diff options
42 files changed, 9254 insertions, 215 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 6758b4b8794..979bfc63572 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -194,6 +194,9 @@ set(METAL_SRC metal/mtl_index_buffer.mm metal/mtl_memory.mm metal/mtl_query.mm + metal/mtl_shader.mm + metal/mtl_shader_generator.mm + metal/mtl_shader_interface.mm metal/mtl_state.mm metal/mtl_texture.mm metal/mtl_texture_util.mm @@ -207,7 +210,14 @@ set(METAL_SRC metal/mtl_framebuffer.hh metal/mtl_index_buffer.hh metal/mtl_memory.hh + metal/mtl_pso_descriptor_state.hh + metal/mtl_primitive.hh metal/mtl_query.hh + metal/mtl_shader.hh + metal/mtl_shader_generator.hh + metal/mtl_shader_interface_type.hh + metal/mtl_shader_interface.hh + metal/mtl_shader_shared.h metal/mtl_state.hh metal/mtl_texture.hh metal/mtl_uniform_buffer.hh @@ -227,6 +237,9 @@ set(LIB ) set(MSL_SRC + shaders/metal/mtl_shader_defines.msl + shaders/metal/mtl_shader_common.msl + metal/kernels/compute_texture_update.msl metal/kernels/compute_texture_read.msl metal/kernels/depth_2d_update_float_frag.glsl @@ -458,21 +471,44 @@ set(GLSL_SRC GPU_shader_shared_utils.h ) -set(GLSL_C) -foreach(GLSL_FILE ${GLSL_SRC}) - data_to_c_simple(${GLSL_FILE} GLSL_C) -endforeach() +set(MTL_BACKEND_GLSL_SRC + metal/kernels/compute_texture_update.msl + metal/kernels/compute_texture_read.msl + metal/kernels/depth_2d_update_float_frag.glsl + metal/kernels/depth_2d_update_int24_frag.glsl + metal/kernels/depth_2d_update_int32_frag.glsl + metal/kernels/depth_2d_update_vert.glsl + metal/kernels/gpu_shader_fullscreen_blit_vert.glsl + metal/kernels/gpu_shader_fullscreen_blit_frag.glsl +) +set(MSL_SRC + shaders/metal/mtl_shader_defines.msl + shaders/metal/mtl_shader_common.msl + metal/mtl_shader_shared.h +) if(WITH_METAL_BACKEND) + list(APPEND GLSL_SRC ${MTL_BACKEND_GLSL_SRC}) + set(MSL_C) foreach(MSL_FILE ${MSL_SRC}) data_to_c_simple(${MSL_FILE} MSL_C) endforeach() - list(APPEND GLSL_C ${MSL_C}) endif() -blender_add_lib(bf_gpu_shaders "${GLSL_C}" "" "" "") +set(GLSL_C) +foreach(GLSL_FILE ${GLSL_SRC}) + data_to_c_simple(${GLSL_FILE} GLSL_C) +endforeach() + +set(SHADER_C) +list(APPEND SHADER_C ${GLSL_C}) +if(WITH_METAL_BACKEND) + list(APPEND SHADER_C ${MSL_C}) +endif() + +blender_add_lib(bf_gpu_shaders "${SHADER_C}" "" "" "") list(APPEND LIB bf_gpu_shaders @@ -587,6 +623,16 @@ set(SRC_SHADER_CREATE_INFOS shaders/compositor/infos/compositor_split_viewer_info.hh ) +set(SRC_SHADER_CREATE_INFOS_MTL + metal/kernels/depth_2d_update_info.hh + metal/kernels/gpu_shader_fullscreen_blit_info.hh +) + +if(WITH_METAL_BACKEND) + list(APPEND SRC_SHADER_CREATE_INFOS ${SRC_SHADER_CREATE_INFOS_MTL}) +endif() + + set(SHADER_CREATE_INFOS_CONTENT "") foreach(DESCRIPTOR_FILE ${SRC_SHADER_CREATE_INFOS}) string(APPEND SHADER_CREATE_INFOS_CONTENT "#include \"${DESCRIPTOR_FILE}\"\n") @@ -629,6 +675,7 @@ if(WITH_GPU_BUILDTIME_SHADER_BUILDER) if(APPLE) add_executable(shader_builder intern/gpu_shader_builder.cc + intern/gpu_shader_builder_stubs.cc ${shader_create_info_list_file} ) diff --git a/source/blender/gpu/GPU_capabilities.h b/source/blender/gpu/GPU_capabilities.h index 61c60f336e1..91cf14dc792 100644 --- a/source/blender/gpu/GPU_capabilities.h +++ b/source/blender/gpu/GPU_capabilities.h @@ -30,6 +30,7 @@ int GPU_max_batch_indices(void); int GPU_max_batch_vertices(void); int GPU_max_vertex_attribs(void); int GPU_max_varying_floats(void); +int GPU_max_samplers(void); int GPU_max_shader_storage_buffer_bindings(void); int GPU_max_compute_shader_storage_blocks(void); int GPU_max_samplers(void); diff --git a/source/blender/gpu/GPU_shader_shared_utils.h b/source/blender/gpu/GPU_shader_shared_utils.h index 88bdad2bf76..1cfc4f8af31 100644 --- a/source/blender/gpu/GPU_shader_shared_utils.h +++ b/source/blender/gpu/GPU_shader_shared_utils.h @@ -43,20 +43,23 @@ # define sqrtf sqrt # define expf exp -# define float2 vec2 -# define float3 vec3 -# define float4 vec4 -# define float4x4 mat4 -# define int2 ivec2 -# define int3 ivec3 -# define int4 ivec4 -# define uint2 uvec2 -# define uint3 uvec3 -# define uint4 uvec4 # define bool1 bool -# define bool2 bvec2 -# define bool3 bvec3 -# define bool4 bvec4 +/* Type name collision with Metal shading language - These typenames are already defined. */ +# ifndef GPU_METAL +# define float2 vec2 +# define float3 vec3 +# define float4 vec4 +# define float4x4 mat4 +# define int2 ivec2 +# define int3 ivec3 +# define int4 ivec4 +# define uint2 uvec2 +# define uint3 uvec3 +# define uint4 uvec4 +# define bool2 bvec2 +# define bool3 bvec3 +# define bool4 bvec4 +# endif #else /* C / C++ */ # pragma once diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc index e29b0d5801d..bcc418169b7 100644 --- a/source/blender/gpu/intern/gpu_context.cc +++ b/source/blender/gpu/intern/gpu_context.cc @@ -56,11 +56,15 @@ static void gpu_backend_discard(); namespace blender::gpu { +int Context::context_counter = 0; Context::Context() { thread_ = pthread_self(); is_active_ = false; matrix_state = GPU_matrix_state_create(); + + context_id = Context::context_counter; + Context::context_counter++; } Context::~Context() diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh index f823a92893c..2217e5262ed 100644 --- a/source/blender/gpu/intern/gpu_context_private.hh +++ b/source/blender/gpu/intern/gpu_context_private.hh @@ -48,6 +48,14 @@ class Context { DebugStack debug_stack; + /* GPUContext counter used to assign a unique ID to each GPUContext. + * NOTE(Metal): This is required by the Metal Backend, as a bug exists in the global OS shader + * cache wherein compilation of identical source from two distinct threads can result in an + * invalid cache collision, result in a broken shader object. Appending the unique context ID + * onto compiled sources ensures the source hashes are different. */ + static int context_counter; + int context_id = 0; + protected: /** Thread on which this context is active. */ pthread_t thread_; diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index 2d1b3dc2dca..4d059ae495e 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -95,6 +95,9 @@ static void standard_defines(Vector<const char *> &sources) case GPU_BACKEND_OPENGL: sources.append("#define GPU_OPENGL\n"); break; + case GPU_BACKEND_METAL: + sources.append("#define GPU_METAL\n"); + break; default: BLI_assert(false && "Invalid GPU Backend Type"); break; diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh index 8236e669288..3884c067c83 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.hh +++ b/source/blender/gpu/intern/gpu_shader_create_info.hh @@ -32,6 +32,7 @@ namespace blender::gpu::shader { #endif enum class Type { + /* Types supported natively across all GPU backends. */ FLOAT = 0, VEC2, VEC3, @@ -47,6 +48,21 @@ enum class Type { IVEC3, IVEC4, BOOL, + /* Additionally supported types to enable data optimisation and native + * support in some GPUBackends. + * NOTE: These types must be representable in all APIs. E.g. VEC3_101010I2 is aliased as vec3 in + * the GL backend, as implicit type conversions from packed normal attribute data to vec3 is + * supported. UCHAR/CHAR types are natively supported in Metal and can be used to avoid + * additional data conversions for GPU_COMP_U8 vertex attributes. */ + VEC3_101010I2, + UCHAR, + UCHAR2, + UCHAR3, + UCHAR4, + CHAR, + CHAR2, + CHAR3, + CHAR4 }; /* All of these functions is a bit out of place */ @@ -86,6 +102,40 @@ static inline std::ostream &operator<<(std::ostream &stream, const Type type) return stream << "mat3"; case Type::MAT4: return stream << "mat4"; + case Type::VEC3_101010I2: + return stream << "vec3_1010102_Inorm"; + case Type::UCHAR: + return stream << "uchar"; + case Type::UCHAR2: + return stream << "uchar2"; + case Type::UCHAR3: + return stream << "uchar3"; + case Type::UCHAR4: + return stream << "uchar4"; + case Type::CHAR: + return stream << "char"; + case Type::CHAR2: + return stream << "char2"; + case Type::CHAR3: + return stream << "char3"; + case Type::CHAR4: + return stream << "char4"; + case Type::INT: + return stream << "int"; + case Type::IVEC2: + return stream << "ivec2"; + case Type::IVEC3: + return stream << "ivec3"; + case Type::IVEC4: + return stream << "ivec4"; + case Type::UINT: + return stream << "uint"; + case Type::UVEC2: + return stream << "uvec2"; + case Type::UVEC3: + return stream << "uvec3"; + case Type::UVEC4: + return stream << "uvec4"; default: BLI_assert(0); return stream; @@ -228,6 +278,8 @@ enum class PrimitiveOut { POINTS = 0, LINE_STRIP, TRIANGLE_STRIP, + LINES, + TRIANGLES, }; struct StageInterfaceInfo { diff --git a/source/blender/gpu/metal/kernels/compute_texture_read.msl b/source/blender/gpu/metal/kernels/compute_texture_read.msl index 4bfb48567f9..7b0760d7620 100644 --- a/source/blender/gpu/metal/kernels/compute_texture_read.msl +++ b/source/blender/gpu/metal/kernels/compute_texture_read.msl @@ -74,7 +74,7 @@ template<> uchar convert_type<uchar>(float val) template<> uint convert_type<uint>(float val) { - return uint(val * double(0xFFFFFFFFu)); + return uint(val * float(0xFFFFFFFFu)); } struct TextureReadParams { diff --git a/source/blender/gpu/metal/kernels/compute_texture_update.msl b/source/blender/gpu/metal/kernels/compute_texture_update.msl index 095c495ac54..43c746e0afa 100644 --- a/source/blender/gpu/metal/kernels/compute_texture_update.msl +++ b/source/blender/gpu/metal/kernels/compute_texture_update.msl @@ -38,22 +38,6 @@ using namespace metal; # define POSITION_TYPE uint3 #endif -float3 mtl_linear_to_srgb_attr(float3 c) -{ - c = max(c, float3(0.0)); - float3 c1 = c * 12.92; - float3 c2 = 1.055 * pow(c, float3(1.0 / 2.4)) - 0.055; - return mix(c1, c2, step(float3(0.0031308), c)); -} - -float3 mtl_srgb_to_linear_attr(float3 c) -{ - c = max(c, float3(0.0)); - float3 c1 = c * (1.0 / 12.92); - float3 c2 = pow((c + 0.055) * (1.0 / 1.055), float3(2.4)); - return mix(c1, c2, step(float3(0.04045), c)); -} - struct TextureUpdateParams { int mip_index; int extent[3]; diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl index 9fd54f3f31f..374aedff90d 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl @@ -1,9 +1,4 @@ -uniform sampler2D source_data; -uniform int mip; - -in vec2 texCoord_interp; - void main() { gl_FragDepth = textureLod(source_data, texCoord_interp, mip).r; diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_info.hh b/source/blender/gpu/metal/kernels/depth_2d_update_info.hh new file mode 100644 index 00000000000..0a3281a98f2 --- /dev/null +++ b/source/blender/gpu/metal/kernels/depth_2d_update_info.hh @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_INTERFACE_INFO(depth_2d_update_iface, "").smooth(Type::VEC2, "texCoord_interp"); + +GPU_SHADER_CREATE_INFO(depth_2d_update_info_base) + .vertex_in(0, Type::VEC2, "pos") + .vertex_out(depth_2d_update_iface) + .fragment_out(0, Type::VEC4, "fragColor") + .push_constant(Type::VEC2, "extent") + .push_constant(Type::VEC2, "offset") + .push_constant(Type::VEC2, "size") + .push_constant(Type::INT, "mip") + .sampler(0, ImageType::FLOAT_2D, "source_data", Frequency::PASS) + .vertex_source("depth_2d_update_vert.glsl"); + +GPU_SHADER_CREATE_INFO(depth_2d_update_float) + .fragment_source("depth_2d_update_float_frag.glsl") + .additional_info("depth_2d_update_info_base") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(depth_2d_update_int24) + .fragment_source("depth_2d_update_int24_frag.glsl") + .additional_info("depth_2d_update_info_base") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(depth_2d_update_int32) + .fragment_source("depth_2d_update_int32_frag.glsl") + .additional_info("depth_2d_update_info_base") + .do_static_compilation(true); diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl index 7483343503f..a4d9e35d491 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl @@ -1,8 +1,4 @@ -uniform isampler2D source_data; -uniform int mip; - -in vec2 texCoord_interp; void main() { diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl index 75d42c57f73..421c25a2e5c 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl @@ -1,9 +1,4 @@ -uniform isampler2D source_data; -uniform int mip; - -in vec2 texCoord_interp; - void main() { uint val = textureLod(source_data, texCoord_interp, mip).r; diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl index faae68d2f55..def0c1ae9de 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl @@ -1,10 +1,4 @@ -uniform vec2 extent; -uniform vec2 offset; -uniform vec2 size; -out vec2 texCoord_interp; -in vec2 pos; - void main() { vec4 rect = vec4(offset.x, offset.y, offset.x + extent.x, offset.y + extent.y); diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl index b1353478593..8c81c5c0d83 100644 --- a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl @@ -1,10 +1,5 @@ -in vec4 uvcoordsvar; -uniform sampler2D imageTexture; -uniform int mip; -out vec4 fragColor; - void main() { vec4 tex_color = textureLod(imageTexture, uvcoordsvar.xy, mip); diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh new file mode 100644 index 00000000000..6af67ad44d2 --- /dev/null +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_INTERFACE_INFO(fullscreen_blit_iface, "").smooth(Type::VEC4, "uvcoordsvar"); + +GPU_SHADER_CREATE_INFO(fullscreen_blit) + .vertex_in(0, Type::VEC2, "pos") + .vertex_out(fullscreen_blit_iface) + .fragment_out(0, Type::VEC4, "fragColor") + .push_constant(Type::VEC2, "fullscreen") + .push_constant(Type::VEC2, "size") + .push_constant(Type::VEC2, "dst_offset") + .push_constant(Type::VEC2, "src_offset") + .push_constant(Type::INT, "mip") + .sampler(0, ImageType::FLOAT_2D, "imageTexture", Frequency::PASS) + .vertex_source("gpu_shader_fullscreen_blit_vert.glsl") + .fragment_source("gpu_shader_fullscreen_blit_frag.glsl") + .do_static_compilation(true);
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl index 8e52868f67d..5d5a0e2ab5f 100644 --- a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl @@ -1,12 +1,4 @@ -out vec4 uvcoordsvar; - -in vec2 pos; -uniform vec2 fullscreen; -uniform vec2 size; -uniform vec2 dst_offset; -uniform vec2 src_offset; - void main() { /* The position represents a 0-1 square, we first scale it by the size we want to have it on diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index 361b2ca05f5..3cd7794f6c9 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -12,6 +12,7 @@ #include "mtl_framebuffer.hh" #include "mtl_index_buffer.hh" #include "mtl_query.hh" +#include "mtl_shader.hh" #include "mtl_uniform_buffer.hh" #include "gpu_capabilities_private.hh" @@ -71,8 +72,8 @@ QueryPool *MTLBackend::querypool_alloc() Shader *MTLBackend::shader_alloc(const char *name) { - /* TODO(Metal): Implement MTLShader. */ - return nullptr; + MTLContext *mtl_context = MTLContext::get(); + return new MTLShader(mtl_context, name); }; Texture *MTLBackend::texture_alloc(const char *name) @@ -168,7 +169,7 @@ void MTLBackend::platform_init(MTLContext *ctx) eGPUSupportLevel support_level = GPU_SUPPORT_LEVEL_SUPPORTED; BLI_assert(ctx); - id<MTLDevice> mtl_device = nil; /*ctx->device; TODO(Metal): Implement MTLContext. */ + id<MTLDevice> mtl_device = ctx->device; BLI_assert(device); NSString *gpu_name = [mtl_device name]; @@ -187,7 +188,7 @@ void MTLBackend::platform_init(MTLContext *ctx) os = GPU_OS_UNIX; #endif - BLI_assert(os == GPU_OS_MAC && "Platform must be macOS"); + BLI_assert_msg(os == GPU_OS_MAC, "Platform must be macOS"); /* Determine Vendor from name. */ if (strstr(vendor, "ATI") || strstr(vendor, "AMD")) { @@ -334,7 +335,7 @@ bool MTLBackend::metal_is_supported() void MTLBackend::capabilities_init(MTLContext *ctx) { BLI_assert(ctx); - id<MTLDevice> device = nil; /*ctx->device TODO(Metal): Implement MTLContext. */ + id<MTLDevice> device = ctx->device; BLI_assert(device); /* Initialize Capabilities. */ diff --git a/source/blender/gpu/metal/mtl_capabilities.hh b/source/blender/gpu/metal/mtl_capabilities.hh index d56f796e60f..5e34d5352f1 100644 --- a/source/blender/gpu/metal/mtl_capabilities.hh +++ b/source/blender/gpu/metal/mtl_capabilities.hh @@ -14,6 +14,8 @@ namespace gpu { #define MTL_MAX_TEXTURE_SLOTS 128 #define MTL_MAX_SAMPLER_SLOTS MTL_MAX_TEXTURE_SLOTS +/* Max limit without using bindless for samplers. */ +#define MTL_MAX_DEFAULT_SAMPLERS 16 #define MTL_MAX_UNIFORM_BUFFER_BINDINGS 31 #define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31 #define MTL_MAX_UNIFORMS_PER_BLOCK 64 diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh index 44ba786f90f..b6f9c0050a9 100644 --- a/source/blender/gpu/metal/mtl_common.hh +++ b/source/blender/gpu/metal/mtl_common.hh @@ -13,4 +13,6 @@ * Set as number of GPU frames in flight, plus an additional value for extra possible CPU frame. */ #define MTL_NUM_SAFE_FRAMES (MTL_MAX_DRAWABLES + 1) +/* Display debug information about missing attributes and incorrect vertex formats. */ +#define MTL_DEBUG_SHADER_ATTRIBUTES 0 #endif diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index d542f0e1025..ccc648eab2a 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -17,6 +17,8 @@ #include "mtl_common.hh" #include "mtl_framebuffer.hh" #include "mtl_memory.hh" +#include "mtl_shader.hh" +#include "mtl_shader_interface.hh" #include "mtl_texture.hh" #include <Cocoa/Cocoa.h> @@ -32,7 +34,6 @@ namespace blender::gpu { /* Forward Declarations */ class MTLContext; class MTLCommandBufferManager; -class MTLShader; class MTLUniformBuf; /* Structs containing information on current binding state for textures and samplers. */ @@ -40,7 +41,7 @@ struct MTLTextureBinding { bool used; /* Same value as index in bindings array. */ - uint texture_slot_index; + uint slot_index; gpu::MTLTexture *texture_resource; }; @@ -56,9 +57,10 @@ struct MTLSamplerBinding { /* Metal Context Render Pass State -- Used to track active RenderCommandEncoder state based on * bound MTLFrameBuffer's.Owned by MTLContext. */ -struct MTLRenderPassState { +class MTLRenderPassState { friend class MTLContext; + public: MTLRenderPassState(MTLContext &context, MTLCommandBufferManager &command_buffer_manager) : ctx(context), cmd(command_buffer_manager){}; @@ -570,6 +572,11 @@ class MTLContext : public Context { friend class MTLBackend; private: + /* Null buffers for empty/unintialized bindings. + * Null attribute buffer follows default attribute format of OpenGL Backend. */ + id<MTLBuffer> null_buffer_; /* All zero's. */ + id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */ + /* Compute and specialization caches. */ MTLContextTextureUtils texture_utils_; @@ -713,6 +720,9 @@ class MTLContext : public Context { { return MTLContext::global_memory_manager; } + /* Uniform Buffer Bindings to command encoders. */ + id<MTLBuffer> get_null_buffer(); + id<MTLBuffer> get_null_attribute_buffer(); }; } // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 26cfe6632ef..f14236bcb58 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -5,6 +5,8 @@ */ #include "mtl_context.hh" #include "mtl_debug.hh" +#include "mtl_shader.hh" +#include "mtl_shader_interface.hh" #include "mtl_state.hh" #include "DNA_userdef_types.h" @@ -29,19 +31,33 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command /* Init debug. */ debug::mtl_debug_init(); + /* Device creation. + * TODO(Metal): This is a temporary initialisation path to enable testing of features + * and shader compilation tests. Future functionality should fetch the existing device + * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */ + this->device = MTLCreateSystemDefaultDevice(); + /* Initialize command buffer state. */ this->main_command_buffer.prepare(); + /* Initialise imm and pipeline state */ + this->pipeline_state.initialised = false; + /* Frame management. */ is_inside_frame_ = false; current_frame_index_ = 0; + /* Prepare null data buffer */ + null_buffer_ = nil; + null_attribute_buffer_ = nil; + /* Create FrameBuffer handles. */ MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left"); MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left"); this->front_left = mtl_front_left; this->back_left = mtl_back_left; this->active_fb = this->back_left; + /* Prepare platform and capabilities. (NOTE: With METAL, this needs to be done after CTX * initialization). */ MTLBackend::platform_init(this); @@ -93,6 +109,12 @@ MTLContext::~MTLContext() sampler_state_cache_[i] = nil; } } + if (null_buffer_) { + [null_buffer_ release]; + } + if (null_attribute_buffer_) { + [null_attribute_buffer_ release]; + } } void MTLContext::begin_frame() @@ -227,6 +249,50 @@ MTLFrameBuffer *MTLContext::get_default_framebuffer() return static_cast<MTLFrameBuffer *>(this->back_left); } +MTLShader *MTLContext::get_active_shader() +{ + return this->pipeline_state.active_shader; +} + +id<MTLBuffer> MTLContext::get_null_buffer() +{ + if (null_buffer_ != nil) { + return null_buffer_; + } + + static const int null_buffer_size = 4096; + null_buffer_ = [this->device newBufferWithLength:null_buffer_size + options:MTLResourceStorageModeManaged]; + [null_buffer_ retain]; + uint32_t *null_data = (uint32_t *)calloc(0, null_buffer_size); + memcpy([null_buffer_ contents], null_data, null_buffer_size); + [null_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)]; + free(null_data); + + BLI_assert(null_buffer_ != nil); + return null_buffer_; +} + +id<MTLBuffer> MTLContext::get_null_attribute_buffer() +{ + if (null_attribute_buffer_ != nil) { + return null_attribute_buffer_; + } + + /* Allocate Null buffer if it has not yet been created. + * Min buffer size is 256 bytes -- though we only need 64 bytes of data. */ + static const int null_buffer_size = 256; + null_attribute_buffer_ = [this->device newBufferWithLength:null_buffer_size + options:MTLResourceStorageModeManaged]; + BLI_assert(null_attribute_buffer_ != nil); + [null_attribute_buffer_ retain]; + float data[4] = {0.0f, 0.0f, 0.0f, 1.0f}; + memcpy([null_attribute_buffer_ contents], data, sizeof(float) * 4); + [null_attribute_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)]; + + return null_attribute_buffer_; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -239,20 +305,20 @@ void MTLContext::pipeline_state_init() /*** Initialize state only once. ***/ if (!this->pipeline_state.initialised) { this->pipeline_state.initialised = true; - this->pipeline_state.active_shader = NULL; + this->pipeline_state.active_shader = nullptr; /* Clear bindings state. */ for (int t = 0; t < GPU_max_textures(); t++) { this->pipeline_state.texture_bindings[t].used = false; - this->pipeline_state.texture_bindings[t].texture_slot_index = t; - this->pipeline_state.texture_bindings[t].texture_resource = NULL; + this->pipeline_state.texture_bindings[t].slot_index = -1; + this->pipeline_state.texture_bindings[t].texture_resource = nullptr; } for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) { this->pipeline_state.sampler_bindings[s].used = false; } for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) { this->pipeline_state.ubo_bindings[u].bound = false; - this->pipeline_state.ubo_bindings[u].ubo = NULL; + this->pipeline_state.ubo_bindings[u].ubo = nullptr; } } @@ -487,52 +553,46 @@ id<MTLSamplerState> MTLContext::get_sampler_from_state(MTLSamplerState sampler_s id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState sampler_state) { /* Check if sampler already exists for given state. */ - id<MTLSamplerState> st = sampler_state_cache_[(uint)sampler_state]; - if (st != nil) { - return st; - } - else { - MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init]; - descriptor.normalizedCoordinates = true; - - MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ? - MTLSamplerAddressModeClampToBorderColor : - MTLSamplerAddressModeClampToEdge; - descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.borderColor = MTLSamplerBorderColorTransparentBlack; - descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? - MTLSamplerMinMagFilterLinear : - MTLSamplerMinMagFilterNearest; - descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? - MTLSamplerMinMagFilterLinear : - MTLSamplerMinMagFilterNearest; - descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? - MTLSamplerMipFilterLinear : - MTLSamplerMipFilterNotMipmapped; - descriptor.lodMinClamp = -1000; - descriptor.lodMaxClamp = 1000; - float aniso_filter = max_ff(16, U.anisotropic_filter); - descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1; - descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ? - MTLCompareFunctionLessEqual : - MTLCompareFunctionAlways; - descriptor.supportArgumentBuffers = true; - - id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor]; - sampler_state_cache_[(uint)sampler_state] = state; - - BLI_assert(state != nil); - [descriptor autorelease]; - return state; - } + MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init]; + descriptor.normalizedCoordinates = true; + + MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ? + MTLSamplerAddressModeClampToBorderColor : + MTLSamplerAddressModeClampToEdge; + descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.borderColor = MTLSamplerBorderColorTransparentBlack; + descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? + MTLSamplerMinMagFilterLinear : + MTLSamplerMinMagFilterNearest; + descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? + MTLSamplerMinMagFilterLinear : + MTLSamplerMinMagFilterNearest; + descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? + MTLSamplerMipFilterLinear : + MTLSamplerMipFilterNotMipmapped; + descriptor.lodMinClamp = -1000; + descriptor.lodMaxClamp = 1000; + float aniso_filter = max_ff(16, U.anisotropic_filter); + descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1; + descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ? + MTLCompareFunctionLessEqual : + MTLCompareFunctionAlways; + descriptor.supportArgumentBuffers = true; + + id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor]; + sampler_state_cache_[(uint)sampler_state] = state; + + BLI_assert(state != nil); + [descriptor autorelease]; + return state; } id<MTLSamplerState> MTLContext::get_default_sampler_state() diff --git a/source/blender/gpu/metal/mtl_memory.mm b/source/blender/gpu/metal/mtl_memory.mm index 07da489bdbb..788736bdfad 100644 --- a/source/blender/gpu/metal/mtl_memory.mm +++ b/source/blender/gpu/metal/mtl_memory.mm @@ -73,7 +73,9 @@ gpu::MTLBuffer *MTLBufferPool::allocate_with_data(uint64_t size, return this->allocate_aligned_with_data(size, 256, cpu_visible, data); } -gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, bool cpu_visible) +gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, + uint32_t alignment, + bool cpu_visible) { /* Check not required. Main GPU module usage considered thread-safe. */ // BLI_assert(BLI_thread_is_main()); @@ -167,7 +169,7 @@ gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, b } gpu::MTLBuffer *MTLBufferPool::allocate_aligned_with_data(uint64_t size, - uint alignment, + uint32_t alignment, bool cpu_visible, const void *data) { @@ -548,9 +550,10 @@ void gpu::MTLBuffer::set_label(NSString *str) void gpu::MTLBuffer::debug_ensure_used() { /* Debug: If buffer is not flagged as in-use, this is a problem. */ - BLI_assert(in_use_ && - "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer " - "has likely already been freed."); + BLI_assert_msg( + in_use_, + "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer " + "has likely already been freed."); } void gpu::MTLBuffer::flush() @@ -665,9 +668,9 @@ MTLTemporaryBuffer MTLScratchBufferManager::scratch_buffer_allocate_range_aligne /* Ensure scratch buffer allocation alignment adheres to offset alignment requirements. */ alignment = max_uu(alignment, 256); - BLI_assert(current_scratch_buffer_ >= 0 && "Scratch Buffer index not set"); + BLI_assert_msg(current_scratch_buffer_ >= 0, "Scratch Buffer index not set"); MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_]; - BLI_assert(current_scratch_buff != nullptr && "Scratch Buffer does not exist"); + BLI_assert_msg(current_scratch_buff != nullptr, "Scratch Buffer does not exist"); MTLTemporaryBuffer allocated_range = current_scratch_buff->allocate_range_aligned(alloc_size, alignment); BLI_assert(allocated_range.size >= alloc_size && allocated_range.size <= alloc_size + alignment); diff --git a/source/blender/gpu/metal/mtl_primitive.hh b/source/blender/gpu/metal/mtl_primitive.hh new file mode 100644 index 00000000000..5aa7a533b95 --- /dev/null +++ b/source/blender/gpu/metal/mtl_primitive.hh @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Encapsulation of Frame-buffer states (attached textures, viewport, scissors). + */ + +#pragma once + +#include "BLI_assert.h" + +#include "GPU_primitive.h" + +#include <Metal/Metal.h> + +namespace blender::gpu { + +/** Utility functions **/ +static inline MTLPrimitiveTopologyClass mtl_prim_type_to_topology_class(MTLPrimitiveType prim_type) +{ + switch (prim_type) { + case MTLPrimitiveTypePoint: + return MTLPrimitiveTopologyClassPoint; + case MTLPrimitiveTypeLine: + case MTLPrimitiveTypeLineStrip: + return MTLPrimitiveTopologyClassLine; + case MTLPrimitiveTypeTriangle: + case MTLPrimitiveTypeTriangleStrip: + return MTLPrimitiveTopologyClassTriangle; + } + return MTLPrimitiveTopologyClassUnspecified; +} + +static inline MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type) +{ + switch (prim_type) { + case GPU_PRIM_POINTS: + return MTLPrimitiveTypePoint; + case GPU_PRIM_LINES: + case GPU_PRIM_LINES_ADJ: + case GPU_PRIM_LINE_LOOP: + return MTLPrimitiveTypeLine; + case GPU_PRIM_LINE_STRIP: + case GPU_PRIM_LINE_STRIP_ADJ: + return MTLPrimitiveTypeLineStrip; + case GPU_PRIM_TRIS: + case GPU_PRIM_TRI_FAN: + case GPU_PRIM_TRIS_ADJ: + return MTLPrimitiveTypeTriangle; + case GPU_PRIM_TRI_STRIP: + return MTLPrimitiveTypeTriangleStrip; + case GPU_PRIM_NONE: + return MTLPrimitiveTypePoint; + }; +} + +/* Certain primitive types are not supported in Metal, and require emulation. + * `GPU_PRIM_LINE_LOOP` and `GPU_PRIM_TRI_FAN` required index buffer patching. + * Adjacency types do not need emulation as the input structure is the same, + * and access is controlled from the vertex shader through SSBO vertex fetch. + * -- These Adj cases are only used in geometry shaders in OpenGL. */ +static inline bool mtl_needs_topology_emulation(GPUPrimType prim_type) +{ + + BLI_assert(prim_type != GPU_PRIM_NONE); + switch (prim_type) { + case GPU_PRIM_LINE_LOOP: + case GPU_PRIM_TRI_FAN: + return true; + default: + return false; + } + return false; +} + +static inline bool mtl_vertex_count_fits_primitive_type(uint32_t vertex_count, + MTLPrimitiveType prim_type) +{ + if (vertex_count == 0) { + return false; + } + + switch (prim_type) { + case MTLPrimitiveTypeLineStrip: + return (vertex_count > 1); + case MTLPrimitiveTypeLine: + return (vertex_count % 2 == 0); + case MTLPrimitiveTypePoint: + return (vertex_count > 0); + case MTLPrimitiveTypeTriangle: + return (vertex_count % 3 == 0); + case MTLPrimitiveTypeTriangleStrip: + return (vertex_count > 2); + } + BLI_assert(false); + return false; +} + +} // namespace blender::gpu
\ No newline at end of file diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh new file mode 100644 index 00000000000..010349eddbf --- /dev/null +++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh @@ -0,0 +1,250 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ +#pragma once + +#include "GPU_vertex_format.h" + +#include <Metal/Metal.h> + +namespace blender::gpu { + +/** Vertex attribute and buffer descriptor wrappers + * for use in PSO construction and caching. */ +struct MTLVertexAttributeDescriptorPSO { + MTLVertexFormat format; + int offset; + int buffer_index; + GPUVertFetchMode format_conversion_mode; + + bool operator==(const MTLVertexAttributeDescriptorPSO &other) const + { + return (format == other.format) && (offset == other.offset) && + (buffer_index == other.buffer_index) && + (format_conversion_mode == other.format_conversion_mode); + } + + uint64_t hash() const + { + return (uint64_t)((uint64_t)this->format ^ (this->offset << 4) ^ (this->buffer_index << 8) ^ + (this->format_conversion_mode << 12)); + } +}; + +struct MTLVertexBufferLayoutDescriptorPSO { + MTLVertexStepFunction step_function; + int step_rate; + int stride; + + bool operator==(const MTLVertexBufferLayoutDescriptorPSO &other) const + { + return (step_function == other.step_function) && (step_rate == other.step_rate) && + (stride == other.stride); + } + + uint64_t hash() const + { + return (uint64_t)((uint64_t)this->step_function ^ (this->step_rate << 4) ^ + (this->stride << 8)); + } +}; + +/* SSBO attribute state caching. */ +struct MTLSSBOAttribute { + + int mtl_attribute_index; + int vbo_id; + int attribute_offset; + int per_vertex_stride; + int attribute_format; + bool is_instance; + + MTLSSBOAttribute(){}; + MTLSSBOAttribute( + int attribute_ind, int vertexbuffer_ind, int offset, int stride, int format, bool instanced) + : mtl_attribute_index(attribute_ind), + vbo_id(vertexbuffer_ind), + attribute_offset(offset), + per_vertex_stride(stride), + attribute_format(format), + is_instance(instanced) + { + } + + bool operator==(const MTLSSBOAttribute &other) const + { + return (memcmp(this, &other, sizeof(MTLSSBOAttribute)) == 0); + } +}; + +struct MTLVertexDescriptor { + + /* Core Vertex Attributes. */ + MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN]; + MTLVertexBufferLayoutDescriptorPSO + buffer_layouts[GPU_BATCH_VBO_MAX_LEN + GPU_BATCH_INST_VBO_MAX_LEN]; + int num_attributes; + int num_vert_buffers; + MTLPrimitiveTopologyClass prim_topology_class; + + /* WORKAROUND: SSBO Vertex-fetch attributes -- These follow the same structure + * but have slightly different binding rules, passed in via uniform + * push constant data block. */ + bool uses_ssbo_vertex_fetch; + MTLSSBOAttribute ssbo_attributes[GPU_VERT_ATTR_MAX_LEN]; + int num_ssbo_attributes; + + bool operator==(const MTLVertexDescriptor &other) const + { + if ((this->num_attributes != other.num_attributes) || + (this->num_vert_buffers != other.num_vert_buffers)) { + return false; + } + if (this->prim_topology_class != other.prim_topology_class) { + return false; + }; + + for (const int a : IndexRange(this->num_attributes)) { + if (!(this->attributes[a] == other.attributes[a])) { + return false; + } + } + + for (const int b : IndexRange(this->num_vert_buffers)) { + if (!(this->buffer_layouts[b] == other.buffer_layouts[b])) { + return false; + } + } + + /* NOTE: No need to compare SSBO attributes, as these will match attribute bindings for the + * given shader. These are simply extra pre-resolved properties we want to include in the + * cache. */ + return true; + } + + uint64_t hash() const + { + uint64_t hash = (uint64_t)(this->num_attributes ^ this->num_vert_buffers); + for (const int a : IndexRange(this->num_attributes)) { + hash ^= this->attributes[a].hash() << a; + } + + for (const int b : IndexRange(this->num_vert_buffers)) { + hash ^= this->buffer_layouts[b].hash() << (b + 10); + } + + /* NOTE: SSBO vertex fetch members not hashed as these will match attribute bindings. */ + return hash; + } +}; + +/* Metal Render Pipeline State Descriptor -- All unique information which feeds PSO creation. */ +struct MTLRenderPipelineStateDescriptor { + /* This state descriptor will contain ALL parameters which generate a unique PSO. + * We will then use this state-object to efficiently look-up or create a + * new PSO for the current shader. + * + * Unlike the 'MTLContextGlobalShaderPipelineState', this struct contains a subset of + * parameters used to distinguish between unique PSOs. This struct is hashable and only contains + * those parameters which are required by PSO generation. Non-unique state such as bound + * resources is not tracked here, as it does not require a unique PSO permutation if changed. */ + + /* Input Vertex Descriptor. */ + MTLVertexDescriptor vertex_descriptor; + + /* Render Target attachment state. + * Assign to MTLPixelFormatInvalid if not used. */ + int num_color_attachments; + MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT]; + MTLPixelFormat depth_attachment_format; + MTLPixelFormat stencil_attachment_format; + + /* Render Pipeline State affecting PSO creation. */ + bool blending_enabled; + MTLBlendOperation alpha_blend_op; + MTLBlendOperation rgb_blend_op; + MTLBlendFactor dest_alpha_blend_factor; + MTLBlendFactor dest_rgb_blend_factor; + MTLBlendFactor src_alpha_blend_factor; + MTLBlendFactor src_rgb_blend_factor; + + /* Global colour write mask as this cannot be specified per attachment. */ + MTLColorWriteMask color_write_mask; + + /* Point size required by point primitives. */ + float point_size = 0.0f; + + /* Comparison Operator for caching. */ + bool operator==(const MTLRenderPipelineStateDescriptor &other) const + { + if (!(vertex_descriptor == other.vertex_descriptor)) { + return false; + } + + if ((num_color_attachments != other.num_color_attachments) || + (depth_attachment_format != other.depth_attachment_format) || + (stencil_attachment_format != other.stencil_attachment_format) || + (color_write_mask != other.color_write_mask) || + (blending_enabled != other.blending_enabled) || (alpha_blend_op != other.alpha_blend_op) || + (rgb_blend_op != other.rgb_blend_op) || + (dest_alpha_blend_factor != other.dest_alpha_blend_factor) || + (dest_rgb_blend_factor != other.dest_rgb_blend_factor) || + (src_alpha_blend_factor != other.src_alpha_blend_factor) || + (src_rgb_blend_factor != other.src_rgb_blend_factor) || + (vertex_descriptor.prim_topology_class != other.vertex_descriptor.prim_topology_class) || + (point_size != other.point_size)) { + return false; + } + + /* Attachments can be skipped, so num_color_attachments will not define the range. */ + for (const int c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) { + if (color_attachment_format[c] != other.color_attachment_format[c]) { + return false; + } + } + + return true; + } + + uint64_t hash() const + { + /* NOTE(Metal): Current setup aims to minimise overlap of parameters + * which are more likely to be different, to ensure earlier hash + * differences without having to fallback to comparisons. + * Though this could likely be further improved to remove + * has collisions. */ + + uint64_t hash = this->vertex_descriptor.hash(); + hash ^= (uint64_t)this->num_color_attachments << 16; /* up to 6 (3 bits). */ + hash ^= (uint64_t)this->depth_attachment_format << 18; /* up to 555 (9 bits). */ + hash ^= (uint64_t)this->stencil_attachment_format << 20; /* up to 555 (9 bits). */ + hash ^= (uint64_t)(*( + (uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */ + + /* Only include elements in Hash if they are needed - avoids variable null assignments + * influencing hash. */ + if (this->num_color_attachments > 0) { + hash ^= (uint64_t)this->color_write_mask << 22; /* 4 bit bitmask. */ + hash ^= (uint64_t)this->alpha_blend_op << 26; /* Up to 4 (3 bits). */ + hash ^= (uint64_t)this->rgb_blend_op << 29; /* Up to 4 (3 bits). */ + hash ^= (uint64_t)this->dest_alpha_blend_factor << 32; /* Up to 18 (5 bits). */ + hash ^= (uint64_t)this->dest_rgb_blend_factor << 37; /* Up to 18 (5 bits). */ + hash ^= (uint64_t)this->src_alpha_blend_factor << 42; /* Up to 18 (5 bits). */ + hash ^= (uint64_t)this->src_rgb_blend_factor << 47; /* Up to 18 (5 bits). */ + } + + for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) { + hash ^= (uint64_t)this->color_attachment_format[c] << (c + 52); // up to 555 (9 bits) + } + + hash |= (uint64_t)((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) + << 62; + hash ^= (uint64_t)this->point_size; + + return hash; + } +}; + +} // namespace blender::gpu
\ No newline at end of file diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh new file mode 100644 index 00000000000..cdbcd7c68f6 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -0,0 +1,1164 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "MEM_guardedalloc.h" + +#include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_shader.h" +#include "GPU_vertex_format.h" + +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> +#include <functional> +#include <unordered_map> + +#include <mutex> +#include <thread> + +#include "mtl_framebuffer.hh" +#include "mtl_shader_interface.hh" +#include "mtl_shader_shared.h" +#include "mtl_state.hh" +#include "mtl_texture.hh" + +#include "gpu_shader_create_info.hh" +#include "gpu_shader_private.hh" + +namespace blender::gpu { + +class MTLShaderInterface; +class MTLContext; + +/* Debug control. */ +#define MTL_SHADER_DEBUG_EXPORT_SOURCE 1 +#define MTL_SHADER_TRANSLATION_DEBUG_OUTPUT 0 + +/* Separate print used only during development and debugging. */ +#if MTL_SHADER_TRANSLATION_DEBUG_OUTPUT +# define shader_debug_printf printf +#else +# define shader_debug_printf(...) /* Null print. */ +#endif + +/* Desired reflection data for a buffer binding. */ +struct MTLBufferArgumentData { + uint32_t index; + uint32_t size; + uint32_t alignment; + bool active; +}; + +/* Metal Render Pipeline State Instance. */ +struct MTLRenderPipelineStateInstance { + /* Function instances with specialisation. + * Required for argument encoder construction. */ + id<MTLFunction> vert; + id<MTLFunction> frag; + + /* PSO handle. */ + id<MTLRenderPipelineState> pso; + + /** Derived information. */ + /* Unique index for PSO variant. */ + uint32_t shader_pso_index; + /* Base bind index for binding uniform buffers, offset based on other + * bound buffers such as vertex buffers, as the count can vary. */ + int base_uniform_buffer_index; + /* buffer bind slot used for null attributes (-1 if not needed). */ + int null_attribute_buffer_index; + /* buffer bind used for transform feedback output buffer. */ + int transform_feedback_buffer_index; + + /** Reflection Data. + * Currently used to verify whether uniform buffers of incorrect sizes being bound, due to left + * over bindings being used for slots that did not need updating for a particular draw. Metal + * Backend over-generates bindings due to detecting their presence, though in many cases, the + * bindings in the source are not all used for a given shader. + * This information can also be used to eliminate redundant/unused bindings. */ + bool reflection_data_available; + blender::Vector<MTLBufferArgumentData> buffer_bindings_reflection_data_vert; + blender::Vector<MTLBufferArgumentData> buffer_bindings_reflection_data_frag; +}; + +/* MTLShaderBuilder source wrapper used during initial compilation. */ +struct MTLShaderBuilder { + NSString *msl_source_vert_ = @""; + NSString *msl_source_frag_ = @""; + + /* Generated GLSL source used during compilation. */ + std::string glsl_vertex_source_ = ""; + std::string glsl_fragment_source_ = ""; + + /* Indicates whether source code has been provided via MSL directly. */ + bool source_from_msl_ = false; +}; + +/** + * MTLShader implements shader compilation, Pipeline State Object (PSO) + * creation for rendering and uniform data binding. + * Shaders can either be created from native MSL, or generated + * from a GLSL source shader using GPUShaderCreateInfo. + * + * Shader creation process: + * - Create MTLShader: + * - Convert GLSL to MSL source if required. + * - set MSL source. + * - set Vertex/Fragment function names. + * - Create and populate MTLShaderInterface. + **/ +class MTLShader : public Shader { + friend shader::ShaderCreateInfo; + friend shader::StageInterfaceInfo; + + public: + /* Cached SSBO vertex fetch attribute uniform locations. */ + int uni_ssbo_input_prim_type_loc = -1; + int uni_ssbo_input_vert_count_loc = -1; + int uni_ssbo_uses_indexed_rendering = -1; + int uni_ssbo_uses_index_mode_u16 = -1; + + private: + /* Context Handle. */ + MTLContext *context_ = nullptr; + + /** Transform Feedback. */ + /* Transform feedback mode. */ + eGPUShaderTFBType transform_feedback_type_ = GPU_SHADER_TFB_NONE; + /* Transform feedback outputs written to TFB buffer. */ + blender::Vector<std::string> tf_output_name_list_; + /* Whether transform feedback is currently active. */ + bool transform_feedback_active_ = false; + /* Vertex buffer to write transform feedback data into. */ + GPUVertBuf *transform_feedback_vertbuf_ = nullptr; + + /** Shader source code. */ + MTLShaderBuilder *shd_builder_ = nullptr; + NSString *vertex_function_name_ = @""; + NSString *fragment_function_name_ = @""; + + /** Compiled shader resources. */ + id<MTLLibrary> shader_library_vert_ = nil; + id<MTLLibrary> shader_library_frag_ = nil; + bool valid_ = false; + + /** Render pipeline state and PSO caching. */ + /* Metal API Descriptor used for creation of unique PSOs based on rendering state. */ + MTLRenderPipelineDescriptor *pso_descriptor_ = nil; + /* Metal backend struct containing all high-level pipeline state parameters + * which contribute to instantiation of a unique PSO. */ + MTLRenderPipelineStateDescriptor current_pipeline_state_; + /* Cache of compiled PipelineStateObjects. */ + blender::Map<MTLRenderPipelineStateDescriptor, MTLRenderPipelineStateInstance *> pso_cache_; + + /* True to enable multi-layered rendering support. */ + bool uses_mtl_array_index_ = false; + + /** SSBO Vertex fetch pragma options. */ + /* Indicates whether to pass in VertexBuffer's as regular buffer bindings + * and perform vertex assembly manually, rather than using Stage-in. + * This is used to give a vertex shader full access to all of the + * vertex data. + * This is primarily used for optimisation techniques and + * alternative solutions for Geometry-shaders which are unsupported + * by Metal. */ + bool use_ssbo_vertex_fetch_mode_ = false; + /* Output primitive type when rendering sing ssbo_vertex_fetch. */ + MTLPrimitiveType ssbo_vertex_fetch_output_prim_type_; + + /* Output vertices per original vertex shader instance. + * This number will be multiplied by the number of input primitives + * from the source draw call. */ + uint32_t ssbo_vertex_fetch_output_num_verts_ = 0; + + bool ssbo_vertex_attribute_bind_active_ = false; + int ssbo_vertex_attribute_bind_mask_ = 0; + bool ssbo_vbo_slot_used_[MTL_SSBO_VERTEX_FETCH_MAX_VBOS]; + + struct ShaderSSBOAttributeBinding { + int attribute_index = -1; + int uniform_stride; + int uniform_offset; + int uniform_fetchmode; + int uniform_vbo_id; + int uniform_attr_type; + }; + ShaderSSBOAttributeBinding cached_ssbo_attribute_bindings_[MTL_MAX_VERTEX_INPUT_ATTRIBUTES] = {}; + + /* Metal Shader Uniform data store. + * This blocks is used to store current shader push_constant + * data before it is submitted to the GPU. This is currently + * stored per shader instance, though depending on GPU module + * functionality, this could potentially be a global data store. + * This data is associated with the PushConstantBlock, which is + * always at index zero in the UBO list. */ + void *push_constant_data_ = nullptr; + bool push_constant_modified_ = false; + + public: + MTLShader(MTLContext *ctx, const char *name); + MTLShader(MTLContext *ctx, + MTLShaderInterface *interface, + const char *name, + NSString *input_vertex_source, + NSString *input_fragment_source, + NSString *vertex_function_name_, + NSString *fragment_function_name_); + ~MTLShader(); + + /* Assign GLSL source. */ + void vertex_shader_from_glsl(MutableSpan<const char *> sources) override; + void geometry_shader_from_glsl(MutableSpan<const char *> sources) override; + void fragment_shader_from_glsl(MutableSpan<const char *> sources) override; + void compute_shader_from_glsl(MutableSpan<const char *> sources) override; + + /* Compile and build - Return true if successful. */ + bool finalize(const shader::ShaderCreateInfo *info = nullptr) override; + + /* Utility. */ + bool is_valid() + { + return valid_; + } + MTLRenderPipelineStateDescriptor &get_current_pipeline_state() + { + return current_pipeline_state_; + } + MTLShaderInterface *get_interface() + { + return static_cast<MTLShaderInterface *>(this->interface); + } + void *get_push_constant_data() + { + return push_constant_data_; + } + + /* Shader source generators from create-info. + * These aren't all used by Metal, as certain parts of source code generation + * for shader entry-points and resource mapping occur during `finalize`. */ + std::string resources_declare(const shader::ShaderCreateInfo &info) const override; + std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override; + std::string fragment_interface_declare(const shader::ShaderCreateInfo &info) const override; + std::string geometry_interface_declare(const shader::ShaderCreateInfo &info) const override; + std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override; + std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override; + + void transform_feedback_names_set(Span<const char *> name_list, + const eGPUShaderTFBType geom_type) override; + bool transform_feedback_enable(GPUVertBuf *buf) override; + void transform_feedback_disable() override; + + void bind() override; + void unbind() override; + + void uniform_float(int location, int comp_len, int array_size, const float *data) override; + void uniform_int(int location, int comp_len, int array_size, const int *data) override; + bool get_push_constant_is_dirty(); + void push_constant_bindstate_mark_dirty(bool is_dirty); + + void vertformat_from_shader(GPUVertFormat *format) const override; + + /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */ + int program_handle_get() const override + { + return -1; + } + + bool get_uses_ssbo_vertex_fetch() + { + return use_ssbo_vertex_fetch_mode_; + } + MTLPrimitiveType get_ssbo_vertex_fetch_output_prim_type() + { + return ssbo_vertex_fetch_output_prim_type_; + } + uint32_t get_ssbo_vertex_fetch_output_num_verts() + { + return ssbo_vertex_fetch_output_num_verts_; + } + static int ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type); + void prepare_ssbo_vertex_fetch_metadata(); + + /* SSBO Vertex Bindings Utility functions. */ + void ssbo_vertex_fetch_bind_attributes_begin(); + void ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr); + void ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder> active_encoder); + + /* Metal shader properties and source mapping. */ + void set_vertex_function_name(NSString *vetex_function_name); + void set_fragment_function_name(NSString *fragment_function_name_); + void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source); + void set_interface(MTLShaderInterface *interface); + MTLRenderPipelineStateInstance *bake_current_pipeline_state(MTLContext *ctx, + MTLPrimitiveTopologyClass prim_type); + + /* Transform Feedback. */ + GPUVertBuf *get_transform_feedback_active_buffer(); + bool has_transform_feedback_varying(std::string str); + + private: + /* Generate MSL shader from GLSL source. */ + bool generate_msl_from_glsl(const shader::ShaderCreateInfo *info); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLShader"); +}; + +/* Vertex format conversion. + * Determines whether it is possible to resize a vertex attribute type + * during input assembly. A conversion is implied by the difference + * between the input vertex descriptor (from MTLBatch/MTLImmediate) + * and the type specified in the shader source. + * + * e.g. vec3 to vec4 expansion, or vec4 to vec2 truncation. + * Note: Vector expansion will replace empty elements with the values + * (0,0,0,1). + * + * If implicit format resize is not possible, this function + * returns false. + * + * Implicitly supported conversions in Metal are described here: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc + */ +inline bool mtl_vertex_format_resize(MTLVertexFormat mtl_format, + uint32_t components, + MTLVertexFormat *r_convertedFormat) +{ + MTLVertexFormat out_vert_format = MTLVertexFormatInvalid; + switch (mtl_format) { + /* Char. */ + case MTLVertexFormatChar: + case MTLVertexFormatChar2: + case MTLVertexFormatChar3: + case MTLVertexFormatChar4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatChar; + break; + case 2: + out_vert_format = MTLVertexFormatChar2; + break; + case 3: + out_vert_format = MTLVertexFormatChar3; + break; + case 4: + out_vert_format = MTLVertexFormatChar4; + break; + } + break; + + /* Normalized Char. */ + case MTLVertexFormatCharNormalized: + case MTLVertexFormatChar2Normalized: + case MTLVertexFormatChar3Normalized: + case MTLVertexFormatChar4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatChar4Normalized; + break; + } + break; + + /* Unsigned Char. */ + case MTLVertexFormatUChar: + case MTLVertexFormatUChar2: + case MTLVertexFormatUChar3: + case MTLVertexFormatUChar4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUChar; + break; + case 2: + out_vert_format = MTLVertexFormatUChar2; + break; + case 3: + out_vert_format = MTLVertexFormatUChar3; + break; + case 4: + out_vert_format = MTLVertexFormatUChar4; + break; + } + break; + + /* Normalized Unsigned char */ + case MTLVertexFormatUCharNormalized: + case MTLVertexFormatUChar2Normalized: + case MTLVertexFormatUChar3Normalized: + case MTLVertexFormatUChar4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUChar4Normalized; + break; + } + break; + + /* Short. */ + case MTLVertexFormatShort: + case MTLVertexFormatShort2: + case MTLVertexFormatShort3: + case MTLVertexFormatShort4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatShort; + break; + case 2: + out_vert_format = MTLVertexFormatShort2; + break; + case 3: + out_vert_format = MTLVertexFormatShort3; + break; + case 4: + out_vert_format = MTLVertexFormatShort4; + break; + } + break; + + /* Normalized Short. */ + case MTLVertexFormatShortNormalized: + case MTLVertexFormatShort2Normalized: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatShort4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatShort4Normalized; + break; + } + break; + + /* Unsigned Short. */ + case MTLVertexFormatUShort: + case MTLVertexFormatUShort2: + case MTLVertexFormatUShort3: + case MTLVertexFormatUShort4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUShort; + break; + case 2: + out_vert_format = MTLVertexFormatUShort2; + break; + case 3: + out_vert_format = MTLVertexFormatUShort3; + break; + case 4: + out_vert_format = MTLVertexFormatUShort4; + break; + } + break; + + /* Normalized Unsigned Short. */ + case MTLVertexFormatUShortNormalized: + case MTLVertexFormatUShort2Normalized: + case MTLVertexFormatUShort3Normalized: + case MTLVertexFormatUShort4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUShort4Normalized; + break; + } + break; + + /* Integer. */ + case MTLVertexFormatInt: + case MTLVertexFormatInt2: + case MTLVertexFormatInt3: + case MTLVertexFormatInt4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatInt; + break; + case 2: + out_vert_format = MTLVertexFormatInt2; + break; + case 3: + out_vert_format = MTLVertexFormatInt3; + break; + case 4: + out_vert_format = MTLVertexFormatInt4; + break; + } + break; + + /* Unsigned Integer. */ + case MTLVertexFormatUInt: + case MTLVertexFormatUInt2: + case MTLVertexFormatUInt3: + case MTLVertexFormatUInt4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUInt; + break; + case 2: + out_vert_format = MTLVertexFormatUInt2; + break; + case 3: + out_vert_format = MTLVertexFormatUInt3; + break; + case 4: + out_vert_format = MTLVertexFormatUInt4; + break; + } + break; + + /* Half. */ + case MTLVertexFormatHalf: + case MTLVertexFormatHalf2: + case MTLVertexFormatHalf3: + case MTLVertexFormatHalf4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatHalf; + break; + case 2: + out_vert_format = MTLVertexFormatHalf2; + break; + case 3: + out_vert_format = MTLVertexFormatHalf3; + break; + case 4: + out_vert_format = MTLVertexFormatHalf4; + break; + } + break; + + /* Float. */ + case MTLVertexFormatFloat: + case MTLVertexFormatFloat2: + case MTLVertexFormatFloat3: + case MTLVertexFormatFloat4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatFloat; + break; + case 2: + out_vert_format = MTLVertexFormatFloat2; + break; + case 3: + out_vert_format = MTLVertexFormatFloat3; + break; + case 4: + out_vert_format = MTLVertexFormatFloat4; + break; + } + break; + + /* Other formats */ + default: + out_vert_format = mtl_format; + break; + } + *r_convertedFormat = out_vert_format; + return out_vert_format != MTLVertexFormatInvalid; +} + +/* Returns whether the METAL API can internally convert between the input type of data in the + * incoming vertex buffer and the format used by the vertex attribute inside the shader. + * + * - Returns TRUE if the type can be converted internally, along with returning the appropriate + * type to be passed into the MTLVertexAttributeDescriptorPSO. + * + * - Returns FALSE if the type cannot be converted internally e.g. casting Int4 to Float4. + * + * If implicit conversion is not possible, then we can fallback to performing manual attribute + * conversion using the special attribute read function specialisations in the shader. + * These functions selectively convert between types based on the specified vertex + * attribute 'GPUVertFetchMode fetch_mode' e.g. GPU_FETCH_INT. + */ +inline bool mtl_convert_vertex_format(MTLVertexFormat shader_attrib_format, + GPUVertCompType component_type, + uint32_t component_length, + GPUVertFetchMode fetch_mode, + MTLVertexFormat *r_convertedFormat) +{ + bool normalized = (fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT); + MTLVertexFormat out_vert_format = MTLVertexFormatInvalid; + + switch (component_type) { + + case GPU_COMP_I8: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatChar || + shader_attrib_format == MTLVertexFormatChar2 || + shader_attrib_format == MTLVertexFormatChar3 || + shader_attrib_format == MTLVertexFormatChar4) { + + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_type, &out_vert_format); + + /* Ensure format resize successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else if (shader_attrib_format == MTLVertexFormatInt4 && component_length == 4) { + /* Allow type expansion - Shader expects MTLVertexFormatInt4, we can supply a type + * with fewer bytes if component count is the same. Sign must also match original type + * -- which is not a problem in this case. */ + out_vert_format = MTLVertexFormatChar4; + } + else if (shader_attrib_format == MTLVertexFormatInt3 && component_length == 3) { + /* Same as above case for matching length and signage (Len=3)*/ + out_vert_format = MTLVertexFormatChar3; + } + else if (shader_attrib_format == MTLVertexFormatInt2 && component_length == 2) { + /* Same as above case for matching length and signage (Len=2)*/ + out_vert_format = MTLVertexFormatChar2; + } + else if (shader_attrib_format == MTLVertexFormatInt && component_length == 1) { + /* Same as above case for matching length and signage (Len=1)*/ + out_vert_format = MTLVertexFormatChar; + } + else if (shader_attrib_format == MTLVertexFormatInt && component_length == 4) { + /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which + * is equivalent to an Int -- so data will be compatible with the shader interface. */ + out_vert_format = MTLVertexFormatInt; + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Char, Char2, Char3, Char4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integer type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatChar4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_U8: + switch (fetch_mode) { + /* Fetching INT: Check backing shader format matches source input. */ + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatUChar || + shader_attrib_format == MTLVertexFormatUChar2 || + shader_attrib_format == MTLVertexFormatUChar3 || + shader_attrib_format == MTLVertexFormatUChar4) { + + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Ensure format resize successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + /* TODO(Metal): Add other format conversions if needed. Currently no attributes hit + * this path. */ + } + else if (shader_attrib_format == MTLVertexFormatUInt4 && component_length == 4) { + /* Allow type expansion - Shader expects MTLVertexFormatUInt4, we can supply a type + * with fewer bytes if component count is the same. */ + out_vert_format = MTLVertexFormatUChar4; + } + else if (shader_attrib_format == MTLVertexFormatUInt3 && component_length == 3) { + /* Same as above case for matching length and signage (Len=3)*/ + out_vert_format = MTLVertexFormatUChar3; + } + else if (shader_attrib_format == MTLVertexFormatUInt2 && component_length == 2) { + /* Same as above case for matching length and signage (Len=2)*/ + out_vert_format = MTLVertexFormatUChar2; + } + else if (shader_attrib_format == MTLVertexFormatUInt && component_length == 1) { + /* Same as above case for matching length and signage (Len=1)*/ + out_vert_format = MTLVertexFormatUChar; + } + else if (shader_attrib_format == MTLVertexFormatInt && component_length == 4) { + /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which + * is equivalent to an Int-- so data will be compatible with shader interface. */ + out_vert_format = MTLVertexFormatInt; + } + else if (shader_attrib_format == MTLVertexFormatUInt && component_length == 4) { + /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which + *is equivalent to a UInt-- so data will be compatible with shader interface. */ + out_vert_format = MTLVertexFormatUInt; + } + else { + BLI_assert_msg(false, + "Source vertex data format is either UChar, UChar2, UChar3, UChar4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integral type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatUCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUChar4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_I16: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatShort || + shader_attrib_format == MTLVertexFormatShort2 || + shader_attrib_format == MTLVertexFormatShort3 || + shader_attrib_format == MTLVertexFormatShort4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Ensure conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Short, Short2, Short3, Short4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integral type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatShort4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_U16: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatUShort || + shader_attrib_format == MTLVertexFormatUShort2 || + shader_attrib_format == MTLVertexFormatUShort3 || + shader_attrib_format == MTLVertexFormatUShort4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Ensure format resize successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either UShort, UShort2, UShort3, UShort4 " + "but format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integral type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatUShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUShort4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_I32: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatInt || + shader_attrib_format == MTLVertexFormatInt2 || + shader_attrib_format == MTLVertexFormatInt3 || + shader_attrib_format == MTLVertexFormatInt4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Verify conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Int, Int2, Int3, Int4 but format " + "in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + /* Unfortunately we cannot implicitly convert between Int and Float in METAL. */ + out_vert_format = MTLVertexFormatInvalid; + break; + } + break; + + case GPU_COMP_U32: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatUInt || + shader_attrib_format == MTLVertexFormatUInt2 || + shader_attrib_format == MTLVertexFormatUInt3 || + shader_attrib_format == MTLVertexFormatUInt4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Verify conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either UInt, UInt2, UInt3, UInt4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + /* Unfortunately we cannot convert between UInt and Float in METAL */ + out_vert_format = MTLVertexFormatInvalid; + break; + } + break; + + case GPU_COMP_F32: + switch (fetch_mode) { + + /* Source data is float. This will be compatible + * if type specified in shader is also float. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (shader_attrib_format == MTLVertexFormatFloat || + shader_attrib_format == MTLVertexFormatFloat2 || + shader_attrib_format == MTLVertexFormatFloat3 || + shader_attrib_format == MTLVertexFormatFloat4) { + /* No conversion Needed (as type matches) - Just a vector resize, if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Verify conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Float, Float2, Float3, Float4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + case GPU_FETCH_INT: + /* Unfortunately we cannot convert between Float and Int implicitly in METAL. */ + out_vert_format = MTLVertexFormatInvalid; + break; + } + break; + + case GPU_COMP_I10: + out_vert_format = MTLVertexFormatInt1010102Normalized; + break; + } + *r_convertedFormat = out_vert_format; + return (out_vert_format != MTLVertexFormatInvalid); +} + +inline uint comp_count_from_vert_format(MTLVertexFormat vert_format) +{ + switch (vert_format) { + case MTLVertexFormatFloat: + case MTLVertexFormatInt: + case MTLVertexFormatUInt: + case MTLVertexFormatShort: + case MTLVertexFormatUChar: + case MTLVertexFormatUCharNormalized: + return 1; + case MTLVertexFormatUChar2: + case MTLVertexFormatUInt2: + case MTLVertexFormatFloat2: + case MTLVertexFormatInt2: + case MTLVertexFormatUChar2Normalized: + return 2; + case MTLVertexFormatUChar3: + case MTLVertexFormatUInt3: + case MTLVertexFormatFloat3: + case MTLVertexFormatInt3: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatUChar3Normalized: + return 3; + case MTLVertexFormatUChar4: + case MTLVertexFormatFloat4: + case MTLVertexFormatUInt4: + case MTLVertexFormatInt4: + case MTLVertexFormatUChar4Normalized: + case MTLVertexFormatInt1010102Normalized: + + default: + BLI_assert_msg(false, "Unrecognised attribute type. Add types to switch as needed."); + return 0; + } +} + +inline GPUVertFetchMode fetchmode_from_vert_format(MTLVertexFormat vert_format) +{ + switch (vert_format) { + case MTLVertexFormatFloat: + case MTLVertexFormatFloat2: + case MTLVertexFormatFloat3: + case MTLVertexFormatFloat4: + return GPU_FETCH_FLOAT; + + case MTLVertexFormatUChar: + case MTLVertexFormatUChar2: + case MTLVertexFormatUChar3: + case MTLVertexFormatUChar4: + case MTLVertexFormatChar: + case MTLVertexFormatChar2: + case MTLVertexFormatChar3: + case MTLVertexFormatChar4: + case MTLVertexFormatUShort: + case MTLVertexFormatUShort2: + case MTLVertexFormatUShort3: + case MTLVertexFormatUShort4: + case MTLVertexFormatShort: + case MTLVertexFormatShort2: + case MTLVertexFormatShort3: + case MTLVertexFormatShort4: + case MTLVertexFormatUInt: + case MTLVertexFormatUInt2: + case MTLVertexFormatUInt3: + case MTLVertexFormatUInt4: + case MTLVertexFormatInt: + case MTLVertexFormatInt2: + case MTLVertexFormatInt3: + case MTLVertexFormatInt4: + return GPU_FETCH_INT; + + case MTLVertexFormatUCharNormalized: + case MTLVertexFormatUChar2Normalized: + case MTLVertexFormatUChar3Normalized: + case MTLVertexFormatUChar4Normalized: + case MTLVertexFormatCharNormalized: + case MTLVertexFormatChar2Normalized: + case MTLVertexFormatChar3Normalized: + case MTLVertexFormatChar4Normalized: + case MTLVertexFormatUShortNormalized: + case MTLVertexFormatUShort2Normalized: + case MTLVertexFormatUShort3Normalized: + case MTLVertexFormatUShort4Normalized: + case MTLVertexFormatShortNormalized: + case MTLVertexFormatShort2Normalized: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatShort4Normalized: + case MTLVertexFormatInt1010102Normalized: + return GPU_FETCH_INT_TO_FLOAT_UNIT; + + default: + BLI_assert_msg(false, "Unrecognised attribute type. Add types to switch as needed."); + return GPU_FETCH_FLOAT; + } +} + +inline GPUVertCompType comp_type_from_vert_format(MTLVertexFormat vert_format) +{ + switch (vert_format) { + case MTLVertexFormatUChar: + case MTLVertexFormatUChar2: + case MTLVertexFormatUChar3: + case MTLVertexFormatUChar4: + case MTLVertexFormatUCharNormalized: + case MTLVertexFormatUChar2Normalized: + case MTLVertexFormatUChar3Normalized: + case MTLVertexFormatUChar4Normalized: + return GPU_COMP_U8; + + case MTLVertexFormatChar: + case MTLVertexFormatChar2: + case MTLVertexFormatChar3: + case MTLVertexFormatChar4: + case MTLVertexFormatCharNormalized: + case MTLVertexFormatChar2Normalized: + case MTLVertexFormatChar3Normalized: + case MTLVertexFormatChar4Normalized: + return GPU_COMP_I8; + + case MTLVertexFormatShort: + case MTLVertexFormatShort2: + case MTLVertexFormatShort3: + case MTLVertexFormatShort4: + case MTLVertexFormatShortNormalized: + case MTLVertexFormatShort2Normalized: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatShort4Normalized: + return GPU_COMP_I16; + + case MTLVertexFormatUShort: + case MTLVertexFormatUShort2: + case MTLVertexFormatUShort3: + case MTLVertexFormatUShort4: + case MTLVertexFormatUShortNormalized: + case MTLVertexFormatUShort2Normalized: + case MTLVertexFormatUShort3Normalized: + case MTLVertexFormatUShort4Normalized: + return GPU_COMP_U16; + + case MTLVertexFormatInt: + case MTLVertexFormatInt2: + case MTLVertexFormatInt3: + case MTLVertexFormatInt4: + return GPU_COMP_I32; + + case MTLVertexFormatUInt: + case MTLVertexFormatUInt2: + case MTLVertexFormatUInt3: + case MTLVertexFormatUInt4: + return GPU_COMP_U32; + + case MTLVertexFormatFloat: + case MTLVertexFormatFloat2: + case MTLVertexFormatFloat3: + case MTLVertexFormatFloat4: + return GPU_COMP_F32; + + case MTLVertexFormatInt1010102Normalized: + return GPU_COMP_I10; + + default: + BLI_assert_msg(false, "Unrecognised attribute type. Add types to switch as needed."); + return GPU_COMP_F32; + } +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm new file mode 100644 index 00000000000..1824057c9a2 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -0,0 +1,1263 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "BLI_string.h" +#include <algorithm> +#include <fstream> +#include <iostream> +#include <map> +#include <mutex> +#include <regex> +#include <sstream> +#include <string> + +#include <cstring> + +#include "GPU_platform.h" +#include "GPU_vertex_format.h" + +#include "mtl_common.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_pso_descriptor_state.hh" +#include "mtl_shader.hh" +#include "mtl_shader_generator.hh" +#include "mtl_shader_interface.hh" +#include "mtl_texture.hh" + +extern char datatoc_mtl_shader_common_msl[]; + +using namespace blender; +using namespace blender::gpu; +using namespace blender::gpu::shader; + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Creation / Destruction. + * \{ */ + +/* Create empty shader to be populated later. */ +MTLShader::MTLShader(MTLContext *ctx, const char *name) : Shader(name) +{ + context_ = ctx; + + /* Create SHD builder to hold temporary resources until compilation is complete. */ + shd_builder_ = new MTLShaderBuilder(); + +#ifndef NDEBUG + /* Remove invalid symbols from shader name to ensure debug entrypoint function name is valid. */ + for (uint i : IndexRange(strlen(this->name))) { + char c = this->name[i]; + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { + } + else { + this->name[i] = '_'; + } + } +#endif +} + +/* Create shader from MSL source. */ +MTLShader::MTLShader(MTLContext *ctx, + MTLShaderInterface *interface, + const char *name, + NSString *input_vertex_source, + NSString *input_fragment_source, + NSString *vert_function_name, + NSString *frag_function_name) + : MTLShader(ctx, name) +{ + BLI_assert([vert_function_name length]); + BLI_assert([frag_function_name length]); + + this->set_vertex_function_name(vert_function_name); + this->set_fragment_function_name(frag_function_name); + this->shader_source_from_msl(input_vertex_source, input_fragment_source); + this->set_interface(interface); + this->finalize(nullptr); +} + +MTLShader::~MTLShader() +{ + if (this->is_valid()) { + + /* Free uniform data block. */ + if (push_constant_data_ != nullptr) { + MEM_freeN(push_constant_data_); + push_constant_data_ = nullptr; + } + + /* Free Metal resources. */ + if (shader_library_vert_ != nil) { + [shader_library_vert_ release]; + shader_library_vert_ = nil; + } + if (shader_library_frag_ != nil) { + [shader_library_frag_ release]; + shader_library_frag_ = nil; + } + + if (pso_descriptor_ != nil) { + [pso_descriptor_ release]; + pso_descriptor_ = nil; + } + + /* Free Pipeline Cache. */ + for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) { + if (pso_inst->vert) { + [pso_inst->vert release]; + } + if (pso_inst->frag) { + [pso_inst->frag release]; + } + if (pso_inst->pso) { + [pso_inst->pso release]; + } + delete pso_inst; + } + pso_cache_.clear(); + + /* NOTE(Metal): ShaderInterface deletion is handled in the super destructor ~Shader(). */ + } + valid_ = false; + + if (shd_builder_ != nullptr) { + delete shd_builder_; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Shader stage creation. + * \{ */ + +void MTLShader::vertex_shader_from_glsl(MutableSpan<const char *> sources) +{ + /* Flag source as not being compiled from native MSL. */ + BLI_assert(shd_builder_ != nullptr); + shd_builder_->source_from_msl_ = false; + + /* Remove #version tag entry. */ + sources[0] = ""; + + /* Consolidate GLSL vertex sources. */ + std::stringstream ss; + for (int i = 0; i < sources.size(); i++) { + ss << sources[i] << std::endl; + } + shd_builder_->glsl_vertex_source_ = ss.str(); +} + +void MTLShader::geometry_shader_from_glsl(MutableSpan<const char *> sources) +{ + MTL_LOG_ERROR("MTLShader::geometry_shader_from_glsl - Geometry shaders unsupported!\n"); +} + +void MTLShader::fragment_shader_from_glsl(MutableSpan<const char *> sources) +{ + /* Flag source as not being compiled from native MSL. */ + BLI_assert(shd_builder_ != nullptr); + shd_builder_->source_from_msl_ = false; + + /* Remove #version tag entry. */ + sources[0] = ""; + + /* Consolidate GLSL fragment sources. */ + std::stringstream ss; + for (int i = 0; i < sources.size(); i++) { + ss << sources[i] << std::endl; + } + shd_builder_->glsl_fragment_source_ = ss.str(); +} + +void MTLShader::compute_shader_from_glsl(MutableSpan<const char *> sources) +{ + /* Remove #version tag entry. */ + sources[0] = ""; + + /* TODO(Metal): Support compute shaders in Metal. */ + MTL_LOG_WARNING( + "MTLShader::compute_shader_from_glsl - Compute shaders currently unsupported!\n"); +} + +bool MTLShader::finalize(const shader::ShaderCreateInfo *info) +{ + /* Check if Shader has already been finalized. */ + if (this->is_valid()) { + MTL_LOG_ERROR("Shader (%p) '%s' has already been finalized!\n", this, this->name_get()); + } + + /* Perform GLSL to MSL source translation. */ + BLI_assert(shd_builder_ != nullptr); + if (!shd_builder_->source_from_msl_) { + bool success = generate_msl_from_glsl(info); + if (!success) { + /* GLSL to MSL translation has failed, or is unsupported for this shader. */ + valid_ = false; + BLI_assert_msg(false, "Shader translation from GLSL to MSL has failed. \n"); + + /* Create empty interface to allow shader to be silently used. */ + MTLShaderInterface *mtl_interface = new MTLShaderInterface(this->name_get()); + this->set_interface(mtl_interface); + + /* Release temporary compilation resources. */ + delete shd_builder_; + return false; + } + } + + /* Ensure we have a valid shader interface. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + BLI_assert(mtl_interface != nullptr); + + /* Verify Context handle, fetch device and compile shader. */ + BLI_assert(context_); + id<MTLDevice> device = context_->device; + BLI_assert(device != nil); + + /* Ensure source and stage entry-point names are set. */ + BLI_assert([vertex_function_name_ length] > 0); + if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) { + BLI_assert([fragment_function_name_ length] > 0); + } + BLI_assert(shd_builder_ != nullptr); + BLI_assert([shd_builder_->msl_source_vert_ length] > 0); + + @autoreleasepool { + MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease]; + options.languageVersion = MTLLanguageVersion2_2; + options.fastMathEnabled = YES; + + NSString *source_to_compile = shd_builder_->msl_source_vert_; + for (int src_stage = 0; src_stage <= 1; src_stage++) { + + source_to_compile = (src_stage == 0) ? shd_builder_->msl_source_vert_ : + shd_builder_->msl_source_frag_; + + /* Transform feedback, skip compilation. */ + if (src_stage == 1 && (transform_feedback_type_ != GPU_SHADER_TFB_NONE)) { + shader_library_frag_ = nil; + break; + } + + /* Concatenate common src. */ + NSString *str = [NSString stringWithUTF8String:datatoc_mtl_shader_common_msl]; + NSString *source_with_header_a = [str stringByAppendingString:source_to_compile]; + + /* Inject unique context ID to avoid cross-context shader cache collisions. + * Required on macOS 11.0. */ + NSString *source_with_header = source_with_header_a; + if (@available(macos 11.0, *)) { + /* Pass-through. Availability syntax requirement, expression cannot be negated. */ + } + else { + source_with_header = [source_with_header_a + stringByAppendingString:[NSString stringWithFormat:@"\n\n#define MTL_CONTEXT_IND %d\n", + context_->context_id]]; + } + [source_with_header retain]; + + /* Prepare Shader Library. */ + NSError *error = nullptr; + id<MTLLibrary> library = [device newLibraryWithSource:source_with_header + options:options + error:&error]; + if (error) { + /* Only exit out if genuine error and not warning. */ + if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location == + NSNotFound) { + NSLog( + @"Compile Error - Metal Shader Library (Stage: %d), error %@ \n", src_stage, error); + BLI_assert(false); + + /* Release temporary compilation resources. */ + delete shd_builder_; + return false; + } + } + + MTL_LOG_INFO("Successfully compiled Metal Shader Library (Stage: %d) for shader; %s\n", + src_stage, + name); + BLI_assert(library != nil); + if (src_stage == 0) { + /* Retain generated library and assign debug name. */ + shader_library_vert_ = library; + [shader_library_vert_ retain]; + shader_library_vert_.label = [NSString stringWithUTF8String:this->name]; + } + else { + /* Retain generated library for fragment shader and assign debug name. */ + shader_library_frag_ = library; + [shader_library_frag_ retain]; + shader_library_frag_.label = [NSString stringWithUTF8String:this->name]; + } + + [source_with_header autorelease]; + } + pso_descriptor_.label = [NSString stringWithUTF8String:this->name]; + + /* Prepare descriptor. */ + pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc] init]; + [pso_descriptor_ retain]; + + /* Shader has successfully been created. */ + valid_ = true; + + /* Prepare backing data storage for local uniforms. */ + const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block(); + if (push_constant_block.size > 0) { + push_constant_data_ = MEM_callocN(push_constant_block.size, __func__); + this->push_constant_bindstate_mark_dirty(true); + } + else { + push_constant_data_ = nullptr; + } + } + + /* Release temporary compilation resources. */ + delete shd_builder_; + return true; +} + +void MTLShader::transform_feedback_names_set(Span<const char *> name_list, + const eGPUShaderTFBType geom_type) +{ + tf_output_name_list_.clear(); + for (int i = 0; i < name_list.size(); i++) { + tf_output_name_list_.append(std::string(name_list[i])); + } + transform_feedback_type_ = geom_type; +} + +bool MTLShader::transform_feedback_enable(GPUVertBuf *buf) +{ + BLI_assert(transform_feedback_type_ != GPU_SHADER_TFB_NONE); + BLI_assert(buf); + transform_feedback_active_ = true; + transform_feedback_vertbuf_ = buf; + /* TODO(Metal): Enable this assertion once MTLVertBuf lands. */ + /*BLI_assert(static_cast<MTLVertBuf *>(unwrap(transform_feedback_vertbuf_))->get_usage_type() == + GPU_USAGE_DEVICE_ONLY);*/ + return true; +} + +void MTLShader::transform_feedback_disable() +{ + transform_feedback_active_ = false; + transform_feedback_vertbuf_ = nullptr; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Shader Binding. + * \{ */ + +void MTLShader::bind() +{ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + if (interface == nullptr || !this->is_valid()) { + MTL_LOG_WARNING( + "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be " + "skipped.\n", + this->name_get()); + } + ctx->pipeline_state.active_shader = this; +} + +void MTLShader::unbind() +{ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + ctx->pipeline_state.active_shader = nullptr; +} + +void MTLShader::uniform_float(int location, int comp_len, int array_size, const float *data) +{ + BLI_assert(this); + if (!this->is_valid()) { + return; + } + MTLShaderInterface *mtl_interface = get_interface(); + if (location < 0 || location >= mtl_interface->get_total_uniforms()) { + MTL_LOG_WARNING("Uniform location %d is not valid in Shader %s\n", location, this->name_get()); + return; + } + + /* Fetch more information about uniform from interface. */ + const MTLShaderUniform &uniform = mtl_interface->get_uniform(location); + + /* Prepare to copy data into local shader push constant memory block. */ + BLI_assert(push_constant_data_ != nullptr); + uint8_t *dest_ptr = (uint8_t *)push_constant_data_; + dest_ptr += uniform.byte_offset; + uint32_t copy_size = sizeof(float) * comp_len * array_size; + + /* Test per-element size. It is valid to copy less array elements than the total, but each + * array element needs to match. */ + uint32_t source_per_element_size = sizeof(float) * comp_len; + uint32_t dest_per_element_size = uniform.size_in_bytes / uniform.array_len; + BLI_assert_msg( + source_per_element_size <= dest_per_element_size, + "source Per-array-element size must be smaller than destination storage capacity for " + "that data"); + + if (source_per_element_size < dest_per_element_size) { + switch (uniform.type) { + + /* Special case for handling 'vec3' array upload. */ + case MTL_DATATYPE_FLOAT3: { + int numvecs = uniform.array_len; + uint8_t *data_c = (uint8_t *)data; + + /* It is more efficient on the host to only modify data if it has changed. + * Data modifications are small, so memory comparison is cheap. + * If uniforms have remained unchanged, then we avoid both copying + * data into the local uniform struct, and upload of the modified uniform + * contents in the command stream. */ + bool changed = false; + for (int i = 0; i < numvecs; i++) { + changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0); + if (changed) { + memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3); + } + data_c += sizeof(float) * 3; + dest_ptr += sizeof(float) * 4; + } + if (changed) { + this->push_constant_bindstate_mark_dirty(true); + } + return; + } + + /* Special case for handling 'mat3' upload. */ + case MTL_DATATYPE_FLOAT3x3: { + int numvecs = 3 * uniform.array_len; + uint8_t *data_c = (uint8_t *)data; + + /* It is more efficient on the host to only modify data if it has changed. + * Data modifications are small, so memory comparison is cheap. + * If uniforms have remained unchanged, then we avoid both copying + * data into the local uniform struct, and upload of the modified uniform + * contents in the command stream. */ + bool changed = false; + for (int i = 0; i < numvecs; i++) { + changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0); + if (changed) { + memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3); + } + data_c += sizeof(float) * 3; + dest_ptr += sizeof(float) * 4; + } + if (changed) { + this->push_constant_bindstate_mark_dirty(true); + } + return; + } + default: + shader_debug_printf("INCOMPATIBLE UNIFORM TYPE: %d\n", uniform.type); + break; + } + } + + /* Debug checks. */ + BLI_assert_msg( + copy_size <= uniform.size_in_bytes, + "Size of provided uniform data is greater than size specified in Shader interface\n"); + + /* Only flag UBO as modified if data is different -- This can avoid re-binding of unmodified + * local uniform data. */ + bool data_changed = (memcmp((void *)dest_ptr, (void *)data, copy_size) != 0); + if (data_changed) { + this->push_constant_bindstate_mark_dirty(true); + memcpy((void *)dest_ptr, (void *)data, copy_size); + } +} + +void MTLShader::uniform_int(int location, int comp_len, int array_size, const int *data) +{ + BLI_assert(this); + if (!this->is_valid()) { + return; + } + + /* NOTE(Metal): Invalidation warning for uniform re-mapping of texture slots, unsupported in + * Metal, as we cannot point a texture binding at a different slot. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + if (location >= mtl_interface->get_total_uniforms() && + location < (mtl_interface->get_total_uniforms() + mtl_interface->get_total_textures())) { + MTL_LOG_WARNING( + "Texture uniform location re-mapping unsupported in Metal. (Possibly also bad uniform " + "location %d)\n", + location); + return; + } + + if (location < 0 || location >= mtl_interface->get_total_uniforms()) { + MTL_LOG_WARNING( + "Uniform is not valid at location %d - Shader %s\n", location, this->name_get()); + return; + } + + /* Fetch more information about uniform from interface. */ + const MTLShaderUniform &uniform = mtl_interface->get_uniform(location); + + /* Determine data location in uniform block. */ + BLI_assert(push_constant_data_ != nullptr); + uint8_t *ptr = (uint8_t *)push_constant_data_; + ptr += uniform.byte_offset; + + /* Copy data into local block. Only flag UBO as modified if data is different + * This can avoid re-binding of unmodified local uniform data, reducing + * the total number of copy operations needed and data transfers between + * CPU and GPU. */ + bool data_changed = (memcmp((void *)ptr, (void *)data, sizeof(int) * comp_len * array_size) != + 0); + if (data_changed) { + this->push_constant_bindstate_mark_dirty(true); + memcpy((void *)ptr, (void *)data, sizeof(int) * comp_len * array_size); + } +} + +bool MTLShader::get_push_constant_is_dirty() +{ + return push_constant_modified_; +} + +void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty) +{ + push_constant_modified_ = is_dirty; +} + +void MTLShader::vertformat_from_shader(GPUVertFormat *format) const +{ + GPU_vertformat_clear(format); + + const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface); + for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) { + const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id); + + /* Extract type parameters from Metal type. */ + GPUVertCompType comp_type = comp_type_from_vert_format(attr.format); + uint comp_len = comp_count_from_vert_format(attr.format); + GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format); + + GPU_vertformat_attr_add(format, + mtl_interface->get_name_at_offset(attr.name_offset), + comp_type, + comp_len, + fetch_mode); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name METAL Custom behaviour + * \{ */ + +void MTLShader::set_vertex_function_name(NSString *vert_function_name) +{ + vertex_function_name_ = vert_function_name; +} + +void MTLShader::set_fragment_function_name(NSString *frag_function_name) +{ + fragment_function_name_ = frag_function_name; +} + +void MTLShader::shader_source_from_msl(NSString *input_vertex_source, + NSString *input_fragment_source) +{ + BLI_assert(shd_builder_ != nullptr); + shd_builder_->msl_source_vert_ = input_vertex_source; + shd_builder_->msl_source_frag_ = input_fragment_source; + shd_builder_->source_from_msl_ = true; +} + +void MTLShader::set_interface(MTLShaderInterface *interface) +{ + /* Assign gpu::Shader superclass interface. */ + Shader::interface = interface; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Bake Pipeline State Objects + * \{ */ +/* Bakes or fetches a pipeline state using the current + * MTLRenderPipelineStateDescriptor state. + * + * This state contains information on shader inputs/outputs, such + * as the vertex descriptor, used to control vertex assembly for + * current vertex data, and active render target information, + * decsribing the output attachment pixel formats. + * + * Other rendering parameters such as global pointsize, blend state, color mask + * etc; are also used. See mtl_shader.h for full MLRenderPipelineStateDescriptor. + */ +MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state( + MTLContext *ctx, MTLPrimitiveTopologyClass prim_type) +{ + /* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should + * be thread-safe due to organisation of high-level renderer. If there are any issues, then + * access can be guarded as appropriate. */ + BLI_assert(this); + MTLShaderInterface *mtl_interface = this->get_interface(); + BLI_assert(mtl_interface); + BLI_assert(this->is_valid()); + + /* NOTE(Metal): Vertex input assembly description will have been populated externally + * via MTLBatch or MTLImmediate during binding or draw. */ + + /* Resolve Context Framebuffer state. */ + MTLFrameBuffer *framebuffer = ctx->get_current_framebuffer(); + + /* Update global pipeline descriptor. */ + MTLStateManager *state_manager = static_cast<MTLStateManager *>( + MTLContext::get()->state_manager); + MTLRenderPipelineStateDescriptor &pipeline_descriptor = state_manager->get_pipeline_descriptor(); + + pipeline_descriptor.num_color_attachments = 0; + for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) { + MTLAttachment color_attachment = framebuffer->get_color_attachment(attachment); + + if (color_attachment.used) { + /* If SRGB is disabled and format is SRGB, use colour data directly with no conversions + * between linear and SRGB. */ + MTLPixelFormat mtl_format = gpu_texture_format_to_metal( + color_attachment.texture->format_get()); + if (framebuffer->get_is_srgb() && !framebuffer->get_srgb_enabled()) { + mtl_format = MTLPixelFormatRGBA8Unorm; + } + pipeline_descriptor.color_attachment_format[attachment] = mtl_format; + } + else { + pipeline_descriptor.color_attachment_format[attachment] = MTLPixelFormatInvalid; + } + + pipeline_descriptor.num_color_attachments += (color_attachment.used) ? 1 : 0; + } + MTLAttachment depth_attachment = framebuffer->get_depth_attachment(); + MTLAttachment stencil_attachment = framebuffer->get_stencil_attachment(); + pipeline_descriptor.depth_attachment_format = (depth_attachment.used) ? + gpu_texture_format_to_metal( + depth_attachment.texture->format_get()) : + MTLPixelFormatInvalid; + pipeline_descriptor.stencil_attachment_format = + (stencil_attachment.used) ? + gpu_texture_format_to_metal(stencil_attachment.texture->format_get()) : + MTLPixelFormatInvalid; + + /* Resolve Context Pipeline State (required by PSO). */ + pipeline_descriptor.color_write_mask = ctx->pipeline_state.color_write_mask; + pipeline_descriptor.blending_enabled = ctx->pipeline_state.blending_enabled; + pipeline_descriptor.alpha_blend_op = ctx->pipeline_state.alpha_blend_op; + pipeline_descriptor.rgb_blend_op = ctx->pipeline_state.rgb_blend_op; + pipeline_descriptor.dest_alpha_blend_factor = ctx->pipeline_state.dest_alpha_blend_factor; + pipeline_descriptor.dest_rgb_blend_factor = ctx->pipeline_state.dest_rgb_blend_factor; + pipeline_descriptor.src_alpha_blend_factor = ctx->pipeline_state.src_alpha_blend_factor; + pipeline_descriptor.src_rgb_blend_factor = ctx->pipeline_state.src_rgb_blend_factor; + pipeline_descriptor.point_size = ctx->pipeline_state.point_size; + + /* Primitive Type -- Primitive topology class needs to be specified for layered rendering. */ + bool requires_specific_topology_class = uses_mtl_array_index_ || + prim_type == MTLPrimitiveTopologyClassPoint; + pipeline_descriptor.vertex_descriptor.prim_topology_class = + (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified; + + /* Check if current PSO exists in the cache. */ + MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor); + MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr; + if (pipeline_state != nullptr) { + return pipeline_state; + } + + shader_debug_printf("Baking new pipeline variant for shader: %s\n", this->name); + + /* Generate new Render Pipeline State Object (PSO). */ + @autoreleasepool { + /* Prepare Render Pipeline Descriptor. */ + + /* Setup function specialisation constants, used to modify and optimise + * generated code based on current render pipeline configuration. */ + MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease]; + + /* Prepare Vertex descriptor based on current pipeline vertex binding state. */ + MTLRenderPipelineStateDescriptor ¤t_state = pipeline_descriptor; + MTLRenderPipelineDescriptor *desc = pso_descriptor_; + [desc reset]; + pso_descriptor_.label = [NSString stringWithUTF8String:this->name]; + + /* Offset the bind index for Uniform buffers such that they begin after the VBO + * buffer bind slots. MTL_uniform_buffer_base_index is passed as a function + * specialisation constant, customised per unique pipeline state permutation. + * + * Note: For binding point compaction, we could use the number of VBOs present + * in the current PSO configuration current_state.vertex_descriptor.num_vert_buffers). + * However, it is more efficient to simply offset the uniform buffer base index to the + * maximal number of VBO bind-points, as then UBO bindpoints for similar draw calls + * will align and avoid the requirement for additional binding. */ + int MTL_uniform_buffer_base_index = GPU_BATCH_VBO_MAX_LEN; + + /* Null buffer index is used if an attribute is not found in the + * bound VBOs VertexFormat. */ + int null_buffer_index = current_state.vertex_descriptor.num_vert_buffers; + bool using_null_buffer = false; + + if (this->get_uses_ssbo_vertex_fetch()) { + /* If using SSBO Vertex fetch mode, no vertex descriptor is required + * as we wont be using stage-in. */ + desc.vertexDescriptor = nil; + desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified; + + /* We want to offset the uniform buffer base to allow for sufficient VBO binding slots - We + * also require +1 slot for the Index buffer. */ + MTL_uniform_buffer_base_index = MTL_SSBO_VERTEX_FETCH_IBO_INDEX + 1; + } + else { + for (const uint i : IndexRange(current_state.vertex_descriptor.num_attributes)) { + + /* Metal backend attribute descriptor state. */ + MTLVertexAttributeDescriptorPSO &attribute_desc = + current_state.vertex_descriptor.attributes[i]; + + /* Flag format conversion */ + /* In some cases, Metal cannot implicity convert between data types. + * In these instances, the fetch mode 'GPUVertFetchMode' as provided in the vertex format + * is passed in, and used to populate function constants named: MTL_AttributeConvert0..15. + + * It is then the responsibility of the vertex shader to perform any necessary type + * casting. + * + * See mtl_shader.hh for more information. Relevant Metal API documentation: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc */ + if (attribute_desc.format == MTLVertexFormatInvalid) { + MTL_LOG_WARNING( + "MTLShader: baking pipeline state for '%s'- expected input attribute at " + "index '%d' but none was specified in the current vertex state\n", + mtl_interface->get_name(), + i); + + /* Write out null conversion constant if attribute unused. */ + int MTL_attribute_conversion_mode = 0; + [values setConstantValue:&MTL_attribute_conversion_mode + type:MTLDataTypeInt + withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]]; + continue; + } + + int MTL_attribute_conversion_mode = (int)attribute_desc.format_conversion_mode; + [values setConstantValue:&MTL_attribute_conversion_mode + type:MTLDataTypeInt + withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]]; + if (MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT_UNIT || + MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT) { + shader_debug_printf( + "TODO(Metal): Shader %s needs to support internal format conversion\n", + mtl_interface->name); + } + + /* Copy metal backend attribute descriptor state into PSO descriptor. + * NOTE: need to copy each element due to direct assignment restrictions. + * Also note */ + MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[i]; + + mtl_attribute.format = attribute_desc.format; + mtl_attribute.offset = attribute_desc.offset; + mtl_attribute.bufferIndex = attribute_desc.buffer_index; + } + + for (const uint i : IndexRange(current_state.vertex_descriptor.num_vert_buffers)) { + /* Metal backend state buffer layout. */ + const MTLVertexBufferLayoutDescriptorPSO &buf_layout = + current_state.vertex_descriptor.buffer_layouts[i]; + /* Copy metal backend buffer layout state into PSO descriptor. + * NOTE: need to copy each element due to copying from internal + * backend descriptor to Metal API descriptor.*/ + MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[i]; + + mtl_buf_layout.stepFunction = buf_layout.step_function; + mtl_buf_layout.stepRate = buf_layout.step_rate; + mtl_buf_layout.stride = buf_layout.stride; + } + + /* Mark empty attribute conversion. */ + for (int i = current_state.vertex_descriptor.num_attributes; i < GPU_VERT_ATTR_MAX_LEN; + i++) { + int MTL_attribute_conversion_mode = 0; + [values setConstantValue:&MTL_attribute_conversion_mode + type:MTLDataTypeInt + withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]]; + } + + /* DEBUG: Missing/empty attributes. */ + /* Attributes are normally mapped as part of the state setting based on the used + * GPUVertFormat, however, if attribues have not been set, we can sort them out here. */ + for (const uint i : IndexRange(mtl_interface->get_total_attributes())) { + const MTLShaderInputAttribute &attribute = mtl_interface->get_attribute(i); + MTLVertexAttributeDescriptor *current_attribute = desc.vertexDescriptor.attributes[i]; + + if (current_attribute.format == MTLVertexFormatInvalid) { +#if MTL_DEBUG_SHADER_ATTRIBUTES == 1 + MTL_LOG_INFO("-> Filling in unbound attribute '%s' for shader PSO '%s' \n", + attribute.name, + mtl_interface->name); +#endif + current_attribute.format = attribute.format; + current_attribute.offset = 0; + current_attribute.bufferIndex = null_buffer_index; + + /* Add Null vert buffer binding for invalid attributes. */ + if (!using_null_buffer) { + MTLVertexBufferLayoutDescriptor *null_buf_layout = + desc.vertexDescriptor.layouts[null_buffer_index]; + + /* Use constant step function such that null buffer can + * contain just a singular dummy attribute. */ + null_buf_layout.stepFunction = MTLVertexStepFunctionConstant; + null_buf_layout.stepRate = 0; + null_buf_layout.stride = max_ii(null_buf_layout.stride, attribute.size); + + /* If we are using the maximum number of vertex buffers, or tight binding indices, + * MTL_uniform_buffer_base_index needs shifting to the bind slot after the null buffer + * index. */ + if (null_buffer_index >= MTL_uniform_buffer_base_index) { + MTL_uniform_buffer_base_index = null_buffer_index + 1; + } + using_null_buffer = true; +#if MTL_DEBUG_SHADER_ATTRIBUTES == 1 + MTL_LOG_INFO("Setting up buffer binding for null attribute with buffer index %d\n", + null_buffer_index); +#endif + } + } + } + + /* Primitive Topology */ + desc.inputPrimitiveTopology = pipeline_descriptor.vertex_descriptor.prim_topology_class; + } + + /* Update constant value for 'MTL_uniform_buffer_base_index' */ + [values setConstantValue:&MTL_uniform_buffer_base_index + type:MTLDataTypeInt + withName:@"MTL_uniform_buffer_base_index"]; + + /* Transform feedback constant */ + int MTL_transform_feedback_buffer_index = (this->transform_feedback_type_ != + GPU_SHADER_TFB_NONE) ? + MTL_uniform_buffer_base_index + + mtl_interface->get_total_uniform_blocks() : + -1; + if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) { + [values setConstantValue:&MTL_transform_feedback_buffer_index + type:MTLDataTypeInt + withName:@"MTL_transform_feedback_buffer_index"]; + } + + /* gl_PointSize constant */ + bool null_pointsize = true; + float MTL_pointsize = pipeline_descriptor.point_size; + if (pipeline_descriptor.vertex_descriptor.prim_topology_class == + MTLPrimitiveTopologyClassPoint) { + /* IF pointsize is > 0.0, PROGRAM_POINT_SIZE is enabled, and gl_PointSize shader keyword + overrides the value. Otherwise, if < 0.0, use global constant point size. */ + if (MTL_pointsize < 0.0) { + MTL_pointsize = fabsf(MTL_pointsize); + [values setConstantValue:&MTL_pointsize + type:MTLDataTypeFloat + withName:@"MTL_global_pointsize"]; + null_pointsize = false; + } + } + + if (null_pointsize) { + MTL_pointsize = 0.0f; + [values setConstantValue:&MTL_pointsize + type:MTLDataTypeFloat + withName:@"MTL_global_pointsize"]; + } + + /* Compile functions */ + NSError *error = nullptr; + desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_ + constantValues:values + error:&error]; + if (error) { + NSLog(@"Compile Error - Metal Shader vertex function, error %@", error); + + /* Only exit out if genuine error and not warning */ + if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location == + NSNotFound) { + BLI_assert(false); + return nullptr; + } + } + + /* If transform feedback is used, Vertex-only stage */ + if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) { + desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_ + constantValues:values + error:&error]; + if (error) { + NSLog(@"Compile Error - Metal Shader fragment function, error %@", error); + + /* Only exit out if genuine error and not warning */ + if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location == + NSNotFound) { + BLI_assert(false); + return nullptr; + } + } + } + else { + desc.fragmentFunction = nil; + desc.rasterizationEnabled = false; + } + + /* Setup pixel format state */ + for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT; + color_attachment++) { + /* Fetch colour attachment pixel format in backend pipeline state. */ + MTLPixelFormat pixel_format = current_state.color_attachment_format[color_attachment]; + /* Populate MTL API PSO attachment descriptor. */ + MTLRenderPipelineColorAttachmentDescriptor *col_attachment = + desc.colorAttachments[color_attachment]; + + col_attachment.pixelFormat = pixel_format; + if (pixel_format != MTLPixelFormatInvalid) { + bool format_supports_blending = mtl_format_supports_blending(pixel_format); + + col_attachment.writeMask = current_state.color_write_mask; + col_attachment.blendingEnabled = current_state.blending_enabled && + format_supports_blending; + if (format_supports_blending && current_state.blending_enabled) { + col_attachment.alphaBlendOperation = current_state.alpha_blend_op; + col_attachment.rgbBlendOperation = current_state.rgb_blend_op; + col_attachment.destinationAlphaBlendFactor = current_state.dest_alpha_blend_factor; + col_attachment.destinationRGBBlendFactor = current_state.dest_rgb_blend_factor; + col_attachment.sourceAlphaBlendFactor = current_state.src_alpha_blend_factor; + col_attachment.sourceRGBBlendFactor = current_state.src_rgb_blend_factor; + } + else { + if (current_state.blending_enabled && !format_supports_blending) { + shader_debug_printf( + "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support " + "blending\n", + *((int *)&pixel_format)); + } + } + } + } + desc.depthAttachmentPixelFormat = current_state.depth_attachment_format; + desc.stencilAttachmentPixelFormat = current_state.stencil_attachment_format; + + /* Compile PSO */ + + MTLAutoreleasedRenderPipelineReflection reflection_data; + id<MTLRenderPipelineState> pso = [ctx->device + newRenderPipelineStateWithDescriptor:desc + options:MTLPipelineOptionBufferTypeInfo + reflection:&reflection_data + error:&error]; + if (error) { + NSLog(@"Failed to create PSO for shader: %s error %@\n", this->name, error); + BLI_assert(false); + return nullptr; + } + else if (!pso) { + NSLog(@"Failed to create PSO for shader: %s, but no error was provided!\n", this->name); + BLI_assert(false); + return nullptr; + } + else { + NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx); + } + + /* Prepare pipeline state instance. */ + MTLRenderPipelineStateInstance *pso_inst = new MTLRenderPipelineStateInstance(); + pso_inst->vert = desc.vertexFunction; + pso_inst->frag = desc.fragmentFunction; + pso_inst->pso = pso; + pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index; + pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1; + pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index; + pso_inst->shader_pso_index = pso_cache_.size(); + + pso_inst->reflection_data_available = (reflection_data != nil); + if (reflection_data != nil) { + + /* Extract shader reflection data for buffer bindings. + * This reflection data is used to contrast the binding information + * we know about in the interface against the bindings in the finalized + * PSO. This accounts for bindings which have been stripped out during + * optimisation, and allows us to both avoid over-binding and also + * allows us to veriy size-correctness for bindings, to ensure + * that buffers bound are not smaller than the size of expected data. */ + NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments]; + + pso_inst->buffer_bindings_reflection_data_vert.clear(); + int buffer_binding_max_ind = 0; + + for (int i = 0; i < [vert_args count]; i++) { + MTLArgument *arg = [vert_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + if (buf_index >= 0) { + buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index); + } + } + } + pso_inst->buffer_bindings_reflection_data_vert.resize(buffer_binding_max_ind + 1); + for (int i = 0; i < buffer_binding_max_ind + 1; i++) { + pso_inst->buffer_bindings_reflection_data_vert[i] = {0, 0, 0, false}; + } + + for (int i = 0; i < [vert_args count]; i++) { + MTLArgument *arg = [vert_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + + if (buf_index >= 0) { + pso_inst->buffer_bindings_reflection_data_vert[buf_index] = { + (uint32_t)([arg index]), + (uint32_t)([arg bufferDataSize]), + (uint32_t)([arg bufferAlignment]), + ([arg isActive] == YES) ? true : false}; + } + } + } + + NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments]; + + pso_inst->buffer_bindings_reflection_data_frag.clear(); + buffer_binding_max_ind = 0; + + for (int i = 0; i < [frag_args count]; i++) { + MTLArgument *arg = [frag_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + if (buf_index >= 0) { + buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index); + } + } + } + pso_inst->buffer_bindings_reflection_data_frag.resize(buffer_binding_max_ind + 1); + for (int i = 0; i < buffer_binding_max_ind + 1; i++) { + pso_inst->buffer_bindings_reflection_data_frag[i] = {0, 0, 0, false}; + } + + for (int i = 0; i < [frag_args count]; i++) { + MTLArgument *arg = [frag_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + shader_debug_printf(" BUF IND: %d (arg name: %s)\n", buf_index, [[arg name] UTF8String]); + if (buf_index >= 0) { + pso_inst->buffer_bindings_reflection_data_frag[buf_index] = { + (uint32_t)([arg index]), + (uint32_t)([arg bufferDataSize]), + (uint32_t)([arg bufferAlignment]), + ([arg isActive] == YES) ? true : false}; + } + } + } + } + + [pso_inst->vert retain]; + [pso_inst->frag retain]; + [pso_inst->pso retain]; + + /* Insert into pso cache. */ + pso_cache_.add(pipeline_descriptor, pso_inst); + shader_debug_printf("PSO CACHE: Stored new variant in PSO cache for shader '%s'\n", + this->name); + return pso_inst; + } +} +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name SSBO-vertex-fetch-mode attribute control. + * \{ */ + +int MTLShader::ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type) +{ + switch (attribute_type) { + case MTLVertexFormatFloat: + return GPU_SHADER_ATTR_TYPE_FLOAT; + case MTLVertexFormatInt: + return GPU_SHADER_ATTR_TYPE_INT; + case MTLVertexFormatUInt: + return GPU_SHADER_ATTR_TYPE_UINT; + case MTLVertexFormatShort: + return GPU_SHADER_ATTR_TYPE_SHORT; + case MTLVertexFormatUChar: + return GPU_SHADER_ATTR_TYPE_CHAR; + case MTLVertexFormatUChar2: + return GPU_SHADER_ATTR_TYPE_CHAR2; + case MTLVertexFormatUChar3: + return GPU_SHADER_ATTR_TYPE_CHAR3; + case MTLVertexFormatUChar4: + return GPU_SHADER_ATTR_TYPE_CHAR4; + case MTLVertexFormatFloat2: + return GPU_SHADER_ATTR_TYPE_VEC2; + case MTLVertexFormatFloat3: + return GPU_SHADER_ATTR_TYPE_VEC3; + case MTLVertexFormatFloat4: + return GPU_SHADER_ATTR_TYPE_VEC4; + case MTLVertexFormatUInt2: + return GPU_SHADER_ATTR_TYPE_UVEC2; + case MTLVertexFormatUInt3: + return GPU_SHADER_ATTR_TYPE_UVEC3; + case MTLVertexFormatUInt4: + return GPU_SHADER_ATTR_TYPE_UVEC4; + case MTLVertexFormatInt2: + return GPU_SHADER_ATTR_TYPE_IVEC2; + case MTLVertexFormatInt3: + return GPU_SHADER_ATTR_TYPE_IVEC3; + case MTLVertexFormatInt4: + return GPU_SHADER_ATTR_TYPE_IVEC4; + case MTLVertexFormatUCharNormalized: + return GPU_SHADER_ATTR_TYPE_UCHAR_NORM; + case MTLVertexFormatUChar2Normalized: + return GPU_SHADER_ATTR_TYPE_UCHAR2_NORM; + case MTLVertexFormatUChar3Normalized: + return GPU_SHADER_ATTR_TYPE_UCHAR3_NORM; + case MTLVertexFormatUChar4Normalized: + return GPU_SHADER_ATTR_TYPE_UCHAR4_NORM; + case MTLVertexFormatInt1010102Normalized: + return GPU_SHADER_ATTR_TYPE_INT1010102_NORM; + case MTLVertexFormatShort3Normalized: + return GPU_SHADER_ATTR_TYPE_SHORT3_NORM; + default: + BLI_assert_msg(false, + "Not yet supported attribute type for SSBO vertex fetch -- Add entry " + "GPU_SHADER_ATTR_TYPE_** to shader defines, and in this table"); + return -1; + } + return -1; +} + +void MTLShader::ssbo_vertex_fetch_bind_attributes_begin() +{ + MTLShaderInterface *mtl_interface = this->get_interface(); + ssbo_vertex_attribute_bind_active_ = true; + ssbo_vertex_attribute_bind_mask_ = (1 << mtl_interface->get_total_attributes()) - 1; + + /* Reset tracking of actively used vbo bind slots for ssbo vertex fetch mode. */ + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + ssbo_vbo_slot_used_[i] = false; + } +} + +void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr) +{ + /* Fetch attribute. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + BLI_assert(ssbo_attr.mtl_attribute_index >= 0 && + ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes()); + + /* Update bind-mask to verify this attribute has been used. */ + BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) == + (1 << ssbo_attr.mtl_attribute_index) && + "Attribute has already been bound"); + ssbo_vertex_attribute_bind_mask_ &= ~(1 << ssbo_attr.mtl_attribute_index); + + /* Fetch attribute uniform addresses from cache. */ + ShaderSSBOAttributeBinding &cached_ssbo_attribute = + cached_ssbo_attribute_bindings_[ssbo_attr.mtl_attribute_index]; + BLI_assert(cached_ssbo_attribute.attribute_index >= 0); + + /* Write attribute descriptor properties to shader uniforms. */ + this->uniform_int(cached_ssbo_attribute.uniform_offset, 1, 1, &ssbo_attr.attribute_offset); + this->uniform_int(cached_ssbo_attribute.uniform_stride, 1, 1, &ssbo_attr.per_vertex_stride); + int inst_val = (ssbo_attr.is_instance ? 1 : 0); + this->uniform_int(cached_ssbo_attribute.uniform_fetchmode, 1, 1, &inst_val); + this->uniform_int(cached_ssbo_attribute.uniform_vbo_id, 1, 1, &ssbo_attr.vbo_id); + BLI_assert(ssbo_attr.attribute_format >= 0); + this->uniform_int(cached_ssbo_attribute.uniform_attr_type, 1, 1, &ssbo_attr.attribute_format); + ssbo_vbo_slot_used_[ssbo_attr.vbo_id] = true; +} + +void MTLShader::ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder> active_encoder) +{ + ssbo_vertex_attribute_bind_active_ = false; + + /* If our mask is non-zero, we have unassigned attributes. */ + if (ssbo_vertex_attribute_bind_mask_ != 0) { + MTLShaderInterface *mtl_interface = this->get_interface(); + + /* Determine if there is a free slot we can bind the null buffer to -- We should have at + * least ONE free slot in this instance. */ + int null_attr_buffer_slot = -1; + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + if (!ssbo_vbo_slot_used_[i]) { + null_attr_buffer_slot = i; + break; + } + } + BLI_assert_msg(null_attr_buffer_slot >= 0, + "No suitable bind location for a NULL buffer was found"); + + for (int i = 0; i < mtl_interface->get_total_attributes(); i++) { + if (ssbo_vertex_attribute_bind_mask_ & (1 << i)) { + const MTLShaderInputAttribute *mtl_shader_attribute = &mtl_interface->get_attribute(i); +#if MTL_DEBUG_SHADER_ATTRIBUTES == 1 + MTL_LOG_WARNING( + "SSBO Vertex Fetch missing attribute with index: %d. Shader: %s, Attr " + "Name: " + "%s - Null buffer bound\n", + i, + this->name_get(), + mtl_shader_attribute->name); +#endif + /* Bind Attribute with NULL buffer index and stride zero (for constant access). */ + MTLSSBOAttribute ssbo_attr( + i, null_attr_buffer_slot, 0, 0, GPU_SHADER_ATTR_TYPE_FLOAT, false); + ssbo_vertex_fetch_bind_attribute(ssbo_attr); + MTL_LOG_WARNING( + "Unassigned Shader attribute: %s, Attr Name: %s -- Binding NULL BUFFER to " + "slot %d\n", + this->name_get(), + mtl_interface->get_name_at_offset(mtl_shader_attribute->name_offset), + null_attr_buffer_slot); + } + } + + /* Bind NULL buffer to given VBO slot. */ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + id<MTLBuffer> null_buf = ctx->get_null_attribute_buffer(); + BLI_assert(null_buf); + + MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state(); + rps.bind_vertex_buffer(null_buf, 0, null_attr_buffer_slot); + } +} + +GPUVertBuf *MTLShader::get_transform_feedback_active_buffer() +{ + if (transform_feedback_type_ == GPU_SHADER_TFB_NONE || !transform_feedback_active_) { + return nullptr; + } + return transform_feedback_vertbuf_; +} + +bool MTLShader::has_transform_feedback_varying(std::string str) +{ + if (this->transform_feedback_type_ == GPU_SHADER_TFB_NONE) { + return false; + } + + return (std::find(tf_output_name_list_.begin(), tf_output_name_list_.end(), str) != + tf_output_name_list_.end()); +} + +} // blender::gpu::shdaer diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh new file mode 100644 index 00000000000..c71504b84b7 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_generator.hh @@ -0,0 +1,724 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include "gpu_shader_create_info.hh" +#include "gpu_shader_private.hh" + +/** -- Metal Shader Generator for GLSL -> MSL conversion -- + * + * The Metal shader generator class is used as a conversion utility for generating + * a compatible MSL shader from a source GLSL shader. There are several steps + * involved in creating a shader, and structural changes which enable the source + * to function in the same way. + * + * 1) Extraction and conversion of shaders input's and output's to their Metal-compatible + * version. This is a subtle data transformation from GPUShaderCreateInfo, allowing + * for Metal-specific parameters. + * + * 2) Determine usage of shader features such as GL global variable usage, depth write output, + * clip distances, multilayered rendering, barycentric coordinates etc; + * + * 3) Generate MSL shader. + * + * 4) Populate MTLShaderInterface, describing input/output structure, bindpoints, buffer size and + * alignment, shader feature usage etc; Everything required by the Metal backend to successfully + * enable use of shaders and GPU backend features. + * + * + * + * For each shading stage, we generate an MSL shader following these steps: + * + * 1) Output custom shader defines describing modes e.g. whether we are using + * sampler bindings or argument buffers; at the top of the shader. + * + * 2) Inject common Metal headers. + * - mtl_shader_defines.msl is used to map GLSL functions to MSL. + * - mtl_shader_common.msl is added to ALL MSL shaders to provide + * common functionality required by the backend. This primarily + * contains function-constant hooks, used in PSO generation. + * + * 3) Create a class Scope which wraps the GLSL shader. This is used to + * create a global per-thread scope around the shader source, to allow + * access to common shader members (GLSL globals, shader inputs/outptus etc) + * + * 4) Generate shader interface structs and populate local members where required for: + * - VertexInputs + * - VertexOutputs + * - Uniforms + * - Uniform Blocks + * - textures; + * etc; + * + * 5) Inject GLSL source. + * + * 6) Generate MSL shader entry point function. Every Metal shader must have a + * vertex/fragment/kernel entrypoint, which contains the function binding table. + * This is where bindings are specified and passed into the shader. + * + * For converted shaders, the MSL entry-point will also instantiate a shader + * class per thread, and pass over bound resource references into the class. + * + * Finally, the shaders "main()" method will be called, and outputs are copied. + * + * Note: For position outputs, the default output position will be converted to + * the Metal coordinate space, which involves flipping the Y coordinate and + * re-mapping the depth range between 0 and 1, as with Vulkan. + * + * + * The final shader structure looks as follows: + * + * -- Shader defines -- + * #define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 0 + * ... etc ...; + * + * class MetalShaderVertexImp { + * + * -- Common shader interface structs -- + * struct VertexIn { + * vec4 pos [[attribute(0)]] + * } + * struct VertexOut {...} + * struct PushConstantBlock {...} + * struct drw_Globals {...} + * ... + * + * -- GLSL source code -- + * ... + * }; + * + * vertex MetalShaderVertexImp::VertexOut vertex_function_entry( + * MetalShaderVertexImp::VertexIn v_in [[stage_in]], + * constant PushConstantBlock& globals [[buffer(MTL_uniform_buffer_base_index)]]) { + * + * MetalShaderVertexImp impl; + * -- Copy input members into impl instance -- + * -- Execute GLSL main function -- + * impl.main(); + * + * -- Copy outputs and return -- + * MetalShaderVertexImp::VertexOut out; + * out.pos = impl.pos; + * -- transform position to Metal coordinate system -- + * return v_out; + * } + * + * -- SSBO-vertex-fetchmode -- + * + * SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly + * as buffers in the shader, rather than using the VertexDescriptor and [[stage_in]] vertex + * assembly. + * + * The purpose of this mode is to enable random-access reading of all vertex data. This is + * particularly useful for efficiently converting geometry shaders to Metal shading language, + * as these techniques are not supported natively in Metal. + * + * Geometry shaders can be re-created by firing off a vertex shader with the desired number of + * total output vertices. Each vertex can then read whichever input attributes it needs to + * achieve the output result. + * This manual reading is also used to provide support for GPU_provoking_vertex, wherein the + * output vertex for flat shading needs to change. In these cases, the manual vertex assembly + * can flip which vertices are read within the primitive. + * + * From an efficiency perspective, this is more GPU-friendly than geometry shading, due to improved + * parallelism throughout the whole pipe, and for Apple hardware specifically, there is no + * significant performance loss from manual vertex assembly vs under-the-hood assembly. + * + * This mode works by passing the required vertex descriptor information into the shader + * as uniform data, describing the type, stride, offset, stepmode and buffer index of each + * attribute, such that the shader ssbo-vertex-fetch utility functions know how to extract data. + * + * This also works with indexed rendering, by similarly binding the index buffer as a manul buffer. + * + * When this mode is used, the code generation and shader interface generation varies to accomodate + * the required features. + * + * This mode can be enabled in a shader with: + * + * `#pragma USE_SSBO_VERTEX_FETCH(TriangleList/LineList, output_vertex_count_per_input_primitive)` + * + * This mirrors the geometry shader interface `layout(triangle_strip, max_vertices = 3) out;` + */ + +/* SSBO vertex fetch attribute uniform parameter names. + * These uniforms are used to pass the information + * required to perform manual vertex assembly within + * the vertex shader. + * Each vertex attribute requires a number of properties + * in order to correctly extract data from the bound vertex + * buffers. */ +#ifndef NDEBUG +/* Global. */ +# define UNIFORM_SSBO_USES_INDEXED_RENDERING_STR "uniform_ssbo_uses_indexed_rendering" +# define UNIFORM_SSBO_INDEX_MODE_U16_STR "uniform_ssbo_index_mode_u16" +# define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR "uniform_ssbo_input_prim_type" +# define UNIFORM_SSBO_INPUT_VERT_COUNT_STR "uniform_ssbo_input_vert_count" +/* Per-attribute. */ +# define UNIFORM_SSBO_OFFSET_STR "uniform_ssbo_offset_" +# define UNIFORM_SSBO_STRIDE_STR "uniform_ssbo_stride_" +# define UNIFORM_SSBO_FETCHMODE_STR "uniform_ssbo_fetchmode_" +# define UNIFORM_SSBO_VBO_ID_STR "uniform_ssbo_vbo_id_" +# define UNIFORM_SSBO_TYPE_STR "uniform_ssbo_type_" +#else +/* Global. */ +# define UNIFORM_SSBO_USES_INDEXED_RENDERING_STR "_ir" +# define UNIFORM_SSBO_INDEX_MODE_U16_STR "_mu" +# define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR "_pt" +# define UNIFORM_SSBO_INPUT_VERT_COUNT_STR "_vc" +/* Per-attribute. */ +# define UNIFORM_SSBO_OFFSET_STR "_so" +# define UNIFORM_SSBO_STRIDE_STR "_ss" +# define UNIFORM_SSBO_FETCHMODE_STR "_sf" +# define UNIFORM_SSBO_VBO_ID_STR "_sv" +# define UNIFORM_SSBO_TYPE_STR "_st" +#endif + +namespace blender::gpu { + +struct MSLUniform { + shader::Type type; + std::string name; + bool is_array; + int array_elems; + ShaderStage stage; + + MSLUniform(shader::Type uniform_type, + std::string uniform_name, + bool is_array_type, + uint32_t num_elems = 1) + : type(uniform_type), name(uniform_name), is_array(is_array_type), array_elems(num_elems) + { + } + + bool operator==(const MSLUniform &right) const + { + return (type == right.type && name == right.name && is_array == right.is_array && + array_elems == right.array_elems); + } +}; + +struct MSLUniformBlock { + std::string type_name; + std::string name; + ShaderStage stage; + bool is_array; + + bool operator==(const MSLUniformBlock &right) const + { + return (type_name == right.type_name && name == right.name); + } +}; + +enum MSLTextureSamplerAccess { + TEXTURE_ACCESS_NONE = 0, + TEXTURE_ACCESS_SAMPLE, + TEXTURE_ACCESS_READ, + TEXTURE_ACCESS_WRITE, + TEXTURE_ACCESS_READWRITE, +}; + +struct MSLTextureSampler { + ShaderStage stage; + shader::ImageType type; + std::string name; + MSLTextureSamplerAccess access; + uint location; + + eGPUTextureType get_texture_binding_type() const; + + void resolve_binding_indices(); + + MSLTextureSampler(ShaderStage in_stage, + shader::ImageType in_sampler_type, + std::string in_sampler_name, + MSLTextureSamplerAccess in_access, + uint in_location) + : stage(in_stage), + type(in_sampler_type), + name(in_sampler_name), + access(in_access), + location(in_location) + { + } + + bool operator==(const MSLTextureSampler &right) const + { + /* We do not compare stage as we want to avoid duplication of resources used across multiple + * stages. */ + return (type == right.type && name == right.name && access == right.access); + } + + std::string get_msl_access_str() const + { + switch (access) { + case TEXTURE_ACCESS_SAMPLE: + return "access::sample"; + case TEXTURE_ACCESS_READ: + return "access::read"; + case TEXTURE_ACCESS_WRITE: + return "access::write"; + case TEXTURE_ACCESS_READWRITE: + return "access::read_write"; + default: + BLI_assert(false); + return ""; + } + return ""; + } + + /* Get typestring for wrapped texture class members. + * wrapper struct type contains combined texture and sampler, templated + * against the texture type. + * See `COMBINED_SAMPLER_TYPE` in `mtl_shader_defines.msl`. */ + std::string get_msl_typestring_wrapper(bool is_addr) const + { + std::string str; + str = this->get_msl_wrapper_type_str() + "<" + this->get_msl_return_type_str() + "," + + this->get_msl_access_str() + ">" + ((is_addr) ? "* " : " ") + this->name; + return str; + } + + /* Get raw texture typestring -- used in entry-point function argument table. */ + std::string get_msl_typestring(bool is_addr) const + { + std::string str; + str = this->get_msl_texture_type_str() + "<" + this->get_msl_return_type_str() + "," + + this->get_msl_access_str() + ">" + ((is_addr) ? "* " : " ") + this->name; + return str; + } + + std::string get_msl_return_type_str() const; + std::string get_msl_texture_type_str() const; + std::string get_msl_wrapper_type_str() const; +}; + +struct MSLVertexInputAttribute { + /* layout_location of -1 means unspecified and will + * be populated manually. */ + int layout_location; + shader::Type type; + std::string name; + + bool operator==(const MSLVertexInputAttribute &right) const + { + return (layout_location == right.layout_location && type == right.type && name == right.name); + } +}; + +struct MSLVertexOutputAttribute { + std::string type; + std::string name; + /* Instance name specified if attributes belong to a struct. */ + std::string instance_name; + /* Interpolation qualifier can be any of smooth (default), flat, no_perspective. */ + std::string interpolation_qualifier; + bool is_array; + int array_elems; + + bool operator==(const MSLVertexOutputAttribute &right) const + { + return (type == right.type && name == right.name && + interpolation_qualifier == right.interpolation_qualifier && + is_array == right.is_array && array_elems == right.array_elems); + } + std::string get_mtl_interpolation_qualifier() const + { + if (interpolation_qualifier == "" || interpolation_qualifier == "smooth") { + return ""; + } + else if (interpolation_qualifier == "flat") { + return " [[flat]]"; + } + else if (interpolation_qualifier == "noperspective") { + return " [[center_no_perspective]]"; + } + return ""; + } +}; + +struct MSLFragmentOutputAttribute { + /* Explicit output binding location N for [[color(N)]] -1 = unspecified. */ + int layout_location; + /* Output index for dual source blending. -1 = unspecified. */ + int layout_index; + shader::Type type; + std::string name; + + bool operator==(const MSLFragmentOutputAttribute &right) const + { + return (layout_location == right.layout_location && type == right.type && name == right.name && + layout_index == right.layout_index); + } +}; + +class MSLGeneratorInterface { + static char *msl_patch_default; + + public: + /** Shader stage input/output binding information. + * Derived from shader source reflection or GPUShaderCreateInfo. */ + blender::Vector<MSLUniformBlock> uniform_blocks; + blender::Vector<MSLUniform> uniforms; + blender::Vector<MSLTextureSampler> texture_samplers; + blender::Vector<MSLVertexInputAttribute> vertex_input_attributes; + blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings; + /* Should match vertex outputs, but defined separately as + * some shader permutations will not utilise all inputs/outputs. + * Final shader uses the intersection between the two sets. */ + blender::Vector<MSLVertexOutputAttribute> fragment_input_varyings; + blender::Vector<MSLFragmentOutputAttribute> fragment_outputs; + /* Transform feedback interface. */ + blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings_tf; + /* Clip Distances. */ + blender::Vector<std::string> clip_distances; + + /** GL Global usage. */ + /* Whether GL position is used, or an alternative vertex output should be the default. */ + bool uses_gl_Position; + /* Whether gl_FragColor is used, or whether an alternative fragment output + * should be the default. */ + bool uses_gl_FragColor; + /* Whether gl_PointCoord is used in the fragment shader. If so, + * we define float2 gl_PointCoord [[point_coord]]. */ + bool uses_gl_PointCoord; + /* Writes out to gl_PointSize in the vertex shader output. */ + bool uses_gl_PointSize; + bool uses_gl_VertexID; + bool uses_gl_InstanceID; + bool uses_gl_BaseInstanceARB; + bool uses_gl_FrontFacing; + /* Sets the output render target array index when using multilayered rendering. */ + bool uses_gl_FragDepth; + bool uses_mtl_array_index_; + bool uses_transform_feedback; + bool uses_barycentrics; + + /* Parameters. */ + shader::DepthWrite depth_write; + + /* Shader buffer bind indices for argument buffers. */ + int sampler_argument_buffer_bind_index[2] = {-1, -1}; + + /*** SSBO Vertex fetch mode. ***/ + /* Indicates whether to pass in Vertex Buffer's as a regular buffers instead of using vertex + * assembly in the PSO descriptor. Enabled with special pragma. */ + bool uses_ssbo_vertex_fetch_mode; + + private: + /* Parent shader instance. */ + MTLShader &parent_shader_; + + /* If prepared from Create info. */ + const shader::ShaderCreateInfo *create_info_; + + public: + MSLGeneratorInterface(MTLShader &shader) : parent_shader_(shader){}; + + /** Prepare MSLGeneratorInterface from create-info. **/ + void prepare_from_createinfo(const shader::ShaderCreateInfo *info); + + /* When SSBO Vertex Fetch mode is used, uniforms are used to pass on the required information + * about vertex attribute bindings, in order to perform manual vertex assembly and random-access + * vertex lookup throughout the bound VBOs. + * + * Some parameters are global for the shader, others change with the currently bound + * VertexBuffers, and their format, as they do with regular GPUBatch's. + * + * (Where ##attr is the attributes name) + * uniform_ssbo_stride_##attr -- Representing the stride between elements of attribute(attr) + * uniform_ssbo_offset_##attr -- Representing the base offset within the vertex + * uniform_ssbo_fetchmode_##attr -- Whether using per-vertex fetch or per-instance fetch + * (0=vert, 1=inst) uniform_ssbo_vbo_id_##attr -- index of the vertex buffer within which the + * data for this attribute is contained uniform_ssbo_type_##attr - The type of data in the + * currently bound buffer -- Could be a mismatch with the Officially reported type. */ + void prepare_ssbo_vertex_fetch_uniforms(); + + /* Samplers. */ + bool use_argument_buffer_for_samplers() const; + uint32_t num_samplers_for_stage(ShaderStage stage) const; + + /* Returns the bind index, relative to MTL_uniform_buffer_base_index. */ + uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage); + + /* Code generation utility functions. */ + std::string generate_msl_uniform_structs(ShaderStage shader_stage); + std::string generate_msl_vertex_in_struct(); + std::string generate_msl_vertex_out_struct(ShaderStage shader_stage); + std::string generate_msl_vertex_transform_feedback_out_struct(ShaderStage shader_stage); + std::string generate_msl_fragment_out_struct(); + std::string generate_msl_vertex_inputs_string(); + std::string generate_msl_fragment_inputs_string(); + std::string generate_msl_vertex_entry_stub(); + std::string generate_msl_fragment_entry_stub(); + std::string generate_msl_global_uniform_population(ShaderStage stage); + std::string generate_ubo_block_macro_chain(MSLUniformBlock block); + std::string generate_msl_uniform_block_population(ShaderStage stage); + std::string generate_msl_vertex_attribute_input_population(); + std::string generate_msl_vertex_output_population(); + std::string generate_msl_vertex_output_tf_population(); + std::string generate_msl_fragment_input_population(); + std::string generate_msl_fragment_output_population(); + std::string generate_msl_uniform_undefs(ShaderStage stage); + std::string generate_ubo_block_undef_chain(ShaderStage stage); + std::string generate_msl_texture_vars(ShaderStage shader_stage); + void generate_msl_textures_input_string(std::stringstream &out, ShaderStage stage); + void generate_msl_uniforms_input_string(std::stringstream &out, ShaderStage stage); + + /* Location is not always specified, so this will resolve outstanding locations. */ + void resolve_input_attribute_locations(); + void resolve_fragment_output_locations(); + + /* Create shader interface for converted GLSL shader. */ + MTLShaderInterface *bake_shader_interface(const char *name); + + /* Fetch combined shader source header. */ + char *msl_patch_default_get(); + + MEM_CXX_CLASS_ALLOC_FUNCS("MSLGeneratorInterface"); +}; + +inline std::string get_stage_class_name(ShaderStage stage) +{ + switch (stage) { + case ShaderStage::VERTEX: + return "MTLShaderVertexImpl"; + case ShaderStage::FRAGMENT: + return "MTLShaderFragmentImpl"; + default: + BLI_assert_unreachable(); + return ""; + } + return ""; +} + +inline bool is_builtin_type(std::string type) +{ + /* Add Types as needed. */ + /* TODO(Metal): Consider replacing this with a switch and constexpr hash and switch. + * Though most efficient and maintainable approach to be determined. */ + static std::map<std::string, eMTLDataType> glsl_builtin_types = { + {"float", MTL_DATATYPE_FLOAT}, + {"vec2", MTL_DATATYPE_FLOAT2}, + {"vec3", MTL_DATATYPE_FLOAT3}, + {"vec4", MTL_DATATYPE_FLOAT4}, + {"int", MTL_DATATYPE_INT}, + {"ivec2", MTL_DATATYPE_INT2}, + {"ivec3", MTL_DATATYPE_INT3}, + {"ivec4", MTL_DATATYPE_INT4}, + {"uint32_t", MTL_DATATYPE_UINT}, + {"uvec2", MTL_DATATYPE_UINT2}, + {"uvec3", MTL_DATATYPE_UINT3}, + {"uvec4", MTL_DATATYPE_UINT4}, + {"mat3", MTL_DATATYPE_FLOAT3x3}, + {"mat4", MTL_DATATYPE_FLOAT4x4}, + {"bool", MTL_DATATYPE_INT}, + {"uchar", MTL_DATATYPE_UCHAR}, + {"uchar2", MTL_DATATYPE_UCHAR2}, + {"uchar2", MTL_DATATYPE_UCHAR3}, + {"uchar4", MTL_DATATYPE_UCHAR4}, + {"vec3_1010102_Unorm", MTL_DATATYPE_UINT1010102_NORM}, + {"vec3_1010102_Inorm", MTL_DATATYPE_INT1010102_NORM}, + }; + return (glsl_builtin_types.find(type) != glsl_builtin_types.end()); +} + +inline bool is_matrix_type(const std::string &type) +{ + /* Matrix type support. Add types as necessary. */ + return (type == "mat4"); +} + +inline bool is_matrix_type(const shader::Type &type) +{ + /* Matrix type support. Add types as necessary. */ + return (type == shader::Type::MAT4 || type == shader::Type::MAT3); +} + +inline int get_matrix_location_count(const std::string &type) +{ + /* Matrix type support. Add types as necessary. */ + if (type == "mat4") { + return 4; + } + if (type == "mat3") { + return 3; + } + return 1; +} + +inline int get_matrix_location_count(const shader::Type &type) +{ + /* Matrix type support. Add types as necessary. */ + if (type == shader::Type::MAT4) { + return 4; + } + else if (type == shader::Type::MAT3) { + return 3; + } + return 1; +} + +inline std::string get_matrix_subtype(const std::string &type) +{ + if (type == "mat4") { + return "vec4"; + } + return type; +} + +inline shader::Type get_matrix_subtype(const shader::Type &type) +{ + if (type == shader::Type::MAT4) { + return shader::Type::VEC4; + } + if (type == shader::Type::MAT3) { + return shader::Type::VEC3; + } + return type; +} + +inline std::string get_attribute_conversion_function(bool *uses_conversion, + const shader::Type &type) +{ + /* NOTE(Metal): Add more attribute types as required. */ + if (type == shader::Type::FLOAT) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float"; + } + else if (type == shader::Type::VEC2) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float2"; + } + else if (type == shader::Type::VEC3) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float3"; + } + else if (type == shader::Type::VEC4) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float4"; + } + *uses_conversion = false; + return ""; +} + +inline const char *to_string(const shader::PrimitiveOut &layout) +{ + switch (layout) { + case shader::PrimitiveOut::POINTS: + return "points"; + case shader::PrimitiveOut::LINE_STRIP: + return "line_strip"; + case shader::PrimitiveOut::TRIANGLE_STRIP: + return "triangle_strip"; + default: + BLI_assert(false); + return "unknown"; + } +} + +inline const char *to_string(const shader::PrimitiveIn &layout) +{ + switch (layout) { + case shader::PrimitiveIn::POINTS: + return "points"; + case shader::PrimitiveIn::LINES: + return "lines"; + case shader::PrimitiveIn::LINES_ADJACENCY: + return "lines_adjacency"; + case shader::PrimitiveIn::TRIANGLES: + return "triangles"; + case shader::PrimitiveIn::TRIANGLES_ADJACENCY: + return "triangles_adjacency"; + default: + BLI_assert(false); + return "unknown"; + } +} + +inline const char *to_string(const shader::Interpolation &interp) +{ + switch (interp) { + case shader::Interpolation::SMOOTH: + return "smooth"; + case shader::Interpolation::FLAT: + return "flat"; + case shader::Interpolation::NO_PERSPECTIVE: + return "noperspective"; + default: + BLI_assert(false); + return "unkown"; + } +} + +inline const char *to_string_msl(const shader::Interpolation &interp) +{ + switch (interp) { + case shader::Interpolation::SMOOTH: + return "[[smooth]]"; + case shader::Interpolation::FLAT: + return "[[flat]]"; + case shader::Interpolation::NO_PERSPECTIVE: + return "[[center_no_perspective]]"; + default: + return ""; + } +} + +inline const char *to_string(const shader::Type &type) +{ + switch (type) { + case shader::Type::FLOAT: + return "float"; + case shader::Type::VEC2: + return "vec2"; + case shader::Type::VEC3: + return "vec3"; + case shader::Type::VEC3_101010I2: + return "vec3_1010102_Inorm"; + case shader::Type::VEC4: + return "vec4"; + case shader::Type::MAT3: + return "mat3"; + case shader::Type::MAT4: + return "mat4"; + case shader::Type::UINT: + return "uint32_t"; + case shader::Type::UVEC2: + return "uvec2"; + case shader::Type::UVEC3: + return "uvec3"; + case shader::Type::UVEC4: + return "uvec4"; + case shader::Type::INT: + return "int"; + case shader::Type::IVEC2: + return "ivec2"; + case shader::Type::IVEC3: + return "ivec3"; + case shader::Type::IVEC4: + return "ivec4"; + case shader::Type::BOOL: + return "bool"; + case shader::Type::UCHAR: + return "uchar"; + case shader::Type::UCHAR2: + return "uchar2"; + case shader::Type::UCHAR3: + return "uchar3"; + case shader::Type::UCHAR4: + return "uchar4"; + case shader::Type::CHAR: + return "char"; + case shader::Type::CHAR2: + return "char2"; + case shader::Type::CHAR3: + return "char3"; + case shader::Type::CHAR4: + return "char4"; + default: + BLI_assert(false); + return "unkown"; + } +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm new file mode 100644 index 00000000000..37c1ddd6e7a --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -0,0 +1,2976 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "BLI_string.h" + +#include "BLI_string.h" +#include <algorithm> +#include <fstream> +#include <iostream> +#include <map> +#include <mutex> +#include <regex> +#include <sstream> +#include <string> + +#include <cstring> + +#include "GPU_platform.h" +#include "GPU_vertex_format.h" + +#include "gpu_shader_dependency_private.h" + +#include "mtl_common.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_shader.hh" +#include "mtl_shader_generator.hh" +#include "mtl_shader_interface.hh" +#include "mtl_texture.hh" + +extern char datatoc_mtl_shader_defines_msl[]; +extern char datatoc_mtl_shader_shared_h[]; + +using namespace blender; +using namespace blender::gpu; +using namespace blender::gpu::shader; + +namespace blender::gpu { + +char *MSLGeneratorInterface::msl_patch_default = nullptr; + +/* -------------------------------------------------------------------- */ +/** \name Shader Translation utility functions. + * \{ */ + +static eMTLDataType to_mtl_type(Type type) +{ + switch (type) { + case Type::FLOAT: + return MTL_DATATYPE_FLOAT; + case Type::VEC2: + return MTL_DATATYPE_FLOAT2; + case Type::VEC3: + return MTL_DATATYPE_FLOAT3; + case Type::VEC4: + return MTL_DATATYPE_FLOAT4; + case Type::MAT3: + return MTL_DATATYPE_FLOAT3x3; + case Type::MAT4: + return MTL_DATATYPE_FLOAT4x4; + case Type::UINT: + return MTL_DATATYPE_UINT; + case Type::UVEC2: + return MTL_DATATYPE_UINT2; + case Type::UVEC3: + return MTL_DATATYPE_UINT3; + case Type::UVEC4: + return MTL_DATATYPE_UINT4; + case Type::INT: + return MTL_DATATYPE_INT; + case Type::IVEC2: + return MTL_DATATYPE_INT2; + case Type::IVEC3: + return MTL_DATATYPE_INT3; + case Type::IVEC4: + return MTL_DATATYPE_INT4; + case Type::VEC3_101010I2: + return MTL_DATATYPE_INT1010102_NORM; + case Type::BOOL: + return MTL_DATATYPE_BOOL; + case Type::UCHAR: + return MTL_DATATYPE_UCHAR; + case Type::UCHAR2: + return MTL_DATATYPE_UCHAR2; + case Type::UCHAR3: + return MTL_DATATYPE_UCHAR3; + case Type::UCHAR4: + return MTL_DATATYPE_UCHAR4; + case Type::CHAR: + return MTL_DATATYPE_CHAR; + case Type::CHAR2: + return MTL_DATATYPE_CHAR2; + case Type::CHAR3: + return MTL_DATATYPE_CHAR3; + case Type::CHAR4: + return MTL_DATATYPE_CHAR4; + default: { + BLI_assert_msg(false, "Unexpected data type"); + } + } + return MTL_DATATYPE_FLOAT; +} + +static std::regex remove_non_numeric_characters("[^0-9]"); + +#ifndef NDEBUG +static void remove_multiline_comments_func(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + bool is_inside_comment = false; + for (char *c = current_str_begin; c < current_str_end; c++) { + if (is_inside_comment) { + if ((*c == '*') && (c < current_str_end - 1) && (*(c + 1) == '/')) { + is_inside_comment = false; + *c = ' '; + *(c + 1) = ' '; + } + else { + *c = ' '; + } + } + else { + if ((*c == '/') && (c < current_str_end - 1) && (*(c + 1) == '*')) { + is_inside_comment = true; + *c = ' '; + } + } + } +} + +static void remove_singleline_comments_func(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + bool is_inside_comment = false; + for (char *c = current_str_begin; c < current_str_end; c++) { + if (is_inside_comment) { + if (*c == '\n') { + is_inside_comment = false; + } + else { + *c = ' '; + } + } + else { + if ((*c == '/') && (c < current_str_end - 1) && (*(c + 1) == '/')) { + is_inside_comment = true; + *c = ' '; + } + } + } +} +#endif + +static bool is_program_word(const char *chr, int *len) +{ + int numchars = 0; + for (const char *c = chr; *c != '\0'; c++) { + char ch = *c; + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || + (numchars > 0 && ch >= '0' && ch <= '9') || ch == '_') { + numchars++; + } + else { + *len = numchars; + return (numchars > 0); + } + } + *len = numchars; + return true; +} + +/* Replace function parameter patterns containing: + * `out vec3 somevar` with `THD vec3&somevar`. + * which enables pass by reference via resolved macro: + * thread vec3& somevar. */ +static void replace_outvars(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + for (char *c = current_str_begin + 2; c < current_str_end - 6; c++) { + char *start = c; + if (strncmp(c, "out ", 4) == 0) { + if (strncmp(c - 2, "in", 2) == 0) { + start = c - 2; + } + + /* Check that the following are words. */ + int len1, len2; + char *word_base1 = c + 4; + char *word_base2 = word_base1; + + if (is_program_word(word_base1, &len1) && (*(word_base1 + len1) == ' ')) { + word_base2 = word_base1 + len1 + 1; + if (is_program_word(word_base2, &len2)) { + /* Match found. */ + bool is_array = (*(word_base2 + len2) == '['); + + /* Generate outvar pattern of form 'THD type&var' from original 'out vec4 var'. */ + *start = 'T'; + *(start + 1) = 'H'; + *(start + 2) = 'D'; + for (char *clear = start + 3; clear < c + 4; clear++) { + *clear = ' '; + } + *(word_base2 - 1) = is_array ? '*' : '&'; + } + } + } + } +} + +static void replace_array_initializers_func(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + for (char *c = current_str_begin; c < current_str_end - 6; c++) { + char *base_scan = c; + int typelen = 0; + + if (is_program_word(c, &typelen) && *(c + typelen) == '[') { + + char *array_len_start = c + typelen + 1; + c = array_len_start; + char *closing_square_brace = strchr(c, ']'); + if (closing_square_brace != nullptr) { + c = closing_square_brace; + char *first_bracket = c + 1; + if (*first_bracket == '(') { + c += 1; + char *semi_colon = strchr(c, ';'); + if (semi_colon != nullptr && *(semi_colon - 1) == ')') { + char *closing_bracket = semi_colon - 1; + + /* Resolve to MSL-compatible array formatting. */ + *first_bracket = '{'; + *closing_bracket = '}'; + for (char *clear = base_scan; clear <= closing_square_brace; clear++) { + *clear = ' '; + } + } + } + } + else { + return; + } + } + } +} + +#ifndef NDEBUG + +static bool balanced_braces(char *current_str_begin, char *current_str_end) +{ + int nested_bracket_depth = 0; + for (char *c = current_str_begin; c < current_str_end; c++) { + /* Track whether we are in global scope. */ + if (*c == '{' || *c == '[' || *c == '(') { + nested_bracket_depth++; + continue; + } + if (*c == '}' || *c == ']' || *c == ')') { + nested_bracket_depth--; + continue; + } + } + return (nested_bracket_depth == 0); +} + +/* Certain Constants (such as arrays, or pointer types) declared in Global-scope + * end up being initialised per shader thread, resulting in high + * register pressure within the shader. + * Here we flag occurences of these constants such that + * they can be moved to a place where this is not a problem. + * + * Constants declared within function-scope do not exhibit this problem. */ +static void extract_global_scope_constants(std::string &str, std::stringstream &global_scope_out) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + int nested_bracket_depth = 0; + for (char *c = current_str_begin; c < current_str_end - 6; c++) { + /* Track whether we are in global scope. */ + if (*c == '{' || *c == '[' || *c == '(') { + nested_bracket_depth++; + continue; + } + if (*c == '}' || *c == ']' || *c == ')') { + nested_bracket_depth--; + BLI_assert(nested_bracket_depth >= 0); + continue; + } + + /* Check For global const declarations */ + if (nested_bracket_depth == 0 && strncmp(c, "const ", 6) == 0 && + strncmp(c, "const constant ", 15) != 0) { + char *c_expr_end = strstr(c, ";"); + if (c_expr_end != nullptr && balanced_braces(c, c_expr_end)) { + MTL_LOG_INFO( + "[PERFORMANCE WARNING] Global scope constant expression found - These get allocated " + "per-thread in METAL - Best to use Macro's or uniforms to avoid overhead: '%.*s'\n", + (int)(c_expr_end + 1 - c), + c); + + /* Jump ptr forward as we know we remain in global scope. */ + c = c_expr_end - 1; + continue; + } + } + } +} +#endif + +static bool extract_ssbo_pragma_info(const MTLShader *shader, + const MSLGeneratorInterface &, + const std::string &in_vertex_src, + MTLPrimitiveType &out_prim_tye, + uint32_t &out_num_output_verts) +{ + /* SSBO Vertex-fetch parameter extraction. */ + static std::regex use_ssbo_fetch_mode_find( + "#pragma " + "USE_SSBO_VERTEX_FETCH\\(\\s*(TriangleList|LineList|\\w+)\\s*,\\s*([0-9]+)\\s*\\)"); + + /* Perform regex search if pragma string found. */ + std::smatch vertex_shader_ssbo_flags; + bool uses_ssbo_fetch = false; + if (in_vertex_src.find("#pragma USE_SSBO_VERTEX_FETCH") != std::string::npos) { + uses_ssbo_fetch = std::regex_search( + in_vertex_src, vertex_shader_ssbo_flags, use_ssbo_fetch_mode_find); + } + if (uses_ssbo_fetch) { + /* Extract Expected output primitive type: + * #pragma USE_SSBO_VERTEX_FETCH(Output Prim Type, num output vertices per input primitive) + * + * Supported Primitive Types (Others can be added if needed, but List types for efficiency): + * - TriangleList + * - LineList + * + * Output vertex count is determined by calculating the number of input primitives, and + * multiplying that by the number of output vertices specified. */ + std::string str_output_primitive_type = vertex_shader_ssbo_flags[1].str(); + std::string str_output_prim_count_per_vertex = vertex_shader_ssbo_flags[2].str(); + + /* Ensure output primitive type is valid. */ + if (str_output_primitive_type == "TriangleList") { + out_prim_tye = MTLPrimitiveTypeTriangle; + } + else if (str_output_primitive_type == "LineList") { + out_prim_tye = MTLPrimitiveTypeLine; + } + else { + MTL_LOG_ERROR("Unsupported output primitive type for SSBO VERTEX FETCH MODE. Shader: %s", + shader->name_get()); + return false; + } + + /* Assign output num vertices per primitive. */ + out_num_output_verts = std::stoi( + std::regex_replace(str_output_prim_count_per_vertex, remove_non_numeric_characters, "")); + BLI_assert(out_num_output_verts > 0); + return true; + } + + /* SSBO Vertex fetchmode not used. */ + return false; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLShader builtin shader generation utilities. + * \{ */ + +static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &res) +{ + switch (res.bind_type) { + case ShaderCreateInfo::Resource::BindType::SAMPLER: + break; + case ShaderCreateInfo::Resource::BindType::IMAGE: + break; + case ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: { + int64_t array_offset = res.uniformbuf.name.find_first_of("["); + if (array_offset == -1) { + /* Create local class member as constant pointer reference to bound UBO buffer. + * Given usage within a shader follows ubo_name.ubo_element syntax, we can + * dereference the pointer as the compiler will optimise this data fetch. + * To do this, we also give the ubo name a postfix of `_local` to avoid + * macro accessor collisions. */ + os << "constant " << res.uniformbuf.type_name << " *" << res.uniformbuf.name + << "_local;\n"; + os << "#define " << res.uniformbuf.name << " (*" << res.uniformbuf.name << "_local)\n"; + } + else { + /* For arrays, we can directly provide the constant access pointer, as the array + * syntax will de-reference this at the correct fetch index. */ + StringRef name_no_array = StringRef(res.uniformbuf.name.c_str(), array_offset); + os << "constant " << res.uniformbuf.type_name << " *" << name_no_array << ";\n"; + } + break; + } + case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: + break; + } +} + +std::string MTLShader::resources_declare(const ShaderCreateInfo &info) const +{ + /* NOTE(Metal): We only use the upfront preparation functions to populate members which + * would exist in the original non-create-info variant. + * + * This function is only used to generate resource structs. + * Global-scope handles for Uniforms, UBOs, textures and samplers + * are generated during class-wrapper construction in `generate_msl_from_glsl`. */ + std::stringstream ss; + + /* Generate resource stubs for UBOs and textures. */ + ss << "\n/* Pass Resources. */\n"; + for (const ShaderCreateInfo::Resource &res : info.pass_resources_) { + print_resource(ss, res); + } + ss << "\n/* Batch Resources. */\n"; + for (const ShaderCreateInfo::Resource &res : info.batch_resources_) { + print_resource(ss, res); + } + /* Note: Push constant uniform data is generated during `generate_msl_from_glsl` + * as the generated output is needed for all paths. This includes generation + * of the push constant data structure (struct PushConstantBlock). + * As all shader generation paths require creation of this. */ + return ss.str(); +} + +std::string MTLShader::vertex_interface_declare(const shader::ShaderCreateInfo &info) const +{ + /* NOTE(Metal): We only use the upfront preparation functions to populate members which + * would exist in the original non-create-info variant. + * + * Here we generate the variables within class wrapper scope to allow reading of + * input attributes by the main code. */ + std::stringstream ss; + ss << "\n/* Vertex Inputs. */\n"; + for (const ShaderCreateInfo::VertIn &attr : info.vertex_inputs_) { + ss << to_string(attr.type) << " " << attr.name << ";\n"; + } + return ss.str(); +} + +std::string MTLShader::fragment_interface_declare(const shader::ShaderCreateInfo &info) const +{ + /* For shaders generated from MSL, the fragment-output struct is generated as part of the entry + * stub during glsl->MSL conversion in `generate_msl_from_glsl`. + * Here, we can instead generate the global-scope variables which will be populated during + * execution. + * + * NOTE: The output declaration for location and blend index are generated in the entry-point + * struct. This is simply a mirror class member which stores the value during main shader body + * execution. */ + std::stringstream ss; + ss << "\n/* Fragment Outputs. */\n"; + for (const ShaderCreateInfo::FragOut &output : info.fragment_outputs_) { + ss << to_string(output.type) << " " << output.name << ";\n"; + } + ss << "\n"; + + return ss.str(); +} + +std::string MTLShader::MTLShader::geometry_interface_declare( + const shader::ShaderCreateInfo &info) const +{ + BLI_assert_msg(false, "Geometry shading unsupported by Metal"); + return ""; +} + +std::string MTLShader::geometry_layout_declare(const shader::ShaderCreateInfo &info) const +{ + BLI_assert_msg(false, "Geometry shading unsupported by Metal"); + return ""; +} + +std::string MTLShader::compute_layout_declare(const ShaderCreateInfo &info) const +{ + /* TODO(Metal): Metal compute layout pending compute support. */ + BLI_assert_msg(false, "Compute shaders unsupported by Metal"); + return ""; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Shader Translation. + * \{ */ + +char *MSLGeneratorInterface::msl_patch_default_get() +{ + if (msl_patch_default != nullptr) { + return msl_patch_default; + } + + std::stringstream ss_patch; + ss_patch << datatoc_mtl_shader_shared_h << std::endl; + ss_patch << datatoc_mtl_shader_defines_msl << std::endl; + size_t len = strlen(ss_patch.str().c_str()); + + msl_patch_default = (char *)malloc(len * sizeof(char)); + strcpy(msl_patch_default, ss_patch.str().c_str()); + return msl_patch_default; +} + +bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) +{ + /* Verify if create-info is available. + * NOTE(Metal): For now, only support creation from CreateInfo. + * If needed, we can perform source translation without this using + * manual reflection. */ + bool uses_create_info = info != nullptr; + if (!uses_create_info) { + MTL_LOG_WARNING("Unable to compile shader %p '%s' as no create-info was provided!\n", + this, + this->name_get()); + valid_ = false; + return false; + } + + /* MSLGeneratorInterface is a class populated to describe all parameters, resources, bindings + * and features used by the source GLSL shader. This information is then used to generate the + * appropriate Metal entry points and perform any required source translation. */ + MSLGeneratorInterface msl_iface(*this); + BLI_assert(shd_builder_ != nullptr); + + /* Populate MSLGeneratorInterface from Create-Info. + * Note this is a seperate path as MSLGeneratorInterface can also be manually populated + * from parsing, if support for shaders without create-info is required. */ + msl_iface.prepare_from_createinfo(info); + + /* Verify Source sizes are greater than zero. */ + BLI_assert(shd_builder_->glsl_vertex_source_.size() > 0); + if (!msl_iface.uses_transform_feedback) { + BLI_assert(shd_builder_->glsl_fragment_source_.size() > 0); + } + + /** Determine use of Transform Feedback. **/ + msl_iface.uses_transform_feedback = false; + if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) { + /* Ensure TransformFeedback is configured correctly. */ + BLI_assert(tf_output_name_list_.size() > 0); + msl_iface.uses_transform_feedback = true; + } + + /* Concatenate msl_shader_defines to provide functionality mapping + * from GLSL to MSL. Also include additioanl GPU defines for + * optional high-level feature support. */ + const std::string msl_defines_string = + "#define GPU_ARB_texture_cube_map_array 1\n\ + #define GPU_ARB_shader_draw_parameters 1\n\ + #define GPU_ARB_texture_gather 1\n"; + + shd_builder_->glsl_vertex_source_ = msl_defines_string + shd_builder_->glsl_vertex_source_; + if (!msl_iface.uses_transform_feedback) { + shd_builder_->glsl_fragment_source_ = msl_defines_string + shd_builder_->glsl_fragment_source_; + } + + /* Extract SSBO usage information from shader pragma: + * + * #pragma USE_SSBO_VERTEX_FETCH(Output Prim Type, num output vertices per input primitive) + * + * This will determine whether SSBO-vertex-fetch + * mode is ued for this shader. Returns true if used, and populates output reference + * values with the output prim type and output number of vertices. */ + MTLPrimitiveType vertex_fetch_ssbo_output_prim_type = MTLPrimitiveTypeTriangle; + uint32_t vertex_fetch_ssbo_num_output_verts = 0; + msl_iface.uses_ssbo_vertex_fetch_mode = extract_ssbo_pragma_info( + this, + msl_iface, + shd_builder_->glsl_vertex_source_, + vertex_fetch_ssbo_output_prim_type, + vertex_fetch_ssbo_num_output_verts); + + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + shader_debug_printf( + "[Shader] SSBO VERTEX FETCH Enabled for Shader '%s' With Output primitive type: %s, " + "vertex count: %u\n", + this->name_get(), + output_primitive_type.c_str(), + vertex_fetch_ssbo_num_output_verts); + } + + /*** Regex Commands ***/ + /* Source cleanup and syntax replacement. */ + static std::regex remove_excess_newlines("\\n+"); + static std::regex replace_mat3("mat3\\s*\\("); + + /* Special condition - mat3 and array constructor replacement. + * Also replace excessive new lines to ensure cases are not missed. + * NOTE(Metal): May be able to skip excess-newline removal. */ + shd_builder_->glsl_vertex_source_ = std::regex_replace( + shd_builder_->glsl_vertex_source_, remove_excess_newlines, "\n"); + shd_builder_->glsl_vertex_source_ = std::regex_replace( + shd_builder_->glsl_vertex_source_, replace_mat3, "MAT3("); + replace_array_initializers_func(shd_builder_->glsl_vertex_source_); + + if (!msl_iface.uses_transform_feedback) { + shd_builder_->glsl_fragment_source_ = std::regex_replace( + shd_builder_->glsl_fragment_source_, remove_excess_newlines, "\n"); + shd_builder_->glsl_fragment_source_ = std::regex_replace( + shd_builder_->glsl_fragment_source_, replace_mat3, "MAT3("); + replace_array_initializers_func(shd_builder_->glsl_fragment_source_); + } + + /**** Extract usage of GL globals. ****/ + /* NOTE(METAL): Currently still performing fallback string scan, as info->builtins_ does + * not always contain the usage flag. This can be removed once all appropriate create-info's + * have been updated. In some cases, this may incur a false positive if access is guarded + * behind a macro. Though in these cases, unused code paths and paramters will be + * optimised out by the Metal shader compiler. */ + + /** Identify usage of vertex-shader builtins. */ + msl_iface.uses_gl_VertexID = bool(info->builtins_ & BuiltinBits::VERTEX_ID) || + shd_builder_->glsl_vertex_source_.find("gl_VertexID") != + std::string::npos; + msl_iface.uses_gl_InstanceID = bool(info->builtins_ & BuiltinBits::INSTANCE_ID) || + shd_builder_->glsl_vertex_source_.find("gl_InstanceID") != + std::string::npos || + shd_builder_->glsl_vertex_source_.find("gpu_InstanceIndex") != + std::string::npos || + msl_iface.uses_ssbo_vertex_fetch_mode; + + /* instance ID in GL is [0, instancecount] in metal it is [base_instance, + * base_instance+instance_count], so we need to offset instanceID by base instance in Metal -- + * Thus we expose the [[base_instance]] attribute if instance ID is used at all. */ + msl_iface.uses_gl_BaseInstanceARB = msl_iface.uses_gl_InstanceID || + shd_builder_->glsl_vertex_source_.find( + "gl_BaseInstanceARB") != std::string::npos || + shd_builder_->glsl_vertex_source_.find("gpu_BaseInstance") != + std::string::npos; + msl_iface.uses_gl_Position = shd_builder_->glsl_vertex_source_.find("gl_Position") != + std::string::npos; + msl_iface.uses_gl_PointSize = shd_builder_->glsl_vertex_source_.find("gl_PointSize") != + std::string::npos; + msl_iface.uses_mtl_array_index_ = shd_builder_->glsl_vertex_source_.find( + "MTLRenderTargetArrayIndex") != std::string::npos; + + /** Identify usage of fragment-shader builtins. */ + if (!msl_iface.uses_transform_feedback) { + std::smatch gl_special_cases; + msl_iface.uses_gl_PointCoord = bool(info->builtins_ & BuiltinBits::POINT_COORD) || + shd_builder_->glsl_fragment_source_.find("gl_PointCoord") != + std::string::npos; + msl_iface.uses_barycentrics = bool(info->builtins_ & BuiltinBits::BARYCENTRIC_COORD); + msl_iface.uses_gl_FrontFacing = bool(info->builtins_ & BuiltinBits::FRONT_FACING) || + shd_builder_->glsl_fragment_source_.find("gl_FrontFacing") != + std::string::npos; + + /* NOTE(Metal): If FragColor is not used, then we treat the first fragment output attachment + * as the primary output. */ + msl_iface.uses_gl_FragColor = shd_builder_->glsl_fragment_source_.find("gl_FragColor") != + std::string::npos; + + /* NOTE(Metal): FragDepth output mode specified in create-info 'DepthWrite depth_write_'. + * If parsing without create-info, manual extraction will be required. */ + msl_iface.uses_gl_FragDepth = shd_builder_->glsl_fragment_source_.find("gl_FragDepth") != + std::string::npos; + msl_iface.depth_write = info->depth_write_; + } + + /* Generate SSBO vertex fetch mode uniform data hooks. */ + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + msl_iface.prepare_ssbo_vertex_fetch_uniforms(); + } + + /* Extract gl_ClipDistances. */ + static std::regex gl_clipdistance_find("gl_ClipDistance\\[([0-9])\\]"); + + std::string clip_search_str = shd_builder_->glsl_vertex_source_; + std::smatch vertex_clip_distances; + + while (std::regex_search(clip_search_str, vertex_clip_distances, gl_clipdistance_find)) { + shader_debug_printf("VERTEX CLIP DISTANCES FOUND: str: %s\n", + vertex_clip_distances[1].str().c_str()); + auto found = std::find(msl_iface.clip_distances.begin(), + msl_iface.clip_distances.end(), + vertex_clip_distances[1].str()); + if (found == msl_iface.clip_distances.end()) { + msl_iface.clip_distances.append(vertex_clip_distances[1].str()); + } + clip_search_str = vertex_clip_distances.suffix(); + } + shd_builder_->glsl_vertex_source_ = std::regex_replace( + shd_builder_->glsl_vertex_source_, gl_clipdistance_find, "gl_ClipDistance_$1"); + + /* Replace 'out' attribute on function parameters with pass-by-reference. */ + replace_outvars(shd_builder_->glsl_vertex_source_); + if (!msl_iface.uses_transform_feedback) { + replace_outvars(shd_builder_->glsl_fragment_source_); + } + + /**** METAL Shader source generation. ****/ + /* Setup stringstream for populaing generated MSL shader vertex/frag shaders. */ + std::stringstream ss_vertex; + std::stringstream ss_fragment; + + /*** Generate VERTEX Stage ***/ + /* Conditional defines. */ + if (msl_iface.use_argument_buffer_for_samplers()) { + ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl; + ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS " + << msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl; + } + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl; + ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS + << std::endl; + ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX + << std::endl; + for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) { + ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl; + } + + /* Macro's */ + ss_vertex << "#define " + "UNIFORM_SSBO_USES_INDEXED_RENDERING_STR " UNIFORM_SSBO_USES_INDEXED_RENDERING_STR + "\n" + "#define UNIFORM_SSBO_INDEX_MODE_U16_STR " UNIFORM_SSBO_INDEX_MODE_U16_STR + "\n" + "#define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR " UNIFORM_SSBO_INPUT_PRIM_TYPE_STR + "\n" + "#define UNIFORM_SSBO_INPUT_VERT_COUNT_STR " UNIFORM_SSBO_INPUT_VERT_COUNT_STR + "\n" + "#define UNIFORM_SSBO_OFFSET_STR " UNIFORM_SSBO_OFFSET_STR + "\n" + "#define UNIFORM_SSBO_STRIDE_STR " UNIFORM_SSBO_STRIDE_STR + "\n" + "#define UNIFORM_SSBO_FETCHMODE_STR " UNIFORM_SSBO_FETCHMODE_STR + "\n" + "#define UNIFORM_SSBO_VBO_ID_STR " UNIFORM_SSBO_VBO_ID_STR + "\n" + "#define UNIFORM_SSBO_TYPE_STR " UNIFORM_SSBO_TYPE_STR "\n"; + } + + /* Inject common Metal header. */ + ss_vertex << msl_iface.msl_patch_default_get() << std::endl << std::endl; + +#ifndef NDEBUG + /* Performance warning: Extract global-scope expressions. + * Note: This is dependent on stripping out comments + * to remove false positives. */ + remove_multiline_comments_func(shd_builder_->glsl_vertex_source_); + remove_singleline_comments_func(shd_builder_->glsl_vertex_source_); + extract_global_scope_constants(shd_builder_->glsl_vertex_source_, ss_vertex); +#endif + + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + + /* Only generate struct for ones with instance names */ + if (!iface->instance_name.is_empty()) { + ss_vertex << "struct " << iface->name << " {" << std::endl; + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + ss_vertex << to_string(inout.type) << " " << inout.name << " " + << to_string_msl(inout.interp) << ";" << std::endl; + } + ss_vertex << "};" << std::endl; + } + } + + /* Wrap entire GLSL source inside class to create + * a scope within the class to enable use of global variables. + * e.g. global access to attributes, uniforms, UBOs, textures etc; */ + ss_vertex << "class " << get_stage_class_name(ShaderStage::VERTEX) << " {" << std::endl; + ss_vertex << "public:" << std::endl; + + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + + bool is_inside_struct = false; + if (!iface->instance_name.is_empty()) { + /* If shader stage interface has an instance name, then it + * is using a struct foramt and as such we only need a local + * class member for the struct, not each element. */ + ss_vertex << iface->name << " " << iface->instance_name << ";" << std::endl; + is_inside_struct = true; + } + + /* Generate local variables, populate elems for vertex out struct gen. */ + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + + /* Only output individual elements if they are not part of an interface struct instance. */ + if (!is_inside_struct) { + ss_vertex << to_string(inout.type) << " " << inout.name << ";" << std::endl; + } + + const char *arraystart = strstr(inout.name.c_str(), "["); + bool is_array = (arraystart != nullptr); + int array_len = (is_array) ? std::stoi(std::regex_replace( + arraystart, remove_non_numeric_characters, "")) : + 0; + + /* Remove array from string name. */ + std::string out_name = inout.name.c_str(); + std::size_t pos = out_name.find('['); + if (is_array && pos != std::string::npos) { + out_name.resize(pos); + } + + /* Add to vertex-output interface. */ + msl_iface.vertex_output_varyings.append( + {to_string(inout.type), + out_name.c_str(), + ((is_inside_struct) ? iface->instance_name.c_str() : ""), + to_string(inout.interp), + is_array, + array_len}); + + /* Add to fragment-input interface.*/ + msl_iface.fragment_input_varyings.append( + {to_string(inout.type), + out_name.c_str(), + ((is_inside_struct) ? iface->instance_name.c_str() : ""), + to_string(inout.interp), + is_array, + array_len}); + } + } + + /** Generate structs from MSL Interface. **/ + /* Generate VertexIn struct. */ + if (!msl_iface.uses_ssbo_vertex_fetch_mode) { + ss_vertex << msl_iface.generate_msl_vertex_in_struct(); + } + /* Genrate Uniform data structs. */ + ss_vertex << msl_iface.generate_msl_uniform_structs(ShaderStage::VERTEX); + + /* Conditionally use global GL variables. */ + if (msl_iface.uses_gl_Position) { + ss_vertex << "float4 gl_Position;" << std::endl; + } + if (msl_iface.uses_gl_PointSize) { + ss_vertex << "float gl_PointSize = 1.0;" << std::endl; + } + if (msl_iface.uses_gl_VertexID) { + ss_vertex << "int gl_VertexID;" << std::endl; + } + if (msl_iface.uses_gl_InstanceID) { + ss_vertex << "int gl_InstanceID;" << std::endl; + } + if (msl_iface.uses_gl_BaseInstanceARB) { + ss_vertex << "int gl_BaseInstanceARB;" << std::endl; + } + for (const int cd : IndexRange(msl_iface.clip_distances.size())) { + ss_vertex << "float gl_ClipDistance_" << cd << ";" << std::endl; + } + + /* Render target array index if using multilayered rendering. */ + if (msl_iface.uses_mtl_array_index_) { + ss_vertex << "int MTLRenderTargetArrayIndex = 0;" << std::endl; + } + + /* Global vertex data pointers when using SSBO vertex fetch mode. + * Bound vertex buffers passed in via the entry point function + * are assigned to these pointers to be globally accessible + * from any function within the GLSL source shader. */ + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + ss_vertex << "constant uchar** MTL_VERTEX_DATA;" << std::endl; + ss_vertex << "constant ushort* MTL_INDEX_DATA_U16 = nullptr;" << std::endl; + ss_vertex << "constant uint32_t* MTL_INDEX_DATA_U32 = nullptr;" << std::endl; + } + + /* Add Texture members. + * These members pack both a texture and a sampler into a single + * struct, as both are needed within texture functions. + * e.g. `_mtl_combined_image_sampler_2d<float, access::read>` + * The exact typename is generated inside `get_msl_typestring_wrapper()`. */ + for (const MSLTextureSampler &tex : msl_iface.texture_samplers) { + if (bool(tex.stage & ShaderStage::VERTEX)) { + ss_vertex << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl; + } + } + ss_vertex << std::endl; + + /* Inject main GLSL source into output stream. */ + ss_vertex << shd_builder_->glsl_vertex_source_ << std::endl; + + /* Generate VertexOut and TransformFeedbackOutput structs. */ + ss_vertex << msl_iface.generate_msl_vertex_out_struct(ShaderStage::VERTEX); + if (msl_iface.uses_transform_feedback) { + ss_vertex << msl_iface.generate_msl_vertex_transform_feedback_out_struct(ShaderStage::VERTEX); + } + + /* Class Closing Bracket to end shader global scope. */ + ss_vertex << "};" << std::endl; + + /* Generate Vertex shader entrypoint function containing resource bindings. */ + ss_vertex << msl_iface.generate_msl_vertex_entry_stub(); + + /*** Generate FRAGMENT Stage. ***/ + if (!msl_iface.uses_transform_feedback) { + + /* Conditional defines. */ + if (msl_iface.use_argument_buffer_for_samplers()) { + ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl; + ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS " + << msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl; + } + + /* Inject common Metal header. */ + ss_fragment << msl_iface.msl_patch_default_get() << std::endl << std::endl; + +#ifndef NDEBUG + /* Performance warning: Identify global-scope expressions. + * These cause excessive register pressure due to global + * arrays being instanciated per-thread. + * Note: This is dependent on stripping out comments + * to remove false positives. */ + remove_multiline_comments_func(shd_builder_->glsl_fragment_source_); + remove_singleline_comments_func(shd_builder_->glsl_fragment_source_); + extract_global_scope_constants(shd_builder_->glsl_fragment_source_, ss_fragment); +#endif + + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + + /* Only generate struct for ones with instance names. */ + if (!iface->instance_name.is_empty()) { + ss_fragment << "struct " << iface->name << " {" << std::endl; + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + ss_fragment << to_string(inout.type) << " " << inout.name << "" + << to_string_msl(inout.interp) << ";" << std::endl; + } + ss_fragment << "};" << std::endl; + } + } + + /* Wrap entire GLSL source inside class to create + * a scope within the class to enable use of global variables. */ + ss_fragment << "class " << get_stage_class_name(ShaderStage::FRAGMENT) << " {" << std::endl; + ss_fragment << "public:" << std::endl; + + /* In/out interface values */ + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + bool is_inside_struct = false; + if (!iface->instance_name.is_empty()) { + /* Struct local variable. */ + ss_fragment << iface->name << " " << iface->instance_name << ";" << std::endl; + is_inside_struct = true; + } + + /* Generate local variables, populate elems for vertex out struct gen. */ + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + /* Only output individual elements if they are not part of an interface struct instance. + */ + if (!is_inside_struct) { + ss_fragment << to_string(inout.type) << " " << inout.name << ";" << std::endl; + } + } + } + + /* Generate global structs */ + ss_fragment << msl_iface.generate_msl_vertex_out_struct(ShaderStage::FRAGMENT); + ss_fragment << msl_iface.generate_msl_fragment_out_struct(); + ss_fragment << msl_iface.generate_msl_uniform_structs(ShaderStage::FRAGMENT); + + /** GL globals. */ + /* gl_FragCoord will always be assigned to the output position from vertex shading. */ + ss_fragment << "float4 gl_FragCoord;" << std::endl; + if (msl_iface.uses_gl_FragColor) { + ss_fragment << "float4 gl_FragColor;" << std::endl; + } + if (msl_iface.uses_gl_FragDepth) { + ss_fragment << "float gl_FragDepth;" << std::endl; + } + if (msl_iface.uses_gl_PointCoord) { + ss_fragment << "float2 gl_PointCoord;" << std::endl; + } + if (msl_iface.uses_gl_FrontFacing) { + ss_fragment << "MTLBOOL gl_FrontFacing;" << std::endl; + } + + /* Add Texture members. */ + for (const MSLTextureSampler &tex : msl_iface.texture_samplers) { + if (bool(tex.stage & ShaderStage::FRAGMENT)) { + ss_fragment << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl; + } + } + + /* Inject Main GLSL Fragment Source into output stream. */ + ss_fragment << shd_builder_->glsl_fragment_source_ << std::endl; + + /* Class Closing Bracket to end shader global scope. */ + ss_fragment << "};" << std::endl; + + /* Generate Fragment entrypoint function. */ + ss_fragment << msl_iface.generate_msl_fragment_entry_stub(); + } + + /* DEBUG: Export source to file for manual verification. */ +#if MTL_SHADER_DEBUG_EXPORT_SOURCE + NSFileManager *sharedFM = [NSFileManager defaultManager]; + NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL]; + NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent] + URLByAppendingPathComponent:@"Shaders/" + isDirectory:YES]; + [sharedFM createDirectoryAtURL:shader_dir + withIntermediateDirectories:YES + attributes:nil + error:nil]; + const char *path_cstr = [shader_dir fileSystemRepresentation]; + + std::ofstream vertex_fs; + vertex_fs.open( + (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedVertexShader.msl") + .c_str()); + vertex_fs << ss_vertex.str(); + vertex_fs.close(); + + if (!msl_iface.uses_transform_feedback) { + std::ofstream fragment_fs; + fragment_fs.open( + (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedFragmentShader.msl") + .c_str()); + fragment_fs << ss_fragment.str(); + fragment_fs.close(); + } + + shader_debug_printf( + "Vertex Shader Saved to: %s\n", + (std::string(path_cstr) + std::string(this->name) + "_GeneratedFragmentShader.msl").c_str()); +#endif + + /* Set MSL source NSString's. Required by Metal API. */ + NSString *msl_final_vert = [NSString stringWithCString:ss_vertex.str().c_str() + encoding:[NSString defaultCStringEncoding]]; + NSString *msl_final_frag = (msl_iface.uses_transform_feedback) ? + (@"") : + ([NSString stringWithCString:ss_fragment.str().c_str() + encoding:[NSString defaultCStringEncoding]]); + + this->shader_source_from_msl(msl_final_vert, msl_final_frag); + shader_debug_printf("[METAL] BSL Converted into MSL\n"); + +#ifndef NDEBUG + /* In debug mode, we inject the name of the shader into the entrypoint function + * name, as these are what show up in the Xcode GPU debugger. */ + this->set_vertex_function_name( + [[NSString stringWithFormat:@"vertex_function_entry_%s", this->name] retain]); + this->set_fragment_function_name( + [[NSString stringWithFormat:@"fragment_function_entry_%s", this->name] retain]); +#else + this->set_vertex_function_name(@"vertex_function_entry"); + this->set_fragment_function_name(@"fragment_function_entry"); +#endif + + /* Bake shader interface. */ + this->set_interface(msl_iface.bake_shader_interface(this->name)); + + /* Update other shader properties. */ + uses_mtl_array_index_ = msl_iface.uses_mtl_array_index_; + use_ssbo_vertex_fetch_mode_ = msl_iface.uses_ssbo_vertex_fetch_mode; + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + ssbo_vertex_fetch_output_prim_type_ = vertex_fetch_ssbo_output_prim_type; + ssbo_vertex_fetch_output_num_verts_ = vertex_fetch_ssbo_num_output_verts; + this->prepare_ssbo_vertex_fetch_metadata(); + } + + /* Successfully completed GLSL to MSL translation. */ + return true; +} + +constexpr size_t const_strlen(const char *str) +{ + return (*str == '\0') ? 0 : const_strlen(str + 1) + 1; +} + +void MTLShader::prepare_ssbo_vertex_fetch_metadata() +{ + BLI_assert(use_ssbo_vertex_fetch_mode_); + + /* Cache global SSBO-vertex-fetch uniforms locations. */ + const ShaderInput *inp_prim_type = interface->uniform_get(UNIFORM_SSBO_INPUT_PRIM_TYPE_STR); + const ShaderInput *inp_vert_count = interface->uniform_get(UNIFORM_SSBO_INPUT_VERT_COUNT_STR); + const ShaderInput *inp_uses_indexed_rendering = interface->uniform_get( + UNIFORM_SSBO_USES_INDEXED_RENDERING_STR); + const ShaderInput *inp_uses_index_mode_u16 = interface->uniform_get( + UNIFORM_SSBO_INDEX_MODE_U16_STR); + + this->uni_ssbo_input_prim_type_loc = (inp_prim_type != nullptr) ? inp_prim_type->location : -1; + this->uni_ssbo_input_vert_count_loc = (inp_vert_count != nullptr) ? inp_vert_count->location : + -1; + this->uni_ssbo_uses_indexed_rendering = (inp_uses_indexed_rendering != nullptr) ? + inp_uses_indexed_rendering->location : + -1; + this->uni_ssbo_uses_index_mode_u16 = (inp_uses_index_mode_u16 != nullptr) ? + inp_uses_index_mode_u16->location : + -1; + + BLI_assert_msg(this->uni_ssbo_input_prim_type_loc != -1, + "uni_ssbo_input_prim_type_loc uniform location invalid!"); + BLI_assert_msg(this->uni_ssbo_input_vert_count_loc != -1, + "uni_ssbo_input_vert_count_loc uniform location invalid!"); + BLI_assert_msg(this->uni_ssbo_uses_indexed_rendering != -1, + "uni_ssbo_uses_indexed_rendering uniform location invalid!"); + BLI_assert_msg(this->uni_ssbo_uses_index_mode_u16 != -1, + "uni_ssbo_uses_index_mode_u16 uniform location invalid!"); + + /* Prepare SSBO-vertex-fetch attribute uniform location cache. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + for (int i = 0; i < mtl_interface->get_total_attributes(); i++) { + const MTLShaderInputAttribute &mtl_shader_attribute = mtl_interface->get_attribute(i); + const char *attr_name = mtl_interface->get_name_at_offset(mtl_shader_attribute.name_offset); + + /* SSBO-vertex-fetch Attribute data is passed via uniforms. here we need to extract the uniform + * address for each attribute, and we can cache it for later use. */ + ShaderSSBOAttributeBinding &cached_ssbo_attr = cached_ssbo_attribute_bindings_[i]; + cached_ssbo_attr.attribute_index = i; + + constexpr int len_UNIFORM_SSBO_STRIDE_STR = const_strlen(UNIFORM_SSBO_STRIDE_STR); + constexpr int len_UNIFORM_SSBO_OFFSET_STR = const_strlen(UNIFORM_SSBO_OFFSET_STR); + constexpr int len_UNIFORM_SSBO_FETCHMODE_STR = const_strlen(UNIFORM_SSBO_FETCHMODE_STR); + constexpr int len_UNIFORM_SSBO_VBO_ID_STR = const_strlen(UNIFORM_SSBO_VBO_ID_STR); + constexpr int len_UNIFORM_SSBO_TYPE_STR = const_strlen(UNIFORM_SSBO_TYPE_STR); + + char strattr_buf_stride[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_STRIDE_STR + 1] = + UNIFORM_SSBO_STRIDE_STR; + char strattr_buf_offset[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_OFFSET_STR + 1] = + UNIFORM_SSBO_OFFSET_STR; + char strattr_buf_fetchmode[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_FETCHMODE_STR + 1] = + UNIFORM_SSBO_FETCHMODE_STR; + char strattr_buf_vbo_id[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_VBO_ID_STR + 1] = + UNIFORM_SSBO_VBO_ID_STR; + char strattr_buf_type[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_TYPE_STR + 1] = + UNIFORM_SSBO_TYPE_STR; + + strcpy(&strattr_buf_stride[len_UNIFORM_SSBO_STRIDE_STR], attr_name); + strcpy(&strattr_buf_offset[len_UNIFORM_SSBO_OFFSET_STR], attr_name); + strcpy(&strattr_buf_fetchmode[len_UNIFORM_SSBO_FETCHMODE_STR], attr_name); + strcpy(&strattr_buf_vbo_id[len_UNIFORM_SSBO_VBO_ID_STR], attr_name); + strcpy(&strattr_buf_type[len_UNIFORM_SSBO_TYPE_STR], attr_name); + + /* Fetch uniform locations and cache for fast access. */ + const ShaderInput *inp_unf_stride = mtl_interface->uniform_get(strattr_buf_stride); + const ShaderInput *inp_unf_offset = mtl_interface->uniform_get(strattr_buf_offset); + const ShaderInput *inp_unf_fetchmode = mtl_interface->uniform_get(strattr_buf_fetchmode); + const ShaderInput *inp_unf_vbo_id = mtl_interface->uniform_get(strattr_buf_vbo_id); + const ShaderInput *inp_unf_attr_type = mtl_interface->uniform_get(strattr_buf_type); + + BLI_assert(inp_unf_stride != nullptr); + BLI_assert(inp_unf_offset != nullptr); + BLI_assert(inp_unf_fetchmode != nullptr); + BLI_assert(inp_unf_vbo_id != nullptr); + BLI_assert(inp_unf_attr_type != nullptr); + + cached_ssbo_attr.uniform_stride = (inp_unf_stride != nullptr) ? inp_unf_stride->location : -1; + cached_ssbo_attr.uniform_offset = (inp_unf_offset != nullptr) ? inp_unf_offset->location : -1; + cached_ssbo_attr.uniform_fetchmode = (inp_unf_fetchmode != nullptr) ? + inp_unf_fetchmode->location : + -1; + cached_ssbo_attr.uniform_vbo_id = (inp_unf_vbo_id != nullptr) ? inp_unf_vbo_id->location : -1; + cached_ssbo_attr.uniform_attr_type = (inp_unf_attr_type != nullptr) ? + inp_unf_attr_type->location : + -1; + + BLI_assert(cached_ssbo_attr.uniform_offset != -1); + BLI_assert(cached_ssbo_attr.uniform_stride != -1); + BLI_assert(cached_ssbo_attr.uniform_fetchmode != -1); + BLI_assert(cached_ssbo_attr.uniform_vbo_id != -1); + BLI_assert(cached_ssbo_attr.uniform_attr_type != -1); + } +} + +void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateInfo *info) +{ + /** Assign info. */ + create_info_ = info; + + /** Prepare Uniforms. */ + for (const shader::ShaderCreateInfo::PushConst &push_constant : create_info_->push_constants_) { + MSLUniform uniform(push_constant.type, + push_constant.name, + bool(push_constant.array_size > 1), + push_constant.array_size); + uniforms.append(uniform); + } + + /** Prepare textures and uniform blocks. + * Perform across both resource categories and extract both + * texture samplers and image types. */ + for (int i = 0; i < 2; i++) { + const Vector<ShaderCreateInfo::Resource> &resources = (i == 0) ? info->pass_resources_ : + info->batch_resources_; + for (const ShaderCreateInfo::Resource &res : resources) { + /* TODO(Metal): Consider adding stage flags to textures in create info. */ + /* Handle sampler types. */ + switch (res.bind_type) { + case shader::ShaderCreateInfo::Resource::BindType::SAMPLER: { + + /* Samplers to have access::sample by default. */ + MSLTextureSamplerAccess access = MSLTextureSamplerAccess::TEXTURE_ACCESS_SAMPLE; + /* TextureBuffers must have read/write/read-write access pattern. */ + if (res.sampler.type == ImageType::FLOAT_BUFFER || + res.sampler.type == ImageType::INT_BUFFER || + res.sampler.type == ImageType::UINT_BUFFER) { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; + } + BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS); + MSLTextureSampler msl_tex( + ShaderStage::BOTH, res.sampler.type, res.sampler.name, access, res.slot); + texture_samplers.append(msl_tex); + } break; + + case shader::ShaderCreateInfo::Resource::BindType::IMAGE: { + /* Flatten qualifier flags into final access state. */ + MSLTextureSamplerAccess access; + if (bool(res.image.qualifiers & Qualifier::READ_WRITE)) { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READWRITE; + } + else if (bool(res.image.qualifiers & Qualifier::WRITE)) { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_WRITE; + } + else { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; + } + BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS); + MSLTextureSampler msl_tex( + ShaderStage::BOTH, res.image.type, res.image.name, access, res.slot); + texture_samplers.append(msl_tex); + } break; + + case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: { + MSLUniformBlock ubo; + BLI_assert(res.uniformbuf.type_name.size() > 0); + BLI_assert(res.uniformbuf.name.size() > 0); + int64_t array_offset = res.uniformbuf.name.find_first_of("["); + + ubo.type_name = res.uniformbuf.type_name; + ubo.is_array = (array_offset > -1); + if (ubo.is_array) { + /* If is array UBO, strip out array tag from name. */ + StringRef name_no_array = StringRef(res.uniformbuf.name.c_str(), array_offset); + ubo.name = name_no_array; + } + else { + ubo.name = res.uniformbuf.name; + } + ubo.stage = ShaderStage::VERTEX | ShaderStage::FRAGMENT; + uniform_blocks.append(ubo); + } break; + + case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: { + /* TODO(Metal): Support shader storage buffer in Metal. + * Pending compute support. */ + } break; + } + } + } + + /** Vertex Inputs. */ + bool all_attr_location_assigned = true; + for (const ShaderCreateInfo::VertIn &attr : info->vertex_inputs_) { + + /* Validate input. */ + BLI_assert(attr.name.size() > 0); + + /* NOTE(Metal): Input attributes may not have a location specified. + * unset locations are resolved during: `resolve_input_attribute_locations`. */ + MSLVertexInputAttribute msl_attr; + bool attr_location_assigned = (attr.index >= 0); + all_attr_location_assigned = all_attr_location_assigned && attr_location_assigned; + msl_attr.layout_location = attr_location_assigned ? attr.index : -1; + msl_attr.type = attr.type; + msl_attr.name = attr.name; + vertex_input_attributes.append(msl_attr); + } + + /* Ensure all attributes are assigned a location. */ + if (!all_attr_location_assigned) { + this->resolve_input_attribute_locations(); + } + + /** Fragment outputs. */ + for (const shader::ShaderCreateInfo::FragOut &frag_out : create_info_->fragment_outputs_) { + + /* Validate input. */ + BLI_assert(frag_out.name.size() > 0); + BLI_assert(frag_out.index >= 0); + + /* Populate MSLGenerator attribute. */ + MSLFragmentOutputAttribute mtl_frag_out; + mtl_frag_out.layout_location = frag_out.index; + mtl_frag_out.layout_index = (frag_out.blend != DualBlend::NONE) ? + ((frag_out.blend == DualBlend::SRC_0) ? 0 : 1) : + -1; + mtl_frag_out.type = frag_out.type; + mtl_frag_out.name = frag_out.name; + + fragment_outputs.append(mtl_frag_out); + } +} + +bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const +{ + /* We can only use argument buffers IF sampler count exceeds static limit of 16, + * AND we can support more samplers with an argument buffer. */ + return texture_samplers.size() >= 16 && GPU_max_samplers() > 16; +} + +uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const +{ + /* Note: Sampler bindings and argument buffer shared across stages, + in case stages share texture/sampler bindings. */ + return texture_samplers.size(); +} + +uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage) +{ + BLI_assert(stage == ShaderStage::VERTEX || stage == ShaderStage::FRAGMENT); + if (sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] >= 0) { + return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)]; + } + sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = + (this->uniform_blocks.size() + 1); + return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)]; +} + +void MSLGeneratorInterface::prepare_ssbo_vertex_fetch_uniforms() +{ + BLI_assert(this->uses_ssbo_vertex_fetch_mode); + + /* Add Special Uniforms for SSBO vertex fetch mode. */ + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INPUT_PRIM_TYPE_STR, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INPUT_VERT_COUNT_STR, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_USES_INDEXED_RENDERING_STR, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INDEX_MODE_U16_STR, false)); + + for (const MSLVertexInputAttribute &attr : this->vertex_input_attributes) { + const std::string &uname = attr.name; + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_STRIDE_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_OFFSET_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_FETCHMODE_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_VBO_ID_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_TYPE_STR + uname, false)); + } +} + +std::string MSLGeneratorInterface::generate_msl_vertex_entry_stub() +{ + std::stringstream out; + out << std::endl << "/*** AUTO-GENERATED MSL VERETX SHADER STUB. ***/" << std::endl; + + /* Undef texture defines from main source - avoid conflict with MSL texture. */ + out << "#undef texture" << std::endl; + out << "#undef textureLod" << std::endl; + + /* Disable special case for booleans being treated as ints in GLSL. */ + out << "#undef bool" << std::endl; + + /* Undef uniform mappings to avoid name collisions. */ + out << generate_msl_uniform_undefs(ShaderStage::VERTEX); + + /* Generate function entry point signature w/ resource bindings and inputs. */ + out << "vertex "; + if (this->uses_transform_feedback) { + out << "void "; + } + else { + out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexOut "; + } +#ifndef NDEBUG + out << "vertex_function_entry_" << parent_shader_.name_get() << "(\n\t"; +#else + out << "vertex_function_entry(\n\t"; +#endif + + out << this->generate_msl_vertex_inputs_string(); + out << ") {" << std::endl << std::endl; + out << "\tMTLShaderVertexImpl::VertexOut output;" << std::endl + << "\tMTLShaderVertexImpl vertex_shader_instance;" << std::endl; + + /* Copy Vertex Globals. */ + if (this->uses_gl_VertexID) { + out << "vertex_shader_instance.gl_VertexID = gl_VertexID;" << std::endl; + } + if (this->uses_gl_InstanceID) { + out << "vertex_shader_instance.gl_InstanceID = gl_InstanceID-gl_BaseInstanceARB;" << std::endl; + } + if (this->uses_gl_BaseInstanceARB) { + out << "vertex_shader_instance.gl_BaseInstanceARB = gl_BaseInstanceARB;" << std::endl; + } + + /* Copy vertex attributes into local variables. */ + out << this->generate_msl_vertex_attribute_input_population(); + + /* Populate Uniforms and uniform blocks. */ + out << this->generate_msl_texture_vars(ShaderStage::VERTEX); + out << this->generate_msl_global_uniform_population(ShaderStage::VERTEX); + out << this->generate_msl_uniform_block_population(ShaderStage::VERTEX); + + /* Execute original 'main' function within class scope. */ + out << "\t/* Execute Vertex main function */\t" << std::endl + << "\tvertex_shader_instance.main();" << std::endl + << std::endl; + + /* Populate Output values. */ + out << this->generate_msl_vertex_output_population(); + + /* Final point size, + * This is only compiled if the MTL_global_pointsize is specified + * as a function specialisation in the PSO. This is restricted to + * point primitive types. */ + out << "if(is_function_constant_defined(MTL_global_pointsize)){ output.pointsize = " + "(MTL_global_pointsize > 0.0)?MTL_global_pointsize:output.pointsize; }" + << std::endl; + + /* Populate transform feedback buffer. */ + if (this->uses_transform_feedback) { + out << this->generate_msl_vertex_output_tf_population(); + } + else { + out << "\treturn output;" << std::endl; + } + out << "}"; + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_fragment_entry_stub() +{ + std::stringstream out; + out << std::endl << "/*** AUTO-GENERATED MSL FRAGMENT SHADER STUB. ***/" << std::endl; + + /* Undef texture defines from main source - avoid conflict with MSL texture*/ + out << "#undef texture" << std::endl; + out << "#undef textureLod" << std::endl; + + /* Disable special case for booleans being treated as ints in GLSL. */ + out << "#undef bool" << std::endl; + + /* Undef uniform mappings to avoid name collisions. */ + out << generate_msl_uniform_undefs(ShaderStage::FRAGMENT); + + /* Generate function entry point signature w/ resource bindings and inputs. */ +#ifndef NDEBUG + out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT) + << "::FragmentOut fragment_function_entry_" << parent_shader_.name_get() << "(\n\t"; +#else + out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT) + << "::FragmentOut fragment_function_entry(\n\t"; +#endif + out << this->generate_msl_fragment_inputs_string(); + out << ") {" << std::endl << std::endl; + out << "\tMTLShaderFragmentImpl::FragmentOut output;" << std::endl + << "\tMTLShaderFragmentImpl fragment_shader_instance;" << std::endl; + + /* Copy Fragment Globals. */ + if (this->uses_gl_PointCoord) { + out << "fragment_shader_instance.gl_PointCoord = gl_PointCoord;" << std::endl; + } + if (this->uses_gl_FrontFacing) { + out << "fragment_shader_instance.gl_FrontFacing = gl_FrontFacing;" << std::endl; + } + + /* Copy vertex attributes into local variable.s */ + out << this->generate_msl_fragment_input_population(); + + /* Barycentrics. */ + if (this->uses_barycentrics) { + + /* Main barycentrics. */ + out << "fragment_shader_instance.gpu_BaryCoord = mtl_barycentric_coord.xyz;"; + + /* barycentricDist represents the world-space distance from the current world-space position + * to the opposite edge of the vertex. */ + out << "float3 worldPos = fragment_shader_instance.worldPosition.xyz;" << std::endl; + out << "float3 wpChange = (length(dfdx(worldPos))+length(dfdy(worldPos)));" << std::endl; + out << "float3 bcChange = " + "(length(dfdx(mtl_barycentric_coord))+length(dfdy(mtl_barycentric_coord)));" + << std::endl; + out << "float3 rateOfChange = wpChange/bcChange;" << std::endl; + + /* Distance to edge using inverse barycentric value, as rather than the length of 0.7 + * contribution, we'd want the distance to the opposite side. */ + out << "fragment_shader_instance.gpu_BarycentricDist.x = length(rateOfChange * " + "(1.0-mtl_barycentric_coord.x));" + << std::endl; + out << "fragment_shader_instance.gpu_BarycentricDist.y = length(rateOfChange * " + "(1.0-mtl_barycentric_coord.y));" + << std::endl; + out << "fragment_shader_instance.gpu_BarycentricDist.z = length(rateOfChange * " + "(1.0-mtl_barycentric_coord.z));" + << std::endl; + } + + /* Populate Uniforms and uniform blocks. */ + out << this->generate_msl_texture_vars(ShaderStage::FRAGMENT); + out << this->generate_msl_global_uniform_population(ShaderStage::FRAGMENT); + out << this->generate_msl_uniform_block_population(ShaderStage::FRAGMENT); + + /* Execute original 'main' function within class scope. */ + out << "\t/* Execute Fragment main function */\t" << std::endl + << "\tfragment_shader_instance.main();" << std::endl + << std::endl; + + /* Populate Output values. */ + out << this->generate_msl_fragment_output_population(); + out << " return output;" << std::endl << "}"; + + return out.str(); +} + +void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out, + ShaderStage stage) +{ + BLI_assert(stage == ShaderStage::VERTEX || stage == ShaderStage::FRAGMENT); + /* Generate texture signatures. */ + BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert()); + for (const MSLTextureSampler &tex : this->texture_samplers) { + if (bool(tex.stage & stage)) { + out << ",\n\t" << tex.get_msl_typestring(false) << " [[texture(" << tex.location << ")]]"; + } + } + + /* Generate sampler signatures. */ + /* Note: Currently textures and samplers share indices across shading stages, so the limit is + * shared. + * If we exceed the hardware-supported limit, then follow a bindless model using argument + * buffers. */ + if (this->use_argument_buffer_for_samplers()) { + out << ",\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+" + << (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]"; + } + else { + /* Maximum Limit of samplers defined in the function argument table is + * MTL_MAX_DEFAULT_SAMPLERS=16. */ + BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS); + for (const MSLTextureSampler &tex : this->texture_samplers) { + if (bool(tex.stage & stage)) { + out << ",\n\tsampler " << tex.name << "_sampler [[sampler(" << tex.location << ")]]"; + } + } + + /* Fallback. */ + if (this->texture_samplers.size() > 16) { + shader_debug_printf( + "[Metal] Warning: Shader exceeds limit of %u samplers on current hardware\n", + MTL_MAX_DEFAULT_SAMPLERS); + } + } +} + +void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out, + ShaderStage stage) +{ + int ubo_index = 0; + for (const MSLUniformBlock &ubo : this->uniform_blocks) { + if (bool(ubo.stage & stage)) { + /* For literal/existing global types, we do not need the class namespace accessor. */ + out << ",\n\tconstant "; + if (!is_builtin_type(ubo.type_name)) { + out << get_stage_class_name(stage) << "::"; + } + /* UniformBuffer bind indices start at MTL_uniform_buffer_base_index+1, as + * MTL_uniform_buffer_base_index is reserved for the PushConstantBlock (push constants). + * MTL_uniform_buffer_base_index is an offset depending on the number of unique VBOs + * bound for the current PSO specialisation. */ + out << ubo.type_name << "* " << ubo.name << "[[buffer(MTL_uniform_buffer_base_index+" + << (ubo_index + 1) << ")]]"; + } + ubo_index++; + } +} + +std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string() +{ + std::stringstream out; + + if (this->uses_ssbo_vertex_fetch_mode) { + /* Vertex Buffers bound as raw buffers. */ + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + out << "\tconstant uchar* MTL_VERTEX_DATA_" << i << " [[buffer(" << i << ")]],\n"; + } + out << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]],"; + } + else { + if (this->vertex_input_attributes.size() > 0) { + /* Vertex Buffers use input assembly. */ + out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]],"; + } + } + out << "\n\tconstant " << get_stage_class_name(ShaderStage::VERTEX) + << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]"; + + this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX); + + /* Transform feedback buffer binding. */ + if (this->uses_transform_feedback) { + out << ",\n\tdevice " << get_stage_class_name(ShaderStage::VERTEX) + << "::VertexOut_TF* " + "transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]"; + } + + /* Generate texture signatures. */ + this->generate_msl_textures_input_string(out, ShaderStage::VERTEX); + + /* Entry point parameters for gl Globals. */ + if (this->uses_gl_VertexID) { + out << ",\n\tconst uint32_t gl_VertexID [[vertex_id]]"; + } + if (this->uses_gl_InstanceID) { + out << ",\n\tconst uint32_t gl_InstanceID [[instance_id]]"; + } + if (this->uses_gl_BaseInstanceARB) { + out << ",\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]"; + } + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string() +{ + std::stringstream out; + out << get_stage_class_name(ShaderStage::FRAGMENT) + << "::VertexOut v_in [[stage_in]],\n\tconstant " + << get_stage_class_name(ShaderStage::FRAGMENT) + << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]"; + + this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT); + + /* Generate texture signatures. */ + this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT); + + if (this->uses_gl_PointCoord) { + out << ",\n\tconst float2 gl_PointCoord [[point_coord]]"; + } + if (this->uses_gl_FrontFacing) { + out << ",\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]"; + } + + /* Barycentrics. */ + if (this->uses_barycentrics) { + out << ",\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]"; + } + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + std::stringstream out; + + /* Common Uniforms. */ + out << "typedef struct {" << std::endl; + + for (const MSLUniform &uniform : this->uniforms) { + if (uniform.is_array) { + out << "\t" << to_string(uniform.type) << " " << uniform.name << "[" << uniform.array_elems + << "];" << std::endl; + } + else { + out << "\t" << to_string(uniform.type) << " " << uniform.name << ";" << std::endl; + } + } + out << "} PushConstantBlock;\n\n"; + + /* Member UBO block reference. */ + out << std::endl << "const constant PushConstantBlock *global_uniforms;" << std::endl; + + /* Macro define chain. + * To access uniforms, we generate a macro such that the uniform name can + * be used directly without using the struct's handle. */ + for (const MSLUniform &uniform : this->uniforms) { + out << "#define " << uniform.name << " global_uniforms->" << uniform.name << std::endl; + } + out << std::endl; + return out.str(); +} + +/* Note: Uniform macro definition vars can conflict with other parameters. */ +std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shader_stage) +{ + std::stringstream out; + + /* Macro undef chain. */ + for (const MSLUniform &uniform : this->uniforms) { + out << "#undef " << uniform.name << std::endl; + } + /* UBO block undef. */ + for (const MSLUniformBlock &ubo : this->uniform_blocks) { + out << "#undef " << ubo.name << std::endl; + } + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_vertex_in_struct() +{ + std::stringstream out; + + /* Skip struct if no vert attributes. */ + if (this->vertex_input_attributes.size() == 0) { + return ""; + } + + /* Output */ + out << "typedef struct {" << std::endl; + for (const MSLVertexInputAttribute &in_attr : this->vertex_input_attributes) { + /* Matrix and array attributes are not trivially supported and thus + * require each element to be passed as an individual attribute. + * This requires shader source generation of sequential elements. + * The matrix type is then re-packed into a Mat4 inside the entry function. + * + * e.g. + * float4 __internal_modelmatrix_0 [[attribute(0)]]; + * float4 __internal_modelmatrix_1 [[attribute(1)]]; + * float4 __internal_modelmatrix_2 [[attribute(2)]]; + * float4 __internal_modelmatrix_3 [[attribute(3)]]; + */ + if (is_matrix_type(in_attr.type) && !this->uses_ssbo_vertex_fetch_mode) { + for (int elem = 0; elem < get_matrix_location_count(in_attr.type); elem++) { + out << "\t" << get_matrix_subtype(in_attr.type) << " __internal_" << in_attr.name << elem + << " [[attribute(" << (in_attr.layout_location + elem) << ")]];" << std::endl; + } + } + else { + out << "\t" << in_attr.type << " " << in_attr.name << " [[attribute(" + << in_attr.layout_location << ")]];" << std::endl; + } + } + + out << "} VertexIn;" << std::endl << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_vertex_out_struct(ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + std::stringstream out; + + /* Vertex output struct. */ + out << "typedef struct {" << std::endl; + + /* If we use GL position, our standard output variable will be mapped to '_default_position_'. + * Otherwise, we use the FIRST element in the output array. + * If transform feedback is enabled, we do not need to output position, unless it + * is explicitly specified as a tf output. */ + bool first_attr_is_position = false; + if (this->uses_gl_Position) { + out << "\tfloat4 _default_position_ [[position]];" << std::endl; + } + else { + if (!this->uses_transform_feedback) { + /* Use first output element for position. */ + BLI_assert(this->vertex_output_varyings.size() > 0); + BLI_assert(this->vertex_output_varyings[0].type == "vec4"); + out << "\tfloat4 " << this->vertex_output_varyings[0].name << " [[position]];" << std::endl; + first_attr_is_position = true; + } + } + + /* Generate other vertex output members. */ + bool skip_first_index = first_attr_is_position; + for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) { + + /* Skip first index if used for position. */ + if (skip_first_index) { + skip_first_index = false; + continue; + } + + if (v_out.is_array) { + /* Array types cannot be trivially passed between shading stages. + * Instead we pass each component individually. E.g. vec4 pos[2] + * will be converted to: `vec4 pos_0; vec4 pos_1;` + * The specified interpolation qualifier will be applied per element. */ + /* TODO(Metal): Support array of matrix in-out types if required + * e.g. Mat4 out_matrices[3]. */ + for (int i = 0; i < v_out.array_elems; i++) { + out << "\t" << v_out.type << " " << v_out.instance_name << "_" << v_out.name << i + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + /* Matrix types need to be expressed as their vector subcomponents. */ + if (is_matrix_type(v_out.type)) { + BLI_assert(v_out.get_mtl_interpolation_qualifier() == " [[flat]]" && + "Matrix varying types must have [[flat]] interpolation"); + std::string subtype = get_matrix_subtype(v_out.type); + for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) { + out << "\t" << subtype << v_out.instance_name << " __matrix_" << v_out.name << elem + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + out << "\t" << v_out.type << " " << v_out.instance_name << "_" << v_out.name + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + } + + /* Add gl_PointSize if written to. */ + if (shader_stage == ShaderStage::VERTEX) { + if (this->uses_gl_PointSize) { + /* If gl_PointSize is explicitly written to, + * we will output the written value directly. + * This value can still be overriden by the + * global pointsize value. */ + out << "\tfloat pointsize [[point_size]];" << std::endl; + } + else { + /* Otherwise, if pointsize is not written to inside the shader, + * then its usage is controlled by whether the MTL_global_pointsize + * function constant has been specified. + * This function constant is enabled for all point primitives beign + * rendered. */ + out << "\tfloat pointsize [[point_size, function_constant(MTL_global_pointsize)]];" + << std::endl; + } + } + + /* Add gl_ClipDistance[n]. */ + if (shader_stage == ShaderStage::VERTEX) { + out << "#if defined(USE_CLIP_PLANES) || defined(USE_WORLD_CLIP_PLANES)" << std::endl; + if (this->clip_distances.size() > 1) { + /* Output array of clip distances if specified. */ + out << "\tfloat clipdistance [[clip_distance]] [" << this->clip_distances.size() << "];" + << std::endl; + } + else if (this->clip_distances.size() > 0) { + out << "\tfloat clipdistance [[clip_distance]];" << std::endl; + } + out << "#endif" << std::endl; + } + + /* Add MTL render target array index for multilayered rendering support. */ + if (uses_mtl_array_index_) { + out << "\tuint MTLRenderTargetArrayIndex [[render_target_array_index]];" << std::endl; + } + + out << "} VertexOut;" << std::endl << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_vertex_transform_feedback_out_struct( + ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + std::stringstream out; + vertex_output_varyings_tf.clear(); + + out << "typedef struct {" << std::endl; + + /* If we use GL position, our standard output variable will be mapped to '_default_position_'. + * Otherwise, we use the FIRST element in the output array -- If transform feedback is enabled, + * we do not need to output position */ + bool first_attr_is_position = false; + if (this->uses_gl_Position) { + + if (parent_shader_.has_transform_feedback_varying("gl_Position")) { + out << "\tfloat4 pos [[position]];" << std::endl; + vertex_output_varyings_tf.append({.type = "vec4", + .name = "gl_Position", + .interpolation_qualifier = "", + .is_array = false, + .array_elems = 1}); + } + } + else { + if (!this->uses_transform_feedback) { + /* Use first output element for position */ + BLI_assert(this->vertex_output_varyings.size() > 0); + BLI_assert(this->vertex_output_varyings[0].type == "vec4"); + first_attr_is_position = true; + } + } + + /* Generate other vertex outputs. */ + bool skip_first_index = first_attr_is_position; + for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) { + + /* Skip first index if used for position. */ + if (skip_first_index) { + skip_first_index = false; + continue; + } + + if (!parent_shader_.has_transform_feedback_varying(v_out.name)) { + continue; + } + vertex_output_varyings_tf.append(v_out); + + if (v_out.is_array) { + /* TODO(Metal): Support array of matrix types if required. */ + for (int i = 0; i < v_out.array_elems; i++) { + out << "\t" << v_out.type << " " << v_out.name << i + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + /* Matrix types need to be expressed as their vector subcomponents. */ + if (is_matrix_type(v_out.type)) { + BLI_assert(v_out.get_mtl_interpolation_qualifier() == " [[flat]]" && + "Matrix varying types must have [[flat]] interpolation"); + std::string subtype = get_matrix_subtype(v_out.type); + for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) { + out << "\t" << subtype << " __matrix_" << v_out.name << elem + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + out << "\t" << v_out.type << " " << v_out.name << v_out.get_mtl_interpolation_qualifier() + << ";" << std::endl; + } + } + } + + out << "} VertexOut_TF;" << std::endl << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_fragment_out_struct() +{ + std::stringstream out; + + /* Output. */ + out << "typedef struct {" << std::endl; + for (int f_output = 0; f_output < this->fragment_outputs.size(); f_output++) { + out << "\t" << to_string(this->fragment_outputs[f_output].type) << " " + << this->fragment_outputs[f_output].name << " [[color(" + << this->fragment_outputs[f_output].layout_location << ")"; + if (this->fragment_outputs[f_output].layout_index >= 0) { + out << ", index(" << this->fragment_outputs[f_output].layout_index << ")"; + } + out << "]]" + << ";" << std::endl; + } + /* Add gl_FragDepth output if used. */ + if (this->uses_gl_FragDepth) { + std::string out_depth_argument = ((this->depth_write == DepthWrite::GREATER) ? + "greater" : + ((this->depth_write == DepthWrite::LESS) ? "less" : + "any")); + out << "\tfloat fragdepth [[depth(" << out_depth_argument << ")]];" << std::endl; + } + + out << "} FragmentOut;" << std::endl; + out << std::endl; + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage) +{ + /* Populate Global Uniforms. */ + std::stringstream out; + + /* Copy UBO block ref. */ + out << "\t/* Copy Uniform block member reference */" << std::endl; + out << "\t" + << ((stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : "fragment_shader_instance.") + << "global_uniforms = uniforms;" << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderStage stage) +{ + /* Populate Global Uniforms. */ + std::stringstream out; + out << "\t/* Copy UBO block references into local class variables */" << std::endl; + for (const MSLUniformBlock &ubo : this->uniform_blocks) { + + /* Only include blocks which are used within this stage. */ + if (bool(ubo.stage & stage)) { + /* Generate UBO reference assignment. + * NOTE(Metal): We append `_local` postfix onto the class member name + * for the ubo to avoid name collision with the UBO accessor macro. + * We only need to add this postfix for the non-array access variant, + * as the array is indexed directly, rather than requiring a dereference. */ + out << "\t" + << ((stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << ubo.name; + if (!ubo.is_array) { + out << "_local"; + } + out << " = " << ubo.name << ";" << std::endl; + } + } + out << std::endl; + return out.str(); +} + +/* Copy input attributes from stage_in into class local variables. */ +std::string MSLGeneratorInterface::generate_msl_vertex_attribute_input_population() +{ + + /* SSBO Vertex Fetch mode does not require local attribute population, + * we only need to pass over the buffer pointer references. */ + if (this->uses_ssbo_vertex_fetch_mode) { + std::stringstream out; + out << "const constant uchar* GLOBAL_MTL_VERTEX_DATA[MTL_SSBO_VERTEX_FETCH_MAX_VBOS] = {" + << std::endl; + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + char delimiter = (i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS - 1) ? ',' : ' '; + out << "\t\tMTL_VERTEX_DATA_" << i << delimiter << std::endl; + } + out << "};" << std::endl; + out << "\tvertex_shader_instance.MTL_VERTEX_DATA = GLOBAL_MTL_VERTEX_DATA;" << std::endl; + out << "\tvertex_shader_instance.MTL_INDEX_DATA_U16 = MTL_INDEX_DATA;" << std::endl; + out << "\tvertex_shader_instance.MTL_INDEX_DATA_U32 = reinterpret_cast<constant " + "uint32_t*>(MTL_INDEX_DATA);" + << std::endl; + return out.str(); + } + + /* Populate local attribute variables. */ + std::stringstream out; + out << "\t/* Copy Vertex Stage-in attributes into local variables */" << std::endl; + for (int attribute = 0; attribute < this->vertex_input_attributes.size(); attribute++) { + + if (is_matrix_type(this->vertex_input_attributes[attribute].type)) { + /* Reading into an internal matrix from split attributes: Should generate the following: + * vertex_shader_instance.mat_attribute_type = + * mat4(v_in.__internal_mat_attribute_type0, + * v_in.__internal_mat_attribute_type1, + * v_in.__internal_mat_attribute_type2, + * v_in.__internal_mat_attribute_type3). */ + out << "\tvertex_shader_instance." << this->vertex_input_attributes[attribute].name << " = " + << this->vertex_input_attributes[attribute].type << "(v_in.__internal_" + << this->vertex_input_attributes[attribute].name << 0; + for (int elem = 1; + elem < get_matrix_location_count(this->vertex_input_attributes[attribute].type); + elem++) { + out << ",\n" + << "v_in.__internal_" << this->vertex_input_attributes[attribute].name << elem; + } + out << ");"; + } + else { + /* OpenGL uses the GPU_FETCH_* functions which can alter how an attribute value is + * interpreted. In Metal, we cannot support all implicit conversions within the vertex + * descriptor/vertex stage-in, so we need to perform value transformation on-read. + * + * This is handled by wrapping attribute reads to local shader registers in a + * suitable conversion function `attribute_conversion_func_name`. + * This conversion function performs a specific transformation on the source + * vertex data, depending on the specified GPU_FETCH_* mode for the current + * vertex format. + * + * The fetch_mode is specified per-attribute using specialisation constants + * on the PSO, wherein a unique set of constants is passed in per vertex + * buffer/format configuration. Efficiently enabling pass-through reads + * if no special fetch is required. */ + bool do_attribute_conversion_on_read = false; + std::string attribute_conversion_func_name = get_attribute_conversion_function( + &do_attribute_conversion_on_read, this->vertex_input_attributes[attribute].type); + + if (do_attribute_conversion_on_read) { + out << "\t" << attribute_conversion_func_name << "(MTL_AttributeConvert" << attribute + << ", v_in." << this->vertex_input_attributes[attribute].name + << ", vertex_shader_instance." << this->vertex_input_attributes[attribute].name << ");" + << std::endl; + } + else { + out << "\tvertex_shader_instance." << this->vertex_input_attributes[attribute].name + << " = v_in." << this->vertex_input_attributes[attribute].name << ";" << std::endl; + } + } + } + out << std::endl; + return out.str(); +} + +/* Copy post-main, modified, local class variables into vertex-output struct. */ +std::string MSLGeneratorInterface::generate_msl_vertex_output_population() +{ + + std::stringstream out; + out << "\t/* Copy Vertex Outputs into output struct */" << std::endl; + + /* Output gl_Position with conversion to Metal coordinate-space. */ + if (this->uses_gl_Position) { + out << "\toutput._default_position_ = vertex_shader_instance.gl_Position;" << std::endl; + + /* Invert Y and rescale depth range. + * This is an alternative method to modifying all projection matrices. */ + out << "\toutput._default_position_.y = -output._default_position_.y;" << std::endl; + out << "\toutput._default_position_.z = " + "(output._default_position_.z+output._default_position_.w)/2.0;" + << std::endl; + } + + /* Output Pointsize. */ + if (this->uses_gl_PointSize) { + out << "\toutput.pointsize = vertex_shader_instance.gl_PointSize;" << std::endl; + } + + /* Output render target array Index. */ + if (uses_mtl_array_index_) { + out << "\toutput.MTLRenderTargetArrayIndex = " + "vertex_shader_instance.MTLRenderTargetArrayIndex;" + << std::endl; + } + + /* Output clipdistances. */ + out << "#if defined(USE_CLIP_PLANES) || defined(USE_WORLD_CLIP_PLANES)" << std::endl; + if (this->clip_distances.size() > 1) { + for (int cd = 0; cd < this->clip_distances.size(); cd++) { + out << "\toutput.clipdistance[" << cd << "] = vertex_shader_instance.gl_ClipDistance_" << cd + << ";" << std::endl; + } + } + else if (this->clip_distances.size() > 0) { + out << "\toutput.clipdistance = vertex_shader_instance.gl_ClipDistance_0;" << std::endl; + } + out << "#endif" << std::endl; + + /* Populate output vertex variables. */ + int output_id = 0; + for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) { + if (v_out.is_array) { + + for (int i = 0; i < v_out.array_elems; i++) { + out << "\toutput." << v_out.instance_name << "_" << v_out.name << i + << " = vertex_shader_instance."; + + if (v_out.instance_name != "") { + out << v_out.instance_name << "."; + } + + out << v_out.name << "[" << i << "]" + << ";" << std::endl; + } + } + else { + /* Matrix types are split into vectors and need to be reconstructed. */ + if (is_matrix_type(v_out.type)) { + for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) { + out << "\toutput." << v_out.instance_name << "__matrix_" << v_out.name << elem + << " = vertex_shader_instance."; + + if (v_out.instance_name != "") { + out << v_out.instance_name << "."; + } + + out << v_out.name << "[" << elem << "];" << std::endl; + } + } + else { + /* If we are not using gl_Position, first vertex output is used for position. + * Ensure it is vec4. If transform feedback is enabled, we do not need position. */ + if (!this->uses_gl_Position && output_id == 0 && !this->uses_transform_feedback) { + + out << "\toutput." << v_out.instance_name << "_" << v_out.name + << " = to_vec4(vertex_shader_instance." << v_out.name << ");" << std::endl; + + /* Invert Y */ + out << "\toutput." << v_out.instance_name << "_" << v_out.name << ".y = -output." + << v_out.name << ".y;" << std::endl; + } + else { + + /* Assign vertex output. */ + out << "\toutput." << v_out.instance_name << "_" << v_out.name + << " = vertex_shader_instance."; + + if (v_out.instance_name != "") { + out << v_out.instance_name << "."; + } + + out << v_out.name << ";" << std::endl; + } + } + } + output_id++; + } + out << std::endl; + return out.str(); +} + +/* Copy desired output varyings into transform feedback structure */ +std::string MSLGeneratorInterface::generate_msl_vertex_output_tf_population() +{ + + std::stringstream out; + out << "\t/* Copy Vertex TF Outputs into transform feedback buffer */" << std::endl; + + /* Populate output vertex variables */ + /* TODO(Metal): Currently do not need to support output matrix types etc; but may need to + * verify for other configurations if these occur in any cases. */ + for (int v_output = 0; v_output < this->vertex_output_varyings_tf.size(); v_output++) { + out << "transform_feedback_results[gl_VertexID]." + << this->vertex_output_varyings_tf[v_output].name << " = vertex_shader_instance." + << this->vertex_output_varyings_tf[v_output].name << ";" << std::endl; + } + out << std::endl; + return out.str(); +} + +/* Copy fragment stage inputs (Vertex Outputs) into local class variables. */ +std::string MSLGeneratorInterface::generate_msl_fragment_input_population() +{ + + /* Populate local attribute variables. */ + std::stringstream out; + out << "\t/* Copy Fragment input into local variables. */" << std::endl; + + /* Special common case for gl_FragCoord, assigning to input position. */ + if (this->uses_gl_Position) { + out << "\tfragment_shader_instance.gl_FragCoord = v_in._default_position_;" << std::endl; + } + else { + /* When gl_Position is not set, first VertexIn element is used for position. */ + out << "\tfragment_shader_instance.gl_FragCoord = v_in." + << this->vertex_output_varyings[0].name << ";" << std::endl; + } + + /* NOTE: We will only assign to the intersection of the vertex output and fragment input. + * Fragment input represents varying variables which are declared (but are not necessarily + * used). The Vertex out defines the set which is passed into the fragment shader, which + * contains out variables declared in the vertex shader, though these are not necessarily + * consumed by the fragment shader. + * + * In the cases where the fragment shader expects a variable, but it does not exist in the + * vertex shader, a warning will be provided. */ + for (int f_input = (this->uses_gl_Position) ? 0 : 1; + f_input < this->fragment_input_varyings.size(); + f_input++) { + bool exists_in_vertex_output = false; + for (int v_o = 0; v_o < this->vertex_output_varyings.size() && !exists_in_vertex_output; + v_o++) { + if (this->fragment_input_varyings[f_input].name == this->vertex_output_varyings[v_o].name) { + exists_in_vertex_output = true; + } + } + if (!exists_in_vertex_output) { + shader_debug_printf( + "[Warning] Fragment shader expects varying input '%s', but this is not passed from " + "the " + "vertex shader\n", + this->fragment_input_varyings[f_input].name.c_str()); + continue; + } + if (this->fragment_input_varyings[f_input].is_array) { + for (int i = 0; i < this->fragment_input_varyings[f_input].array_elems; i++) { + out << "\tfragment_shader_instance."; + + if (this->fragment_input_varyings[f_input].instance_name != "") { + out << this->fragment_input_varyings[f_input].instance_name << "."; + } + + out << this->fragment_input_varyings[f_input].name << "[" << i << "] = v_in." + << this->fragment_input_varyings[f_input].instance_name << "_" + << this->fragment_input_varyings[f_input].name << i << ";" << std::endl; + } + } + else { + /* Matrix types are split into components and need to be regrouped into a matrix. */ + if (is_matrix_type(this->fragment_input_varyings[f_input].type)) { + out << "\tfragment_shader_instance."; + + if (this->fragment_input_varyings[f_input].instance_name != "") { + out << this->fragment_input_varyings[f_input].instance_name << "."; + } + + out << this->fragment_input_varyings[f_input].name << " = " + << this->fragment_input_varyings[f_input].type; + int count = get_matrix_location_count(this->fragment_input_varyings[f_input].type); + for (int elem = 0; elem < count; elem++) { + out << ((elem == 0) ? "(" : "") << "v_in." + << this->fragment_input_varyings[f_input].instance_name << "__matrix_" + << this->fragment_input_varyings[f_input].name << elem + << ((elem < count - 1) ? ",\n" : ""); + } + out << ");" << std::endl; + } + else { + out << "\tfragment_shader_instance."; + + if (this->fragment_input_varyings[f_input].instance_name != "") { + out << this->fragment_input_varyings[f_input].instance_name << "."; + } + + out << this->fragment_input_varyings[f_input].name << " = v_in." + << this->fragment_input_varyings[f_input].instance_name << "_" + << this->fragment_input_varyings[f_input].name << ";" << std::endl; + } + } + } + out << std::endl; + return out.str(); +} + +/* Copy post-main, modified, local class variables into fragment-output struct. */ +std::string MSLGeneratorInterface::generate_msl_fragment_output_population() +{ + + /* Populate output fragment variables. */ + std::stringstream out; + out << "\t/* Copy Fragment Outputs into output struct. */" << std::endl; + + /* Output gl_FragDepth. */ + if (this->uses_gl_FragDepth) { + out << "\toutput.fragdepth = fragment_shader_instance.gl_FragDepth;" << std::endl; + } + + /* Output attributes. */ + for (int f_output = 0; f_output < this->fragment_outputs.size(); f_output++) { + + out << "\toutput." << this->fragment_outputs[f_output].name << " = fragment_shader_instance." + << this->fragment_outputs[f_output].name << ";" << std::endl; + } + out << std::endl; + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + + std::stringstream out; + out << "\t/* Populate local texture and sampler members */" << std::endl; + for (int i = 0; i < this->texture_samplers.size(); i++) { + if (bool(this->texture_samplers[i].stage & shader_stage)) { + + /* Assign texture reference. */ + out << "\t" + << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << this->texture_samplers[i].name << ".texture = &" << this->texture_samplers[i].name + << ";" << std::endl; + + /* Assign sampler reference. */ + if (this->use_argument_buffer_for_samplers()) { + out << "\t" + << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << this->texture_samplers[i].name << ".samp = &samplers.sampler_args[" << i << "];" + << std::endl; + } + else { + out << "\t" + << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << this->texture_samplers[i].name << ".samp = &" << this->texture_samplers[i].name + << "_sampler;" << std::endl; + } + } + } + out << std::endl; + return out.str(); +} + +void MSLGeneratorInterface::resolve_input_attribute_locations() +{ + /* Determine used-attribute-location mask. */ + uint32_t used_locations = 0; + for (const MSLVertexInputAttribute &attr : vertex_input_attributes) { + if (attr.layout_location >= 0) { + /* Matrix and array types span multiple location slots. */ + uint32_t location_element_count = get_matrix_location_count(attr.type); + for (uint32_t i = 1; i <= location_element_count; i++) { + /* Ensure our location hasn't already been used. */ + uint32_t location_mask = (i << attr.layout_location); + BLI_assert((used_locations & location_mask) == 0); + used_locations = used_locations | location_mask; + } + } + } + + /* Assign unused location slots to other attributes. */ + for (MSLVertexInputAttribute &attr : vertex_input_attributes) { + if (attr.layout_location == -1) { + /* Determine number of locations required. */ + uint32_t required_attr_slot_count = get_matrix_location_count(attr.type); + + /* Determine free location. + * Starting from 1 is slightly less efficient, however, + * given mutli-sized attributes, an earlier slot may remain free. + * given GPU_VERT_ATTR_MAX_LEN is small, this wont matter. */ + for (int loc = 0; loc < GPU_VERT_ATTR_MAX_LEN - (required_attr_slot_count - 1); loc++) { + + uint32_t location_mask = (1 << loc); + /* Generate sliding mask using location and required number of slots, + * to ensure contiguous slots are free. + * slot mask will be a number containing N binary 1's, where N is the + * number of attributes needed. + * e.g. N=4 -> 1111. */ + uint32_t location_slot_mask = (1 << required_attr_slot_count) - 1; + uint32_t sliding_location_slot_mask = location_slot_mask << location_mask; + if ((used_locations & sliding_location_slot_mask) == 0) { + /* Assign location and update mask. */ + attr.layout_location = loc; + used_locations = used_locations | location_slot_mask; + continue; + } + } + + /* Error if could not assign attribute. */ + MTL_LOG_ERROR("Could not assign attribute location to attribute %s for shader %s\n", + attr.name.c_str(), + this->parent_shader_.name_get()); + } + } +} + +void MSLGeneratorInterface::resolve_fragment_output_locations() +{ + int running_location_ind = 0; + + /* This code works under the assumption that either all layout_locations are set, + * or none are. */ + for (int i = 0; i < this->fragment_outputs.size(); i++) { + BLI_assert_msg( + ((running_location_ind > 0) ? (this->fragment_outputs[i].layout_location == -1) : true), + "Error: Mismatched input attributes, some with location specified, some without"); + if (this->fragment_outputs[i].layout_location == -1) { + this->fragment_outputs[i].layout_location = running_location_ind; + running_location_ind++; + } + } +} + +/* Add string to name buffer. Utility function to be used in bake_shader_interface. + * Returns the offset of the inserted name.*/ +static uint32_t name_buffer_copystr(char **name_buffer_ptr, + const char *str_to_copy, + uint32_t &name_buffer_size, + uint32_t &name_buffer_offset) +{ + /* Verify input is valid. */ + BLI_assert(str_to_copy != nullptr); + + /* Determine length of new string, and ensure name buffer is large enough. */ + uint32_t ret_len = strlen(str_to_copy); + BLI_assert(ret_len > 0); + + /* If required name buffer size is larger, increase by atleast 128 bytes. */ + if (name_buffer_size + ret_len > name_buffer_size) { + name_buffer_size = name_buffer_size + max_ii(128, ret_len); + *name_buffer_ptr = (char *)MEM_reallocN(*name_buffer_ptr, name_buffer_size); + } + + /* Copy string into name buffer. */ + uint32_t insert_offset = name_buffer_offset; + char *current_offset = (*name_buffer_ptr) + insert_offset; + strcpy(current_offset, str_to_copy); + + /* Adjust offset including null terminator. */ + name_buffer_offset += ret_len + 1; + + /* Return offset into name buffer for inserted string. */ + return insert_offset; +} + +MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *name) +{ + MTLShaderInterface *interface = new MTLShaderInterface(name); + interface->init(); + + /* Name buffer. */ + /* Initialise name buffer. */ + uint32_t name_buffer_size = 256; + uint32_t name_buffer_offset = 0; + interface->name_buffer_ = (char *)MEM_mallocN(name_buffer_size, "name_buffer"); + + /* Prepare Interface Input Attributes. */ + int c_offset = 0; + for (int attribute = 0; attribute < this->vertex_input_attributes.size(); attribute++) { + + /* We need a special case for handling matrix types, which splits the matrix into its vector + * components. */ + if (is_matrix_type(this->vertex_input_attributes[attribute].type)) { + + eMTLDataType mtl_type = to_mtl_type( + get_matrix_subtype(this->vertex_input_attributes[attribute].type)); + int size = mtl_get_data_type_size(mtl_type); + for (int elem = 0; + elem < get_matrix_location_count(this->vertex_input_attributes[attribute].type); + elem++) { + /* First attribute matches the core name -- subsequent attributes tagged with + * __internal_<name><index>. */ + std::string _internal_name = (elem == 0) ? + this->vertex_input_attributes[attribute].name : + "__internal_" + + this->vertex_input_attributes[attribute].name + + std::to_string(elem); + + /* IF Using SSBO vertex Fetch, we do not need to expose other dummy attributes in the + * shader interface, only the first one for the whole matrix, as we can pass whatever data + * we want in this mode, and do not need to split attributes. */ + if (elem == 0 || !this->uses_ssbo_vertex_fetch_mode) { + interface->add_input_attribute( + name_buffer_copystr(&interface->name_buffer_, + _internal_name.c_str(), + name_buffer_size, + name_buffer_offset), + this->vertex_input_attributes[attribute].layout_location + elem, + mtl_datatype_to_vertex_type(mtl_type), + 0, + size, + c_offset, + (elem == 0) ? + get_matrix_location_count(this->vertex_input_attributes[attribute].type) : + 0); + } + c_offset += size; + } + shader_debug_printf( + "[Note] Matrix Type '%s' added to shader interface as vertex attribute. (Elem Count: " + "%d)\n", + this->vertex_input_attributes[attribute].name.c_str(), + get_matrix_location_count(this->vertex_input_attributes[attribute].type)); + } + else { + + /* Normal attribute types. */ + eMTLDataType mtl_type = to_mtl_type(this->vertex_input_attributes[attribute].type); + int size = mtl_get_data_type_size(mtl_type); + interface->add_input_attribute( + name_buffer_copystr(&interface->name_buffer_, + this->vertex_input_attributes[attribute].name.c_str(), + name_buffer_size, + name_buffer_offset), + this->vertex_input_attributes[attribute].layout_location, + mtl_datatype_to_vertex_type(mtl_type), + 0, + size, + c_offset); + c_offset += size; + } + } + + /* Prepare Interface Default Uniform Block. */ + interface->add_push_constant_block(name_buffer_copystr( + &interface->name_buffer_, "PushConstantBlock", name_buffer_size, name_buffer_offset)); + + for (int uniform = 0; uniform < this->uniforms.size(); uniform++) { + interface->add_uniform( + name_buffer_copystr(&interface->name_buffer_, + this->uniforms[uniform].name.c_str(), + name_buffer_size, + name_buffer_offset), + to_mtl_type(this->uniforms[uniform].type), + (this->uniforms[uniform].is_array) ? this->uniforms[uniform].array_elems : 1); + } + + /* Prepare Interface Uniform Blocks. */ + for (int uniform_block = 0; uniform_block < this->uniform_blocks.size(); uniform_block++) { + interface->add_uniform_block( + name_buffer_copystr(&interface->name_buffer_, + this->uniform_blocks[uniform_block].name.c_str(), + name_buffer_size, + name_buffer_offset), + uniform_block, + 0, + this->uniform_blocks[uniform_block].stage); + } + + /* Texture/sampler bindings to interface. */ + for (const MSLTextureSampler &texture_sampler : this->texture_samplers) { + interface->add_texture(name_buffer_copystr(&interface->name_buffer_, + texture_sampler.name.c_str(), + name_buffer_size, + name_buffer_offset), + texture_sampler.location, + texture_sampler.get_texture_binding_type(), + texture_sampler.stage); + } + + /* Sampler Parameters. */ + interface->set_sampler_properties( + this->use_argument_buffer_for_samplers(), + this->get_sampler_argument_buffer_bind_index(ShaderStage::VERTEX), + this->get_sampler_argument_buffer_bind_index(ShaderStage::FRAGMENT)); + + /* Map Metal bindings to standardised ShaderInput struct name/binding index. */ + interface->prepare_common_shader_inputs(); + + /* Resize name buffer to save some memory. */ + if (name_buffer_offset < name_buffer_size) { + interface->name_buffer_ = (char *)MEM_reallocN(interface->name_buffer_, name_buffer_offset); + } + + return interface; +} + +std::string MSLTextureSampler::get_msl_texture_type_str() const +{ + /* Add Types as needed. */ + switch (this->type) { + case ImageType::FLOAT_1D: { + return "texture1d"; + } + case ImageType::FLOAT_2D: { + return "texture2d"; + } + case ImageType::FLOAT_3D: { + return "texture3d"; + } + case ImageType::FLOAT_CUBE: { + return "texturecube"; + } + case ImageType::FLOAT_1D_ARRAY: { + return "texture1d_array"; + } + case ImageType::FLOAT_2D_ARRAY: { + return "texture2d_array"; + } + case ImageType::FLOAT_CUBE_ARRAY: { + return "texturecube_array"; + } + case ImageType::FLOAT_BUFFER: { + return "texture_buffer"; + } + case ImageType::DEPTH_2D: { + return "depth2d"; + } + case ImageType::SHADOW_2D: { + return "depth2d"; + } + case ImageType::DEPTH_2D_ARRAY: { + return "depth2d_array"; + } + case ImageType::SHADOW_2D_ARRAY: { + return "depth2d_array"; + } + case ImageType::DEPTH_CUBE: { + return "depthcube"; + } + case ImageType::SHADOW_CUBE: { + return "depthcube"; + } + case ImageType::DEPTH_CUBE_ARRAY: { + return "depthcube_array"; + } + case ImageType::SHADOW_CUBE_ARRAY: { + return "depthcube_array"; + } + case ImageType::INT_1D: { + return "texture1d"; + } + case ImageType::INT_2D: { + return "texture2d"; + } + case ImageType::INT_3D: { + return "texture3d"; + } + case ImageType::INT_CUBE: { + return "texturecube"; + } + case ImageType::INT_1D_ARRAY: { + return "texture1d_array"; + } + case ImageType::INT_2D_ARRAY: { + return "texture2d_array"; + } + case ImageType::INT_CUBE_ARRAY: { + return "texturecube_array"; + } + case ImageType::INT_BUFFER: { + return "texture_buffer"; + } + case ImageType::UINT_1D: { + return "texture1d"; + } + case ImageType::UINT_2D: { + return "texture2d"; + } + case ImageType::UINT_3D: { + return "texture3d"; + } + case ImageType::UINT_CUBE: { + return "texturecube"; + } + case ImageType::UINT_1D_ARRAY: { + return "texture1d_array"; + } + case ImageType::UINT_2D_ARRAY: { + return "texture2d_array"; + } + case ImageType::UINT_CUBE_ARRAY: { + return "texturecube_array"; + } + case ImageType::UINT_BUFFER: { + return "texture_buffer"; + } + default: { + /* Unrecognised type. */ + BLI_assert_unreachable(); + return "ERROR"; + } + }; +} + +std::string MSLTextureSampler::get_msl_wrapper_type_str() const +{ + /* Add Types as needed. */ + switch (this->type) { + case ImageType::FLOAT_1D: { + return "_mtl_combined_image_sampler_1d"; + } + case ImageType::FLOAT_2D: { + return "_mtl_combined_image_sampler_2d"; + } + case ImageType::FLOAT_3D: { + return "_mtl_combined_image_sampler_3d"; + } + case ImageType::FLOAT_CUBE: { + return "_mtl_combined_image_sampler_cube"; + } + case ImageType::FLOAT_1D_ARRAY: { + return "_mtl_combined_image_sampler_1d_array"; + } + case ImageType::FLOAT_2D_ARRAY: { + return "_mtl_combined_image_sampler_2d_array"; + } + case ImageType::FLOAT_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_cube_array"; + } + case ImageType::FLOAT_BUFFER: { + return "_mtl_combined_image_sampler_buffer"; + } + case ImageType::DEPTH_2D: { + return "_mtl_combined_image_sampler_depth_2d"; + } + case ImageType::SHADOW_2D: { + return "_mtl_combined_image_sampler_depth_2d"; + } + case ImageType::DEPTH_2D_ARRAY: { + return "_mtl_combined_image_sampler_depth_2d_array"; + } + case ImageType::SHADOW_2D_ARRAY: { + return "_mtl_combined_image_sampler_depth_2d_array"; + } + case ImageType::DEPTH_CUBE: { + return "_mtl_combined_image_sampler_depth_cube"; + } + case ImageType::SHADOW_CUBE: { + return "_mtl_combined_image_sampler_depth_cube"; + } + case ImageType::DEPTH_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_depth_cube_array"; + } + case ImageType::SHADOW_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_depth_cube_array"; + } + case ImageType::INT_1D: { + return "_mtl_combined_image_sampler_1d"; + } + case ImageType::INT_2D: { + return "_mtl_combined_image_sampler_2d"; + } + case ImageType::INT_3D: { + return "_mtl_combined_image_sampler_3d"; + } + case ImageType::INT_CUBE: { + return "_mtl_combined_image_sampler_cube"; + } + case ImageType::INT_1D_ARRAY: { + return "_mtl_combined_image_sampler_1d_array"; + } + case ImageType::INT_2D_ARRAY: { + return "_mtl_combined_image_sampler_2d_array"; + } + case ImageType::INT_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_cube_array"; + } + case ImageType::INT_BUFFER: { + return "_mtl_combined_image_sampler_buffer"; + } + case ImageType::UINT_1D: { + return "_mtl_combined_image_sampler_1d"; + } + case ImageType::UINT_2D: { + return "_mtl_combined_image_sampler_2d"; + } + case ImageType::UINT_3D: { + return "_mtl_combined_image_sampler_3d"; + } + case ImageType::UINT_CUBE: { + return "_mtl_combined_image_sampler_cube"; + } + case ImageType::UINT_1D_ARRAY: { + return "_mtl_combined_image_sampler_1d_array"; + } + case ImageType::UINT_2D_ARRAY: { + return "_mtl_combined_image_sampler_2d_array"; + } + case ImageType::UINT_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_cube_array"; + } + case ImageType::UINT_BUFFER: { + return "_mtl_combined_image_sampler_buffer"; + } + default: { + /* Unrecognised type. */ + BLI_assert_unreachable(); + return "ERROR"; + } + }; +} + +std::string MSLTextureSampler::get_msl_return_type_str() const +{ + /* Add Types as needed */ + switch (this->type) { + /* Floating point return. */ + case ImageType::FLOAT_1D: + case ImageType::FLOAT_2D: + case ImageType::FLOAT_3D: + case ImageType::FLOAT_CUBE: + case ImageType::FLOAT_1D_ARRAY: + case ImageType::FLOAT_2D_ARRAY: + case ImageType::FLOAT_CUBE_ARRAY: + case ImageType::FLOAT_BUFFER: + case ImageType::DEPTH_2D: + case ImageType::SHADOW_2D: + case ImageType::DEPTH_2D_ARRAY: + case ImageType::SHADOW_2D_ARRAY: + case ImageType::DEPTH_CUBE: + case ImageType::SHADOW_CUBE: + case ImageType::DEPTH_CUBE_ARRAY: + case ImageType::SHADOW_CUBE_ARRAY: { + return "float"; + } + /* Integer return. */ + case ImageType::INT_1D: + case ImageType::INT_2D: + case ImageType::INT_3D: + case ImageType::INT_CUBE: + case ImageType::INT_1D_ARRAY: + case ImageType::INT_2D_ARRAY: + case ImageType::INT_CUBE_ARRAY: + case ImageType::INT_BUFFER: { + return "int"; + } + + /* Unsigned Integer return. */ + case ImageType::UINT_1D: + case ImageType::UINT_2D: + case ImageType::UINT_3D: + case ImageType::UINT_CUBE: + case ImageType::UINT_1D_ARRAY: + case ImageType::UINT_2D_ARRAY: + case ImageType::UINT_CUBE_ARRAY: + case ImageType::UINT_BUFFER: { + return "uint32_t"; + } + + default: { + /* Unrecognised type. */ + BLI_assert_unreachable(); + return "ERROR"; + } + }; +} + +eGPUTextureType MSLTextureSampler::get_texture_binding_type() const +{ + /* Add Types as needed */ + switch (this->type) { + case ImageType::FLOAT_1D: { + return GPU_TEXTURE_1D; + } + case ImageType::FLOAT_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::FLOAT_3D: { + return GPU_TEXTURE_3D; + } + case ImageType::FLOAT_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::FLOAT_1D_ARRAY: { + return GPU_TEXTURE_1D_ARRAY; + } + case ImageType::FLOAT_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::FLOAT_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::FLOAT_BUFFER: { + return GPU_TEXTURE_BUFFER; + } + case ImageType::DEPTH_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::SHADOW_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::DEPTH_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::SHADOW_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::DEPTH_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::SHADOW_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::DEPTH_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::SHADOW_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::INT_1D: { + return GPU_TEXTURE_1D; + } + case ImageType::INT_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::INT_3D: { + return GPU_TEXTURE_3D; + } + case ImageType::INT_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::INT_1D_ARRAY: { + return GPU_TEXTURE_1D_ARRAY; + } + case ImageType::INT_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::INT_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::INT_BUFFER: { + return GPU_TEXTURE_BUFFER; + } + case ImageType::UINT_1D: { + return GPU_TEXTURE_1D; + } + case ImageType::UINT_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::UINT_3D: { + return GPU_TEXTURE_3D; + } + case ImageType::UINT_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::UINT_1D_ARRAY: { + return GPU_TEXTURE_1D_ARRAY; + } + case ImageType::UINT_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::UINT_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::UINT_BUFFER: { + return GPU_TEXTURE_BUFFER; + } + default: { + BLI_assert_unreachable(); + return GPU_TEXTURE_2D; + } + }; +} + +/** \} */ + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_interface.hh b/source/blender/gpu/metal/mtl_shader_interface.hh new file mode 100644 index 00000000000..0f04c04031d --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_interface.hh @@ -0,0 +1,267 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "MEM_guardedalloc.h" + +#include "BLI_vector.hh" + +#include "gpu_shader_interface.hh" +#include "mtl_capabilities.hh" +#include "mtl_shader_interface_type.hh" + +#include "GPU_common.h" +#include "GPU_common_types.h" +#include "GPU_texture.h" +#include "gpu_texture_private.hh" +#include <Metal/Metal.h> +#include <functional> + +namespace blender::gpu { + +/* MTLShaderInterface describes the layout and properties of a given shader, + * including input and output bindings, and any special properties or modes + * that the shader may require. + * + * -- Shader input/output bindings -- + * + * We require custom datastructures for the binding information in Metal. + * This is because certain bindings contain and require more information to + * be stored than can be tracked solely within the `ShaderInput` struct. + * e.g. data sizes and offsets. + * + * Upon interface completion, `prepare_common_shader_inputs` is used to + * populate the global ShaderInput* array to enable correct functionality + * of shader binding location lookups. These returned locations act as indices + * into the arrays stored here in the MTLShaderInterace, such that extraction + * of required information can be performed within the backend. + * + * e.g. `int loc = GPU_shader_get_uniform(...)` + * `loc` will match the index into the MTLShaderUniform uniforms_[] array + * to fetch the required Metal specific information. + * + * + * + * -- Argument Buffers and Argument Encoders -- + * + * We can use ArgumentBuffers (AB's) in Metal to extend the resource bind limitations + * by providing bindless support. + * + * Argument Buffers are used for sampler bindings when the builtin + * sampler limit of 16 is exceeded, as in all cases for Blender, + * each individual texture is associated with a given sampler, and this + * lower limit would otherwise reduce the total availability of textures + * used in shaders. + * + * In future, argument buffers may be extended to support other resource + * types, if overall bind limits are ever increased within Blender. + * + * The ArgumentEncoder cache used to store the generated ArgumentEncoders for a given + * shader permutation. The ArgumentEncoder is the resource used to write resource binding + * information to a specified buffer, and is unique to the shader's resource interface. + */ + +enum class ShaderStage : uint32_t { + VERTEX = 1 << 0, + FRAGMENT = 1 << 1, + BOTH = (ShaderStage::VERTEX | ShaderStage::FRAGMENT), +}; +ENUM_OPERATORS(ShaderStage, ShaderStage::BOTH); + +inline uint get_shader_stage_index(ShaderStage stage) +{ + switch (stage) { + case ShaderStage::VERTEX: + return 0; + case ShaderStage::FRAGMENT: + return 1; + default: + BLI_assert_unreachable(); + return 0; + } + return 0; +} + +/* Shader input/output binding information. */ +struct MTLShaderInputAttribute { + uint32_t name_offset; + MTLVertexFormat format; + uint32_t index; + uint32_t location; + uint32_t size; + uint32_t buffer_index; + uint32_t offset; + /* For attributes of Matrix/array types, we need to insert "fake" attributes for + * each element, as matrix types are not natively supported. + * + * > 1 if matrix/arrays are used, specifying number of elements. + * = 1 for non-matrix types + * = 0 if used as a dummy slot for "fake" matrix attributes. */ + uint32_t matrix_element_count; +}; + +struct MTLShaderUniformBlock { + uint32_t name_offset; + uint32_t size = 0; + /* Buffer resouce bind index in shader [[buffer(index)]]. */ + uint32_t buffer_index; + + /* Tracking for manual uniform addition. */ + uint32_t current_offset; + ShaderStage stage_mask; +}; + +struct MTLShaderUniform { + uint32_t name_offset; + /* Index of `MTLShaderUniformBlock` this uniform belongs to. */ + uint32_t size_in_bytes; + uint32_t byte_offset; + eMTLDataType type; + uint32_t array_len; +}; + +struct MTLShaderTexture { + bool used; + uint32_t name_offset; + /* Texture resource bind slot in shader [[texture(n)]]. */ + int slot_index; + eGPUTextureType type; + ShaderStage stage_mask; +}; + +struct MTLShaderSampler { + uint32_t name_offset; + /* Sampler resource bind slot in shader [[sampler(n)]]. */ + uint32_t slot_index = 0; +}; + +/* Utility Functions. */ +MTLVertexFormat mtl_datatype_to_vertex_type(eMTLDataType type); + +/** + * Implementation of Shader interface for Metal Backend. + **/ +class MTLShaderInterface : public ShaderInterface { + + private: + /* Argument encoders caching. + * Static size is based on common input permutation variations. */ + static const int ARGUMENT_ENCODERS_CACHE_SIZE = 3; + struct ArgumentEncoderCacheEntry { + id<MTLArgumentEncoder> encoder; + int buffer_index; + }; + ArgumentEncoderCacheEntry arg_encoders_[ARGUMENT_ENCODERS_CACHE_SIZE] = {}; + + /* Vertex input Attribues. */ + uint32_t total_attributes_; + uint32_t total_vert_stride_; + MTLShaderInputAttribute attributes_[MTL_MAX_VERTEX_INPUT_ATTRIBUTES]; + + /* Uniforms. */ + uint32_t total_uniforms_; + MTLShaderUniform uniforms_[MTL_MAX_UNIFORMS_PER_BLOCK]; + + /* Uniform Blocks. */ + uint32_t total_uniform_blocks_; + MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS]; + MTLShaderUniformBlock push_constant_block_; + + /* Textures. */ + /* Textures support explicit binding indices, so some texture slots + * remain unused. */ + uint32_t total_textures_; + int max_texture_index_; + MTLShaderTexture textures_[MTL_MAX_TEXTURE_SLOTS]; + + /* Whether argument buffers are used for sampler bindings. */ + bool sampler_use_argument_buffer_; + int sampler_argument_buffer_bind_index_vert_; + int sampler_argument_buffer_bind_index_frag_; + + /* Attribute Mask. */ + uint32_t enabled_attribute_mask_; + + /* Debug. */ + char name[256]; + + public: + MTLShaderInterface(const char *name); + ~MTLShaderInterface(); + + void init(); + void add_input_attribute(uint32_t name_offset, + uint32_t attribute_location, + MTLVertexFormat format, + uint32_t buffer_index, + uint32_t size, + uint32_t offset, + int matrix_element_count = 1); + uint32_t add_uniform_block(uint32_t name_offset, + uint32_t buffer_index, + uint32_t size, + ShaderStage stage_mask = ShaderStage::BOTH); + void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1); + void add_texture(uint32_t name_offset, + uint32_t texture_slot, + eGPUTextureType tex_binding_type, + ShaderStage stage_mask = ShaderStage::FRAGMENT); + void add_push_constant_block(uint32_t name_offset); + + /* Resolve and cache locations of builtin uniforms and uniform blocks. */ + void map_builtins(); + void set_sampler_properties(bool use_argument_buffer, + uint32_t argument_buffer_bind_index_vert, + uint32_t argument_buffer_bind_index_frag); + + /* Prepare ShaderInput interface for binding resolution. */ + void prepare_common_shader_inputs(); + + /* Fetch Uniforms. */ + const MTLShaderUniform &get_uniform(uint index) const; + uint32_t get_total_uniforms() const; + + /* Fetch Uniform Blocks. */ + const MTLShaderUniformBlock &get_uniform_block(uint index) const; + uint32_t get_total_uniform_blocks() const; + bool has_uniform_block(uint32_t block_index) const; + uint32_t get_uniform_block_size(uint32_t block_index) const; + + /* Push constant uniform data block should always be available. */ + const MTLShaderUniformBlock &get_push_constant_block() const; + + /* Fetch textures. */ + const MTLShaderTexture &get_texture(uint index) const; + uint32_t get_total_textures() const; + uint32_t get_max_texture_index() const; + bool get_use_argument_buffer_for_samplers(int *vertex_arg_buffer_bind_index, + int *fragment_arg_buffer_bind_index) const; + + /* Fetch Attributes. */ + const MTLShaderInputAttribute &get_attribute(uint index) const; + uint32_t get_total_attributes() const; + uint32_t get_total_vertex_stride() const; + uint32_t get_enabled_attribute_mask() const; + + /* Name buffer fetching. */ + const char *get_name_at_offset(uint32_t offset) const; + + /* Interface name. */ + const char *get_name() const + { + return this->name; + } + + /* Argument buffer encoder management. */ + id<MTLArgumentEncoder> find_argument_encoder(int buffer_index) const; + + void insert_argument_encoder(int buffer_index, id encoder); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLShaderInterface"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm new file mode 100644 index 00000000000..1adf1210496 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_interface.mm @@ -0,0 +1,604 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * GPU shader interface (C --> GLSL) + */ + +#include "BLI_bitmap.h" + +#include "GPU_capabilities.h" + +#include "mtl_common.hh" +#include "mtl_debug.hh" +#include "mtl_shader_interface.hh" +#include "mtl_shader_interface_type.hh" + +#include "BLI_blenlib.h" +#include "BLI_math_base.h" +#include "BLI_utildefines.h" +#include "MEM_guardedalloc.h" + +namespace blender::gpu { + +MTLShaderInterface::MTLShaderInterface(const char *name) +{ + /* Shared ShaderInputs array is populated later on in `prepare_common_shader_inputs` + * after Metal Shader Interface preparation. */ + inputs_ = nullptr; + + if (name != nullptr) { + strcpy(this->name, name); + } + + /* Ensure ShaderInterface parameters are cleared. */ + this->init(); +} + +MTLShaderInterface::~MTLShaderInterface() +{ + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + if (arg_encoders_[i].encoder != nil) { + id<MTLArgumentEncoder> enc = arg_encoders_[i].encoder; + [enc release]; + } + } +} + +const char *MTLShaderInterface::get_name_at_offset(uint32_t offset) const +{ + return name_buffer_ + offset; +} + +void MTLShaderInterface::init() +{ + total_attributes_ = 0; + total_uniform_blocks_ = 0; + total_uniforms_ = 0; + total_textures_ = 0; + max_texture_index_ = -1; + enabled_attribute_mask_ = 0; + total_vert_stride_ = 0; + sampler_use_argument_buffer_ = false; + sampler_argument_buffer_bind_index_vert_ = -1; + sampler_argument_buffer_bind_index_frag_ = -1; + + /* NULL initialise uniform location markers for builtins. */ + for (const int u : IndexRange(GPU_NUM_UNIFORMS)) { + builtins_[u] = -1; + } + for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) { + builtin_blocks_[ubo] = -1; + } + for (const int tex : IndexRange(MTL_MAX_TEXTURE_SLOTS)) { + textures_[tex].used = false; + textures_[tex].slot_index = -1; + } + + /* Null initialisation for argument encoders. */ + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + arg_encoders_[i].encoder = nil; + arg_encoders_[i].buffer_index = -1; + } +} + +void MTLShaderInterface::add_input_attribute(uint32_t name_offset, + uint32_t attribute_location, + MTLVertexFormat format, + uint32_t buffer_index, + uint32_t size, + uint32_t offset, + int matrix_element_count) +{ + MTLShaderInputAttribute &input_attr = attributes_[total_attributes_]; + input_attr.name_offset = name_offset; + input_attr.format = format; + input_attr.location = attribute_location; + input_attr.size = size; + input_attr.buffer_index = buffer_index; + input_attr.offset = offset; + input_attr.matrix_element_count = matrix_element_count; + input_attr.index = total_attributes_; + total_attributes_++; + total_vert_stride_ = max_ii(total_vert_stride_, offset + size); + enabled_attribute_mask_ |= (1 << attribute_location); +} + +uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset, + uint32_t buffer_index, + uint32_t size, + ShaderStage stage_mask) +{ + /* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */ + if ((size % 16) != 0) { + size += 16 - (size % 16); + } + + MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_]; + uni_block.name_offset = name_offset; + /* We offset the buffer bidning index by one, as the first slot is reserved for push constant + * data. */ + uni_block.buffer_index = buffer_index + 1; + uni_block.size = size; + uni_block.current_offset = 0; + uni_block.stage_mask = ShaderStage::BOTH; + return (total_uniform_blocks_++); +} + +void MTLShaderInterface::add_push_constant_block(uint32_t name_offset) +{ + push_constant_block_.name_offset = name_offset; + /* Push constant data block is always uniform buffer index 0. */ + push_constant_block_.buffer_index = 0; + /* Size starts at zero and grows as uniforms are added. */ + push_constant_block_.size = 0; + + push_constant_block_.current_offset = 0; + push_constant_block_.stage_mask = ShaderStage::BOTH; +} + +void MTLShaderInterface::add_uniform(uint32_t name_offset, eMTLDataType type, int array_len) +{ + BLI_assert(array_len > 0); + BLI_assert(total_uniforms_ < MTL_MAX_UNIFORMS_PER_BLOCK); + if (total_uniforms_ >= MTL_MAX_UNIFORMS_PER_BLOCK) { + MTL_LOG_WARNING( + "[Warning] Cannot add uniform '%s' to shader interface '%s' as the uniform limit of %d " + "has been reached.\n", + name, + name, + MTL_MAX_UNIFORMS_PER_BLOCK); + return; + } + MTLShaderUniform &uniform = uniforms_[total_uniforms_]; + uniform.name_offset = name_offset; + + /* Determine size and offset alignment -- C++ struct alignment rules: Base address of value must + * match alignment of type. GLSL follows minimum type alignment of 4. */ + int data_type_size = mtl_get_data_type_size(type) * array_len; + int data_type_alignment = max_ii(mtl_get_data_type_alignment(type), 4); + int current_offset = push_constant_block_.current_offset; + if ((current_offset % data_type_alignment) != 0) { + current_offset += data_type_alignment - (current_offset % data_type_alignment); + } + + uniform.size_in_bytes = data_type_size; + uniform.byte_offset = current_offset; + uniform.type = type; + uniform.array_len = array_len; + total_uniforms_++; + + /* Update Push constant block-- update offset, re-size and re-align total memory requirement to + * be 16-byte aligned. Following GLSL std140. */ + push_constant_block_.current_offset = current_offset + data_type_size; + if (push_constant_block_.current_offset > push_constant_block_.size) { + push_constant_block_.size = push_constant_block_.current_offset; + if ((push_constant_block_.size % 16) != 0) { + push_constant_block_.size += 16 - (push_constant_block_.size % 16); + } + } + + /* Validate properties. */ + BLI_assert(uniform.size_in_bytes > 0); + BLI_assert_msg( + current_offset + data_type_size <= push_constant_block_.size, + "Uniform size and offset sits outside the specified size range for the uniform block"); +} + +void MTLShaderInterface::add_texture(uint32_t name_offset, + uint32_t texture_slot, + eGPUTextureType tex_binding_type, + ShaderStage stage_mask) +{ + BLI_assert(texture_slot >= 0 && texture_slot < GPU_max_textures()); + if (texture_slot >= 0 && texture_slot < GPU_max_textures()) { + + MTLShaderTexture &tex = textures_[texture_slot]; + BLI_assert_msg(tex.used == false, "Texture slot already in-use by another binding"); + tex.name_offset = name_offset; + tex.slot_index = texture_slot; + tex.type = tex_binding_type; + tex.stage_mask = stage_mask; + tex.used = true; + total_textures_++; + max_texture_index_ = max_ii(max_texture_index_, texture_slot); + } + else { + BLI_assert_msg(false, "Exceeding maximum supported texture count."); + MTL_LOG_WARNING( + "Could not add additional texture with index %d to shader interface. Maximum " + "supported texture count is %d\n", + texture_slot, + GPU_max_textures()); + } +} + +void MTLShaderInterface::map_builtins() +{ + /* Clear builtin arrays to NULL locations. */ + for (const int u : IndexRange(GPU_NUM_UNIFORMS)) { + builtins_[u] = -1; + } + for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) { + builtin_blocks_[ubo] = -1; + } + + /* Resolve and cache uniform locations for bultin uniforms. */ + for (const int u : IndexRange(GPU_NUM_UNIFORMS)) { + const ShaderInput *uni = this->uniform_get(builtin_uniform_name((GPUUniformBuiltin)u)); + if (uni != nullptr) { + BLI_assert(uni->location >= 0); + if (uni->location >= 0) { + builtins_[u] = uni->location; + MTL_LOG_INFO("Mapped builtin uniform '%s' NB: '%s' to location: %d\n", + builtin_uniform_name((GPUUniformBuiltin)u), + get_name_at_offset(uni->name_offset), + uni->location); + } + } + } + + /* Resolve and cache uniform locations for bultin uniform blocks. */ + for (const int u : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) { + const ShaderInput *uni = this->ubo_get(builtin_uniform_block_name((GPUUniformBlockBuiltin)u)); + + if (uni != nullptr) { + BLI_assert(uni->location >= 0); + if (uni->location >= 0) { + builtin_blocks_[u] = uni->binding; + MTL_LOG_INFO("Mapped builtin uniform block '%s' to location %d\n", + builtin_uniform_block_name((GPUUniformBlockBuiltin)u), + uni->location); + } + } + } +} + +/* Populate ShaderInput struct based on interface. */ +void MTLShaderInterface::prepare_common_shader_inputs() +{ + /* ShaderInput inputs_ maps a uniform name to an external + * uniform location, which is used as an array index to look-up + * information in the local MTLShaderInterface input structs. + * + * ShaderInput population follows the ordering rules in gpu_shader_interface. */ + + /* Populate ShaderInterface counts. */ + attr_len_ = this->get_total_attributes(); + ubo_len_ = this->get_total_uniform_blocks(); + uniform_len_ = this->get_total_uniforms() + this->get_total_textures(); + + /* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */ + ssbo_len_ = 0; + + /* Calculate total inputs and allocate ShaderInput array. */ + /* NOTE: We use the existing name_buffer_ allocated for internal input structs. */ + int input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_; + inputs_ = (ShaderInput *)MEM_callocN(sizeof(ShaderInput) * input_tot_len, __func__); + ShaderInput *current_input = inputs_; + + /* Attributes. */ + for (const int attr_index : IndexRange(total_attributes_)) { + MTLShaderInputAttribute &shd_attr = attributes_[attr_index]; + current_input->name_offset = shd_attr.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_attr.name_offset)); + current_input->location = attr_index; + current_input->binding = attr_index; + current_input++; + } + + /* UBOs. */ + BLI_assert(&inputs_[attr_len_] >= current_input); + current_input = &inputs_[attr_len_]; + for (const int ubo_index : IndexRange(total_uniform_blocks_)) { + MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index]; + current_input->name_offset = shd_ubo.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset)); + /* Location refers to the index in the ubos_ array. */ + current_input->location = ubo_index; + /* Final binding location refers to the buffer binding index within the shader (Relative to + * MTL_uniform_buffer_base_index). */ + current_input->binding = shd_ubo.buffer_index; + current_input++; + } + + /* Uniforms. */ + BLI_assert(&inputs_[attr_len_ + ubo_len_] >= current_input); + current_input = &inputs_[attr_len_ + ubo_len_]; + for (const int uniform_index : IndexRange(total_uniforms_)) { + MTLShaderUniform &shd_uni = uniforms_[uniform_index]; + current_input->name_offset = shd_uni.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_uni.name_offset)); + current_input->location = uniform_index; + current_input->binding = uniform_index; + current_input++; + } + + /* Textures. + * NOTE(Metal): Textures are externally treated as uniforms in gpu_shader_interface. + * Location for textures resolved as `binding` value. This + * is the index into the local MTLShaderTexture textures[] array. + * + * In MSL, we cannot trivially remap which texture slot a given texture + * handle points to, unlike in GLSL, where a uniform sampler/image can be updated + * and queried as both a texture and a uniform. */ + for (int texture_index = 0; texture_index <= max_texture_index_; texture_index++) { + const MTLShaderTexture &shd_tex = textures_[texture_index]; + + /* Not all texture entries are used when explicit texture locations are specified. */ + if (shd_tex.used) { + BLI_assert_msg(shd_tex.slot_index == texture_index, + "Texture binding slot should match array index for texture."); + current_input->name_offset = shd_tex.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_tex.name_offset)); + + /* Location represents look-up address. + * For Metal, this location is a unique value offset by + * total_uniforms such that it does not overlap. + * + * This range offset allows a check in the uniform look-up + * to ensure texture handles are not treated as standard uniforms in Metal. */ + current_input->location = texture_index + total_uniforms_; + + /* Binding represents texture slot [[texture(n)]]. */ + current_input->binding = shd_tex.slot_index; + current_input++; + } + } + + /* SSBO bindings. + * TODO(Metal): Support SSBOs. Pending compute support. */ + BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input); + current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_]; + + /* Map builtin uniform indices to uniform binding locations. */ + this->map_builtins(); +} + +void MTLShaderInterface::set_sampler_properties(bool use_argument_buffer, + uint32_t argument_buffer_bind_index_vert, + uint32_t argument_buffer_bind_index_frag) +{ + sampler_use_argument_buffer_ = use_argument_buffer; + sampler_argument_buffer_bind_index_vert_ = argument_buffer_bind_index_vert; + sampler_argument_buffer_bind_index_frag_ = argument_buffer_bind_index_frag; +} + +/* Attributes. */ +const MTLShaderInputAttribute &MTLShaderInterface::get_attribute(uint index) const +{ + BLI_assert(index < MTL_MAX_VERTEX_INPUT_ATTRIBUTES); + BLI_assert(index < get_total_attributes()); + return attributes_[index]; +} + +uint32_t MTLShaderInterface::get_total_attributes() const +{ + return total_attributes_; +} + +uint32_t MTLShaderInterface::get_total_vertex_stride() const +{ + return total_vert_stride_; +} + +uint32_t MTLShaderInterface::get_enabled_attribute_mask() const +{ + return enabled_attribute_mask_; +} + +/* Uniforms. */ +const MTLShaderUniform &MTLShaderInterface::get_uniform(uint index) const +{ + BLI_assert(index < MTL_MAX_UNIFORMS_PER_BLOCK); + BLI_assert(index < get_total_uniforms()); + return uniforms_[index]; +} + +uint32_t MTLShaderInterface::get_total_uniforms() const +{ + return total_uniforms_; +} + +/* Uniform Blocks. */ +const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const +{ + BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(index < get_total_uniform_blocks()); + return ubos_[index]; +} + +const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const +{ + return push_constant_block_; +} + +uint32_t MTLShaderInterface::get_total_uniform_blocks() const +{ + return total_uniform_blocks_; +} + +bool MTLShaderInterface::has_uniform_block(uint32_t block_index) const +{ + return (block_index < total_uniform_blocks_); +} + +uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const +{ + return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0; +} + +/* Textures. */ +const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const +{ + BLI_assert(index < MTL_MAX_TEXTURE_SLOTS); + BLI_assert(index <= get_max_texture_index()); + return textures_[index]; +} + +uint32_t MTLShaderInterface::get_total_textures() const +{ + return total_textures_; +} + +uint32_t MTLShaderInterface::get_max_texture_index() const +{ + return max_texture_index_; +} + +bool MTLShaderInterface::get_use_argument_buffer_for_samplers( + int *vertex_arg_buffer_bind_index, int *fragment_arg_buffer_bind_index) const +{ + /* Returns argument buffer binding slot for each shader stage. + * The exact bind slot may be different, as each stage has different buffer inputs. */ + *vertex_arg_buffer_bind_index = sampler_argument_buffer_bind_index_vert_; + *fragment_arg_buffer_bind_index = sampler_argument_buffer_bind_index_frag_; + return sampler_use_argument_buffer_; +} + +id<MTLArgumentEncoder> MTLShaderInterface::find_argument_encoder(int buffer_index) const +{ + id encoder = nil; + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + encoder = arg_encoders_[i].buffer_index == buffer_index ? arg_encoders_[i].encoder : encoder; + } + return encoder; +} + +void MTLShaderInterface::insert_argument_encoder(int buffer_index, id encoder) +{ + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + if (arg_encoders_[i].encoder == nil) { + arg_encoders_[i].encoder = encoder; + arg_encoders_[i].buffer_index = buffer_index; + return; + } + } + MTL_LOG_WARNING("could not insert encoder into cache!"); +} + +MTLVertexFormat mtl_datatype_to_vertex_type(eMTLDataType type) +{ + switch (type) { + case MTL_DATATYPE_CHAR: + return MTLVertexFormatChar; + case MTL_DATATYPE_UCHAR: + return MTLVertexFormatUChar; + case MTL_DATATYPE_BOOL: + return MTLVertexFormatUChar; + case MTL_DATATYPE_CHAR2: + return MTLVertexFormatChar2; + case MTL_DATATYPE_UCHAR2: + return MTLVertexFormatUChar2; + case MTL_DATATYPE_BOOL2: + return MTLVertexFormatUChar2; + case MTL_DATATYPE_SHORT: + return MTLVertexFormatShort; + case MTL_DATATYPE_USHORT: + return MTLVertexFormatUShort; + case MTL_DATATYPE_CHAR3: + return MTLVertexFormatChar3; + case MTL_DATATYPE_UCHAR3: + return MTLVertexFormatUChar3; + case MTL_DATATYPE_BOOL3: + return MTLVertexFormatUChar3; + case MTL_DATATYPE_CHAR4: + return MTLVertexFormatChar4; + case MTL_DATATYPE_UCHAR4: + return MTLVertexFormatUChar4; + case MTL_DATATYPE_INT: + return MTLVertexFormatInt; + case MTL_DATATYPE_UINT: + return MTLVertexFormatUInt; + case MTL_DATATYPE_BOOL4: + return MTLVertexFormatUChar4; + case MTL_DATATYPE_SHORT2: + return MTLVertexFormatShort2; + case MTL_DATATYPE_USHORT2: + return MTLVertexFormatUShort2; + case MTL_DATATYPE_FLOAT: + return MTLVertexFormatFloat; + case MTL_DATATYPE_HALF2x2: + case MTL_DATATYPE_HALF3x2: + case MTL_DATATYPE_HALF4x2: + BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender."); + return MTLVertexFormatInvalid; + + case MTL_DATATYPE_SHORT3: + return MTLVertexFormatShort3; + case MTL_DATATYPE_USHORT3: + return MTLVertexFormatUShort3; + case MTL_DATATYPE_SHORT4: + return MTLVertexFormatShort4; + case MTL_DATATYPE_USHORT4: + return MTLVertexFormatUShort4; + case MTL_DATATYPE_INT2: + return MTLVertexFormatInt2; + case MTL_DATATYPE_UINT2: + return MTLVertexFormatUInt2; + case MTL_DATATYPE_FLOAT2: + return MTLVertexFormatFloat2; + case MTL_DATATYPE_LONG: + return MTLVertexFormatInt; + case MTL_DATATYPE_ULONG: + return MTLVertexFormatUInt; + case MTL_DATATYPE_HALF2x3: + case MTL_DATATYPE_HALF2x4: + case MTL_DATATYPE_HALF3x3: + case MTL_DATATYPE_HALF3x4: + case MTL_DATATYPE_HALF4x3: + case MTL_DATATYPE_HALF4x4: + case MTL_DATATYPE_FLOAT2x2: + case MTL_DATATYPE_FLOAT3x2: + case MTL_DATATYPE_FLOAT4x2: + BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender."); + return MTLVertexFormatInvalid; + + case MTL_DATATYPE_INT3: + return MTLVertexFormatInt3; + case MTL_DATATYPE_INT4: + return MTLVertexFormatInt4; + case MTL_DATATYPE_UINT3: + return MTLVertexFormatUInt3; + case MTL_DATATYPE_UINT4: + return MTLVertexFormatUInt4; + case MTL_DATATYPE_FLOAT3: + return MTLVertexFormatFloat3; + case MTL_DATATYPE_FLOAT4: + return MTLVertexFormatFloat4; + case MTL_DATATYPE_LONG2: + return MTLVertexFormatInt2; + case MTL_DATATYPE_ULONG2: + return MTLVertexFormatUInt2; + case MTL_DATATYPE_FLOAT2x3: + case MTL_DATATYPE_FLOAT2x4: + case MTL_DATATYPE_FLOAT3x3: + case MTL_DATATYPE_FLOAT3x4: + case MTL_DATATYPE_FLOAT4x3: + case MTL_DATATYPE_FLOAT4x4: + BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender."); + return MTLVertexFormatInvalid; + + case MTL_DATATYPE_LONG3: + return MTLVertexFormatInt3; + case MTL_DATATYPE_LONG4: + return MTLVertexFormatInt4; + case MTL_DATATYPE_ULONG3: + return MTLVertexFormatUInt3; + case MTL_DATATYPE_ULONG4: + return MTLVertexFormatUInt4; + + /* Special Types */ + case MTL_DATATYPE_UINT1010102_NORM: + return MTLVertexFormatUInt1010102Normalized; + case MTL_DATATYPE_INT1010102_NORM: + return MTLVertexFormatInt1010102Normalized; + + default: + BLI_assert(false); + return MTLVertexFormatInvalid; + }; +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_interface_type.hh b/source/blender/gpu/metal/mtl_shader_interface_type.hh new file mode 100644 index 00000000000..a8e651d8509 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_interface_type.hh @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ +#pragma once + +#include "BLI_assert.h" + +enum eMTLDataType { + MTL_DATATYPE_CHAR, + MTL_DATATYPE_CHAR2, + MTL_DATATYPE_CHAR3, + MTL_DATATYPE_CHAR4, + + MTL_DATATYPE_UCHAR, + MTL_DATATYPE_UCHAR2, + MTL_DATATYPE_UCHAR3, + MTL_DATATYPE_UCHAR4, + + MTL_DATATYPE_BOOL, + MTL_DATATYPE_BOOL2, + MTL_DATATYPE_BOOL3, + MTL_DATATYPE_BOOL4, + + MTL_DATATYPE_SHORT, + MTL_DATATYPE_SHORT2, + MTL_DATATYPE_SHORT3, + MTL_DATATYPE_SHORT4, + + MTL_DATATYPE_USHORT, + MTL_DATATYPE_USHORT2, + MTL_DATATYPE_USHORT3, + MTL_DATATYPE_USHORT4, + + MTL_DATATYPE_INT, + MTL_DATATYPE_INT2, + MTL_DATATYPE_INT3, + MTL_DATATYPE_INT4, + + MTL_DATATYPE_UINT, + MTL_DATATYPE_UINT2, + MTL_DATATYPE_UINT3, + MTL_DATATYPE_UINT4, + + MTL_DATATYPE_FLOAT, + MTL_DATATYPE_FLOAT2, + MTL_DATATYPE_FLOAT3, + MTL_DATATYPE_FLOAT4, + + MTL_DATATYPE_LONG, + MTL_DATATYPE_LONG2, + MTL_DATATYPE_LONG3, + MTL_DATATYPE_LONG4, + + MTL_DATATYPE_ULONG, + MTL_DATATYPE_ULONG2, + MTL_DATATYPE_ULONG3, + MTL_DATATYPE_ULONG4, + + MTL_DATATYPE_HALF2x2, + MTL_DATATYPE_HALF2x3, + MTL_DATATYPE_HALF2x4, + MTL_DATATYPE_HALF3x2, + MTL_DATATYPE_HALF3x3, + MTL_DATATYPE_HALF3x4, + MTL_DATATYPE_HALF4x2, + MTL_DATATYPE_HALF4x3, + MTL_DATATYPE_HALF4x4, + + MTL_DATATYPE_FLOAT2x2, + MTL_DATATYPE_FLOAT2x3, + MTL_DATATYPE_FLOAT2x4, + MTL_DATATYPE_FLOAT3x2, + MTL_DATATYPE_FLOAT3x3, + MTL_DATATYPE_FLOAT3x4, + MTL_DATATYPE_FLOAT4x2, + MTL_DATATYPE_FLOAT4x3, + MTL_DATATYPE_FLOAT4x4, + + MTL_DATATYPE_UINT1010102_NORM, + MTL_DATATYPE_INT1010102_NORM +}; + +inline uint mtl_get_data_type_size(eMTLDataType type) +{ + switch (type) { + case MTL_DATATYPE_CHAR: + case MTL_DATATYPE_UCHAR: + case MTL_DATATYPE_BOOL: + return 1; + case MTL_DATATYPE_CHAR2: + case MTL_DATATYPE_UCHAR2: + case MTL_DATATYPE_BOOL2: + case MTL_DATATYPE_SHORT: + case MTL_DATATYPE_USHORT: + return 2; + + case MTL_DATATYPE_CHAR3: + case MTL_DATATYPE_UCHAR3: + case MTL_DATATYPE_BOOL3: + return 3; + case MTL_DATATYPE_CHAR4: + case MTL_DATATYPE_UCHAR4: + case MTL_DATATYPE_INT: + case MTL_DATATYPE_UINT: + case MTL_DATATYPE_BOOL4: + case MTL_DATATYPE_SHORT2: + case MTL_DATATYPE_USHORT2: + case MTL_DATATYPE_FLOAT: + case MTL_DATATYPE_UINT1010102_NORM: + case MTL_DATATYPE_INT1010102_NORM: + return 4; + + case MTL_DATATYPE_SHORT3: + case MTL_DATATYPE_USHORT3: + case MTL_DATATYPE_SHORT4: + case MTL_DATATYPE_USHORT4: + case MTL_DATATYPE_INT2: + case MTL_DATATYPE_UINT2: + case MTL_DATATYPE_FLOAT2: + case MTL_DATATYPE_LONG: + case MTL_DATATYPE_ULONG: + case MTL_DATATYPE_HALF2x2: + return 8; + + case MTL_DATATYPE_HALF3x2: + return 12; + + case MTL_DATATYPE_INT3: + case MTL_DATATYPE_INT4: + case MTL_DATATYPE_UINT3: + case MTL_DATATYPE_UINT4: + case MTL_DATATYPE_FLOAT3: + case MTL_DATATYPE_FLOAT4: + case MTL_DATATYPE_LONG2: + case MTL_DATATYPE_ULONG2: + case MTL_DATATYPE_HALF2x3: + case MTL_DATATYPE_HALF2x4: + case MTL_DATATYPE_HALF4x2: + return 16; + + case MTL_DATATYPE_HALF3x3: + case MTL_DATATYPE_HALF3x4: + case MTL_DATATYPE_FLOAT3x2: + return 24; + + case MTL_DATATYPE_LONG3: + case MTL_DATATYPE_LONG4: + case MTL_DATATYPE_ULONG3: + case MTL_DATATYPE_ULONG4: + case MTL_DATATYPE_HALF4x3: + case MTL_DATATYPE_HALF4x4: + case MTL_DATATYPE_FLOAT2x3: + case MTL_DATATYPE_FLOAT2x4: + case MTL_DATATYPE_FLOAT4x2: + return 32; + + case MTL_DATATYPE_FLOAT3x3: + case MTL_DATATYPE_FLOAT3x4: + return 48; + + case MTL_DATATYPE_FLOAT4x3: + case MTL_DATATYPE_FLOAT4x4: + return 64; + default: + BLI_assert(false); + return 0; + }; +} + +inline uint mtl_get_data_type_alignment(eMTLDataType type) +{ + switch (type) { + case MTL_DATATYPE_CHAR: + case MTL_DATATYPE_UCHAR: + case MTL_DATATYPE_BOOL: + return 1; + case MTL_DATATYPE_CHAR2: + case MTL_DATATYPE_UCHAR2: + case MTL_DATATYPE_BOOL2: + case MTL_DATATYPE_SHORT: + case MTL_DATATYPE_USHORT: + return 2; + + case MTL_DATATYPE_CHAR3: + case MTL_DATATYPE_UCHAR3: + case MTL_DATATYPE_BOOL3: + return 3; + case MTL_DATATYPE_CHAR4: + case MTL_DATATYPE_UCHAR4: + case MTL_DATATYPE_INT: + case MTL_DATATYPE_UINT: + case MTL_DATATYPE_BOOL4: + case MTL_DATATYPE_SHORT2: + case MTL_DATATYPE_USHORT2: + case MTL_DATATYPE_FLOAT: + case MTL_DATATYPE_HALF2x2: + case MTL_DATATYPE_HALF3x2: + case MTL_DATATYPE_HALF4x2: + case MTL_DATATYPE_UINT1010102_NORM: + case MTL_DATATYPE_INT1010102_NORM: + return 4; + + case MTL_DATATYPE_SHORT3: + case MTL_DATATYPE_USHORT3: + case MTL_DATATYPE_SHORT4: + case MTL_DATATYPE_USHORT4: + case MTL_DATATYPE_INT2: + case MTL_DATATYPE_UINT2: + case MTL_DATATYPE_FLOAT2: + case MTL_DATATYPE_LONG: + case MTL_DATATYPE_ULONG: + case MTL_DATATYPE_HALF2x3: + case MTL_DATATYPE_HALF2x4: + case MTL_DATATYPE_HALF3x3: + case MTL_DATATYPE_HALF3x4: + case MTL_DATATYPE_HALF4x3: + case MTL_DATATYPE_HALF4x4: + case MTL_DATATYPE_FLOAT2x2: + case MTL_DATATYPE_FLOAT3x2: + case MTL_DATATYPE_FLOAT4x2: + return 8; + + case MTL_DATATYPE_INT3: + case MTL_DATATYPE_INT4: + case MTL_DATATYPE_UINT3: + case MTL_DATATYPE_UINT4: + case MTL_DATATYPE_FLOAT3: + case MTL_DATATYPE_FLOAT4: + case MTL_DATATYPE_LONG2: + case MTL_DATATYPE_ULONG2: + case MTL_DATATYPE_FLOAT2x3: + case MTL_DATATYPE_FLOAT2x4: + case MTL_DATATYPE_FLOAT3x3: + case MTL_DATATYPE_FLOAT3x4: + case MTL_DATATYPE_FLOAT4x3: + case MTL_DATATYPE_FLOAT4x4: + return 16; + + case MTL_DATATYPE_LONG3: + case MTL_DATATYPE_LONG4: + case MTL_DATATYPE_ULONG3: + case MTL_DATATYPE_ULONG4: + return 32; + + default: + BLI_assert_msg(false, "Unrecognised MTL datatype."); + return 0; + }; +} diff --git a/source/blender/gpu/metal/mtl_shader_shared.h b/source/blender/gpu/metal/mtl_shader_shared.h new file mode 100644 index 00000000000..f6fd9035001 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_shared.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* Global parameters. */ +#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS 6 /* buffer bind 0..5 */ +#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX MTL_SSBO_VERTEX_FETCH_MAX_VBOS + +/* Add Types as needed (Also need to be added to mtl_shader.h). */ +#define GPU_SHADER_ATTR_TYPE_FLOAT 0 +#define GPU_SHADER_ATTR_TYPE_INT 1 +#define GPU_SHADER_ATTR_TYPE_SHORT 2 +#define GPU_SHADER_ATTR_TYPE_CHAR 3 +#define GPU_SHADER_ATTR_TYPE_VEC2 4 +#define GPU_SHADER_ATTR_TYPE_VEC3 5 +#define GPU_SHADER_ATTR_TYPE_VEC4 6 +#define GPU_SHADER_ATTR_TYPE_UVEC2 7 +#define GPU_SHADER_ATTR_TYPE_UVEC3 8 +#define GPU_SHADER_ATTR_TYPE_UVEC4 9 +#define GPU_SHADER_ATTR_TYPE_IVEC2 10 +#define GPU_SHADER_ATTR_TYPE_IVEC3 11 +#define GPU_SHADER_ATTR_TYPE_IVEC4 12 +#define GPU_SHADER_ATTR_TYPE_MAT3 13 +#define GPU_SHADER_ATTR_TYPE_MAT4 14 +#define GPU_SHADER_ATTR_TYPE_UCHAR_NORM 15 +#define GPU_SHADER_ATTR_TYPE_UCHAR2_NORM 16 +#define GPU_SHADER_ATTR_TYPE_UCHAR3_NORM 17 +#define GPU_SHADER_ATTR_TYPE_UCHAR4_NORM 18 +#define GPU_SHADER_ATTR_TYPE_INT1010102_NORM 19 +#define GPU_SHADER_ATTR_TYPE_SHORT3_NORM 20 +#define GPU_SHADER_ATTR_TYPE_CHAR2 21 +#define GPU_SHADER_ATTR_TYPE_CHAR3 22 +#define GPU_SHADER_ATTR_TYPE_CHAR4 23 +#define GPU_SHADER_ATTR_TYPE_UINT 24 diff --git a/source/blender/gpu/metal/mtl_state.hh b/source/blender/gpu/metal/mtl_state.hh index e6472491b35..1af56378c5a 100644 --- a/source/blender/gpu/metal/mtl_state.hh +++ b/source/blender/gpu/metal/mtl_state.hh @@ -3,6 +3,7 @@ /** \file * \ingroup gpu */ +#pragma once #include "MEM_guardedalloc.h" @@ -11,6 +12,8 @@ #include "GPU_state.h" #include "gpu_state_private.hh" +#include "mtl_pso_descriptor_state.hh" + namespace blender::gpu { /* Forward Declarations. */ @@ -21,7 +24,7 @@ class MTLContext; * Metal Implementation. **/ class MTLStateManager : public StateManager { - public: + private: /* Current state of the associated MTLContext. * Avoids resetting the whole state for every change. */ @@ -29,6 +32,9 @@ class MTLStateManager : public StateManager { GPUStateMutable current_mutable_; MTLContext *context_; + /* Global pipeline descriptors. */ + MTLRenderPipelineStateDescriptor pipeline_descriptor_; + public: MTLStateManager(MTLContext *ctx); @@ -47,6 +53,12 @@ class MTLStateManager : public StateManager { void texture_unpack_row_length_set(uint len) override; + /* Global pipeline descriptors. */ + MTLRenderPipelineStateDescriptor &get_pipeline_descriptor() + { + return pipeline_descriptor_; + } + private: void set_write_mask(const eGPUWriteMask value); void set_depth_test(const eGPUDepthTest value); diff --git a/source/blender/gpu/metal/mtl_state.mm b/source/blender/gpu/metal/mtl_state.mm index 0f2d4d7dc48..85080041246 100644 --- a/source/blender/gpu/metal/mtl_state.mm +++ b/source/blender/gpu/metal/mtl_state.mm @@ -11,6 +11,7 @@ #include "mtl_context.hh" #include "mtl_framebuffer.hh" +#include "mtl_shader_interface_type.hh" #include "mtl_state.hh" namespace blender::gpu { diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh index 82a7a20a310..be6f3a3a02b 100644 --- a/source/blender/gpu/metal/mtl_texture.hh +++ b/source/blender/gpu/metal/mtl_texture.hh @@ -363,20 +363,20 @@ class MTLTexture : public Texture { }; id<MTLComputePipelineState> texture_update_1d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_1d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_2d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_2d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_3d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> mtl_texture_update_impl( - TextureUpdateRoutineSpecialisation specialisation_params, + TextureUpdateRoutineSpecialisation specialization_params, blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type); /* Depth Update Utilities */ @@ -384,7 +384,7 @@ class MTLTexture : public Texture { * use a compute shader to write to depth, so we must instead render to a depth target. * These processes use vertex/fragment shaders to render texture data from an intermediate * source, in order to prime the depth buffer*/ - GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialisation); + GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialization); void update_sub_depth_2d( int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data); @@ -397,20 +397,20 @@ class MTLTexture : public Texture { }; id<MTLComputePipelineState> texture_read_1d_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_1d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_2d_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_2d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_3d_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> mtl_texture_read_impl( - TextureReadRoutineSpecialisation specialisation_params, + TextureReadRoutineSpecialisation specialization_params, blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type); /* fullscreen blit utilities. */ diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index 0cb38a3a2b7..2b7c2333bff 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -479,8 +479,8 @@ void gpu::MTLTexture::update_sub( int expected_dst_bytes_per_pixel = get_mtl_format_bytesize(destination_format); int destination_num_channels = get_mtl_format_num_components(destination_format); - /* Prepare specialisation struct (For texture update routine). */ - TextureUpdateRoutineSpecialisation compute_specialisation_kernel = { + /* Prepare specialization struct (For texture update routine). */ + TextureUpdateRoutineSpecialisation compute_specialization_kernel = { tex_data_format_to_msl_type_str(type), /* INPUT DATA FORMAT */ tex_data_format_to_msl_texture_template_type(type), /* TEXTURE DATA FORMAT */ num_channels, @@ -620,7 +620,7 @@ void gpu::MTLTexture::update_sub( /* Use Compute Based update. */ if (type_ == GPU_TEXTURE_1D) { id<MTLComputePipelineState> pso = texture_update_1d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], 1, 1}, {offset[0], 0, 0}, @@ -637,7 +637,7 @@ void gpu::MTLTexture::update_sub( } else if (type_ == GPU_TEXTURE_1D_ARRAY) { id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], 1}, {offset[0], offset[1], 0}, @@ -694,7 +694,7 @@ void gpu::MTLTexture::update_sub( /* Use Compute texture update. */ if (type_ == GPU_TEXTURE_2D) { id<MTLComputePipelineState> pso = texture_update_2d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], 1}, {offset[0], offset[1], 0}, @@ -712,7 +712,7 @@ void gpu::MTLTexture::update_sub( } else if (type_ == GPU_TEXTURE_2D_ARRAY) { id<MTLComputePipelineState> pso = texture_update_2d_array_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], extent[2]}, {offset[0], offset[1], offset[2]}, @@ -752,7 +752,7 @@ void gpu::MTLTexture::update_sub( } else { id<MTLComputePipelineState> pso = texture_update_3d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], extent[2]}, {offset[0], offset[1], offset[2]}, @@ -1216,7 +1216,7 @@ void gpu::MTLTexture::read_internal(int mip, destination_buffer_host_ptr = (void *)((uint8_t *)([destination_buffer contents]) + destination_offset); - /* Prepare specialisation struct (For non-trivial texture read routine). */ + /* Prepare specialization struct (For non-trivial texture read routine). */ int depth_format_mode = 0; if (is_depth_format) { depth_format_mode = 1; @@ -1236,7 +1236,7 @@ void gpu::MTLTexture::read_internal(int mip, } } - TextureReadRoutineSpecialisation compute_specialisation_kernel = { + TextureReadRoutineSpecialisation compute_specialization_kernel = { tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA TYPE */ tex_data_format_to_msl_type_str(desired_output_format), /* OUTPUT DATA TYPE */ num_channels, /* TEXTURE COMPONENT COUNT */ @@ -1283,7 +1283,7 @@ void gpu::MTLTexture::read_internal(int mip, id<MTLComputeCommandEncoder> compute_encoder = ctx->main_command_buffer.ensure_begin_compute_encoder(); id<MTLComputePipelineState> pso = texture_read_2d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureReadParams params = { mip, {width, height, 1}, @@ -1339,7 +1339,7 @@ void gpu::MTLTexture::read_internal(int mip, id<MTLComputeCommandEncoder> compute_encoder = ctx->main_command_buffer.ensure_begin_compute_encoder(); id<MTLComputePipelineState> pso = texture_read_2d_array_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureReadParams params = { mip, {width, height, depth}, diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm index e2f0b3c848e..25b30c6cb0e 100644 --- a/source/blender/gpu/metal/mtl_texture_util.mm +++ b/source/blender/gpu/metal/mtl_texture_util.mm @@ -305,13 +305,13 @@ bool mtl_format_supports_blending(MTLPixelFormat format) * \{ */ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( - TextureUpdateRoutineSpecialisation specialisation_params, + TextureUpdateRoutineSpecialisation specialization_params, blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type) { /* Check whether the Kernel exists. */ - id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params); + id<MTLComputePipelineState> *result = specialization_cache.lookup_ptr(specialization_params); if (result != nullptr) { return *result; } @@ -332,18 +332,18 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( options.languageVersion = MTLLanguageVersion2_2; options.preprocessorMacros = @{ @"INPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.input_data_type.c_str()], @"OUTPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.output_data_type.c_str()], @"COMPONENT_COUNT_INPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_input], + [NSNumber numberWithInt:specialization_params.component_count_input], @"COMPONENT_COUNT_OUTPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_output], + [NSNumber numberWithInt:specialization_params.component_count_output], @"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))] }; /* Prepare shader library for conversion routine. */ - NSError *error = NULL; + NSError *error = nullptr; id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src options:options error:&error] autorelease]; @@ -370,7 +370,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( /* Store PSO. */ [compute_pso retain]; - specialisation_cache.add_new(specialisation_params, compute_pso); + specialization_cache.add_new(specialization_params, compute_pso); return_pso = compute_pso; } @@ -379,53 +379,53 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_update_impl(specialisation, + return mtl_texture_update_impl(specialization, mtl_context->get_texture_utils().texture_1d_update_compute_psos, GPU_TEXTURE_1D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); return mtl_texture_update_impl( - specialisation, + specialization, mtl_context->get_texture_utils().texture_1d_array_update_compute_psos, GPU_TEXTURE_1D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_update_impl(specialisation, + return mtl_texture_update_impl(specialization, mtl_context->get_texture_utils().texture_2d_update_compute_psos, GPU_TEXTURE_2D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); return mtl_texture_update_impl( - specialisation, + specialization, mtl_context->get_texture_utils().texture_2d_array_update_compute_psos, GPU_TEXTURE_2D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_update_impl(specialisation, + return mtl_texture_update_impl(specialization, mtl_context->get_texture_utils().texture_3d_update_compute_psos, GPU_TEXTURE_3D); } @@ -434,7 +434,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel( * Currently does not appear to be hit. */ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( - DepthTextureUpdateRoutineSpecialisation specialisation) + DepthTextureUpdateRoutineSpecialisation specialization) { /* Check whether the Kernel exists. */ @@ -442,13 +442,13 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( BLI_assert(mtl_context != nullptr); GPUShader **result = mtl_context->get_texture_utils().depth_2d_update_shaders.lookup_ptr( - specialisation); + specialization); if (result != nullptr) { return *result; } const char *fragment_source = nullptr; - switch (specialisation.data_mode) { + switch (specialization.data_mode) { case MTL_DEPTH_UPDATE_MODE_FLOAT: fragment_source = datatoc_depth_2d_update_float_frag_glsl; break; @@ -469,7 +469,7 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( nullptr, nullptr, "depth_2d_update_sh_get"); - mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialisation, shader); + mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader); return shader; } @@ -507,18 +507,18 @@ void gpu::MTLTexture::update_sub_depth_2d( eGPUTextureFormat format = (is_float) ? GPU_R32F : GPU_R32I; /* Shader key - Add parameters here for different configurations. */ - DepthTextureUpdateRoutineSpecialisation specialisation; + DepthTextureUpdateRoutineSpecialisation specialization; switch (type) { case GPU_DATA_FLOAT: - specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT; + specialization.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT; break; case GPU_DATA_UINT_24_8: - specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT24; + specialization.data_mode = MTL_DEPTH_UPDATE_MODE_INT24; break; case GPU_DATA_UINT: - specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT32; + specialization.data_mode = MTL_DEPTH_UPDATE_MODE_INT32; break; default: @@ -544,7 +544,7 @@ void gpu::MTLTexture::update_sub_depth_2d( GPU_framebuffer_clear_stencil(depth_fb_temp, 0); } - GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialisation); + GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialization); BLI_assert(depth_2d_update_sh != nullptr); GPUBatch *quad = GPU_batch_preset_quad(); GPU_batch_set_shader(quad, depth_2d_update_sh); @@ -591,13 +591,13 @@ void gpu::MTLTexture::update_sub_depth_2d( * \{ */ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( - TextureReadRoutineSpecialisation specialisation_params, + TextureReadRoutineSpecialisation specialization_params, blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type) { /* Check whether the Kernel exists. */ - id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params); + id<MTLComputePipelineState> *result = specialization_cache.lookup_ptr(specialization_params); if (result != nullptr) { return *result; } @@ -615,10 +615,10 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( /* Defensive Debug Checks. */ long long int depth_scale_factor = 1; - if (specialisation_params.depth_format_mode > 0) { - BLI_assert(specialisation_params.component_count_input == 1); - BLI_assert(specialisation_params.component_count_output == 1); - switch (specialisation_params.depth_format_mode) { + if (specialization_params.depth_format_mode > 0) { + BLI_assert(specialization_params.component_count_input == 1); + BLI_assert(specialization_params.component_count_output == 1); + switch (specialization_params.depth_format_mode) { case 1: /* FLOAT */ depth_scale_factor = 1; @@ -642,24 +642,24 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( options.languageVersion = MTLLanguageVersion2_2; options.preprocessorMacros = @{ @"INPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.input_data_type.c_str()], @"OUTPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.output_data_type.c_str()], @"COMPONENT_COUNT_INPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_input], + [NSNumber numberWithInt:specialization_params.component_count_input], @"COMPONENT_COUNT_OUTPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_output], + [NSNumber numberWithInt:specialization_params.component_count_output], @"WRITE_COMPONENT_COUNT" : - [NSNumber numberWithInt:min_ii(specialisation_params.component_count_input, - specialisation_params.component_count_output)], + [NSNumber numberWithInt:min_ii(specialization_params.component_count_input, + specialization_params.component_count_output)], @"IS_DEPTH_FORMAT" : - [NSNumber numberWithInt:((specialisation_params.depth_format_mode > 0) ? 1 : 0)], + [NSNumber numberWithInt:((specialization_params.depth_format_mode > 0) ? 1 : 0)], @"DEPTH_SCALE_FACTOR" : [NSNumber numberWithLongLong:depth_scale_factor], @"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))] }; /* Prepare shader library for conversion routine. */ - NSError *error = NULL; + NSError *error = nullptr; id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src options:options error:&error] autorelease]; @@ -687,7 +687,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( /* Store PSO. */ [compute_pso retain]; - specialisation_cache.add_new(specialisation_params, compute_pso); + specialization_cache.add_new(specialization_params, compute_pso); return_pso = compute_pso; } @@ -696,51 +696,51 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_2d_read_compute_psos, GPU_TEXTURE_2D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_2d_array_read_compute_psos, GPU_TEXTURE_2D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_1d_read_compute_psos, GPU_TEXTURE_1D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_1d_array_read_compute_psos, GPU_TEXTURE_1D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_3d_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_3d_read_compute_psos, GPU_TEXTURE_3D); } diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index 2375e78d9f1..4814a5ad71b 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -497,6 +497,7 @@ void GLBackend::capabilities_init() glGetIntegerv(GL_NUM_EXTENSIONS, &GCaps.extensions_len); GCaps.extension_get = gl_extension_get; + GCaps.max_samplers = GCaps.max_textures; GCaps.mem_stats_support = epoxy_has_gl_extension("GL_NVX_gpu_memory_info") || epoxy_has_gl_extension("GL_ATI_meminfo"); GCaps.shader_image_load_store_support = epoxy_has_gl_extension("GL_ARB_shader_image_load_store"); diff --git a/source/blender/gpu/shaders/metal/mtl_shader_common.msl b/source/blender/gpu/shaders/metal/mtl_shader_common.msl new file mode 100644 index 00000000000..c504cdbacb1 --- /dev/null +++ b/source/blender/gpu/shaders/metal/mtl_shader_common.msl @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* Common Metal header to be included in all compiled Metal shaders. + * Both native MSL shaders and GLSL shaders. */ + +using namespace metal; + +/* Should match GPUVertFetchMode. */ +typedef enum { + GPU_FETCH_FLOAT = 0, + GPU_FETCH_INT, + GPU_FETCH_INT_TO_FLOAT_UNIT, + GPU_FETCH_INT_TO_FLOAT, +} GPUVertFetchMode; + +/* Consant to flag base binding index of uniform buffers. */ +constant int MTL_uniform_buffer_base_index [[function_constant(0)]]; + +/* Default Point Size. + * Unused if function constant not set. */ +constant float MTL_global_pointsize [[function_constant(1)]]; + +/* Attribute conversions flags (Up to 16 attributes supported in Blender). */ +constant int MTL_AttributeConvert0 [[function_constant(2)]]; +constant int MTL_AttributeConvert1 [[function_constant(3)]]; +constant int MTL_AttributeConvert2 [[function_constant(4)]]; +constant int MTL_AttributeConvert3 [[function_constant(5)]]; +constant int MTL_AttributeConvert4 [[function_constant(6)]]; +constant int MTL_AttributeConvert5 [[function_constant(7)]]; +constant int MTL_AttributeConvert6 [[function_constant(8)]]; +constant int MTL_AttributeConvert7 [[function_constant(9)]]; +constant int MTL_AttributeConvert8 [[function_constant(10)]]; +constant int MTL_AttributeConvert9 [[function_constant(11)]]; +constant int MTL_AttributeConvert10 [[function_constant(12)]]; +constant int MTL_AttributeConvert11 [[function_constant(13)]]; +constant int MTL_AttributeConvert12 [[function_constant(14)]]; +constant int MTL_AttributeConvert13 [[function_constant(15)]]; +constant int MTL_AttributeConvert14 [[function_constant(16)]]; +constant int MTL_AttributeConvert15 [[function_constant(17)]]; + +/* Consant to flag binding index of transform feedback buffer. + * Unused if function constant not set. */ +constant int MTL_transform_feedback_buffer_index [[function_constant(18)]]; + +/** Internal attribute conversion functionality. */ +/* Following descriptions in mtl_shader.hh, Metal only supports some implicit + * attribute type conversions. These conversions occur when there is a difference + * between the type specified in the vertex descriptor (In the input vertex buffers), + * and the attribute type in the shader's VertexIn struct (ShaderInterface). + * + * The supported implicit conversions are described here: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc + * + * For unsupported conversions, the mtl_shader_generator will create an attribute reading function + * which performs this conversion manually upon read, depending on the requested fetchmode. + * + * These conversions use the function constants above, so any branching is optimized out during + * backend shader compilation (PSO creation). + * + * NOTE: Not all possibilities have been covered here, any additional conversion routines should + * be added as needed, and mtl_shader_generator should also be updated with any newly required + * read functions. + * + * These paths are only needed for cases where implicit conversion will not happen, in which + * case the value will be read as the type in the shader. + */ +#define internal_vertex_attribute_convert_read_float(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float(as_type<int>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float(as_type<int>(v_in)) / float(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float2(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float2(as_type<int2>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float2(as_type<int2>(v_in)) / float2(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float3(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float3(as_type<int3>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float3(as_type<int3>(v_in)) / float3(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float4(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float4(as_type<int4>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float4(as_type<int4>(v_in)) / float4(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl new file mode 100644 index 00000000000..3b32783620d --- /dev/null +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -0,0 +1,1065 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** Special header for mapping commonly defined tokens to API-specific variations. + * Where possible, this will adhere closely to base GLSL, where semantics are the same. + * However, host code shader code may need modifying to support types where necessary variations + * exist between APIs but are not expressed through the source. (e.g. distinctio between depth2d + * and texture2d types in metal). + */ + +/* Base instance with offsets. */ +#define gpu_BaseInstance gl_BaseInstanceARB +#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance) + +/* derivative signs. */ +#define DFDX_SIGN 1.0 +#define DFDY_SIGN 1.0 + +/* Type definitions. */ +#define vec2 float2 +#define vec3 float3 +#define vec4 float4 +#define mat2 float2x2 +#define mat2x2 float2x2 +#define mat3 float3x3 +#define mat4 float4x4 +#define ivec2 int2 +#define ivec3 int3 +#define ivec4 int4 +#define uvec2 uint2 +#define uvec3 uint3 +#define uvec4 uint4 +/* MTLBOOL is used for native boolean's generated by the Metal backend, to avoid type-emulation + * for GLSL bools, which are treated as integers. */ +#define MTLBOOL bool +#define bool int +#define bvec2 bool2 +#define bvec3 bool3 +#define bvec4 bool4 +#define vec3_1010102_Unorm uint +#define vec3_1010102_Inorm int + +/* Strip GLSL Decorators. */ +#define in +#define flat +#define smooth +#define noperspective +#define layout(std140) struct +#define uniform + +/* Used to replace 'out' in function parameters with threadlocal reference + * shortened to avoid expanding the glsl source string. */ +#define THD thread + +/* Generate wrapper structs for combined texture and sampler type. */ +#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS +# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ + template<typename T, access A = access::sample> struct STRUCT_NAME { \ + thread TEX_TYPE<T, A> *texture; \ + constant sampler *samp; \ + } +#else +# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ + template<typename T, access A = access::sample> struct STRUCT_NAME { \ + thread TEX_TYPE<T, A> *texture; \ + thread sampler *samp; \ + } +#endif + +/* Add any types as needed. */ +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array); + +/* Sampler struct for argument buffer. */ +#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS +struct SStruct { + array<sampler, ARGUMENT_BUFFER_NUM_SAMPLERS> sampler_args [[id(0)]]; +}; +#endif + +/* Samplers as function parameters. */ +#define sampler1D thread _mtl_combined_image_sampler_1d<float> +#define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float> +#define sampler2D thread _mtl_combined_image_sampler_2d<float> +#define depth2D thread _mtl_combined_image_sampler_depth_2d<float> +#define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float> +#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> +#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> +#define sampler3D thread _mtl_combined_image_sampler_3d<float> +#define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read> +#define samplerCube thread _mtl_combined_image_sampler_cube<float> +#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float> + +#define usampler1D thread _mtl_combined_image_sampler_1d<uint> +#define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint> +#define usampler2D thread _mtl_combined_image_sampler_2d<uint> +#define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint> +#define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint> +#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> +#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> +#define usampler3D thread _mtl_combined_image_sampler_3d<uint> +#define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read> +#define usamplerCube thread _mtl_combined_image_sampler_cube<uint> +#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint> + +#define isampler1D thread _mtl_combined_image_sampler_1d<int> +#define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int> +#define isampler2D thread _mtl_combined_image_sampler_2d<int> +#define idepth2D thread _mtl_combined_image_sampler_depth_2d<int> +#define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int> +#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> +#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> +#define isampler3D thread _mtl_combined_image_sampler_3d<int> +#define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read> +#define isamplerCube thread _mtl_combined_image_sampler_cube<int> +#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int> + +/* Vector accessor aliases. */ +#define st xy + +/* Texture functions. */ +#define texelFetch _texelFetch_internal +#define texelFetchOffset(__tex, __texel, __lod, __offset) \ + _texelFetch_internal(__tex, __texel, __lod, __offset) +#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv) +#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias))) +#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod))) +#define textureLodOffset(__tex, __uv, __lod, __offset) \ + _texture_internal_level(__tex, __uv, level(float(__lod)), __offset) +#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0) +#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp) +#define textureGatherOffset(__tex, __offset, __uv, __comp) \ + _texture_gather_internal(__tex, __uv, __comp, __offset) + +#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME +#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__) +#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__) + +/* Texture-write functions. */ +#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value) + +/* Singular return values from texture functions of type DEPTH are often indexed with either .r or + * .x. This is a lightweight wrapper type for handling this syntax. */ +union _msl_return_float { + float r; + float x; + inline operator float() const + { + return r; + } +}; + +/* Add custom texture sampling/reading routines for each type to account for special return cases, + * e.g. returning a float with an r parameter Note: Cannot use template specialization for input + * type, as return types are specific to the signature of 'tex'. */ +/* Texture Read. */ +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, T texel) +{ + float w = tex.texture->get_width(); + if (texel >= 0 && texel < w) { + return tex.texture->read(uint(texel)); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T> +inline vec<S, 4> _texelFetch_internal( + const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel) +{ + float w = tex.texture->get_width(); + if (texel >= 0 && texel < w) { + return tex.texture->read(uint(texel)); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + T texel, + uint lod, + T offset = 0) +{ + float w = tex.texture->get_width(); + if ((texel + offset) >= 0 && (texel + offset) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel + offset), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + vec<T, 1> texel, + uint lod, + vec<T, 1> offset = 0) +{ + float w = tex.texture->get_width(); + if ((texel + offset) >= 0 && (texel + offset) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel + offset), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, int n, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + vec<T, n> texel, + uint lod, + vec<T, n> offset = vec<T, n>(0)) +{ + float w = tex.texture->get_width(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel.x + offset.x), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0, 0)) +{ + + float w = tex.texture->get_width(); + float h = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + return tex.texture->read(uint2(texel + offset), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_depth() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint3(texel + offset), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename T, access A> +inline _msl_return_float _texelFetch_internal( + thread _mtl_combined_image_sampler_depth_2d<float, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)}; + return fl; + } + else { + _msl_return_float fl = {0}; + return fl; + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); + } + else { + return vec<S, 4>(0); + } +} + +/* Sample. */ +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u) +{ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_samp( + thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua) +{ + return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); +} + +inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, + float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline uint4 _texture_internal_samp( + thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline float4 _texture_internal_samp( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw) +{ + return tex.texture->sample(*tex.samp, uvw); +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva) +{ + return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z)); +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))}; + return fl; +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac) +{ + _msl_return_float fl = { + tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs) +{ + return tex.texture->sample(*tex.samp, uvs.xyz); +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a) +{ + return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w)); +} + +/* Sample Level. */ +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, + float u, + level options, + int offset = 0) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_level( + thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, + float2 ua, + level options, + int offset = 0) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); +} + +inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline uint4 _texture_internal_level( + thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline float4 _texture_internal_level( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_3d<T, access::sample> tex, + float3 uvw, + level options = level(0), + int3 offset = int3(0)) +{ + return tex.texture->sample(*tex.samp, uvw, options, offset); +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, + float3 uva, + level options = level(0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset); +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, + float3 uva, + level options = level(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)}; + return fl; +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, + float4 uvac, + level options = level(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = { + tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_cube<T, access::sample> tex, + float3 uvs, + level options = level(0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uvs.xyz, options); +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, + float4 coord_a, + level options = level(0), + int3 offset = int3(0)) +{ + return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options); +} + +/* Sample Bias. */ +template<typename T> +inline vec<T, 4> _texture_internal_bias( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, + float u, + bias options = bias(0.0), + int offset = 0) +{ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_bias( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, + float2 uv, + bias options = bias(0.0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline _msl_return_float _texture_internal_bias( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + bias options = bias(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; + return fl; +} + +/* Texture Gather. */ +component int_to_component(const int comp) +{ + switch (comp) { + default: + case 0: + return component::x; + case 1: + return component::y; + case 2: + return component::z; + case 3: + return component::w; + } + return component::x; +} + +inline float4 _texture_gather_internal( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +template<typename T> +inline vec<T, 4> _texture_gather_internal( + thread _mtl_combined_image_sampler_2d<T, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +template<typename T> +inline vec<T, 4> _texture_gather_internal( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +/* Texture write support. */ +template<typename S, typename T, access A> +inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, + T _coord, + vec<S, 4> value) +{ + float w = tex.texture->get_width(); + float h = tex.texture->get_height(); + if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) { + tex.texture->write(value, uint2(_coord.xy)); + } +} + +template<typename S, typename T, access A> +inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, + T _coord, + vec<S, 4> value) +{ + float w = tex.texture->get_width(); + float h = tex.texture->get_height(); + float d = tex.texture->get_depth(); + if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && + _coord.z < d) { + tex.texture->write(value, uint3(_coord.xyz)); + } +} + +/* SSBO Vertex Fetch Mode. */ +#ifdef MTL_SSBO_VERTEX_FETCH +/* Enabled when geometry is passed via raw buffer bindings, rather than using + * vertex assembly in the vertex-descriptor. + * + * To describe the layout of input attribute data, we will generate uniforms (defaulting to 0) + * with the names per unique input attribute with name `attr`: + * + * - uniform_ssbo_stride_##attr -- Representing the stride between element. + * - uniform_ssbo_offset_##attr -- Representing the base offset within the vertex. + * - uniform_ssbo_fetchmode_##attr - Whether using per-vertex (=0) or per-instance fetch (=1). + * - uniform_ssbo_vbo_id_##attr - buffer binding index for VBO with data for this attribute. + * - uniform_ssbo_type_##attr - The type of data in the currently bound buffer. + * + * If the uniform_ssbo_type_* does not match with the desired type, then it is the responsibility + * of the shader to perform the conversion. Types should always be read as the raw attribute type, + * and then converted. e.g. If the uniform_ssbo_type_* is `int`, but we want to read it to be + * normalized to a float. + * The implementation should query the attribute type using vertex_fetch_get_attr_type(attr_name): + * + * float fweight = 0.0; + * if(vertex_fetch_get_attr_type(in_weight) == GPU_SHADER_ATTR_TYPE_INT) { + * int iweight = vertex_fetch_attribute(gl_VertexID, in_weight, int); + * fweight = (float)iweight/(float)INT32_MAX; + * } else { + * fweight = = vertex_fetch_attribute(gl_VertexID, in_weight, float); + * } + * + * Note: These uniforms are generated as part of the same data block used for regular uniforms + * and attribute data is written prior to each draw call, depending on the configuration of + * the vertex descriptor for an MTLBatch or MTLImmedaite call. */ +# define PPCAT_NX(A, B) A##B +# define PPCAT(A, B) PPCAT_NX(A, B) + +# define RESOLVE_VERTEX(v_id) \ + ((UNIFORM_SSBO_USES_INDEXED_RENDERING_STR > 0) ? \ + ((UNIFORM_SSBO_INDEX_MODE_U16_STR > 0) ? MTL_INDEX_DATA_U16[v_id] : \ + MTL_INDEX_DATA_U32[v_id]) : \ + v_id) +# define ATTR_TYPE(attr) PPCAT(SSBO_ATTR_TYPE_, attr) +# define vertex_fetch_attribute_raw(n, attr, type) \ + (reinterpret_cast<constant type *>( \ + &MTL_VERTEX_DATA[PPCAT(UNIFORM_SSBO_VBO_ID_STR, attr)] \ + [(PPCAT(UNIFORM_SSBO_STRIDE_STR, attr) * \ + ((PPCAT(UNIFORM_SSBO_FETCHMODE_STR, attr)) ? gl_InstanceID : n)) + \ + PPCAT(UNIFORM_SSBO_OFFSET_STR, attr)]))[0] +# define vertex_fetch_attribute(n, attr, type) \ + vertex_fetch_attribute_raw(RESOLVE_VERTEX(n), attr, type) +# define vertex_id_from_index_id(n) RESOLVE_VERTEX(n) +# define vertex_fetch_get_input_prim_type() UNIFORM_SSBO_INPUT_PRIM_TYPE_STR +# define vertex_fetch_get_input_vert_count() UNIFORM_SSBO_INPUT_VERT_COUNT_STR +# define vertex_fetch_get_attr_type(attr) PPCAT(UNIFORM_SSBO_TYPE_STR, attr) + +/* Must mirror GPU_primitive.h. */ +# define GPU_PRIM_POINTS 0 +# define GPU_PRIM_LINES 1 +# define GPU_PRIM_TRIS 2 +# define GPU_PRIM_LINE_STRIP 3 +# define GPU_PRIM_LINE_LOOP 4 +# define GPU_PRIM_TRI_STRIP 5 +# define GPU_PRIM_TRI_FAN 6 +# define GPU_PRIM_LINES_ADJ 7 +# define GPU_PRIM_TRIS_ADJ 8 +# define GPU_PRIM_LINE_STRIP_ADJ 9 +#endif + +/* Common Functions. */ +#define dFdx(x) dfdx(x) +#define dFdy(x) dfdy(x) +#define mod(x, y) _mtlmod(x, y) +#define discard discard_fragment() +#define inversesqrt rsqrt + +inline float radians(float deg) +{ + /* Constant factor: M_PI_F/180.0. */ + return deg * 0.01745329251f; +} + +inline float degrees(float rad) +{ + /* Constant factor: 180.0/M_PI_F. */ + return rad * 57.2957795131; +} + +#define select(A, B, C) mix(A, B, C) + +/* Type conversions and type truncations. */ +inline float4 to_float4(float3 val) +{ + return float4(val, 1.0); +} + +/* Type conversions and type truncations (Utility Functions). */ +inline float3x3 mat4_to_mat3(float4x4 matrix) +{ + return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz); +} + +inline int floatBitsToInt(float f) +{ + return as_type<int>(f); +} + +inline int2 floatBitsToInt(float2 f) +{ + return as_type<int2>(f); +} + +inline int3 floatBitsToInt(float3 f) +{ + return as_type<int3>(f); +} + +inline int4 floatBitsToInt(float4 f) +{ + return as_type<int4>(f); +} + +inline uint floatBitsToUint(float f) +{ + return as_type<uint>(f); +} + +inline uint2 floatBitsToUint(float2 f) +{ + return as_type<uint2>(f); +} + +inline uint3 floatBitsToUint(float3 f) +{ + return as_type<uint3>(f); +} + +inline uint4 floatBitsToUint(float4 f) +{ + return as_type<uint4>(f); +} + +inline float intBitsToFloat(int f) +{ + return as_type<float>(f); +} + +inline float2 intBitsToFloat(int2 f) +{ + return as_type<float2>(f); +} + +inline float3 intBitsToFloat(int3 f) +{ + return as_type<float3>(f); +} + +inline float4 intBitsToFloat(int4 f) +{ + return as_type<float4>(f); +} + +/* Texture size functions. Add texture types as needed. */ +template<typename T, access A> +int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod) +{ + return int(image.texture->get_width()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod) +{ + return int2(image.texture->get_width(), image.texture->get_array_size()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_array_size()); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_array_size()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_depth(lod)); +} + +/* Equality and comparison functions. */ +#define lessThan(a, b) ((a) < (b)) +#define lessThanEqual(a, b) ((a) <= (b)) +#define greaterThan(a, b) ((a) > (b)) +#define greaterThanEqual(a, b) ((a) >= (b)) +#define equal(a, b) ((a) == (b)) +#define notEqual(a, b) ((a) != (b)) + +template<typename T, int n> bool all(vec<T, n> x) +{ + bool _all = true; + for (int i = 0; i < n; i++) { + _all = _all && (x[i] > 0); + } + return _all; +} + +template<typename T, int n> bool any(vec<T, n> x) +{ + bool _any = false; + for (int i = 0; i < n; i++) { + _any = _any || (x[i] > 0); + } + return _any; +} + +/* Modulo functionality. */ +int _mtlmod(int a, int b) +{ + return a - b * (a / b); +} + +template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y) +{ + return x - y * floor(x / y); +} + +template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y) +{ + return x - vec<T, n>(y) * floor(x / vec<T, n>(y)); +} + +template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y) +{ + return vec<U, n>(x) - y * floor(vec<U, n>(x) / y); +} + +/* Mathematical functions. */ +template<typename T> T atan(T y, T x) +{ + return atan2(y, x); +} + +/* Matrix Inverse. */ +float4x4 inverse(float4x4 a) +{ + float b00 = a[0][0] * a[1][1] - a[0][1] * a[1][0]; + float b01 = a[0][0] * a[1][2] - a[0][2] * a[1][0]; + float b02 = a[0][0] * a[1][3] - a[0][3] * a[1][0]; + float b03 = a[0][1] * a[1][2] - a[0][2] * a[1][1]; + float b04 = a[0][1] * a[1][3] - a[0][3] * a[1][1]; + float b05 = a[0][2] * a[1][3] - a[0][3] * a[1][2]; + float b06 = a[2][0] * a[3][1] - a[2][1] * a[3][0]; + float b07 = a[2][0] * a[3][2] - a[2][2] * a[3][0]; + float b08 = a[2][0] * a[3][3] - a[2][3] * a[3][0]; + float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1]; + float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1]; + float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2]; + + float invdet = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); + + return float4x4(a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09, + a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09, + a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03, + a[2][2] * b04 - a[2][1] * b05 - a[2][3] * b03, + a[1][2] * b08 - a[1][0] * b11 - a[1][3] * b07, + a[0][0] * b11 - a[0][2] * b08 + a[0][3] * b07, + a[3][2] * b02 - a[3][0] * b05 - a[3][3] * b01, + a[2][0] * b05 - a[2][2] * b02 + a[2][3] * b01, + a[1][0] * b10 - a[1][1] * b08 + a[1][3] * b06, + a[0][1] * b08 - a[0][0] * b10 - a[0][3] * b06, + a[3][0] * b04 - a[3][1] * b02 + a[3][3] * b00, + a[2][1] * b02 - a[2][0] * b04 - a[2][3] * b00, + a[1][1] * b07 - a[1][0] * b09 - a[1][2] * b06, + a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06, + a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00, + a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00) * + invdet; +} + +float3x3 inverse(float3x3 m) +{ + + float invdet = 1.0 / (m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) - + m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) + + m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2])); + + float3x3 inverse(0); + inverse[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]); + inverse[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]); + inverse[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]); + inverse[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]); + inverse[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]); + inverse[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]); + inverse[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]); + inverse[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]); + inverse[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]); + inverse = inverse * invdet; + + return inverse; +} + +/* Additional overloads for builtin functions. */ +float distance(float x, float y) +{ + return abs(y - x); +} + +/* Overload for mix(A, B, float ratio). */ +template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val) +{ + return mix(a, b, vec<T, Size>(val)); +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T, int Size> +vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask) +{ + vec<T, Size> result; + for (int i = 0; i < Size; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Using vec<bool, S> does not appear to work, splitting cases. */ +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask) +{ + vec<T, 4> result; + for (int i = 0; i < 4; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask) +{ + vec<T, 3> result; + for (int i = 0; i < 3; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask) +{ + vec<T, 2> result; + for (int i = 0; i < 2; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> T mix(T a, T b, MTLBOOL mask) +{ + return (mask) ? b : a; +} + +template<typename T, unsigned int Size> bool is_zero(vec<T, Size> a) +{ + for (int i = 0; i < Size; i++) { + if (a[i] != T(0)) { + return false; + } + } + return true; +} + +/* Matrix conversion fallback. */ +mat3 MAT3(vec3 a, vec3 b, vec3 c) +{ + return mat3(a, b, c); +} +mat3 MAT3(float f) +{ + return mat3(f); +} +mat3 MAT3(mat4 m) +{ + return mat4_to_mat3(m); +}
\ No newline at end of file diff --git a/source/blender/python/gpu/gpu_py_shader_create_info.cc b/source/blender/python/gpu/gpu_py_shader_create_info.cc index fbab39efe24..c9e49c5cc4b 100644 --- a/source/blender/python/gpu/gpu_py_shader_create_info.cc +++ b/source/blender/python/gpu/gpu_py_shader_create_info.cc @@ -673,6 +673,9 @@ static int constant_type_size(Type type) case Type::FLOAT: case Type::INT: case Type::UINT: + case Type::UCHAR4: + case Type::CHAR4: + case blender::gpu::shader::Type::VEC3_101010I2: return 4; break; case Type::VEC2: @@ -695,6 +698,18 @@ static int constant_type_size(Type type) case Type::MAT4: return 64; break; + case blender::gpu::shader::Type::UCHAR: + case blender::gpu::shader::Type::CHAR: + return 1; + break; + case blender::gpu::shader::Type::UCHAR2: + case blender::gpu::shader::Type::CHAR2: + return 2; + break; + case blender::gpu::shader::Type::UCHAR3: + case blender::gpu::shader::Type::CHAR3: + return 3; + break; } BLI_assert(false); return -1; |