Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorThomas Dinges <dingto>2022-09-01 23:22:32 +0300
committerClément Foucault <foucault.clem@gmail.com>2022-09-01 23:28:40 +0300
commitcc8ea6ac67a108fcb96e4a8373ac02faf9ccea3d (patch)
tree351d23dc82e72ea34e6f2d630f9746ec76344af5 /source
parentac07fb38a1b35fa156b2d0901eb35cd65ed73903 (diff)
Metal: MTLShader and MTLShaderGenerator implementation.
Full support for translation and compilation of shaders in Metal, using GPUShaderCreateInfo. Includes render pipeline state creation and management, enabling all standard GPU viewport rendering features in Metal. Authored by Apple: Michael Parkin-White, Marco Giordano Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D15563
Diffstat (limited to 'source')
-rw-r--r--source/blender/gpu/CMakeLists.txt59
-rw-r--r--source/blender/gpu/GPU_capabilities.h1
-rw-r--r--source/blender/gpu/GPU_shader_shared_utils.h29
-rw-r--r--source/blender/gpu/intern/gpu_context.cc4
-rw-r--r--source/blender/gpu/intern/gpu_context_private.hh8
-rw-r--r--source/blender/gpu/intern/gpu_shader.cc3
-rw-r--r--source/blender/gpu/intern/gpu_shader_create_info.hh52
-rw-r--r--source/blender/gpu/metal/kernels/compute_texture_read.msl2
-rw-r--r--source/blender/gpu/metal/kernels/compute_texture_update.msl16
-rw-r--r--source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl5
-rw-r--r--source/blender/gpu/metal/kernels/depth_2d_update_info.hh35
-rw-r--r--source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl4
-rw-r--r--source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl5
-rw-r--r--source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl6
-rw-r--r--source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl5
-rw-r--r--source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh23
-rw-r--r--source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl8
-rw-r--r--source/blender/gpu/metal/mtl_backend.mm11
-rw-r--r--source/blender/gpu/metal/mtl_capabilities.hh2
-rw-r--r--source/blender/gpu/metal/mtl_common.hh2
-rw-r--r--source/blender/gpu/metal/mtl_context.hh16
-rw-r--r--source/blender/gpu/metal/mtl_context.mm160
-rw-r--r--source/blender/gpu/metal/mtl_memory.mm17
-rw-r--r--source/blender/gpu/metal/mtl_primitive.hh100
-rw-r--r--source/blender/gpu/metal/mtl_pso_descriptor_state.hh250
-rw-r--r--source/blender/gpu/metal/mtl_shader.hh1164
-rw-r--r--source/blender/gpu/metal/mtl_shader.mm1263
-rw-r--r--source/blender/gpu/metal/mtl_shader_generator.hh724
-rw-r--r--source/blender/gpu/metal/mtl_shader_generator.mm2976
-rw-r--r--source/blender/gpu/metal/mtl_shader_interface.hh267
-rw-r--r--source/blender/gpu/metal/mtl_shader_interface.mm604
-rw-r--r--source/blender/gpu/metal/mtl_shader_interface_type.hh251
-rw-r--r--source/blender/gpu/metal/mtl_shader_shared.h32
-rw-r--r--source/blender/gpu/metal/mtl_state.hh14
-rw-r--r--source/blender/gpu/metal/mtl_state.mm1
-rw-r--r--source/blender/gpu/metal/mtl_texture.hh30
-rw-r--r--source/blender/gpu/metal/mtl_texture.mm22
-rw-r--r--source/blender/gpu/metal/mtl_texture_util.mm108
-rw-r--r--source/blender/gpu/opengl/gl_backend.cc1
-rw-r--r--source/blender/gpu/shaders/metal/mtl_shader_common.msl109
-rw-r--r--source/blender/gpu/shaders/metal/mtl_shader_defines.msl1065
-rw-r--r--source/blender/python/gpu/gpu_py_shader_create_info.cc15
42 files changed, 9254 insertions, 215 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 6758b4b8794..979bfc63572 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -194,6 +194,9 @@ set(METAL_SRC
metal/mtl_index_buffer.mm
metal/mtl_memory.mm
metal/mtl_query.mm
+ metal/mtl_shader.mm
+ metal/mtl_shader_generator.mm
+ metal/mtl_shader_interface.mm
metal/mtl_state.mm
metal/mtl_texture.mm
metal/mtl_texture_util.mm
@@ -207,7 +210,14 @@ set(METAL_SRC
metal/mtl_framebuffer.hh
metal/mtl_index_buffer.hh
metal/mtl_memory.hh
+ metal/mtl_pso_descriptor_state.hh
+ metal/mtl_primitive.hh
metal/mtl_query.hh
+ metal/mtl_shader.hh
+ metal/mtl_shader_generator.hh
+ metal/mtl_shader_interface_type.hh
+ metal/mtl_shader_interface.hh
+ metal/mtl_shader_shared.h
metal/mtl_state.hh
metal/mtl_texture.hh
metal/mtl_uniform_buffer.hh
@@ -227,6 +237,9 @@ set(LIB
)
set(MSL_SRC
+ shaders/metal/mtl_shader_defines.msl
+ shaders/metal/mtl_shader_common.msl
+
metal/kernels/compute_texture_update.msl
metal/kernels/compute_texture_read.msl
metal/kernels/depth_2d_update_float_frag.glsl
@@ -458,21 +471,44 @@ set(GLSL_SRC
GPU_shader_shared_utils.h
)
-set(GLSL_C)
-foreach(GLSL_FILE ${GLSL_SRC})
- data_to_c_simple(${GLSL_FILE} GLSL_C)
-endforeach()
+set(MTL_BACKEND_GLSL_SRC
+ metal/kernels/compute_texture_update.msl
+ metal/kernels/compute_texture_read.msl
+ metal/kernels/depth_2d_update_float_frag.glsl
+ metal/kernels/depth_2d_update_int24_frag.glsl
+ metal/kernels/depth_2d_update_int32_frag.glsl
+ metal/kernels/depth_2d_update_vert.glsl
+ metal/kernels/gpu_shader_fullscreen_blit_vert.glsl
+ metal/kernels/gpu_shader_fullscreen_blit_frag.glsl
+)
+set(MSL_SRC
+ shaders/metal/mtl_shader_defines.msl
+ shaders/metal/mtl_shader_common.msl
+ metal/mtl_shader_shared.h
+)
if(WITH_METAL_BACKEND)
+ list(APPEND GLSL_SRC ${MTL_BACKEND_GLSL_SRC})
+
set(MSL_C)
foreach(MSL_FILE ${MSL_SRC})
data_to_c_simple(${MSL_FILE} MSL_C)
endforeach()
- list(APPEND GLSL_C ${MSL_C})
endif()
-blender_add_lib(bf_gpu_shaders "${GLSL_C}" "" "" "")
+set(GLSL_C)
+foreach(GLSL_FILE ${GLSL_SRC})
+ data_to_c_simple(${GLSL_FILE} GLSL_C)
+endforeach()
+
+set(SHADER_C)
+list(APPEND SHADER_C ${GLSL_C})
+if(WITH_METAL_BACKEND)
+ list(APPEND SHADER_C ${MSL_C})
+endif()
+
+blender_add_lib(bf_gpu_shaders "${SHADER_C}" "" "" "")
list(APPEND LIB
bf_gpu_shaders
@@ -587,6 +623,16 @@ set(SRC_SHADER_CREATE_INFOS
shaders/compositor/infos/compositor_split_viewer_info.hh
)
+set(SRC_SHADER_CREATE_INFOS_MTL
+ metal/kernels/depth_2d_update_info.hh
+ metal/kernels/gpu_shader_fullscreen_blit_info.hh
+)
+
+if(WITH_METAL_BACKEND)
+ list(APPEND SRC_SHADER_CREATE_INFOS ${SRC_SHADER_CREATE_INFOS_MTL})
+endif()
+
+
set(SHADER_CREATE_INFOS_CONTENT "")
foreach(DESCRIPTOR_FILE ${SRC_SHADER_CREATE_INFOS})
string(APPEND SHADER_CREATE_INFOS_CONTENT "#include \"${DESCRIPTOR_FILE}\"\n")
@@ -629,6 +675,7 @@ if(WITH_GPU_BUILDTIME_SHADER_BUILDER)
if(APPLE)
add_executable(shader_builder
intern/gpu_shader_builder.cc
+ intern/gpu_shader_builder_stubs.cc
${shader_create_info_list_file}
)
diff --git a/source/blender/gpu/GPU_capabilities.h b/source/blender/gpu/GPU_capabilities.h
index 61c60f336e1..91cf14dc792 100644
--- a/source/blender/gpu/GPU_capabilities.h
+++ b/source/blender/gpu/GPU_capabilities.h
@@ -30,6 +30,7 @@ int GPU_max_batch_indices(void);
int GPU_max_batch_vertices(void);
int GPU_max_vertex_attribs(void);
int GPU_max_varying_floats(void);
+int GPU_max_samplers(void);
int GPU_max_shader_storage_buffer_bindings(void);
int GPU_max_compute_shader_storage_blocks(void);
int GPU_max_samplers(void);
diff --git a/source/blender/gpu/GPU_shader_shared_utils.h b/source/blender/gpu/GPU_shader_shared_utils.h
index 88bdad2bf76..1cfc4f8af31 100644
--- a/source/blender/gpu/GPU_shader_shared_utils.h
+++ b/source/blender/gpu/GPU_shader_shared_utils.h
@@ -43,20 +43,23 @@
# define sqrtf sqrt
# define expf exp
-# define float2 vec2
-# define float3 vec3
-# define float4 vec4
-# define float4x4 mat4
-# define int2 ivec2
-# define int3 ivec3
-# define int4 ivec4
-# define uint2 uvec2
-# define uint3 uvec3
-# define uint4 uvec4
# define bool1 bool
-# define bool2 bvec2
-# define bool3 bvec3
-# define bool4 bvec4
+/* Type name collision with Metal shading language - These typenames are already defined. */
+# ifndef GPU_METAL
+# define float2 vec2
+# define float3 vec3
+# define float4 vec4
+# define float4x4 mat4
+# define int2 ivec2
+# define int3 ivec3
+# define int4 ivec4
+# define uint2 uvec2
+# define uint3 uvec3
+# define uint4 uvec4
+# define bool2 bvec2
+# define bool3 bvec3
+# define bool4 bvec4
+# endif
#else /* C / C++ */
# pragma once
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index e29b0d5801d..bcc418169b7 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -56,11 +56,15 @@ static void gpu_backend_discard();
namespace blender::gpu {
+int Context::context_counter = 0;
Context::Context()
{
thread_ = pthread_self();
is_active_ = false;
matrix_state = GPU_matrix_state_create();
+
+ context_id = Context::context_counter;
+ Context::context_counter++;
}
Context::~Context()
diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh
index f823a92893c..2217e5262ed 100644
--- a/source/blender/gpu/intern/gpu_context_private.hh
+++ b/source/blender/gpu/intern/gpu_context_private.hh
@@ -48,6 +48,14 @@ class Context {
DebugStack debug_stack;
+ /* GPUContext counter used to assign a unique ID to each GPUContext.
+ * NOTE(Metal): This is required by the Metal Backend, as a bug exists in the global OS shader
+ * cache wherein compilation of identical source from two distinct threads can result in an
+ * invalid cache collision, result in a broken shader object. Appending the unique context ID
+ * onto compiled sources ensures the source hashes are different. */
+ static int context_counter;
+ int context_id = 0;
+
protected:
/** Thread on which this context is active. */
pthread_t thread_;
diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc
index 2d1b3dc2dca..4d059ae495e 100644
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -95,6 +95,9 @@ static void standard_defines(Vector<const char *> &sources)
case GPU_BACKEND_OPENGL:
sources.append("#define GPU_OPENGL\n");
break;
+ case GPU_BACKEND_METAL:
+ sources.append("#define GPU_METAL\n");
+ break;
default:
BLI_assert(false && "Invalid GPU Backend Type");
break;
diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh
index 8236e669288..3884c067c83 100644
--- a/source/blender/gpu/intern/gpu_shader_create_info.hh
+++ b/source/blender/gpu/intern/gpu_shader_create_info.hh
@@ -32,6 +32,7 @@ namespace blender::gpu::shader {
#endif
enum class Type {
+ /* Types supported natively across all GPU backends. */
FLOAT = 0,
VEC2,
VEC3,
@@ -47,6 +48,21 @@ enum class Type {
IVEC3,
IVEC4,
BOOL,
+ /* Additionally supported types to enable data optimisation and native
+ * support in some GPUBackends.
+ * NOTE: These types must be representable in all APIs. E.g. VEC3_101010I2 is aliased as vec3 in
+ * the GL backend, as implicit type conversions from packed normal attribute data to vec3 is
+ * supported. UCHAR/CHAR types are natively supported in Metal and can be used to avoid
+ * additional data conversions for GPU_COMP_U8 vertex attributes. */
+ VEC3_101010I2,
+ UCHAR,
+ UCHAR2,
+ UCHAR3,
+ UCHAR4,
+ CHAR,
+ CHAR2,
+ CHAR3,
+ CHAR4
};
/* All of these functions is a bit out of place */
@@ -86,6 +102,40 @@ static inline std::ostream &operator<<(std::ostream &stream, const Type type)
return stream << "mat3";
case Type::MAT4:
return stream << "mat4";
+ case Type::VEC3_101010I2:
+ return stream << "vec3_1010102_Inorm";
+ case Type::UCHAR:
+ return stream << "uchar";
+ case Type::UCHAR2:
+ return stream << "uchar2";
+ case Type::UCHAR3:
+ return stream << "uchar3";
+ case Type::UCHAR4:
+ return stream << "uchar4";
+ case Type::CHAR:
+ return stream << "char";
+ case Type::CHAR2:
+ return stream << "char2";
+ case Type::CHAR3:
+ return stream << "char3";
+ case Type::CHAR4:
+ return stream << "char4";
+ case Type::INT:
+ return stream << "int";
+ case Type::IVEC2:
+ return stream << "ivec2";
+ case Type::IVEC3:
+ return stream << "ivec3";
+ case Type::IVEC4:
+ return stream << "ivec4";
+ case Type::UINT:
+ return stream << "uint";
+ case Type::UVEC2:
+ return stream << "uvec2";
+ case Type::UVEC3:
+ return stream << "uvec3";
+ case Type::UVEC4:
+ return stream << "uvec4";
default:
BLI_assert(0);
return stream;
@@ -228,6 +278,8 @@ enum class PrimitiveOut {
POINTS = 0,
LINE_STRIP,
TRIANGLE_STRIP,
+ LINES,
+ TRIANGLES,
};
struct StageInterfaceInfo {
diff --git a/source/blender/gpu/metal/kernels/compute_texture_read.msl b/source/blender/gpu/metal/kernels/compute_texture_read.msl
index 4bfb48567f9..7b0760d7620 100644
--- a/source/blender/gpu/metal/kernels/compute_texture_read.msl
+++ b/source/blender/gpu/metal/kernels/compute_texture_read.msl
@@ -74,7 +74,7 @@ template<> uchar convert_type<uchar>(float val)
template<> uint convert_type<uint>(float val)
{
- return uint(val * double(0xFFFFFFFFu));
+ return uint(val * float(0xFFFFFFFFu));
}
struct TextureReadParams {
diff --git a/source/blender/gpu/metal/kernels/compute_texture_update.msl b/source/blender/gpu/metal/kernels/compute_texture_update.msl
index 095c495ac54..43c746e0afa 100644
--- a/source/blender/gpu/metal/kernels/compute_texture_update.msl
+++ b/source/blender/gpu/metal/kernels/compute_texture_update.msl
@@ -38,22 +38,6 @@ using namespace metal;
# define POSITION_TYPE uint3
#endif
-float3 mtl_linear_to_srgb_attr(float3 c)
-{
- c = max(c, float3(0.0));
- float3 c1 = c * 12.92;
- float3 c2 = 1.055 * pow(c, float3(1.0 / 2.4)) - 0.055;
- return mix(c1, c2, step(float3(0.0031308), c));
-}
-
-float3 mtl_srgb_to_linear_attr(float3 c)
-{
- c = max(c, float3(0.0));
- float3 c1 = c * (1.0 / 12.92);
- float3 c2 = pow((c + 0.055) * (1.0 / 1.055), float3(2.4));
- return mix(c1, c2, step(float3(0.04045), c));
-}
-
struct TextureUpdateParams {
int mip_index;
int extent[3];
diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl
index 9fd54f3f31f..374aedff90d 100644
--- a/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl
+++ b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl
@@ -1,9 +1,4 @@
-uniform sampler2D source_data;
-uniform int mip;
-
-in vec2 texCoord_interp;
-
void main()
{
gl_FragDepth = textureLod(source_data, texCoord_interp, mip).r;
diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_info.hh b/source/blender/gpu/metal/kernels/depth_2d_update_info.hh
new file mode 100644
index 00000000000..0a3281a98f2
--- /dev/null
+++ b/source/blender/gpu/metal/kernels/depth_2d_update_info.hh
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_INTERFACE_INFO(depth_2d_update_iface, "").smooth(Type::VEC2, "texCoord_interp");
+
+GPU_SHADER_CREATE_INFO(depth_2d_update_info_base)
+ .vertex_in(0, Type::VEC2, "pos")
+ .vertex_out(depth_2d_update_iface)
+ .fragment_out(0, Type::VEC4, "fragColor")
+ .push_constant(Type::VEC2, "extent")
+ .push_constant(Type::VEC2, "offset")
+ .push_constant(Type::VEC2, "size")
+ .push_constant(Type::INT, "mip")
+ .sampler(0, ImageType::FLOAT_2D, "source_data", Frequency::PASS)
+ .vertex_source("depth_2d_update_vert.glsl");
+
+GPU_SHADER_CREATE_INFO(depth_2d_update_float)
+ .fragment_source("depth_2d_update_float_frag.glsl")
+ .additional_info("depth_2d_update_info_base")
+ .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(depth_2d_update_int24)
+ .fragment_source("depth_2d_update_int24_frag.glsl")
+ .additional_info("depth_2d_update_info_base")
+ .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(depth_2d_update_int32)
+ .fragment_source("depth_2d_update_int32_frag.glsl")
+ .additional_info("depth_2d_update_info_base")
+ .do_static_compilation(true);
diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl
index 7483343503f..a4d9e35d491 100644
--- a/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl
+++ b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl
@@ -1,8 +1,4 @@
-uniform isampler2D source_data;
-uniform int mip;
-
-in vec2 texCoord_interp;
void main()
{
diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl
index 75d42c57f73..421c25a2e5c 100644
--- a/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl
+++ b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl
@@ -1,9 +1,4 @@
-uniform isampler2D source_data;
-uniform int mip;
-
-in vec2 texCoord_interp;
-
void main()
{
uint val = textureLod(source_data, texCoord_interp, mip).r;
diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl
index faae68d2f55..def0c1ae9de 100644
--- a/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl
+++ b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl
@@ -1,10 +1,4 @@
-uniform vec2 extent;
-uniform vec2 offset;
-uniform vec2 size;
-out vec2 texCoord_interp;
-in vec2 pos;
-
void main()
{
vec4 rect = vec4(offset.x, offset.y, offset.x + extent.x, offset.y + extent.y);
diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl
index b1353478593..8c81c5c0d83 100644
--- a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl
+++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl
@@ -1,10 +1,5 @@
-in vec4 uvcoordsvar;
-uniform sampler2D imageTexture;
-uniform int mip;
-out vec4 fragColor;
-
void main()
{
vec4 tex_color = textureLod(imageTexture, uvcoordsvar.xy, mip);
diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh
new file mode 100644
index 00000000000..6af67ad44d2
--- /dev/null
+++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_INTERFACE_INFO(fullscreen_blit_iface, "").smooth(Type::VEC4, "uvcoordsvar");
+
+GPU_SHADER_CREATE_INFO(fullscreen_blit)
+ .vertex_in(0, Type::VEC2, "pos")
+ .vertex_out(fullscreen_blit_iface)
+ .fragment_out(0, Type::VEC4, "fragColor")
+ .push_constant(Type::VEC2, "fullscreen")
+ .push_constant(Type::VEC2, "size")
+ .push_constant(Type::VEC2, "dst_offset")
+ .push_constant(Type::VEC2, "src_offset")
+ .push_constant(Type::INT, "mip")
+ .sampler(0, ImageType::FLOAT_2D, "imageTexture", Frequency::PASS)
+ .vertex_source("gpu_shader_fullscreen_blit_vert.glsl")
+ .fragment_source("gpu_shader_fullscreen_blit_frag.glsl")
+ .do_static_compilation(true); \ No newline at end of file
diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl
index 8e52868f67d..5d5a0e2ab5f 100644
--- a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl
+++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl
@@ -1,12 +1,4 @@
-out vec4 uvcoordsvar;
-
-in vec2 pos;
-uniform vec2 fullscreen;
-uniform vec2 size;
-uniform vec2 dst_offset;
-uniform vec2 src_offset;
-
void main()
{
/* The position represents a 0-1 square, we first scale it by the size we want to have it on
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index 361b2ca05f5..3cd7794f6c9 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -12,6 +12,7 @@
#include "mtl_framebuffer.hh"
#include "mtl_index_buffer.hh"
#include "mtl_query.hh"
+#include "mtl_shader.hh"
#include "mtl_uniform_buffer.hh"
#include "gpu_capabilities_private.hh"
@@ -71,8 +72,8 @@ QueryPool *MTLBackend::querypool_alloc()
Shader *MTLBackend::shader_alloc(const char *name)
{
- /* TODO(Metal): Implement MTLShader. */
- return nullptr;
+ MTLContext *mtl_context = MTLContext::get();
+ return new MTLShader(mtl_context, name);
};
Texture *MTLBackend::texture_alloc(const char *name)
@@ -168,7 +169,7 @@ void MTLBackend::platform_init(MTLContext *ctx)
eGPUSupportLevel support_level = GPU_SUPPORT_LEVEL_SUPPORTED;
BLI_assert(ctx);
- id<MTLDevice> mtl_device = nil; /*ctx->device; TODO(Metal): Implement MTLContext. */
+ id<MTLDevice> mtl_device = ctx->device;
BLI_assert(device);
NSString *gpu_name = [mtl_device name];
@@ -187,7 +188,7 @@ void MTLBackend::platform_init(MTLContext *ctx)
os = GPU_OS_UNIX;
#endif
- BLI_assert(os == GPU_OS_MAC && "Platform must be macOS");
+ BLI_assert_msg(os == GPU_OS_MAC, "Platform must be macOS");
/* Determine Vendor from name. */
if (strstr(vendor, "ATI") || strstr(vendor, "AMD")) {
@@ -334,7 +335,7 @@ bool MTLBackend::metal_is_supported()
void MTLBackend::capabilities_init(MTLContext *ctx)
{
BLI_assert(ctx);
- id<MTLDevice> device = nil; /*ctx->device TODO(Metal): Implement MTLContext. */
+ id<MTLDevice> device = ctx->device;
BLI_assert(device);
/* Initialize Capabilities. */
diff --git a/source/blender/gpu/metal/mtl_capabilities.hh b/source/blender/gpu/metal/mtl_capabilities.hh
index d56f796e60f..5e34d5352f1 100644
--- a/source/blender/gpu/metal/mtl_capabilities.hh
+++ b/source/blender/gpu/metal/mtl_capabilities.hh
@@ -14,6 +14,8 @@ namespace gpu {
#define MTL_MAX_TEXTURE_SLOTS 128
#define MTL_MAX_SAMPLER_SLOTS MTL_MAX_TEXTURE_SLOTS
+/* Max limit without using bindless for samplers. */
+#define MTL_MAX_DEFAULT_SAMPLERS 16
#define MTL_MAX_UNIFORM_BUFFER_BINDINGS 31
#define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31
#define MTL_MAX_UNIFORMS_PER_BLOCK 64
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index 44ba786f90f..b6f9c0050a9 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -13,4 +13,6 @@
* Set as number of GPU frames in flight, plus an additional value for extra possible CPU frame. */
#define MTL_NUM_SAFE_FRAMES (MTL_MAX_DRAWABLES + 1)
+/* Display debug information about missing attributes and incorrect vertex formats. */
+#define MTL_DEBUG_SHADER_ATTRIBUTES 0
#endif
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index d542f0e1025..ccc648eab2a 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -17,6 +17,8 @@
#include "mtl_common.hh"
#include "mtl_framebuffer.hh"
#include "mtl_memory.hh"
+#include "mtl_shader.hh"
+#include "mtl_shader_interface.hh"
#include "mtl_texture.hh"
#include <Cocoa/Cocoa.h>
@@ -32,7 +34,6 @@ namespace blender::gpu {
/* Forward Declarations */
class MTLContext;
class MTLCommandBufferManager;
-class MTLShader;
class MTLUniformBuf;
/* Structs containing information on current binding state for textures and samplers. */
@@ -40,7 +41,7 @@ struct MTLTextureBinding {
bool used;
/* Same value as index in bindings array. */
- uint texture_slot_index;
+ uint slot_index;
gpu::MTLTexture *texture_resource;
};
@@ -56,9 +57,10 @@ struct MTLSamplerBinding {
/* Metal Context Render Pass State -- Used to track active RenderCommandEncoder state based on
* bound MTLFrameBuffer's.Owned by MTLContext. */
-struct MTLRenderPassState {
+class MTLRenderPassState {
friend class MTLContext;
+ public:
MTLRenderPassState(MTLContext &context, MTLCommandBufferManager &command_buffer_manager)
: ctx(context), cmd(command_buffer_manager){};
@@ -570,6 +572,11 @@ class MTLContext : public Context {
friend class MTLBackend;
private:
+ /* Null buffers for empty/unintialized bindings.
+ * Null attribute buffer follows default attribute format of OpenGL Backend. */
+ id<MTLBuffer> null_buffer_; /* All zero's. */
+ id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+
/* Compute and specialization caches. */
MTLContextTextureUtils texture_utils_;
@@ -713,6 +720,9 @@ class MTLContext : public Context {
{
return MTLContext::global_memory_manager;
}
+ /* Uniform Buffer Bindings to command encoders. */
+ id<MTLBuffer> get_null_buffer();
+ id<MTLBuffer> get_null_attribute_buffer();
};
} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index 26cfe6632ef..f14236bcb58 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,6 +5,8 @@
*/
#include "mtl_context.hh"
#include "mtl_debug.hh"
+#include "mtl_shader.hh"
+#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
#include "DNA_userdef_types.h"
@@ -29,19 +31,33 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
/* Init debug. */
debug::mtl_debug_init();
+ /* Device creation.
+ * TODO(Metal): This is a temporary initialisation path to enable testing of features
+ * and shader compilation tests. Future functionality should fetch the existing device
+ * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */
+ this->device = MTLCreateSystemDefaultDevice();
+
/* Initialize command buffer state. */
this->main_command_buffer.prepare();
+ /* Initialise imm and pipeline state */
+ this->pipeline_state.initialised = false;
+
/* Frame management. */
is_inside_frame_ = false;
current_frame_index_ = 0;
+ /* Prepare null data buffer */
+ null_buffer_ = nil;
+ null_attribute_buffer_ = nil;
+
/* Create FrameBuffer handles. */
MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
this->front_left = mtl_front_left;
this->back_left = mtl_back_left;
this->active_fb = this->back_left;
+
/* Prepare platform and capabilities. (NOTE: With METAL, this needs to be done after CTX
* initialization). */
MTLBackend::platform_init(this);
@@ -93,6 +109,12 @@ MTLContext::~MTLContext()
sampler_state_cache_[i] = nil;
}
}
+ if (null_buffer_) {
+ [null_buffer_ release];
+ }
+ if (null_attribute_buffer_) {
+ [null_attribute_buffer_ release];
+ }
}
void MTLContext::begin_frame()
@@ -227,6 +249,50 @@ MTLFrameBuffer *MTLContext::get_default_framebuffer()
return static_cast<MTLFrameBuffer *>(this->back_left);
}
+MTLShader *MTLContext::get_active_shader()
+{
+ return this->pipeline_state.active_shader;
+}
+
+id<MTLBuffer> MTLContext::get_null_buffer()
+{
+ if (null_buffer_ != nil) {
+ return null_buffer_;
+ }
+
+ static const int null_buffer_size = 4096;
+ null_buffer_ = [this->device newBufferWithLength:null_buffer_size
+ options:MTLResourceStorageModeManaged];
+ [null_buffer_ retain];
+ uint32_t *null_data = (uint32_t *)calloc(0, null_buffer_size);
+ memcpy([null_buffer_ contents], null_data, null_buffer_size);
+ [null_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)];
+ free(null_data);
+
+ BLI_assert(null_buffer_ != nil);
+ return null_buffer_;
+}
+
+id<MTLBuffer> MTLContext::get_null_attribute_buffer()
+{
+ if (null_attribute_buffer_ != nil) {
+ return null_attribute_buffer_;
+ }
+
+ /* Allocate Null buffer if it has not yet been created.
+ * Min buffer size is 256 bytes -- though we only need 64 bytes of data. */
+ static const int null_buffer_size = 256;
+ null_attribute_buffer_ = [this->device newBufferWithLength:null_buffer_size
+ options:MTLResourceStorageModeManaged];
+ BLI_assert(null_attribute_buffer_ != nil);
+ [null_attribute_buffer_ retain];
+ float data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
+ memcpy([null_attribute_buffer_ contents], data, sizeof(float) * 4);
+ [null_attribute_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)];
+
+ return null_attribute_buffer_;
+}
+
/** \} */
/* -------------------------------------------------------------------- */
@@ -239,20 +305,20 @@ void MTLContext::pipeline_state_init()
/*** Initialize state only once. ***/
if (!this->pipeline_state.initialised) {
this->pipeline_state.initialised = true;
- this->pipeline_state.active_shader = NULL;
+ this->pipeline_state.active_shader = nullptr;
/* Clear bindings state. */
for (int t = 0; t < GPU_max_textures(); t++) {
this->pipeline_state.texture_bindings[t].used = false;
- this->pipeline_state.texture_bindings[t].texture_slot_index = t;
- this->pipeline_state.texture_bindings[t].texture_resource = NULL;
+ this->pipeline_state.texture_bindings[t].slot_index = -1;
+ this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
}
for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) {
this->pipeline_state.sampler_bindings[s].used = false;
}
for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) {
this->pipeline_state.ubo_bindings[u].bound = false;
- this->pipeline_state.ubo_bindings[u].ubo = NULL;
+ this->pipeline_state.ubo_bindings[u].ubo = nullptr;
}
}
@@ -487,52 +553,46 @@ id<MTLSamplerState> MTLContext::get_sampler_from_state(MTLSamplerState sampler_s
id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState sampler_state)
{
/* Check if sampler already exists for given state. */
- id<MTLSamplerState> st = sampler_state_cache_[(uint)sampler_state];
- if (st != nil) {
- return st;
- }
- else {
- MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init];
- descriptor.normalizedCoordinates = true;
-
- MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ?
- MTLSamplerAddressModeClampToBorderColor :
- MTLSamplerAddressModeClampToEdge;
- descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ?
- MTLSamplerAddressModeRepeat :
- clamp_type;
- descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ?
- MTLSamplerAddressModeRepeat :
- clamp_type;
- descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ?
- MTLSamplerAddressModeRepeat :
- clamp_type;
- descriptor.borderColor = MTLSamplerBorderColorTransparentBlack;
- descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
- MTLSamplerMinMagFilterLinear :
- MTLSamplerMinMagFilterNearest;
- descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
- MTLSamplerMinMagFilterLinear :
- MTLSamplerMinMagFilterNearest;
- descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ?
- MTLSamplerMipFilterLinear :
- MTLSamplerMipFilterNotMipmapped;
- descriptor.lodMinClamp = -1000;
- descriptor.lodMaxClamp = 1000;
- float aniso_filter = max_ff(16, U.anisotropic_filter);
- descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1;
- descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ?
- MTLCompareFunctionLessEqual :
- MTLCompareFunctionAlways;
- descriptor.supportArgumentBuffers = true;
-
- id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor];
- sampler_state_cache_[(uint)sampler_state] = state;
-
- BLI_assert(state != nil);
- [descriptor autorelease];
- return state;
- }
+ MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init];
+ descriptor.normalizedCoordinates = true;
+
+ MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ?
+ MTLSamplerAddressModeClampToBorderColor :
+ MTLSamplerAddressModeClampToEdge;
+ descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ?
+ MTLSamplerAddressModeRepeat :
+ clamp_type;
+ descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ?
+ MTLSamplerAddressModeRepeat :
+ clamp_type;
+ descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ?
+ MTLSamplerAddressModeRepeat :
+ clamp_type;
+ descriptor.borderColor = MTLSamplerBorderColorTransparentBlack;
+ descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
+ MTLSamplerMinMagFilterLinear :
+ MTLSamplerMinMagFilterNearest;
+ descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
+ MTLSamplerMinMagFilterLinear :
+ MTLSamplerMinMagFilterNearest;
+ descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ?
+ MTLSamplerMipFilterLinear :
+ MTLSamplerMipFilterNotMipmapped;
+ descriptor.lodMinClamp = -1000;
+ descriptor.lodMaxClamp = 1000;
+ float aniso_filter = max_ff(16, U.anisotropic_filter);
+ descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1;
+ descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ?
+ MTLCompareFunctionLessEqual :
+ MTLCompareFunctionAlways;
+ descriptor.supportArgumentBuffers = true;
+
+ id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor];
+ sampler_state_cache_[(uint)sampler_state] = state;
+
+ BLI_assert(state != nil);
+ [descriptor autorelease];
+ return state;
}
id<MTLSamplerState> MTLContext::get_default_sampler_state()
diff --git a/source/blender/gpu/metal/mtl_memory.mm b/source/blender/gpu/metal/mtl_memory.mm
index 07da489bdbb..788736bdfad 100644
--- a/source/blender/gpu/metal/mtl_memory.mm
+++ b/source/blender/gpu/metal/mtl_memory.mm
@@ -73,7 +73,9 @@ gpu::MTLBuffer *MTLBufferPool::allocate_with_data(uint64_t size,
return this->allocate_aligned_with_data(size, 256, cpu_visible, data);
}
-gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
+gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size,
+ uint32_t alignment,
+ bool cpu_visible)
{
/* Check not required. Main GPU module usage considered thread-safe. */
// BLI_assert(BLI_thread_is_main());
@@ -167,7 +169,7 @@ gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, b
}
gpu::MTLBuffer *MTLBufferPool::allocate_aligned_with_data(uint64_t size,
- uint alignment,
+ uint32_t alignment,
bool cpu_visible,
const void *data)
{
@@ -548,9 +550,10 @@ void gpu::MTLBuffer::set_label(NSString *str)
void gpu::MTLBuffer::debug_ensure_used()
{
/* Debug: If buffer is not flagged as in-use, this is a problem. */
- BLI_assert(in_use_ &&
- "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer "
- "has likely already been freed.");
+ BLI_assert_msg(
+ in_use_,
+ "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer "
+ "has likely already been freed.");
}
void gpu::MTLBuffer::flush()
@@ -665,9 +668,9 @@ MTLTemporaryBuffer MTLScratchBufferManager::scratch_buffer_allocate_range_aligne
/* Ensure scratch buffer allocation alignment adheres to offset alignment requirements. */
alignment = max_uu(alignment, 256);
- BLI_assert(current_scratch_buffer_ >= 0 && "Scratch Buffer index not set");
+ BLI_assert_msg(current_scratch_buffer_ >= 0, "Scratch Buffer index not set");
MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_];
- BLI_assert(current_scratch_buff != nullptr && "Scratch Buffer does not exist");
+ BLI_assert_msg(current_scratch_buff != nullptr, "Scratch Buffer does not exist");
MTLTemporaryBuffer allocated_range = current_scratch_buff->allocate_range_aligned(alloc_size,
alignment);
BLI_assert(allocated_range.size >= alloc_size && allocated_range.size <= alloc_size + alignment);
diff --git a/source/blender/gpu/metal/mtl_primitive.hh b/source/blender/gpu/metal/mtl_primitive.hh
new file mode 100644
index 00000000000..5aa7a533b95
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_primitive.hh
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Encapsulation of Frame-buffer states (attached textures, viewport, scissors).
+ */
+
+#pragma once
+
+#include "BLI_assert.h"
+
+#include "GPU_primitive.h"
+
+#include <Metal/Metal.h>
+
+namespace blender::gpu {
+
+/** Utility functions **/
+static inline MTLPrimitiveTopologyClass mtl_prim_type_to_topology_class(MTLPrimitiveType prim_type)
+{
+ switch (prim_type) {
+ case MTLPrimitiveTypePoint:
+ return MTLPrimitiveTopologyClassPoint;
+ case MTLPrimitiveTypeLine:
+ case MTLPrimitiveTypeLineStrip:
+ return MTLPrimitiveTopologyClassLine;
+ case MTLPrimitiveTypeTriangle:
+ case MTLPrimitiveTypeTriangleStrip:
+ return MTLPrimitiveTopologyClassTriangle;
+ }
+ return MTLPrimitiveTopologyClassUnspecified;
+}
+
+static inline MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type)
+{
+ switch (prim_type) {
+ case GPU_PRIM_POINTS:
+ return MTLPrimitiveTypePoint;
+ case GPU_PRIM_LINES:
+ case GPU_PRIM_LINES_ADJ:
+ case GPU_PRIM_LINE_LOOP:
+ return MTLPrimitiveTypeLine;
+ case GPU_PRIM_LINE_STRIP:
+ case GPU_PRIM_LINE_STRIP_ADJ:
+ return MTLPrimitiveTypeLineStrip;
+ case GPU_PRIM_TRIS:
+ case GPU_PRIM_TRI_FAN:
+ case GPU_PRIM_TRIS_ADJ:
+ return MTLPrimitiveTypeTriangle;
+ case GPU_PRIM_TRI_STRIP:
+ return MTLPrimitiveTypeTriangleStrip;
+ case GPU_PRIM_NONE:
+ return MTLPrimitiveTypePoint;
+ };
+}
+
+/* Certain primitive types are not supported in Metal, and require emulation.
+ * `GPU_PRIM_LINE_LOOP` and `GPU_PRIM_TRI_FAN` required index buffer patching.
+ * Adjacency types do not need emulation as the input structure is the same,
+ * and access is controlled from the vertex shader through SSBO vertex fetch.
+ * -- These Adj cases are only used in geometry shaders in OpenGL. */
+static inline bool mtl_needs_topology_emulation(GPUPrimType prim_type)
+{
+
+ BLI_assert(prim_type != GPU_PRIM_NONE);
+ switch (prim_type) {
+ case GPU_PRIM_LINE_LOOP:
+ case GPU_PRIM_TRI_FAN:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+static inline bool mtl_vertex_count_fits_primitive_type(uint32_t vertex_count,
+ MTLPrimitiveType prim_type)
+{
+ if (vertex_count == 0) {
+ return false;
+ }
+
+ switch (prim_type) {
+ case MTLPrimitiveTypeLineStrip:
+ return (vertex_count > 1);
+ case MTLPrimitiveTypeLine:
+ return (vertex_count % 2 == 0);
+ case MTLPrimitiveTypePoint:
+ return (vertex_count > 0);
+ case MTLPrimitiveTypeTriangle:
+ return (vertex_count % 3 == 0);
+ case MTLPrimitiveTypeTriangleStrip:
+ return (vertex_count > 2);
+ }
+ BLI_assert(false);
+ return false;
+}
+
+} // namespace blender::gpu \ No newline at end of file
diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
new file mode 100644
index 00000000000..010349eddbf
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+#pragma once
+
+#include "GPU_vertex_format.h"
+
+#include <Metal/Metal.h>
+
+namespace blender::gpu {
+
+/** Vertex attribute and buffer descriptor wrappers
+ * for use in PSO construction and caching. */
+struct MTLVertexAttributeDescriptorPSO {
+ MTLVertexFormat format;
+ int offset;
+ int buffer_index;
+ GPUVertFetchMode format_conversion_mode;
+
+ bool operator==(const MTLVertexAttributeDescriptorPSO &other) const
+ {
+ return (format == other.format) && (offset == other.offset) &&
+ (buffer_index == other.buffer_index) &&
+ (format_conversion_mode == other.format_conversion_mode);
+ }
+
+ uint64_t hash() const
+ {
+ return (uint64_t)((uint64_t)this->format ^ (this->offset << 4) ^ (this->buffer_index << 8) ^
+ (this->format_conversion_mode << 12));
+ }
+};
+
+struct MTLVertexBufferLayoutDescriptorPSO {
+ MTLVertexStepFunction step_function;
+ int step_rate;
+ int stride;
+
+ bool operator==(const MTLVertexBufferLayoutDescriptorPSO &other) const
+ {
+ return (step_function == other.step_function) && (step_rate == other.step_rate) &&
+ (stride == other.stride);
+ }
+
+ uint64_t hash() const
+ {
+ return (uint64_t)((uint64_t)this->step_function ^ (this->step_rate << 4) ^
+ (this->stride << 8));
+ }
+};
+
+/* SSBO attribute state caching. */
+struct MTLSSBOAttribute {
+
+ int mtl_attribute_index;
+ int vbo_id;
+ int attribute_offset;
+ int per_vertex_stride;
+ int attribute_format;
+ bool is_instance;
+
+ MTLSSBOAttribute(){};
+ MTLSSBOAttribute(
+ int attribute_ind, int vertexbuffer_ind, int offset, int stride, int format, bool instanced)
+ : mtl_attribute_index(attribute_ind),
+ vbo_id(vertexbuffer_ind),
+ attribute_offset(offset),
+ per_vertex_stride(stride),
+ attribute_format(format),
+ is_instance(instanced)
+ {
+ }
+
+ bool operator==(const MTLSSBOAttribute &other) const
+ {
+ return (memcmp(this, &other, sizeof(MTLSSBOAttribute)) == 0);
+ }
+};
+
+struct MTLVertexDescriptor {
+
+ /* Core Vertex Attributes. */
+ MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN];
+ MTLVertexBufferLayoutDescriptorPSO
+ buffer_layouts[GPU_BATCH_VBO_MAX_LEN + GPU_BATCH_INST_VBO_MAX_LEN];
+ int num_attributes;
+ int num_vert_buffers;
+ MTLPrimitiveTopologyClass prim_topology_class;
+
+ /* WORKAROUND: SSBO Vertex-fetch attributes -- These follow the same structure
+ * but have slightly different binding rules, passed in via uniform
+ * push constant data block. */
+ bool uses_ssbo_vertex_fetch;
+ MTLSSBOAttribute ssbo_attributes[GPU_VERT_ATTR_MAX_LEN];
+ int num_ssbo_attributes;
+
+ bool operator==(const MTLVertexDescriptor &other) const
+ {
+ if ((this->num_attributes != other.num_attributes) ||
+ (this->num_vert_buffers != other.num_vert_buffers)) {
+ return false;
+ }
+ if (this->prim_topology_class != other.prim_topology_class) {
+ return false;
+ };
+
+ for (const int a : IndexRange(this->num_attributes)) {
+ if (!(this->attributes[a] == other.attributes[a])) {
+ return false;
+ }
+ }
+
+ for (const int b : IndexRange(this->num_vert_buffers)) {
+ if (!(this->buffer_layouts[b] == other.buffer_layouts[b])) {
+ return false;
+ }
+ }
+
+ /* NOTE: No need to compare SSBO attributes, as these will match attribute bindings for the
+ * given shader. These are simply extra pre-resolved properties we want to include in the
+ * cache. */
+ return true;
+ }
+
+ uint64_t hash() const
+ {
+ uint64_t hash = (uint64_t)(this->num_attributes ^ this->num_vert_buffers);
+ for (const int a : IndexRange(this->num_attributes)) {
+ hash ^= this->attributes[a].hash() << a;
+ }
+
+ for (const int b : IndexRange(this->num_vert_buffers)) {
+ hash ^= this->buffer_layouts[b].hash() << (b + 10);
+ }
+
+ /* NOTE: SSBO vertex fetch members not hashed as these will match attribute bindings. */
+ return hash;
+ }
+};
+
+/* Metal Render Pipeline State Descriptor -- All unique information which feeds PSO creation. */
+struct MTLRenderPipelineStateDescriptor {
+ /* This state descriptor will contain ALL parameters which generate a unique PSO.
+ * We will then use this state-object to efficiently look-up or create a
+ * new PSO for the current shader.
+ *
+ * Unlike the 'MTLContextGlobalShaderPipelineState', this struct contains a subset of
+ * parameters used to distinguish between unique PSOs. This struct is hashable and only contains
+ * those parameters which are required by PSO generation. Non-unique state such as bound
+ * resources is not tracked here, as it does not require a unique PSO permutation if changed. */
+
+ /* Input Vertex Descriptor. */
+ MTLVertexDescriptor vertex_descriptor;
+
+ /* Render Target attachment state.
+ * Assign to MTLPixelFormatInvalid if not used. */
+ int num_color_attachments;
+ MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT];
+ MTLPixelFormat depth_attachment_format;
+ MTLPixelFormat stencil_attachment_format;
+
+ /* Render Pipeline State affecting PSO creation. */
+ bool blending_enabled;
+ MTLBlendOperation alpha_blend_op;
+ MTLBlendOperation rgb_blend_op;
+ MTLBlendFactor dest_alpha_blend_factor;
+ MTLBlendFactor dest_rgb_blend_factor;
+ MTLBlendFactor src_alpha_blend_factor;
+ MTLBlendFactor src_rgb_blend_factor;
+
+ /* Global colour write mask as this cannot be specified per attachment. */
+ MTLColorWriteMask color_write_mask;
+
+ /* Point size required by point primitives. */
+ float point_size = 0.0f;
+
+ /* Comparison Operator for caching. */
+ bool operator==(const MTLRenderPipelineStateDescriptor &other) const
+ {
+ if (!(vertex_descriptor == other.vertex_descriptor)) {
+ return false;
+ }
+
+ if ((num_color_attachments != other.num_color_attachments) ||
+ (depth_attachment_format != other.depth_attachment_format) ||
+ (stencil_attachment_format != other.stencil_attachment_format) ||
+ (color_write_mask != other.color_write_mask) ||
+ (blending_enabled != other.blending_enabled) || (alpha_blend_op != other.alpha_blend_op) ||
+ (rgb_blend_op != other.rgb_blend_op) ||
+ (dest_alpha_blend_factor != other.dest_alpha_blend_factor) ||
+ (dest_rgb_blend_factor != other.dest_rgb_blend_factor) ||
+ (src_alpha_blend_factor != other.src_alpha_blend_factor) ||
+ (src_rgb_blend_factor != other.src_rgb_blend_factor) ||
+ (vertex_descriptor.prim_topology_class != other.vertex_descriptor.prim_topology_class) ||
+ (point_size != other.point_size)) {
+ return false;
+ }
+
+ /* Attachments can be skipped, so num_color_attachments will not define the range. */
+ for (const int c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
+ if (color_attachment_format[c] != other.color_attachment_format[c]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ uint64_t hash() const
+ {
+ /* NOTE(Metal): Current setup aims to minimise overlap of parameters
+ * which are more likely to be different, to ensure earlier hash
+ * differences without having to fallback to comparisons.
+ * Though this could likely be further improved to remove
+ * has collisions. */
+
+ uint64_t hash = this->vertex_descriptor.hash();
+ hash ^= (uint64_t)this->num_color_attachments << 16; /* up to 6 (3 bits). */
+ hash ^= (uint64_t)this->depth_attachment_format << 18; /* up to 555 (9 bits). */
+ hash ^= (uint64_t)this->stencil_attachment_format << 20; /* up to 555 (9 bits). */
+ hash ^= (uint64_t)(*(
+ (uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */
+
+ /* Only include elements in Hash if they are needed - avoids variable null assignments
+ * influencing hash. */
+ if (this->num_color_attachments > 0) {
+ hash ^= (uint64_t)this->color_write_mask << 22; /* 4 bit bitmask. */
+ hash ^= (uint64_t)this->alpha_blend_op << 26; /* Up to 4 (3 bits). */
+ hash ^= (uint64_t)this->rgb_blend_op << 29; /* Up to 4 (3 bits). */
+ hash ^= (uint64_t)this->dest_alpha_blend_factor << 32; /* Up to 18 (5 bits). */
+ hash ^= (uint64_t)this->dest_rgb_blend_factor << 37; /* Up to 18 (5 bits). */
+ hash ^= (uint64_t)this->src_alpha_blend_factor << 42; /* Up to 18 (5 bits). */
+ hash ^= (uint64_t)this->src_rgb_blend_factor << 47; /* Up to 18 (5 bits). */
+ }
+
+ for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
+ hash ^= (uint64_t)this->color_attachment_format[c] << (c + 52); // up to 555 (9 bits)
+ }
+
+ hash |= (uint64_t)((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0)
+ << 62;
+ hash ^= (uint64_t)this->point_size;
+
+ return hash;
+ }
+};
+
+} // namespace blender::gpu \ No newline at end of file
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
new file mode 100644
index 00000000000..cdbcd7c68f6
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -0,0 +1,1164 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_shader.h"
+#include "GPU_vertex_format.h"
+
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+#include <functional>
+#include <unordered_map>
+
+#include <mutex>
+#include <thread>
+
+#include "mtl_framebuffer.hh"
+#include "mtl_shader_interface.hh"
+#include "mtl_shader_shared.h"
+#include "mtl_state.hh"
+#include "mtl_texture.hh"
+
+#include "gpu_shader_create_info.hh"
+#include "gpu_shader_private.hh"
+
+namespace blender::gpu {
+
+class MTLShaderInterface;
+class MTLContext;
+
+/* Debug control. */
+#define MTL_SHADER_DEBUG_EXPORT_SOURCE 1
+#define MTL_SHADER_TRANSLATION_DEBUG_OUTPUT 0
+
+/* Separate print used only during development and debugging. */
+#if MTL_SHADER_TRANSLATION_DEBUG_OUTPUT
+# define shader_debug_printf printf
+#else
+# define shader_debug_printf(...) /* Null print. */
+#endif
+
+/* Desired reflection data for a buffer binding. */
+struct MTLBufferArgumentData {
+ uint32_t index;
+ uint32_t size;
+ uint32_t alignment;
+ bool active;
+};
+
+/* Metal Render Pipeline State Instance. */
+struct MTLRenderPipelineStateInstance {
+ /* Function instances with specialisation.
+ * Required for argument encoder construction. */
+ id<MTLFunction> vert;
+ id<MTLFunction> frag;
+
+ /* PSO handle. */
+ id<MTLRenderPipelineState> pso;
+
+ /** Derived information. */
+ /* Unique index for PSO variant. */
+ uint32_t shader_pso_index;
+ /* Base bind index for binding uniform buffers, offset based on other
+ * bound buffers such as vertex buffers, as the count can vary. */
+ int base_uniform_buffer_index;
+ /* buffer bind slot used for null attributes (-1 if not needed). */
+ int null_attribute_buffer_index;
+ /* buffer bind used for transform feedback output buffer. */
+ int transform_feedback_buffer_index;
+
+ /** Reflection Data.
+ * Currently used to verify whether uniform buffers of incorrect sizes being bound, due to left
+ * over bindings being used for slots that did not need updating for a particular draw. Metal
+ * Backend over-generates bindings due to detecting their presence, though in many cases, the
+ * bindings in the source are not all used for a given shader.
+ * This information can also be used to eliminate redundant/unused bindings. */
+ bool reflection_data_available;
+ blender::Vector<MTLBufferArgumentData> buffer_bindings_reflection_data_vert;
+ blender::Vector<MTLBufferArgumentData> buffer_bindings_reflection_data_frag;
+};
+
+/* MTLShaderBuilder source wrapper used during initial compilation. */
+struct MTLShaderBuilder {
+ NSString *msl_source_vert_ = @"";
+ NSString *msl_source_frag_ = @"";
+
+ /* Generated GLSL source used during compilation. */
+ std::string glsl_vertex_source_ = "";
+ std::string glsl_fragment_source_ = "";
+
+ /* Indicates whether source code has been provided via MSL directly. */
+ bool source_from_msl_ = false;
+};
+
+/**
+ * MTLShader implements shader compilation, Pipeline State Object (PSO)
+ * creation for rendering and uniform data binding.
+ * Shaders can either be created from native MSL, or generated
+ * from a GLSL source shader using GPUShaderCreateInfo.
+ *
+ * Shader creation process:
+ * - Create MTLShader:
+ * - Convert GLSL to MSL source if required.
+ * - set MSL source.
+ * - set Vertex/Fragment function names.
+ * - Create and populate MTLShaderInterface.
+ **/
+class MTLShader : public Shader {
+ friend shader::ShaderCreateInfo;
+ friend shader::StageInterfaceInfo;
+
+ public:
+ /* Cached SSBO vertex fetch attribute uniform locations. */
+ int uni_ssbo_input_prim_type_loc = -1;
+ int uni_ssbo_input_vert_count_loc = -1;
+ int uni_ssbo_uses_indexed_rendering = -1;
+ int uni_ssbo_uses_index_mode_u16 = -1;
+
+ private:
+ /* Context Handle. */
+ MTLContext *context_ = nullptr;
+
+ /** Transform Feedback. */
+ /* Transform feedback mode. */
+ eGPUShaderTFBType transform_feedback_type_ = GPU_SHADER_TFB_NONE;
+ /* Transform feedback outputs written to TFB buffer. */
+ blender::Vector<std::string> tf_output_name_list_;
+ /* Whether transform feedback is currently active. */
+ bool transform_feedback_active_ = false;
+ /* Vertex buffer to write transform feedback data into. */
+ GPUVertBuf *transform_feedback_vertbuf_ = nullptr;
+
+ /** Shader source code. */
+ MTLShaderBuilder *shd_builder_ = nullptr;
+ NSString *vertex_function_name_ = @"";
+ NSString *fragment_function_name_ = @"";
+
+ /** Compiled shader resources. */
+ id<MTLLibrary> shader_library_vert_ = nil;
+ id<MTLLibrary> shader_library_frag_ = nil;
+ bool valid_ = false;
+
+ /** Render pipeline state and PSO caching. */
+ /* Metal API Descriptor used for creation of unique PSOs based on rendering state. */
+ MTLRenderPipelineDescriptor *pso_descriptor_ = nil;
+ /* Metal backend struct containing all high-level pipeline state parameters
+ * which contribute to instantiation of a unique PSO. */
+ MTLRenderPipelineStateDescriptor current_pipeline_state_;
+ /* Cache of compiled PipelineStateObjects. */
+ blender::Map<MTLRenderPipelineStateDescriptor, MTLRenderPipelineStateInstance *> pso_cache_;
+
+ /* True to enable multi-layered rendering support. */
+ bool uses_mtl_array_index_ = false;
+
+ /** SSBO Vertex fetch pragma options. */
+ /* Indicates whether to pass in VertexBuffer's as regular buffer bindings
+ * and perform vertex assembly manually, rather than using Stage-in.
+ * This is used to give a vertex shader full access to all of the
+ * vertex data.
+ * This is primarily used for optimisation techniques and
+ * alternative solutions for Geometry-shaders which are unsupported
+ * by Metal. */
+ bool use_ssbo_vertex_fetch_mode_ = false;
+ /* Output primitive type when rendering sing ssbo_vertex_fetch. */
+ MTLPrimitiveType ssbo_vertex_fetch_output_prim_type_;
+
+ /* Output vertices per original vertex shader instance.
+ * This number will be multiplied by the number of input primitives
+ * from the source draw call. */
+ uint32_t ssbo_vertex_fetch_output_num_verts_ = 0;
+
+ bool ssbo_vertex_attribute_bind_active_ = false;
+ int ssbo_vertex_attribute_bind_mask_ = 0;
+ bool ssbo_vbo_slot_used_[MTL_SSBO_VERTEX_FETCH_MAX_VBOS];
+
+ struct ShaderSSBOAttributeBinding {
+ int attribute_index = -1;
+ int uniform_stride;
+ int uniform_offset;
+ int uniform_fetchmode;
+ int uniform_vbo_id;
+ int uniform_attr_type;
+ };
+ ShaderSSBOAttributeBinding cached_ssbo_attribute_bindings_[MTL_MAX_VERTEX_INPUT_ATTRIBUTES] = {};
+
+ /* Metal Shader Uniform data store.
+ * This blocks is used to store current shader push_constant
+ * data before it is submitted to the GPU. This is currently
+ * stored per shader instance, though depending on GPU module
+ * functionality, this could potentially be a global data store.
+ * This data is associated with the PushConstantBlock, which is
+ * always at index zero in the UBO list. */
+ void *push_constant_data_ = nullptr;
+ bool push_constant_modified_ = false;
+
+ public:
+ MTLShader(MTLContext *ctx, const char *name);
+ MTLShader(MTLContext *ctx,
+ MTLShaderInterface *interface,
+ const char *name,
+ NSString *input_vertex_source,
+ NSString *input_fragment_source,
+ NSString *vertex_function_name_,
+ NSString *fragment_function_name_);
+ ~MTLShader();
+
+ /* Assign GLSL source. */
+ void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
+ void geometry_shader_from_glsl(MutableSpan<const char *> sources) override;
+ void fragment_shader_from_glsl(MutableSpan<const char *> sources) override;
+ void compute_shader_from_glsl(MutableSpan<const char *> sources) override;
+
+ /* Compile and build - Return true if successful. */
+ bool finalize(const shader::ShaderCreateInfo *info = nullptr) override;
+
+ /* Utility. */
+ bool is_valid()
+ {
+ return valid_;
+ }
+ MTLRenderPipelineStateDescriptor &get_current_pipeline_state()
+ {
+ return current_pipeline_state_;
+ }
+ MTLShaderInterface *get_interface()
+ {
+ return static_cast<MTLShaderInterface *>(this->interface);
+ }
+ void *get_push_constant_data()
+ {
+ return push_constant_data_;
+ }
+
+ /* Shader source generators from create-info.
+ * These aren't all used by Metal, as certain parts of source code generation
+ * for shader entry-points and resource mapping occur during `finalize`. */
+ std::string resources_declare(const shader::ShaderCreateInfo &info) const override;
+ std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override;
+ std::string fragment_interface_declare(const shader::ShaderCreateInfo &info) const override;
+ std::string geometry_interface_declare(const shader::ShaderCreateInfo &info) const override;
+ std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override;
+ std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override;
+
+ void transform_feedback_names_set(Span<const char *> name_list,
+ const eGPUShaderTFBType geom_type) override;
+ bool transform_feedback_enable(GPUVertBuf *buf) override;
+ void transform_feedback_disable() override;
+
+ void bind() override;
+ void unbind() override;
+
+ void uniform_float(int location, int comp_len, int array_size, const float *data) override;
+ void uniform_int(int location, int comp_len, int array_size, const int *data) override;
+ bool get_push_constant_is_dirty();
+ void push_constant_bindstate_mark_dirty(bool is_dirty);
+
+ void vertformat_from_shader(GPUVertFormat *format) const override;
+
+ /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
+ int program_handle_get() const override
+ {
+ return -1;
+ }
+
+ bool get_uses_ssbo_vertex_fetch()
+ {
+ return use_ssbo_vertex_fetch_mode_;
+ }
+ MTLPrimitiveType get_ssbo_vertex_fetch_output_prim_type()
+ {
+ return ssbo_vertex_fetch_output_prim_type_;
+ }
+ uint32_t get_ssbo_vertex_fetch_output_num_verts()
+ {
+ return ssbo_vertex_fetch_output_num_verts_;
+ }
+ static int ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type);
+ void prepare_ssbo_vertex_fetch_metadata();
+
+ /* SSBO Vertex Bindings Utility functions. */
+ void ssbo_vertex_fetch_bind_attributes_begin();
+ void ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr);
+ void ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder> active_encoder);
+
+ /* Metal shader properties and source mapping. */
+ void set_vertex_function_name(NSString *vetex_function_name);
+ void set_fragment_function_name(NSString *fragment_function_name_);
+ void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source);
+ void set_interface(MTLShaderInterface *interface);
+ MTLRenderPipelineStateInstance *bake_current_pipeline_state(MTLContext *ctx,
+ MTLPrimitiveTopologyClass prim_type);
+
+ /* Transform Feedback. */
+ GPUVertBuf *get_transform_feedback_active_buffer();
+ bool has_transform_feedback_varying(std::string str);
+
+ private:
+ /* Generate MSL shader from GLSL source. */
+ bool generate_msl_from_glsl(const shader::ShaderCreateInfo *info);
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLShader");
+};
+
+/* Vertex format conversion.
+ * Determines whether it is possible to resize a vertex attribute type
+ * during input assembly. A conversion is implied by the difference
+ * between the input vertex descriptor (from MTLBatch/MTLImmediate)
+ * and the type specified in the shader source.
+ *
+ * e.g. vec3 to vec4 expansion, or vec4 to vec2 truncation.
+ * Note: Vector expansion will replace empty elements with the values
+ * (0,0,0,1).
+ *
+ * If implicit format resize is not possible, this function
+ * returns false.
+ *
+ * Implicitly supported conversions in Metal are described here:
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
+ */
+inline bool mtl_vertex_format_resize(MTLVertexFormat mtl_format,
+ uint32_t components,
+ MTLVertexFormat *r_convertedFormat)
+{
+ MTLVertexFormat out_vert_format = MTLVertexFormatInvalid;
+ switch (mtl_format) {
+ /* Char. */
+ case MTLVertexFormatChar:
+ case MTLVertexFormatChar2:
+ case MTLVertexFormatChar3:
+ case MTLVertexFormatChar4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatChar;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatChar2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatChar3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatChar4;
+ break;
+ }
+ break;
+
+ /* Normalized Char. */
+ case MTLVertexFormatCharNormalized:
+ case MTLVertexFormatChar2Normalized:
+ case MTLVertexFormatChar3Normalized:
+ case MTLVertexFormatChar4Normalized:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatCharNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatChar2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatChar3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatChar4Normalized;
+ break;
+ }
+ break;
+
+ /* Unsigned Char. */
+ case MTLVertexFormatUChar:
+ case MTLVertexFormatUChar2:
+ case MTLVertexFormatUChar3:
+ case MTLVertexFormatUChar4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatUChar;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatUChar2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatUChar3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatUChar4;
+ break;
+ }
+ break;
+
+ /* Normalized Unsigned char */
+ case MTLVertexFormatUCharNormalized:
+ case MTLVertexFormatUChar2Normalized:
+ case MTLVertexFormatUChar3Normalized:
+ case MTLVertexFormatUChar4Normalized:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatUCharNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatUChar2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatUChar3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatUChar4Normalized;
+ break;
+ }
+ break;
+
+ /* Short. */
+ case MTLVertexFormatShort:
+ case MTLVertexFormatShort2:
+ case MTLVertexFormatShort3:
+ case MTLVertexFormatShort4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatShort;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatShort2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatShort3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatShort4;
+ break;
+ }
+ break;
+
+ /* Normalized Short. */
+ case MTLVertexFormatShortNormalized:
+ case MTLVertexFormatShort2Normalized:
+ case MTLVertexFormatShort3Normalized:
+ case MTLVertexFormatShort4Normalized:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatShortNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatShort2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatShort3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatShort4Normalized;
+ break;
+ }
+ break;
+
+ /* Unsigned Short. */
+ case MTLVertexFormatUShort:
+ case MTLVertexFormatUShort2:
+ case MTLVertexFormatUShort3:
+ case MTLVertexFormatUShort4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatUShort;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatUShort2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatUShort3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatUShort4;
+ break;
+ }
+ break;
+
+ /* Normalized Unsigned Short. */
+ case MTLVertexFormatUShortNormalized:
+ case MTLVertexFormatUShort2Normalized:
+ case MTLVertexFormatUShort3Normalized:
+ case MTLVertexFormatUShort4Normalized:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatUShortNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatUShort2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatUShort3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatUShort4Normalized;
+ break;
+ }
+ break;
+
+ /* Integer. */
+ case MTLVertexFormatInt:
+ case MTLVertexFormatInt2:
+ case MTLVertexFormatInt3:
+ case MTLVertexFormatInt4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatInt;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatInt2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatInt3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatInt4;
+ break;
+ }
+ break;
+
+ /* Unsigned Integer. */
+ case MTLVertexFormatUInt:
+ case MTLVertexFormatUInt2:
+ case MTLVertexFormatUInt3:
+ case MTLVertexFormatUInt4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatUInt;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatUInt2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatUInt3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatUInt4;
+ break;
+ }
+ break;
+
+ /* Half. */
+ case MTLVertexFormatHalf:
+ case MTLVertexFormatHalf2:
+ case MTLVertexFormatHalf3:
+ case MTLVertexFormatHalf4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatHalf;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatHalf2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatHalf3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatHalf4;
+ break;
+ }
+ break;
+
+ /* Float. */
+ case MTLVertexFormatFloat:
+ case MTLVertexFormatFloat2:
+ case MTLVertexFormatFloat3:
+ case MTLVertexFormatFloat4:
+ switch (components) {
+ case 1:
+ out_vert_format = MTLVertexFormatFloat;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatFloat2;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatFloat3;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatFloat4;
+ break;
+ }
+ break;
+
+ /* Other formats */
+ default:
+ out_vert_format = mtl_format;
+ break;
+ }
+ *r_convertedFormat = out_vert_format;
+ return out_vert_format != MTLVertexFormatInvalid;
+}
+
+/* Returns whether the METAL API can internally convert between the input type of data in the
+ * incoming vertex buffer and the format used by the vertex attribute inside the shader.
+ *
+ * - Returns TRUE if the type can be converted internally, along with returning the appropriate
+ * type to be passed into the MTLVertexAttributeDescriptorPSO.
+ *
+ * - Returns FALSE if the type cannot be converted internally e.g. casting Int4 to Float4.
+ *
+ * If implicit conversion is not possible, then we can fallback to performing manual attribute
+ * conversion using the special attribute read function specialisations in the shader.
+ * These functions selectively convert between types based on the specified vertex
+ * attribute 'GPUVertFetchMode fetch_mode' e.g. GPU_FETCH_INT.
+ */
+inline bool mtl_convert_vertex_format(MTLVertexFormat shader_attrib_format,
+ GPUVertCompType component_type,
+ uint32_t component_length,
+ GPUVertFetchMode fetch_mode,
+ MTLVertexFormat *r_convertedFormat)
+{
+ bool normalized = (fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT);
+ MTLVertexFormat out_vert_format = MTLVertexFormatInvalid;
+
+ switch (component_type) {
+
+ case GPU_COMP_I8:
+ switch (fetch_mode) {
+ case GPU_FETCH_INT:
+ if (shader_attrib_format == MTLVertexFormatChar ||
+ shader_attrib_format == MTLVertexFormatChar2 ||
+ shader_attrib_format == MTLVertexFormatChar3 ||
+ shader_attrib_format == MTLVertexFormatChar4) {
+
+ /* No conversion Needed (as type matches) - Just a vector resize if needed. */
+ bool can_convert = mtl_vertex_format_resize(
+ shader_attrib_format, component_type, &out_vert_format);
+
+ /* Ensure format resize successful. */
+ BLI_assert(can_convert);
+ UNUSED_VARS_NDEBUG(can_convert);
+ }
+ else if (shader_attrib_format == MTLVertexFormatInt4 && component_length == 4) {
+ /* Allow type expansion - Shader expects MTLVertexFormatInt4, we can supply a type
+ * with fewer bytes if component count is the same. Sign must also match original type
+ * -- which is not a problem in this case. */
+ out_vert_format = MTLVertexFormatChar4;
+ }
+ else if (shader_attrib_format == MTLVertexFormatInt3 && component_length == 3) {
+ /* Same as above case for matching length and signage (Len=3)*/
+ out_vert_format = MTLVertexFormatChar3;
+ }
+ else if (shader_attrib_format == MTLVertexFormatInt2 && component_length == 2) {
+ /* Same as above case for matching length and signage (Len=2)*/
+ out_vert_format = MTLVertexFormatChar2;
+ }
+ else if (shader_attrib_format == MTLVertexFormatInt && component_length == 1) {
+ /* Same as above case for matching length and signage (Len=1)*/
+ out_vert_format = MTLVertexFormatChar;
+ }
+ else if (shader_attrib_format == MTLVertexFormatInt && component_length == 4) {
+ /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which
+ * is equivalent to an Int -- so data will be compatible with the shader interface. */
+ out_vert_format = MTLVertexFormatInt;
+ }
+ else {
+ BLI_assert_msg(false,
+ "Source vertex data format is either Char, Char2, Char3, Char4 but "
+ "format in shader interface is NOT compatible.\n");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+
+ /* Source vertex data is integer type, but shader interface type is floating point.
+ * If the input attribute is specified as normalized, we can convert. */
+ case GPU_FETCH_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT_UNIT:
+ if (normalized) {
+ switch (component_length) {
+ case 1:
+ out_vert_format = MTLVertexFormatCharNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatChar2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatChar3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatChar4Normalized;
+ break;
+ default:
+ BLI_assert_msg(false, "invalid vertex format");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ }
+ else {
+ /* Cannot convert. */
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+ }
+ break;
+
+ case GPU_COMP_U8:
+ switch (fetch_mode) {
+ /* Fetching INT: Check backing shader format matches source input. */
+ case GPU_FETCH_INT:
+ if (shader_attrib_format == MTLVertexFormatUChar ||
+ shader_attrib_format == MTLVertexFormatUChar2 ||
+ shader_attrib_format == MTLVertexFormatUChar3 ||
+ shader_attrib_format == MTLVertexFormatUChar4) {
+
+ /* No conversion Needed (as type matches) - Just a vector resize if needed. */
+ bool can_convert = mtl_vertex_format_resize(
+ shader_attrib_format, component_length, &out_vert_format);
+
+ /* Ensure format resize successful. */
+ BLI_assert(can_convert);
+ UNUSED_VARS_NDEBUG(can_convert);
+ /* TODO(Metal): Add other format conversions if needed. Currently no attributes hit
+ * this path. */
+ }
+ else if (shader_attrib_format == MTLVertexFormatUInt4 && component_length == 4) {
+ /* Allow type expansion - Shader expects MTLVertexFormatUInt4, we can supply a type
+ * with fewer bytes if component count is the same. */
+ out_vert_format = MTLVertexFormatUChar4;
+ }
+ else if (shader_attrib_format == MTLVertexFormatUInt3 && component_length == 3) {
+ /* Same as above case for matching length and signage (Len=3)*/
+ out_vert_format = MTLVertexFormatUChar3;
+ }
+ else if (shader_attrib_format == MTLVertexFormatUInt2 && component_length == 2) {
+ /* Same as above case for matching length and signage (Len=2)*/
+ out_vert_format = MTLVertexFormatUChar2;
+ }
+ else if (shader_attrib_format == MTLVertexFormatUInt && component_length == 1) {
+ /* Same as above case for matching length and signage (Len=1)*/
+ out_vert_format = MTLVertexFormatUChar;
+ }
+ else if (shader_attrib_format == MTLVertexFormatInt && component_length == 4) {
+ /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which
+ * is equivalent to an Int-- so data will be compatible with shader interface. */
+ out_vert_format = MTLVertexFormatInt;
+ }
+ else if (shader_attrib_format == MTLVertexFormatUInt && component_length == 4) {
+ /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which
+ *is equivalent to a UInt-- so data will be compatible with shader interface. */
+ out_vert_format = MTLVertexFormatUInt;
+ }
+ else {
+ BLI_assert_msg(false,
+ "Source vertex data format is either UChar, UChar2, UChar3, UChar4 but "
+ "format in shader interface is NOT compatible.\n");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+
+ /* Source vertex data is integral type, but shader interface type is floating point.
+ * If the input attribute is specified as normalized, we can convert. */
+ case GPU_FETCH_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT_UNIT:
+ if (normalized) {
+ switch (component_length) {
+ case 1:
+ out_vert_format = MTLVertexFormatUCharNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatUChar2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatUChar3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatUChar4Normalized;
+ break;
+ default:
+ BLI_assert_msg(false, "invalid vertex format");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ }
+ else {
+ /* Cannot convert. */
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+ }
+ break;
+
+ case GPU_COMP_I16:
+ switch (fetch_mode) {
+ case GPU_FETCH_INT:
+ if (shader_attrib_format == MTLVertexFormatShort ||
+ shader_attrib_format == MTLVertexFormatShort2 ||
+ shader_attrib_format == MTLVertexFormatShort3 ||
+ shader_attrib_format == MTLVertexFormatShort4) {
+ /* No conversion Needed (as type matches) - Just a vector resize if needed. */
+ bool can_convert = mtl_vertex_format_resize(
+ shader_attrib_format, component_length, &out_vert_format);
+
+ /* Ensure conversion successful. */
+ BLI_assert(can_convert);
+ UNUSED_VARS_NDEBUG(can_convert);
+ }
+ else {
+ BLI_assert_msg(false,
+ "Source vertex data format is either Short, Short2, Short3, Short4 but "
+ "format in shader interface is NOT compatible.\n");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+
+ /* Source vertex data is integral type, but shader interface type is floating point.
+ * If the input attribute is specified as normalized, we can convert. */
+ case GPU_FETCH_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT_UNIT:
+ if (normalized) {
+ switch (component_length) {
+ case 1:
+ out_vert_format = MTLVertexFormatShortNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatShort2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatShort3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatShort4Normalized;
+ break;
+ default:
+ BLI_assert_msg(false, "invalid vertex format");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ }
+ else {
+ /* Cannot convert. */
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+ }
+ break;
+
+ case GPU_COMP_U16:
+ switch (fetch_mode) {
+ case GPU_FETCH_INT:
+ if (shader_attrib_format == MTLVertexFormatUShort ||
+ shader_attrib_format == MTLVertexFormatUShort2 ||
+ shader_attrib_format == MTLVertexFormatUShort3 ||
+ shader_attrib_format == MTLVertexFormatUShort4) {
+ /* No conversion Needed (as type matches) - Just a vector resize if needed. */
+ bool can_convert = mtl_vertex_format_resize(
+ shader_attrib_format, component_length, &out_vert_format);
+
+ /* Ensure format resize successful. */
+ BLI_assert(can_convert);
+ UNUSED_VARS_NDEBUG(can_convert);
+ }
+ else {
+ BLI_assert_msg(false,
+ "Source vertex data format is either UShort, UShort2, UShort3, UShort4 "
+ "but format in shader interface is NOT compatible.\n");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+
+ /* Source vertex data is integral type, but shader interface type is floating point.
+ * If the input attribute is specified as normalized, we can convert. */
+ case GPU_FETCH_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT_UNIT:
+ if (normalized) {
+ switch (component_length) {
+ case 1:
+ out_vert_format = MTLVertexFormatUShortNormalized;
+ break;
+ case 2:
+ out_vert_format = MTLVertexFormatUShort2Normalized;
+ break;
+ case 3:
+ out_vert_format = MTLVertexFormatUShort3Normalized;
+ break;
+ case 4:
+ out_vert_format = MTLVertexFormatUShort4Normalized;
+ break;
+ default:
+ BLI_assert_msg(false, "invalid vertex format");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ }
+ else {
+ /* Cannot convert. */
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+ }
+ break;
+
+ case GPU_COMP_I32:
+ switch (fetch_mode) {
+ case GPU_FETCH_INT:
+ if (shader_attrib_format == MTLVertexFormatInt ||
+ shader_attrib_format == MTLVertexFormatInt2 ||
+ shader_attrib_format == MTLVertexFormatInt3 ||
+ shader_attrib_format == MTLVertexFormatInt4) {
+ /* No conversion Needed (as type matches) - Just a vector resize if needed. */
+ bool can_convert = mtl_vertex_format_resize(
+ shader_attrib_format, component_length, &out_vert_format);
+
+ /* Verify conversion successful. */
+ BLI_assert(can_convert);
+ UNUSED_VARS_NDEBUG(can_convert);
+ }
+ else {
+ BLI_assert_msg(false,
+ "Source vertex data format is either Int, Int2, Int3, Int4 but format "
+ "in shader interface is NOT compatible.\n");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+ case GPU_FETCH_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT_UNIT:
+ /* Unfortunately we cannot implicitly convert between Int and Float in METAL. */
+ out_vert_format = MTLVertexFormatInvalid;
+ break;
+ }
+ break;
+
+ case GPU_COMP_U32:
+ switch (fetch_mode) {
+ case GPU_FETCH_INT:
+ if (shader_attrib_format == MTLVertexFormatUInt ||
+ shader_attrib_format == MTLVertexFormatUInt2 ||
+ shader_attrib_format == MTLVertexFormatUInt3 ||
+ shader_attrib_format == MTLVertexFormatUInt4) {
+ /* No conversion Needed (as type matches) - Just a vector resize if needed. */
+ bool can_convert = mtl_vertex_format_resize(
+ shader_attrib_format, component_length, &out_vert_format);
+
+ /* Verify conversion successful. */
+ BLI_assert(can_convert);
+ UNUSED_VARS_NDEBUG(can_convert);
+ }
+ else {
+ BLI_assert_msg(false,
+ "Source vertex data format is either UInt, UInt2, UInt3, UInt4 but "
+ "format in shader interface is NOT compatible.\n");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+ case GPU_FETCH_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT_UNIT:
+ /* Unfortunately we cannot convert between UInt and Float in METAL */
+ out_vert_format = MTLVertexFormatInvalid;
+ break;
+ }
+ break;
+
+ case GPU_COMP_F32:
+ switch (fetch_mode) {
+
+ /* Source data is float. This will be compatible
+ * if type specified in shader is also float. */
+ case GPU_FETCH_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT:
+ case GPU_FETCH_INT_TO_FLOAT_UNIT:
+ if (shader_attrib_format == MTLVertexFormatFloat ||
+ shader_attrib_format == MTLVertexFormatFloat2 ||
+ shader_attrib_format == MTLVertexFormatFloat3 ||
+ shader_attrib_format == MTLVertexFormatFloat4) {
+ /* No conversion Needed (as type matches) - Just a vector resize, if needed. */
+ bool can_convert = mtl_vertex_format_resize(
+ shader_attrib_format, component_length, &out_vert_format);
+
+ /* Verify conversion successful. */
+ BLI_assert(can_convert);
+ UNUSED_VARS_NDEBUG(can_convert);
+ }
+ else {
+ BLI_assert_msg(false,
+ "Source vertex data format is either Float, Float2, Float3, Float4 but "
+ "format in shader interface is NOT compatible.\n");
+ out_vert_format = MTLVertexFormatInvalid;
+ }
+ break;
+
+ case GPU_FETCH_INT:
+ /* Unfortunately we cannot convert between Float and Int implicitly in METAL. */
+ out_vert_format = MTLVertexFormatInvalid;
+ break;
+ }
+ break;
+
+ case GPU_COMP_I10:
+ out_vert_format = MTLVertexFormatInt1010102Normalized;
+ break;
+ }
+ *r_convertedFormat = out_vert_format;
+ return (out_vert_format != MTLVertexFormatInvalid);
+}
+
+inline uint comp_count_from_vert_format(MTLVertexFormat vert_format)
+{
+ switch (vert_format) {
+ case MTLVertexFormatFloat:
+ case MTLVertexFormatInt:
+ case MTLVertexFormatUInt:
+ case MTLVertexFormatShort:
+ case MTLVertexFormatUChar:
+ case MTLVertexFormatUCharNormalized:
+ return 1;
+ case MTLVertexFormatUChar2:
+ case MTLVertexFormatUInt2:
+ case MTLVertexFormatFloat2:
+ case MTLVertexFormatInt2:
+ case MTLVertexFormatUChar2Normalized:
+ return 2;
+ case MTLVertexFormatUChar3:
+ case MTLVertexFormatUInt3:
+ case MTLVertexFormatFloat3:
+ case MTLVertexFormatInt3:
+ case MTLVertexFormatShort3Normalized:
+ case MTLVertexFormatUChar3Normalized:
+ return 3;
+ case MTLVertexFormatUChar4:
+ case MTLVertexFormatFloat4:
+ case MTLVertexFormatUInt4:
+ case MTLVertexFormatInt4:
+ case MTLVertexFormatUChar4Normalized:
+ case MTLVertexFormatInt1010102Normalized:
+
+ default:
+ BLI_assert_msg(false, "Unrecognised attribute type. Add types to switch as needed.");
+ return 0;
+ }
+}
+
+inline GPUVertFetchMode fetchmode_from_vert_format(MTLVertexFormat vert_format)
+{
+ switch (vert_format) {
+ case MTLVertexFormatFloat:
+ case MTLVertexFormatFloat2:
+ case MTLVertexFormatFloat3:
+ case MTLVertexFormatFloat4:
+ return GPU_FETCH_FLOAT;
+
+ case MTLVertexFormatUChar:
+ case MTLVertexFormatUChar2:
+ case MTLVertexFormatUChar3:
+ case MTLVertexFormatUChar4:
+ case MTLVertexFormatChar:
+ case MTLVertexFormatChar2:
+ case MTLVertexFormatChar3:
+ case MTLVertexFormatChar4:
+ case MTLVertexFormatUShort:
+ case MTLVertexFormatUShort2:
+ case MTLVertexFormatUShort3:
+ case MTLVertexFormatUShort4:
+ case MTLVertexFormatShort:
+ case MTLVertexFormatShort2:
+ case MTLVertexFormatShort3:
+ case MTLVertexFormatShort4:
+ case MTLVertexFormatUInt:
+ case MTLVertexFormatUInt2:
+ case MTLVertexFormatUInt3:
+ case MTLVertexFormatUInt4:
+ case MTLVertexFormatInt:
+ case MTLVertexFormatInt2:
+ case MTLVertexFormatInt3:
+ case MTLVertexFormatInt4:
+ return GPU_FETCH_INT;
+
+ case MTLVertexFormatUCharNormalized:
+ case MTLVertexFormatUChar2Normalized:
+ case MTLVertexFormatUChar3Normalized:
+ case MTLVertexFormatUChar4Normalized:
+ case MTLVertexFormatCharNormalized:
+ case MTLVertexFormatChar2Normalized:
+ case MTLVertexFormatChar3Normalized:
+ case MTLVertexFormatChar4Normalized:
+ case MTLVertexFormatUShortNormalized:
+ case MTLVertexFormatUShort2Normalized:
+ case MTLVertexFormatUShort3Normalized:
+ case MTLVertexFormatUShort4Normalized:
+ case MTLVertexFormatShortNormalized:
+ case MTLVertexFormatShort2Normalized:
+ case MTLVertexFormatShort3Normalized:
+ case MTLVertexFormatShort4Normalized:
+ case MTLVertexFormatInt1010102Normalized:
+ return GPU_FETCH_INT_TO_FLOAT_UNIT;
+
+ default:
+ BLI_assert_msg(false, "Unrecognised attribute type. Add types to switch as needed.");
+ return GPU_FETCH_FLOAT;
+ }
+}
+
+inline GPUVertCompType comp_type_from_vert_format(MTLVertexFormat vert_format)
+{
+ switch (vert_format) {
+ case MTLVertexFormatUChar:
+ case MTLVertexFormatUChar2:
+ case MTLVertexFormatUChar3:
+ case MTLVertexFormatUChar4:
+ case MTLVertexFormatUCharNormalized:
+ case MTLVertexFormatUChar2Normalized:
+ case MTLVertexFormatUChar3Normalized:
+ case MTLVertexFormatUChar4Normalized:
+ return GPU_COMP_U8;
+
+ case MTLVertexFormatChar:
+ case MTLVertexFormatChar2:
+ case MTLVertexFormatChar3:
+ case MTLVertexFormatChar4:
+ case MTLVertexFormatCharNormalized:
+ case MTLVertexFormatChar2Normalized:
+ case MTLVertexFormatChar3Normalized:
+ case MTLVertexFormatChar4Normalized:
+ return GPU_COMP_I8;
+
+ case MTLVertexFormatShort:
+ case MTLVertexFormatShort2:
+ case MTLVertexFormatShort3:
+ case MTLVertexFormatShort4:
+ case MTLVertexFormatShortNormalized:
+ case MTLVertexFormatShort2Normalized:
+ case MTLVertexFormatShort3Normalized:
+ case MTLVertexFormatShort4Normalized:
+ return GPU_COMP_I16;
+
+ case MTLVertexFormatUShort:
+ case MTLVertexFormatUShort2:
+ case MTLVertexFormatUShort3:
+ case MTLVertexFormatUShort4:
+ case MTLVertexFormatUShortNormalized:
+ case MTLVertexFormatUShort2Normalized:
+ case MTLVertexFormatUShort3Normalized:
+ case MTLVertexFormatUShort4Normalized:
+ return GPU_COMP_U16;
+
+ case MTLVertexFormatInt:
+ case MTLVertexFormatInt2:
+ case MTLVertexFormatInt3:
+ case MTLVertexFormatInt4:
+ return GPU_COMP_I32;
+
+ case MTLVertexFormatUInt:
+ case MTLVertexFormatUInt2:
+ case MTLVertexFormatUInt3:
+ case MTLVertexFormatUInt4:
+ return GPU_COMP_U32;
+
+ case MTLVertexFormatFloat:
+ case MTLVertexFormatFloat2:
+ case MTLVertexFormatFloat3:
+ case MTLVertexFormatFloat4:
+ return GPU_COMP_F32;
+
+ case MTLVertexFormatInt1010102Normalized:
+ return GPU_COMP_I10;
+
+ default:
+ BLI_assert_msg(false, "Unrecognised attribute type. Add types to switch as needed.");
+ return GPU_COMP_F32;
+ }
+}
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm
new file mode 100644
index 00000000000..1824057c9a2
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -0,0 +1,1263 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BKE_global.h"
+
+#include "BLI_string.h"
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <string>
+
+#include <cstring>
+
+#include "GPU_platform.h"
+#include "GPU_vertex_format.h"
+
+#include "mtl_common.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_pso_descriptor_state.hh"
+#include "mtl_shader.hh"
+#include "mtl_shader_generator.hh"
+#include "mtl_shader_interface.hh"
+#include "mtl_texture.hh"
+
+extern char datatoc_mtl_shader_common_msl[];
+
+using namespace blender;
+using namespace blender::gpu;
+using namespace blender::gpu::shader;
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation / Destruction.
+ * \{ */
+
+/* Create empty shader to be populated later. */
+MTLShader::MTLShader(MTLContext *ctx, const char *name) : Shader(name)
+{
+ context_ = ctx;
+
+ /* Create SHD builder to hold temporary resources until compilation is complete. */
+ shd_builder_ = new MTLShaderBuilder();
+
+#ifndef NDEBUG
+ /* Remove invalid symbols from shader name to ensure debug entrypoint function name is valid. */
+ for (uint i : IndexRange(strlen(this->name))) {
+ char c = this->name[i];
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
+ }
+ else {
+ this->name[i] = '_';
+ }
+ }
+#endif
+}
+
+/* Create shader from MSL source. */
+MTLShader::MTLShader(MTLContext *ctx,
+ MTLShaderInterface *interface,
+ const char *name,
+ NSString *input_vertex_source,
+ NSString *input_fragment_source,
+ NSString *vert_function_name,
+ NSString *frag_function_name)
+ : MTLShader(ctx, name)
+{
+ BLI_assert([vert_function_name length]);
+ BLI_assert([frag_function_name length]);
+
+ this->set_vertex_function_name(vert_function_name);
+ this->set_fragment_function_name(frag_function_name);
+ this->shader_source_from_msl(input_vertex_source, input_fragment_source);
+ this->set_interface(interface);
+ this->finalize(nullptr);
+}
+
+MTLShader::~MTLShader()
+{
+ if (this->is_valid()) {
+
+ /* Free uniform data block. */
+ if (push_constant_data_ != nullptr) {
+ MEM_freeN(push_constant_data_);
+ push_constant_data_ = nullptr;
+ }
+
+ /* Free Metal resources. */
+ if (shader_library_vert_ != nil) {
+ [shader_library_vert_ release];
+ shader_library_vert_ = nil;
+ }
+ if (shader_library_frag_ != nil) {
+ [shader_library_frag_ release];
+ shader_library_frag_ = nil;
+ }
+
+ if (pso_descriptor_ != nil) {
+ [pso_descriptor_ release];
+ pso_descriptor_ = nil;
+ }
+
+ /* Free Pipeline Cache. */
+ for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) {
+ if (pso_inst->vert) {
+ [pso_inst->vert release];
+ }
+ if (pso_inst->frag) {
+ [pso_inst->frag release];
+ }
+ if (pso_inst->pso) {
+ [pso_inst->pso release];
+ }
+ delete pso_inst;
+ }
+ pso_cache_.clear();
+
+ /* NOTE(Metal): ShaderInterface deletion is handled in the super destructor ~Shader(). */
+ }
+ valid_ = false;
+
+ if (shd_builder_ != nullptr) {
+ delete shd_builder_;
+ }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Shader stage creation.
+ * \{ */
+
+void MTLShader::vertex_shader_from_glsl(MutableSpan<const char *> sources)
+{
+ /* Flag source as not being compiled from native MSL. */
+ BLI_assert(shd_builder_ != nullptr);
+ shd_builder_->source_from_msl_ = false;
+
+ /* Remove #version tag entry. */
+ sources[0] = "";
+
+ /* Consolidate GLSL vertex sources. */
+ std::stringstream ss;
+ for (int i = 0; i < sources.size(); i++) {
+ ss << sources[i] << std::endl;
+ }
+ shd_builder_->glsl_vertex_source_ = ss.str();
+}
+
+void MTLShader::geometry_shader_from_glsl(MutableSpan<const char *> sources)
+{
+ MTL_LOG_ERROR("MTLShader::geometry_shader_from_glsl - Geometry shaders unsupported!\n");
+}
+
+void MTLShader::fragment_shader_from_glsl(MutableSpan<const char *> sources)
+{
+ /* Flag source as not being compiled from native MSL. */
+ BLI_assert(shd_builder_ != nullptr);
+ shd_builder_->source_from_msl_ = false;
+
+ /* Remove #version tag entry. */
+ sources[0] = "";
+
+ /* Consolidate GLSL fragment sources. */
+ std::stringstream ss;
+ for (int i = 0; i < sources.size(); i++) {
+ ss << sources[i] << std::endl;
+ }
+ shd_builder_->glsl_fragment_source_ = ss.str();
+}
+
+void MTLShader::compute_shader_from_glsl(MutableSpan<const char *> sources)
+{
+ /* Remove #version tag entry. */
+ sources[0] = "";
+
+ /* TODO(Metal): Support compute shaders in Metal. */
+ MTL_LOG_WARNING(
+ "MTLShader::compute_shader_from_glsl - Compute shaders currently unsupported!\n");
+}
+
+bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
+{
+ /* Check if Shader has already been finalized. */
+ if (this->is_valid()) {
+ MTL_LOG_ERROR("Shader (%p) '%s' has already been finalized!\n", this, this->name_get());
+ }
+
+ /* Perform GLSL to MSL source translation. */
+ BLI_assert(shd_builder_ != nullptr);
+ if (!shd_builder_->source_from_msl_) {
+ bool success = generate_msl_from_glsl(info);
+ if (!success) {
+ /* GLSL to MSL translation has failed, or is unsupported for this shader. */
+ valid_ = false;
+ BLI_assert_msg(false, "Shader translation from GLSL to MSL has failed. \n");
+
+ /* Create empty interface to allow shader to be silently used. */
+ MTLShaderInterface *mtl_interface = new MTLShaderInterface(this->name_get());
+ this->set_interface(mtl_interface);
+
+ /* Release temporary compilation resources. */
+ delete shd_builder_;
+ return false;
+ }
+ }
+
+ /* Ensure we have a valid shader interface. */
+ MTLShaderInterface *mtl_interface = this->get_interface();
+ BLI_assert(mtl_interface != nullptr);
+
+ /* Verify Context handle, fetch device and compile shader. */
+ BLI_assert(context_);
+ id<MTLDevice> device = context_->device;
+ BLI_assert(device != nil);
+
+ /* Ensure source and stage entry-point names are set. */
+ BLI_assert([vertex_function_name_ length] > 0);
+ if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
+ BLI_assert([fragment_function_name_ length] > 0);
+ }
+ BLI_assert(shd_builder_ != nullptr);
+ BLI_assert([shd_builder_->msl_source_vert_ length] > 0);
+
+ @autoreleasepool {
+ MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
+ options.languageVersion = MTLLanguageVersion2_2;
+ options.fastMathEnabled = YES;
+
+ NSString *source_to_compile = shd_builder_->msl_source_vert_;
+ for (int src_stage = 0; src_stage <= 1; src_stage++) {
+
+ source_to_compile = (src_stage == 0) ? shd_builder_->msl_source_vert_ :
+ shd_builder_->msl_source_frag_;
+
+ /* Transform feedback, skip compilation. */
+ if (src_stage == 1 && (transform_feedback_type_ != GPU_SHADER_TFB_NONE)) {
+ shader_library_frag_ = nil;
+ break;
+ }
+
+ /* Concatenate common src. */
+ NSString *str = [NSString stringWithUTF8String:datatoc_mtl_shader_common_msl];
+ NSString *source_with_header_a = [str stringByAppendingString:source_to_compile];
+
+ /* Inject unique context ID to avoid cross-context shader cache collisions.
+ * Required on macOS 11.0. */
+ NSString *source_with_header = source_with_header_a;
+ if (@available(macos 11.0, *)) {
+ /* Pass-through. Availability syntax requirement, expression cannot be negated. */
+ }
+ else {
+ source_with_header = [source_with_header_a
+ stringByAppendingString:[NSString stringWithFormat:@"\n\n#define MTL_CONTEXT_IND %d\n",
+ context_->context_id]];
+ }
+ [source_with_header retain];
+
+ /* Prepare Shader Library. */
+ NSError *error = nullptr;
+ id<MTLLibrary> library = [device newLibraryWithSource:source_with_header
+ options:options
+ error:&error];
+ if (error) {
+ /* Only exit out if genuine error and not warning. */
+ if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
+ NSNotFound) {
+ NSLog(
+ @"Compile Error - Metal Shader Library (Stage: %d), error %@ \n", src_stage, error);
+ BLI_assert(false);
+
+ /* Release temporary compilation resources. */
+ delete shd_builder_;
+ return false;
+ }
+ }
+
+ MTL_LOG_INFO("Successfully compiled Metal Shader Library (Stage: %d) for shader; %s\n",
+ src_stage,
+ name);
+ BLI_assert(library != nil);
+ if (src_stage == 0) {
+ /* Retain generated library and assign debug name. */
+ shader_library_vert_ = library;
+ [shader_library_vert_ retain];
+ shader_library_vert_.label = [NSString stringWithUTF8String:this->name];
+ }
+ else {
+ /* Retain generated library for fragment shader and assign debug name. */
+ shader_library_frag_ = library;
+ [shader_library_frag_ retain];
+ shader_library_frag_.label = [NSString stringWithUTF8String:this->name];
+ }
+
+ [source_with_header autorelease];
+ }
+ pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
+
+ /* Prepare descriptor. */
+ pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc] init];
+ [pso_descriptor_ retain];
+
+ /* Shader has successfully been created. */
+ valid_ = true;
+
+ /* Prepare backing data storage for local uniforms. */
+ const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block();
+ if (push_constant_block.size > 0) {
+ push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
+ this->push_constant_bindstate_mark_dirty(true);
+ }
+ else {
+ push_constant_data_ = nullptr;
+ }
+ }
+
+ /* Release temporary compilation resources. */
+ delete shd_builder_;
+ return true;
+}
+
+void MTLShader::transform_feedback_names_set(Span<const char *> name_list,
+ const eGPUShaderTFBType geom_type)
+{
+ tf_output_name_list_.clear();
+ for (int i = 0; i < name_list.size(); i++) {
+ tf_output_name_list_.append(std::string(name_list[i]));
+ }
+ transform_feedback_type_ = geom_type;
+}
+
+bool MTLShader::transform_feedback_enable(GPUVertBuf *buf)
+{
+ BLI_assert(transform_feedback_type_ != GPU_SHADER_TFB_NONE);
+ BLI_assert(buf);
+ transform_feedback_active_ = true;
+ transform_feedback_vertbuf_ = buf;
+ /* TODO(Metal): Enable this assertion once MTLVertBuf lands. */
+ /*BLI_assert(static_cast<MTLVertBuf *>(unwrap(transform_feedback_vertbuf_))->get_usage_type() ==
+ GPU_USAGE_DEVICE_ONLY);*/
+ return true;
+}
+
+void MTLShader::transform_feedback_disable()
+{
+ transform_feedback_active_ = false;
+ transform_feedback_vertbuf_ = nullptr;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Shader Binding.
+ * \{ */
+
+void MTLShader::bind()
+{
+ MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+ if (interface == nullptr || !this->is_valid()) {
+ MTL_LOG_WARNING(
+ "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be "
+ "skipped.\n",
+ this->name_get());
+ }
+ ctx->pipeline_state.active_shader = this;
+}
+
+void MTLShader::unbind()
+{
+ MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+ ctx->pipeline_state.active_shader = nullptr;
+}
+
+void MTLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
+{
+ BLI_assert(this);
+ if (!this->is_valid()) {
+ return;
+ }
+ MTLShaderInterface *mtl_interface = get_interface();
+ if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
+ MTL_LOG_WARNING("Uniform location %d is not valid in Shader %s\n", location, this->name_get());
+ return;
+ }
+
+ /* Fetch more information about uniform from interface. */
+ const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
+
+ /* Prepare to copy data into local shader push constant memory block. */
+ BLI_assert(push_constant_data_ != nullptr);
+ uint8_t *dest_ptr = (uint8_t *)push_constant_data_;
+ dest_ptr += uniform.byte_offset;
+ uint32_t copy_size = sizeof(float) * comp_len * array_size;
+
+ /* Test per-element size. It is valid to copy less array elements than the total, but each
+ * array element needs to match. */
+ uint32_t source_per_element_size = sizeof(float) * comp_len;
+ uint32_t dest_per_element_size = uniform.size_in_bytes / uniform.array_len;
+ BLI_assert_msg(
+ source_per_element_size <= dest_per_element_size,
+ "source Per-array-element size must be smaller than destination storage capacity for "
+ "that data");
+
+ if (source_per_element_size < dest_per_element_size) {
+ switch (uniform.type) {
+
+ /* Special case for handling 'vec3' array upload. */
+ case MTL_DATATYPE_FLOAT3: {
+ int numvecs = uniform.array_len;
+ uint8_t *data_c = (uint8_t *)data;
+
+ /* It is more efficient on the host to only modify data if it has changed.
+ * Data modifications are small, so memory comparison is cheap.
+ * If uniforms have remained unchanged, then we avoid both copying
+ * data into the local uniform struct, and upload of the modified uniform
+ * contents in the command stream. */
+ bool changed = false;
+ for (int i = 0; i < numvecs; i++) {
+ changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
+ if (changed) {
+ memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
+ }
+ data_c += sizeof(float) * 3;
+ dest_ptr += sizeof(float) * 4;
+ }
+ if (changed) {
+ this->push_constant_bindstate_mark_dirty(true);
+ }
+ return;
+ }
+
+ /* Special case for handling 'mat3' upload. */
+ case MTL_DATATYPE_FLOAT3x3: {
+ int numvecs = 3 * uniform.array_len;
+ uint8_t *data_c = (uint8_t *)data;
+
+ /* It is more efficient on the host to only modify data if it has changed.
+ * Data modifications are small, so memory comparison is cheap.
+ * If uniforms have remained unchanged, then we avoid both copying
+ * data into the local uniform struct, and upload of the modified uniform
+ * contents in the command stream. */
+ bool changed = false;
+ for (int i = 0; i < numvecs; i++) {
+ changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0);
+ if (changed) {
+ memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3);
+ }
+ data_c += sizeof(float) * 3;
+ dest_ptr += sizeof(float) * 4;
+ }
+ if (changed) {
+ this->push_constant_bindstate_mark_dirty(true);
+ }
+ return;
+ }
+ default:
+ shader_debug_printf("INCOMPATIBLE UNIFORM TYPE: %d\n", uniform.type);
+ break;
+ }
+ }
+
+ /* Debug checks. */
+ BLI_assert_msg(
+ copy_size <= uniform.size_in_bytes,
+ "Size of provided uniform data is greater than size specified in Shader interface\n");
+
+ /* Only flag UBO as modified if data is different -- This can avoid re-binding of unmodified
+ * local uniform data. */
+ bool data_changed = (memcmp((void *)dest_ptr, (void *)data, copy_size) != 0);
+ if (data_changed) {
+ this->push_constant_bindstate_mark_dirty(true);
+ memcpy((void *)dest_ptr, (void *)data, copy_size);
+ }
+}
+
+void MTLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
+{
+ BLI_assert(this);
+ if (!this->is_valid()) {
+ return;
+ }
+
+ /* NOTE(Metal): Invalidation warning for uniform re-mapping of texture slots, unsupported in
+ * Metal, as we cannot point a texture binding at a different slot. */
+ MTLShaderInterface *mtl_interface = this->get_interface();
+ if (location >= mtl_interface->get_total_uniforms() &&
+ location < (mtl_interface->get_total_uniforms() + mtl_interface->get_total_textures())) {
+ MTL_LOG_WARNING(
+ "Texture uniform location re-mapping unsupported in Metal. (Possibly also bad uniform "
+ "location %d)\n",
+ location);
+ return;
+ }
+
+ if (location < 0 || location >= mtl_interface->get_total_uniforms()) {
+ MTL_LOG_WARNING(
+ "Uniform is not valid at location %d - Shader %s\n", location, this->name_get());
+ return;
+ }
+
+ /* Fetch more information about uniform from interface. */
+ const MTLShaderUniform &uniform = mtl_interface->get_uniform(location);
+
+ /* Determine data location in uniform block. */
+ BLI_assert(push_constant_data_ != nullptr);
+ uint8_t *ptr = (uint8_t *)push_constant_data_;
+ ptr += uniform.byte_offset;
+
+ /* Copy data into local block. Only flag UBO as modified if data is different
+ * This can avoid re-binding of unmodified local uniform data, reducing
+ * the total number of copy operations needed and data transfers between
+ * CPU and GPU. */
+ bool data_changed = (memcmp((void *)ptr, (void *)data, sizeof(int) * comp_len * array_size) !=
+ 0);
+ if (data_changed) {
+ this->push_constant_bindstate_mark_dirty(true);
+ memcpy((void *)ptr, (void *)data, sizeof(int) * comp_len * array_size);
+ }
+}
+
+bool MTLShader::get_push_constant_is_dirty()
+{
+ return push_constant_modified_;
+}
+
+void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
+{
+ push_constant_modified_ = is_dirty;
+}
+
+void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
+{
+ GPU_vertformat_clear(format);
+
+ const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
+ for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
+ const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
+
+ /* Extract type parameters from Metal type. */
+ GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
+ uint comp_len = comp_count_from_vert_format(attr.format);
+ GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
+
+ GPU_vertformat_attr_add(format,
+ mtl_interface->get_name_at_offset(attr.name_offset),
+ comp_type,
+ comp_len,
+ fetch_mode);
+ }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name METAL Custom behaviour
+ * \{ */
+
+void MTLShader::set_vertex_function_name(NSString *vert_function_name)
+{
+ vertex_function_name_ = vert_function_name;
+}
+
+void MTLShader::set_fragment_function_name(NSString *frag_function_name)
+{
+ fragment_function_name_ = frag_function_name;
+}
+
+void MTLShader::shader_source_from_msl(NSString *input_vertex_source,
+ NSString *input_fragment_source)
+{
+ BLI_assert(shd_builder_ != nullptr);
+ shd_builder_->msl_source_vert_ = input_vertex_source;
+ shd_builder_->msl_source_frag_ = input_fragment_source;
+ shd_builder_->source_from_msl_ = true;
+}
+
+void MTLShader::set_interface(MTLShaderInterface *interface)
+{
+ /* Assign gpu::Shader superclass interface. */
+ Shader::interface = interface;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Bake Pipeline State Objects
+ * \{ */
+/* Bakes or fetches a pipeline state using the current
+ * MTLRenderPipelineStateDescriptor state.
+ *
+ * This state contains information on shader inputs/outputs, such
+ * as the vertex descriptor, used to control vertex assembly for
+ * current vertex data, and active render target information,
+ * decsribing the output attachment pixel formats.
+ *
+ * Other rendering parameters such as global pointsize, blend state, color mask
+ * etc; are also used. See mtl_shader.h for full MLRenderPipelineStateDescriptor.
+ */
+MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
+ MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
+{
+ /* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should
+ * be thread-safe due to organisation of high-level renderer. If there are any issues, then
+ * access can be guarded as appropriate. */
+ BLI_assert(this);
+ MTLShaderInterface *mtl_interface = this->get_interface();
+ BLI_assert(mtl_interface);
+ BLI_assert(this->is_valid());
+
+ /* NOTE(Metal): Vertex input assembly description will have been populated externally
+ * via MTLBatch or MTLImmediate during binding or draw. */
+
+ /* Resolve Context Framebuffer state. */
+ MTLFrameBuffer *framebuffer = ctx->get_current_framebuffer();
+
+ /* Update global pipeline descriptor. */
+ MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+ MTLContext::get()->state_manager);
+ MTLRenderPipelineStateDescriptor &pipeline_descriptor = state_manager->get_pipeline_descriptor();
+
+ pipeline_descriptor.num_color_attachments = 0;
+ for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) {
+ MTLAttachment color_attachment = framebuffer->get_color_attachment(attachment);
+
+ if (color_attachment.used) {
+ /* If SRGB is disabled and format is SRGB, use colour data directly with no conversions
+ * between linear and SRGB. */
+ MTLPixelFormat mtl_format = gpu_texture_format_to_metal(
+ color_attachment.texture->format_get());
+ if (framebuffer->get_is_srgb() && !framebuffer->get_srgb_enabled()) {
+ mtl_format = MTLPixelFormatRGBA8Unorm;
+ }
+ pipeline_descriptor.color_attachment_format[attachment] = mtl_format;
+ }
+ else {
+ pipeline_descriptor.color_attachment_format[attachment] = MTLPixelFormatInvalid;
+ }
+
+ pipeline_descriptor.num_color_attachments += (color_attachment.used) ? 1 : 0;
+ }
+ MTLAttachment depth_attachment = framebuffer->get_depth_attachment();
+ MTLAttachment stencil_attachment = framebuffer->get_stencil_attachment();
+ pipeline_descriptor.depth_attachment_format = (depth_attachment.used) ?
+ gpu_texture_format_to_metal(
+ depth_attachment.texture->format_get()) :
+ MTLPixelFormatInvalid;
+ pipeline_descriptor.stencil_attachment_format =
+ (stencil_attachment.used) ?
+ gpu_texture_format_to_metal(stencil_attachment.texture->format_get()) :
+ MTLPixelFormatInvalid;
+
+ /* Resolve Context Pipeline State (required by PSO). */
+ pipeline_descriptor.color_write_mask = ctx->pipeline_state.color_write_mask;
+ pipeline_descriptor.blending_enabled = ctx->pipeline_state.blending_enabled;
+ pipeline_descriptor.alpha_blend_op = ctx->pipeline_state.alpha_blend_op;
+ pipeline_descriptor.rgb_blend_op = ctx->pipeline_state.rgb_blend_op;
+ pipeline_descriptor.dest_alpha_blend_factor = ctx->pipeline_state.dest_alpha_blend_factor;
+ pipeline_descriptor.dest_rgb_blend_factor = ctx->pipeline_state.dest_rgb_blend_factor;
+ pipeline_descriptor.src_alpha_blend_factor = ctx->pipeline_state.src_alpha_blend_factor;
+ pipeline_descriptor.src_rgb_blend_factor = ctx->pipeline_state.src_rgb_blend_factor;
+ pipeline_descriptor.point_size = ctx->pipeline_state.point_size;
+
+ /* Primitive Type -- Primitive topology class needs to be specified for layered rendering. */
+ bool requires_specific_topology_class = uses_mtl_array_index_ ||
+ prim_type == MTLPrimitiveTopologyClassPoint;
+ pipeline_descriptor.vertex_descriptor.prim_topology_class =
+ (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;
+
+ /* Check if current PSO exists in the cache. */
+ MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor);
+ MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
+ if (pipeline_state != nullptr) {
+ return pipeline_state;
+ }
+
+ shader_debug_printf("Baking new pipeline variant for shader: %s\n", this->name);
+
+ /* Generate new Render Pipeline State Object (PSO). */
+ @autoreleasepool {
+ /* Prepare Render Pipeline Descriptor. */
+
+ /* Setup function specialisation constants, used to modify and optimise
+ * generated code based on current render pipeline configuration. */
+ MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
+
+ /* Prepare Vertex descriptor based on current pipeline vertex binding state. */
+ MTLRenderPipelineStateDescriptor &current_state = pipeline_descriptor;
+ MTLRenderPipelineDescriptor *desc = pso_descriptor_;
+ [desc reset];
+ pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
+
+ /* Offset the bind index for Uniform buffers such that they begin after the VBO
+ * buffer bind slots. MTL_uniform_buffer_base_index is passed as a function
+ * specialisation constant, customised per unique pipeline state permutation.
+ *
+ * Note: For binding point compaction, we could use the number of VBOs present
+ * in the current PSO configuration current_state.vertex_descriptor.num_vert_buffers).
+ * However, it is more efficient to simply offset the uniform buffer base index to the
+ * maximal number of VBO bind-points, as then UBO bindpoints for similar draw calls
+ * will align and avoid the requirement for additional binding. */
+ int MTL_uniform_buffer_base_index = GPU_BATCH_VBO_MAX_LEN;
+
+ /* Null buffer index is used if an attribute is not found in the
+ * bound VBOs VertexFormat. */
+ int null_buffer_index = current_state.vertex_descriptor.num_vert_buffers;
+ bool using_null_buffer = false;
+
+ if (this->get_uses_ssbo_vertex_fetch()) {
+ /* If using SSBO Vertex fetch mode, no vertex descriptor is required
+ * as we wont be using stage-in. */
+ desc.vertexDescriptor = nil;
+ desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified;
+
+ /* We want to offset the uniform buffer base to allow for sufficient VBO binding slots - We
+ * also require +1 slot for the Index buffer. */
+ MTL_uniform_buffer_base_index = MTL_SSBO_VERTEX_FETCH_IBO_INDEX + 1;
+ }
+ else {
+ for (const uint i : IndexRange(current_state.vertex_descriptor.num_attributes)) {
+
+ /* Metal backend attribute descriptor state. */
+ MTLVertexAttributeDescriptorPSO &attribute_desc =
+ current_state.vertex_descriptor.attributes[i];
+
+ /* Flag format conversion */
+ /* In some cases, Metal cannot implicity convert between data types.
+ * In these instances, the fetch mode 'GPUVertFetchMode' as provided in the vertex format
+ * is passed in, and used to populate function constants named: MTL_AttributeConvert0..15.
+
+ * It is then the responsibility of the vertex shader to perform any necessary type
+ * casting.
+ *
+ * See mtl_shader.hh for more information. Relevant Metal API documentation:
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc */
+ if (attribute_desc.format == MTLVertexFormatInvalid) {
+ MTL_LOG_WARNING(
+ "MTLShader: baking pipeline state for '%s'- expected input attribute at "
+ "index '%d' but none was specified in the current vertex state\n",
+ mtl_interface->get_name(),
+ i);
+
+ /* Write out null conversion constant if attribute unused. */
+ int MTL_attribute_conversion_mode = 0;
+ [values setConstantValue:&MTL_attribute_conversion_mode
+ type:MTLDataTypeInt
+ withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
+ continue;
+ }
+
+ int MTL_attribute_conversion_mode = (int)attribute_desc.format_conversion_mode;
+ [values setConstantValue:&MTL_attribute_conversion_mode
+ type:MTLDataTypeInt
+ withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
+ if (MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT_UNIT ||
+ MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT) {
+ shader_debug_printf(
+ "TODO(Metal): Shader %s needs to support internal format conversion\n",
+ mtl_interface->name);
+ }
+
+ /* Copy metal backend attribute descriptor state into PSO descriptor.
+ * NOTE: need to copy each element due to direct assignment restrictions.
+ * Also note */
+ MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[i];
+
+ mtl_attribute.format = attribute_desc.format;
+ mtl_attribute.offset = attribute_desc.offset;
+ mtl_attribute.bufferIndex = attribute_desc.buffer_index;
+ }
+
+ for (const uint i : IndexRange(current_state.vertex_descriptor.num_vert_buffers)) {
+ /* Metal backend state buffer layout. */
+ const MTLVertexBufferLayoutDescriptorPSO &buf_layout =
+ current_state.vertex_descriptor.buffer_layouts[i];
+ /* Copy metal backend buffer layout state into PSO descriptor.
+ * NOTE: need to copy each element due to copying from internal
+ * backend descriptor to Metal API descriptor.*/
+ MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[i];
+
+ mtl_buf_layout.stepFunction = buf_layout.step_function;
+ mtl_buf_layout.stepRate = buf_layout.step_rate;
+ mtl_buf_layout.stride = buf_layout.stride;
+ }
+
+ /* Mark empty attribute conversion. */
+ for (int i = current_state.vertex_descriptor.num_attributes; i < GPU_VERT_ATTR_MAX_LEN;
+ i++) {
+ int MTL_attribute_conversion_mode = 0;
+ [values setConstantValue:&MTL_attribute_conversion_mode
+ type:MTLDataTypeInt
+ withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]];
+ }
+
+ /* DEBUG: Missing/empty attributes. */
+ /* Attributes are normally mapped as part of the state setting based on the used
+ * GPUVertFormat, however, if attribues have not been set, we can sort them out here. */
+ for (const uint i : IndexRange(mtl_interface->get_total_attributes())) {
+ const MTLShaderInputAttribute &attribute = mtl_interface->get_attribute(i);
+ MTLVertexAttributeDescriptor *current_attribute = desc.vertexDescriptor.attributes[i];
+
+ if (current_attribute.format == MTLVertexFormatInvalid) {
+#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
+ MTL_LOG_INFO("-> Filling in unbound attribute '%s' for shader PSO '%s' \n",
+ attribute.name,
+ mtl_interface->name);
+#endif
+ current_attribute.format = attribute.format;
+ current_attribute.offset = 0;
+ current_attribute.bufferIndex = null_buffer_index;
+
+ /* Add Null vert buffer binding for invalid attributes. */
+ if (!using_null_buffer) {
+ MTLVertexBufferLayoutDescriptor *null_buf_layout =
+ desc.vertexDescriptor.layouts[null_buffer_index];
+
+ /* Use constant step function such that null buffer can
+ * contain just a singular dummy attribute. */
+ null_buf_layout.stepFunction = MTLVertexStepFunctionConstant;
+ null_buf_layout.stepRate = 0;
+ null_buf_layout.stride = max_ii(null_buf_layout.stride, attribute.size);
+
+ /* If we are using the maximum number of vertex buffers, or tight binding indices,
+ * MTL_uniform_buffer_base_index needs shifting to the bind slot after the null buffer
+ * index. */
+ if (null_buffer_index >= MTL_uniform_buffer_base_index) {
+ MTL_uniform_buffer_base_index = null_buffer_index + 1;
+ }
+ using_null_buffer = true;
+#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
+ MTL_LOG_INFO("Setting up buffer binding for null attribute with buffer index %d\n",
+ null_buffer_index);
+#endif
+ }
+ }
+ }
+
+ /* Primitive Topology */
+ desc.inputPrimitiveTopology = pipeline_descriptor.vertex_descriptor.prim_topology_class;
+ }
+
+ /* Update constant value for 'MTL_uniform_buffer_base_index' */
+ [values setConstantValue:&MTL_uniform_buffer_base_index
+ type:MTLDataTypeInt
+ withName:@"MTL_uniform_buffer_base_index"];
+
+ /* Transform feedback constant */
+ int MTL_transform_feedback_buffer_index = (this->transform_feedback_type_ !=
+ GPU_SHADER_TFB_NONE) ?
+ MTL_uniform_buffer_base_index +
+ mtl_interface->get_total_uniform_blocks() :
+ -1;
+ if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
+ [values setConstantValue:&MTL_transform_feedback_buffer_index
+ type:MTLDataTypeInt
+ withName:@"MTL_transform_feedback_buffer_index"];
+ }
+
+ /* gl_PointSize constant */
+ bool null_pointsize = true;
+ float MTL_pointsize = pipeline_descriptor.point_size;
+ if (pipeline_descriptor.vertex_descriptor.prim_topology_class ==
+ MTLPrimitiveTopologyClassPoint) {
+ /* IF pointsize is > 0.0, PROGRAM_POINT_SIZE is enabled, and gl_PointSize shader keyword
+ overrides the value. Otherwise, if < 0.0, use global constant point size. */
+ if (MTL_pointsize < 0.0) {
+ MTL_pointsize = fabsf(MTL_pointsize);
+ [values setConstantValue:&MTL_pointsize
+ type:MTLDataTypeFloat
+ withName:@"MTL_global_pointsize"];
+ null_pointsize = false;
+ }
+ }
+
+ if (null_pointsize) {
+ MTL_pointsize = 0.0f;
+ [values setConstantValue:&MTL_pointsize
+ type:MTLDataTypeFloat
+ withName:@"MTL_global_pointsize"];
+ }
+
+ /* Compile functions */
+ NSError *error = nullptr;
+ desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_
+ constantValues:values
+ error:&error];
+ if (error) {
+ NSLog(@"Compile Error - Metal Shader vertex function, error %@", error);
+
+ /* Only exit out if genuine error and not warning */
+ if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
+ NSNotFound) {
+ BLI_assert(false);
+ return nullptr;
+ }
+ }
+
+ /* If transform feedback is used, Vertex-only stage */
+ if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
+ desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_
+ constantValues:values
+ error:&error];
+ if (error) {
+ NSLog(@"Compile Error - Metal Shader fragment function, error %@", error);
+
+ /* Only exit out if genuine error and not warning */
+ if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
+ NSNotFound) {
+ BLI_assert(false);
+ return nullptr;
+ }
+ }
+ }
+ else {
+ desc.fragmentFunction = nil;
+ desc.rasterizationEnabled = false;
+ }
+
+ /* Setup pixel format state */
+ for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT;
+ color_attachment++) {
+ /* Fetch colour attachment pixel format in backend pipeline state. */
+ MTLPixelFormat pixel_format = current_state.color_attachment_format[color_attachment];
+ /* Populate MTL API PSO attachment descriptor. */
+ MTLRenderPipelineColorAttachmentDescriptor *col_attachment =
+ desc.colorAttachments[color_attachment];
+
+ col_attachment.pixelFormat = pixel_format;
+ if (pixel_format != MTLPixelFormatInvalid) {
+ bool format_supports_blending = mtl_format_supports_blending(pixel_format);
+
+ col_attachment.writeMask = current_state.color_write_mask;
+ col_attachment.blendingEnabled = current_state.blending_enabled &&
+ format_supports_blending;
+ if (format_supports_blending && current_state.blending_enabled) {
+ col_attachment.alphaBlendOperation = current_state.alpha_blend_op;
+ col_attachment.rgbBlendOperation = current_state.rgb_blend_op;
+ col_attachment.destinationAlphaBlendFactor = current_state.dest_alpha_blend_factor;
+ col_attachment.destinationRGBBlendFactor = current_state.dest_rgb_blend_factor;
+ col_attachment.sourceAlphaBlendFactor = current_state.src_alpha_blend_factor;
+ col_attachment.sourceRGBBlendFactor = current_state.src_rgb_blend_factor;
+ }
+ else {
+ if (current_state.blending_enabled && !format_supports_blending) {
+ shader_debug_printf(
+ "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support "
+ "blending\n",
+ *((int *)&pixel_format));
+ }
+ }
+ }
+ }
+ desc.depthAttachmentPixelFormat = current_state.depth_attachment_format;
+ desc.stencilAttachmentPixelFormat = current_state.stencil_attachment_format;
+
+ /* Compile PSO */
+
+ MTLAutoreleasedRenderPipelineReflection reflection_data;
+ id<MTLRenderPipelineState> pso = [ctx->device
+ newRenderPipelineStateWithDescriptor:desc
+ options:MTLPipelineOptionBufferTypeInfo
+ reflection:&reflection_data
+ error:&error];
+ if (error) {
+ NSLog(@"Failed to create PSO for shader: %s error %@\n", this->name, error);
+ BLI_assert(false);
+ return nullptr;
+ }
+ else if (!pso) {
+ NSLog(@"Failed to create PSO for shader: %s, but no error was provided!\n", this->name);
+ BLI_assert(false);
+ return nullptr;
+ }
+ else {
+ NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx);
+ }
+
+ /* Prepare pipeline state instance. */
+ MTLRenderPipelineStateInstance *pso_inst = new MTLRenderPipelineStateInstance();
+ pso_inst->vert = desc.vertexFunction;
+ pso_inst->frag = desc.fragmentFunction;
+ pso_inst->pso = pso;
+ pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
+ pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
+ pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
+ pso_inst->shader_pso_index = pso_cache_.size();
+
+ pso_inst->reflection_data_available = (reflection_data != nil);
+ if (reflection_data != nil) {
+
+ /* Extract shader reflection data for buffer bindings.
+ * This reflection data is used to contrast the binding information
+ * we know about in the interface against the bindings in the finalized
+ * PSO. This accounts for bindings which have been stripped out during
+ * optimisation, and allows us to both avoid over-binding and also
+ * allows us to veriy size-correctness for bindings, to ensure
+ * that buffers bound are not smaller than the size of expected data. */
+ NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments];
+
+ pso_inst->buffer_bindings_reflection_data_vert.clear();
+ int buffer_binding_max_ind = 0;
+
+ for (int i = 0; i < [vert_args count]; i++) {
+ MTLArgument *arg = [vert_args objectAtIndex:i];
+ if ([arg type] == MTLArgumentTypeBuffer) {
+ int buf_index = [arg index] - MTL_uniform_buffer_base_index;
+ if (buf_index >= 0) {
+ buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
+ }
+ }
+ }
+ pso_inst->buffer_bindings_reflection_data_vert.resize(buffer_binding_max_ind + 1);
+ for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
+ pso_inst->buffer_bindings_reflection_data_vert[i] = {0, 0, 0, false};
+ }
+
+ for (int i = 0; i < [vert_args count]; i++) {
+ MTLArgument *arg = [vert_args objectAtIndex:i];
+ if ([arg type] == MTLArgumentTypeBuffer) {
+ int buf_index = [arg index] - MTL_uniform_buffer_base_index;
+
+ if (buf_index >= 0) {
+ pso_inst->buffer_bindings_reflection_data_vert[buf_index] = {
+ (uint32_t)([arg index]),
+ (uint32_t)([arg bufferDataSize]),
+ (uint32_t)([arg bufferAlignment]),
+ ([arg isActive] == YES) ? true : false};
+ }
+ }
+ }
+
+ NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments];
+
+ pso_inst->buffer_bindings_reflection_data_frag.clear();
+ buffer_binding_max_ind = 0;
+
+ for (int i = 0; i < [frag_args count]; i++) {
+ MTLArgument *arg = [frag_args objectAtIndex:i];
+ if ([arg type] == MTLArgumentTypeBuffer) {
+ int buf_index = [arg index] - MTL_uniform_buffer_base_index;
+ if (buf_index >= 0) {
+ buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index);
+ }
+ }
+ }
+ pso_inst->buffer_bindings_reflection_data_frag.resize(buffer_binding_max_ind + 1);
+ for (int i = 0; i < buffer_binding_max_ind + 1; i++) {
+ pso_inst->buffer_bindings_reflection_data_frag[i] = {0, 0, 0, false};
+ }
+
+ for (int i = 0; i < [frag_args count]; i++) {
+ MTLArgument *arg = [frag_args objectAtIndex:i];
+ if ([arg type] == MTLArgumentTypeBuffer) {
+ int buf_index = [arg index] - MTL_uniform_buffer_base_index;
+ shader_debug_printf(" BUF IND: %d (arg name: %s)\n", buf_index, [[arg name] UTF8String]);
+ if (buf_index >= 0) {
+ pso_inst->buffer_bindings_reflection_data_frag[buf_index] = {
+ (uint32_t)([arg index]),
+ (uint32_t)([arg bufferDataSize]),
+ (uint32_t)([arg bufferAlignment]),
+ ([arg isActive] == YES) ? true : false};
+ }
+ }
+ }
+ }
+
+ [pso_inst->vert retain];
+ [pso_inst->frag retain];
+ [pso_inst->pso retain];
+
+ /* Insert into pso cache. */
+ pso_cache_.add(pipeline_descriptor, pso_inst);
+ shader_debug_printf("PSO CACHE: Stored new variant in PSO cache for shader '%s'\n",
+ this->name);
+ return pso_inst;
+ }
+}
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name SSBO-vertex-fetch-mode attribute control.
+ * \{ */
+
+int MTLShader::ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type)
+{
+ switch (attribute_type) {
+ case MTLVertexFormatFloat:
+ return GPU_SHADER_ATTR_TYPE_FLOAT;
+ case MTLVertexFormatInt:
+ return GPU_SHADER_ATTR_TYPE_INT;
+ case MTLVertexFormatUInt:
+ return GPU_SHADER_ATTR_TYPE_UINT;
+ case MTLVertexFormatShort:
+ return GPU_SHADER_ATTR_TYPE_SHORT;
+ case MTLVertexFormatUChar:
+ return GPU_SHADER_ATTR_TYPE_CHAR;
+ case MTLVertexFormatUChar2:
+ return GPU_SHADER_ATTR_TYPE_CHAR2;
+ case MTLVertexFormatUChar3:
+ return GPU_SHADER_ATTR_TYPE_CHAR3;
+ case MTLVertexFormatUChar4:
+ return GPU_SHADER_ATTR_TYPE_CHAR4;
+ case MTLVertexFormatFloat2:
+ return GPU_SHADER_ATTR_TYPE_VEC2;
+ case MTLVertexFormatFloat3:
+ return GPU_SHADER_ATTR_TYPE_VEC3;
+ case MTLVertexFormatFloat4:
+ return GPU_SHADER_ATTR_TYPE_VEC4;
+ case MTLVertexFormatUInt2:
+ return GPU_SHADER_ATTR_TYPE_UVEC2;
+ case MTLVertexFormatUInt3:
+ return GPU_SHADER_ATTR_TYPE_UVEC3;
+ case MTLVertexFormatUInt4:
+ return GPU_SHADER_ATTR_TYPE_UVEC4;
+ case MTLVertexFormatInt2:
+ return GPU_SHADER_ATTR_TYPE_IVEC2;
+ case MTLVertexFormatInt3:
+ return GPU_SHADER_ATTR_TYPE_IVEC3;
+ case MTLVertexFormatInt4:
+ return GPU_SHADER_ATTR_TYPE_IVEC4;
+ case MTLVertexFormatUCharNormalized:
+ return GPU_SHADER_ATTR_TYPE_UCHAR_NORM;
+ case MTLVertexFormatUChar2Normalized:
+ return GPU_SHADER_ATTR_TYPE_UCHAR2_NORM;
+ case MTLVertexFormatUChar3Normalized:
+ return GPU_SHADER_ATTR_TYPE_UCHAR3_NORM;
+ case MTLVertexFormatUChar4Normalized:
+ return GPU_SHADER_ATTR_TYPE_UCHAR4_NORM;
+ case MTLVertexFormatInt1010102Normalized:
+ return GPU_SHADER_ATTR_TYPE_INT1010102_NORM;
+ case MTLVertexFormatShort3Normalized:
+ return GPU_SHADER_ATTR_TYPE_SHORT3_NORM;
+ default:
+ BLI_assert_msg(false,
+ "Not yet supported attribute type for SSBO vertex fetch -- Add entry "
+ "GPU_SHADER_ATTR_TYPE_** to shader defines, and in this table");
+ return -1;
+ }
+ return -1;
+}
+
+void MTLShader::ssbo_vertex_fetch_bind_attributes_begin()
+{
+ MTLShaderInterface *mtl_interface = this->get_interface();
+ ssbo_vertex_attribute_bind_active_ = true;
+ ssbo_vertex_attribute_bind_mask_ = (1 << mtl_interface->get_total_attributes()) - 1;
+
+ /* Reset tracking of actively used vbo bind slots for ssbo vertex fetch mode. */
+ for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+ ssbo_vbo_slot_used_[i] = false;
+ }
+}
+
+void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr)
+{
+ /* Fetch attribute. */
+ MTLShaderInterface *mtl_interface = this->get_interface();
+ BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
+ ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+
+ /* Update bind-mask to verify this attribute has been used. */
+ BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
+ (1 << ssbo_attr.mtl_attribute_index) &&
+ "Attribute has already been bound");
+ ssbo_vertex_attribute_bind_mask_ &= ~(1 << ssbo_attr.mtl_attribute_index);
+
+ /* Fetch attribute uniform addresses from cache. */
+ ShaderSSBOAttributeBinding &cached_ssbo_attribute =
+ cached_ssbo_attribute_bindings_[ssbo_attr.mtl_attribute_index];
+ BLI_assert(cached_ssbo_attribute.attribute_index >= 0);
+
+ /* Write attribute descriptor properties to shader uniforms. */
+ this->uniform_int(cached_ssbo_attribute.uniform_offset, 1, 1, &ssbo_attr.attribute_offset);
+ this->uniform_int(cached_ssbo_attribute.uniform_stride, 1, 1, &ssbo_attr.per_vertex_stride);
+ int inst_val = (ssbo_attr.is_instance ? 1 : 0);
+ this->uniform_int(cached_ssbo_attribute.uniform_fetchmode, 1, 1, &inst_val);
+ this->uniform_int(cached_ssbo_attribute.uniform_vbo_id, 1, 1, &ssbo_attr.vbo_id);
+ BLI_assert(ssbo_attr.attribute_format >= 0);
+ this->uniform_int(cached_ssbo_attribute.uniform_attr_type, 1, 1, &ssbo_attr.attribute_format);
+ ssbo_vbo_slot_used_[ssbo_attr.vbo_id] = true;
+}
+
+void MTLShader::ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder> active_encoder)
+{
+ ssbo_vertex_attribute_bind_active_ = false;
+
+ /* If our mask is non-zero, we have unassigned attributes. */
+ if (ssbo_vertex_attribute_bind_mask_ != 0) {
+ MTLShaderInterface *mtl_interface = this->get_interface();
+
+ /* Determine if there is a free slot we can bind the null buffer to -- We should have at
+ * least ONE free slot in this instance. */
+ int null_attr_buffer_slot = -1;
+ for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+ if (!ssbo_vbo_slot_used_[i]) {
+ null_attr_buffer_slot = i;
+ break;
+ }
+ }
+ BLI_assert_msg(null_attr_buffer_slot >= 0,
+ "No suitable bind location for a NULL buffer was found");
+
+ for (int i = 0; i < mtl_interface->get_total_attributes(); i++) {
+ if (ssbo_vertex_attribute_bind_mask_ & (1 << i)) {
+ const MTLShaderInputAttribute *mtl_shader_attribute = &mtl_interface->get_attribute(i);
+#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
+ MTL_LOG_WARNING(
+ "SSBO Vertex Fetch missing attribute with index: %d. Shader: %s, Attr "
+ "Name: "
+ "%s - Null buffer bound\n",
+ i,
+ this->name_get(),
+ mtl_shader_attribute->name);
+#endif
+ /* Bind Attribute with NULL buffer index and stride zero (for constant access). */
+ MTLSSBOAttribute ssbo_attr(
+ i, null_attr_buffer_slot, 0, 0, GPU_SHADER_ATTR_TYPE_FLOAT, false);
+ ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+ MTL_LOG_WARNING(
+ "Unassigned Shader attribute: %s, Attr Name: %s -- Binding NULL BUFFER to "
+ "slot %d\n",
+ this->name_get(),
+ mtl_interface->get_name_at_offset(mtl_shader_attribute->name_offset),
+ null_attr_buffer_slot);
+ }
+ }
+
+ /* Bind NULL buffer to given VBO slot. */
+ MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+ id<MTLBuffer> null_buf = ctx->get_null_attribute_buffer();
+ BLI_assert(null_buf);
+
+ MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state();
+ rps.bind_vertex_buffer(null_buf, 0, null_attr_buffer_slot);
+ }
+}
+
+GPUVertBuf *MTLShader::get_transform_feedback_active_buffer()
+{
+ if (transform_feedback_type_ == GPU_SHADER_TFB_NONE || !transform_feedback_active_) {
+ return nullptr;
+ }
+ return transform_feedback_vertbuf_;
+}
+
+bool MTLShader::has_transform_feedback_varying(std::string str)
+{
+ if (this->transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
+ return false;
+ }
+
+ return (std::find(tf_output_name_list_.begin(), tf_output_name_list_.end(), str) !=
+ tf_output_name_list_.end());
+}
+
+} // blender::gpu::shdaer
diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh
new file mode 100644
index 00000000000..c71504b84b7
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader_generator.hh
@@ -0,0 +1,724 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "gpu_shader_create_info.hh"
+#include "gpu_shader_private.hh"
+
+/** -- Metal Shader Generator for GLSL -> MSL conversion --
+ *
+ * The Metal shader generator class is used as a conversion utility for generating
+ * a compatible MSL shader from a source GLSL shader. There are several steps
+ * involved in creating a shader, and structural changes which enable the source
+ * to function in the same way.
+ *
+ * 1) Extraction and conversion of shaders input's and output's to their Metal-compatible
+ * version. This is a subtle data transformation from GPUShaderCreateInfo, allowing
+ * for Metal-specific parameters.
+ *
+ * 2) Determine usage of shader features such as GL global variable usage, depth write output,
+ * clip distances, multilayered rendering, barycentric coordinates etc;
+ *
+ * 3) Generate MSL shader.
+ *
+ * 4) Populate MTLShaderInterface, describing input/output structure, bindpoints, buffer size and
+ * alignment, shader feature usage etc; Everything required by the Metal backend to successfully
+ * enable use of shaders and GPU backend features.
+ *
+ *
+ *
+ * For each shading stage, we generate an MSL shader following these steps:
+ *
+ * 1) Output custom shader defines describing modes e.g. whether we are using
+ * sampler bindings or argument buffers; at the top of the shader.
+ *
+ * 2) Inject common Metal headers.
+ * - mtl_shader_defines.msl is used to map GLSL functions to MSL.
+ * - mtl_shader_common.msl is added to ALL MSL shaders to provide
+ * common functionality required by the backend. This primarily
+ * contains function-constant hooks, used in PSO generation.
+ *
+ * 3) Create a class Scope which wraps the GLSL shader. This is used to
+ * create a global per-thread scope around the shader source, to allow
+ * access to common shader members (GLSL globals, shader inputs/outptus etc)
+ *
+ * 4) Generate shader interface structs and populate local members where required for:
+ * - VertexInputs
+ * - VertexOutputs
+ * - Uniforms
+ * - Uniform Blocks
+ * - textures;
+ * etc;
+ *
+ * 5) Inject GLSL source.
+ *
+ * 6) Generate MSL shader entry point function. Every Metal shader must have a
+ * vertex/fragment/kernel entrypoint, which contains the function binding table.
+ * This is where bindings are specified and passed into the shader.
+ *
+ * For converted shaders, the MSL entry-point will also instantiate a shader
+ * class per thread, and pass over bound resource references into the class.
+ *
+ * Finally, the shaders "main()" method will be called, and outputs are copied.
+ *
+ * Note: For position outputs, the default output position will be converted to
+ * the Metal coordinate space, which involves flipping the Y coordinate and
+ * re-mapping the depth range between 0 and 1, as with Vulkan.
+ *
+ *
+ * The final shader structure looks as follows:
+ *
+ * -- Shader defines --
+ * #define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 0
+ * ... etc ...;
+ *
+ * class MetalShaderVertexImp {
+ *
+ * -- Common shader interface structs --
+ * struct VertexIn {
+ * vec4 pos [[attribute(0)]]
+ * }
+ * struct VertexOut {...}
+ * struct PushConstantBlock {...}
+ * struct drw_Globals {...}
+ * ...
+ *
+ * -- GLSL source code --
+ * ...
+ * };
+ *
+ * vertex MetalShaderVertexImp::VertexOut vertex_function_entry(
+ * MetalShaderVertexImp::VertexIn v_in [[stage_in]],
+ * constant PushConstantBlock& globals [[buffer(MTL_uniform_buffer_base_index)]]) {
+ *
+ * MetalShaderVertexImp impl;
+ * -- Copy input members into impl instance --
+ * -- Execute GLSL main function --
+ * impl.main();
+ *
+ * -- Copy outputs and return --
+ * MetalShaderVertexImp::VertexOut out;
+ * out.pos = impl.pos;
+ * -- transform position to Metal coordinate system --
+ * return v_out;
+ * }
+ *
+ * -- SSBO-vertex-fetchmode --
+ *
+ * SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly
+ * as buffers in the shader, rather than using the VertexDescriptor and [[stage_in]] vertex
+ * assembly.
+ *
+ * The purpose of this mode is to enable random-access reading of all vertex data. This is
+ * particularly useful for efficiently converting geometry shaders to Metal shading language,
+ * as these techniques are not supported natively in Metal.
+ *
+ * Geometry shaders can be re-created by firing off a vertex shader with the desired number of
+ * total output vertices. Each vertex can then read whichever input attributes it needs to
+ * achieve the output result.
+ * This manual reading is also used to provide support for GPU_provoking_vertex, wherein the
+ * output vertex for flat shading needs to change. In these cases, the manual vertex assembly
+ * can flip which vertices are read within the primitive.
+ *
+ * From an efficiency perspective, this is more GPU-friendly than geometry shading, due to improved
+ * parallelism throughout the whole pipe, and for Apple hardware specifically, there is no
+ * significant performance loss from manual vertex assembly vs under-the-hood assembly.
+ *
+ * This mode works by passing the required vertex descriptor information into the shader
+ * as uniform data, describing the type, stride, offset, stepmode and buffer index of each
+ * attribute, such that the shader ssbo-vertex-fetch utility functions know how to extract data.
+ *
+ * This also works with indexed rendering, by similarly binding the index buffer as a manul buffer.
+ *
+ * When this mode is used, the code generation and shader interface generation varies to accomodate
+ * the required features.
+ *
+ * This mode can be enabled in a shader with:
+ *
+ * `#pragma USE_SSBO_VERTEX_FETCH(TriangleList/LineList, output_vertex_count_per_input_primitive)`
+ *
+ * This mirrors the geometry shader interface `layout(triangle_strip, max_vertices = 3) out;`
+ */
+
+/* SSBO vertex fetch attribute uniform parameter names.
+ * These uniforms are used to pass the information
+ * required to perform manual vertex assembly within
+ * the vertex shader.
+ * Each vertex attribute requires a number of properties
+ * in order to correctly extract data from the bound vertex
+ * buffers. */
+#ifndef NDEBUG
+/* Global. */
+# define UNIFORM_SSBO_USES_INDEXED_RENDERING_STR "uniform_ssbo_uses_indexed_rendering"
+# define UNIFORM_SSBO_INDEX_MODE_U16_STR "uniform_ssbo_index_mode_u16"
+# define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR "uniform_ssbo_input_prim_type"
+# define UNIFORM_SSBO_INPUT_VERT_COUNT_STR "uniform_ssbo_input_vert_count"
+/* Per-attribute. */
+# define UNIFORM_SSBO_OFFSET_STR "uniform_ssbo_offset_"
+# define UNIFORM_SSBO_STRIDE_STR "uniform_ssbo_stride_"
+# define UNIFORM_SSBO_FETCHMODE_STR "uniform_ssbo_fetchmode_"
+# define UNIFORM_SSBO_VBO_ID_STR "uniform_ssbo_vbo_id_"
+# define UNIFORM_SSBO_TYPE_STR "uniform_ssbo_type_"
+#else
+/* Global. */
+# define UNIFORM_SSBO_USES_INDEXED_RENDERING_STR "_ir"
+# define UNIFORM_SSBO_INDEX_MODE_U16_STR "_mu"
+# define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR "_pt"
+# define UNIFORM_SSBO_INPUT_VERT_COUNT_STR "_vc"
+/* Per-attribute. */
+# define UNIFORM_SSBO_OFFSET_STR "_so"
+# define UNIFORM_SSBO_STRIDE_STR "_ss"
+# define UNIFORM_SSBO_FETCHMODE_STR "_sf"
+# define UNIFORM_SSBO_VBO_ID_STR "_sv"
+# define UNIFORM_SSBO_TYPE_STR "_st"
+#endif
+
+namespace blender::gpu {
+
+struct MSLUniform {
+ shader::Type type;
+ std::string name;
+ bool is_array;
+ int array_elems;
+ ShaderStage stage;
+
+ MSLUniform(shader::Type uniform_type,
+ std::string uniform_name,
+ bool is_array_type,
+ uint32_t num_elems = 1)
+ : type(uniform_type), name(uniform_name), is_array(is_array_type), array_elems(num_elems)
+ {
+ }
+
+ bool operator==(const MSLUniform &right) const
+ {
+ return (type == right.type && name == right.name && is_array == right.is_array &&
+ array_elems == right.array_elems);
+ }
+};
+
+struct MSLUniformBlock {
+ std::string type_name;
+ std::string name;
+ ShaderStage stage;
+ bool is_array;
+
+ bool operator==(const MSLUniformBlock &right) const
+ {
+ return (type_name == right.type_name && name == right.name);
+ }
+};
+
+enum MSLTextureSamplerAccess {
+ TEXTURE_ACCESS_NONE = 0,
+ TEXTURE_ACCESS_SAMPLE,
+ TEXTURE_ACCESS_READ,
+ TEXTURE_ACCESS_WRITE,
+ TEXTURE_ACCESS_READWRITE,
+};
+
+struct MSLTextureSampler {
+ ShaderStage stage;
+ shader::ImageType type;
+ std::string name;
+ MSLTextureSamplerAccess access;
+ uint location;
+
+ eGPUTextureType get_texture_binding_type() const;
+
+ void resolve_binding_indices();
+
+ MSLTextureSampler(ShaderStage in_stage,
+ shader::ImageType in_sampler_type,
+ std::string in_sampler_name,
+ MSLTextureSamplerAccess in_access,
+ uint in_location)
+ : stage(in_stage),
+ type(in_sampler_type),
+ name(in_sampler_name),
+ access(in_access),
+ location(in_location)
+ {
+ }
+
+ bool operator==(const MSLTextureSampler &right) const
+ {
+ /* We do not compare stage as we want to avoid duplication of resources used across multiple
+ * stages. */
+ return (type == right.type && name == right.name && access == right.access);
+ }
+
+ std::string get_msl_access_str() const
+ {
+ switch (access) {
+ case TEXTURE_ACCESS_SAMPLE:
+ return "access::sample";
+ case TEXTURE_ACCESS_READ:
+ return "access::read";
+ case TEXTURE_ACCESS_WRITE:
+ return "access::write";
+ case TEXTURE_ACCESS_READWRITE:
+ return "access::read_write";
+ default:
+ BLI_assert(false);
+ return "";
+ }
+ return "";
+ }
+
+ /* Get typestring for wrapped texture class members.
+ * wrapper struct type contains combined texture and sampler, templated
+ * against the texture type.
+ * See `COMBINED_SAMPLER_TYPE` in `mtl_shader_defines.msl`. */
+ std::string get_msl_typestring_wrapper(bool is_addr) const
+ {
+ std::string str;
+ str = this->get_msl_wrapper_type_str() + "<" + this->get_msl_return_type_str() + "," +
+ this->get_msl_access_str() + ">" + ((is_addr) ? "* " : " ") + this->name;
+ return str;
+ }
+
+ /* Get raw texture typestring -- used in entry-point function argument table. */
+ std::string get_msl_typestring(bool is_addr) const
+ {
+ std::string str;
+ str = this->get_msl_texture_type_str() + "<" + this->get_msl_return_type_str() + "," +
+ this->get_msl_access_str() + ">" + ((is_addr) ? "* " : " ") + this->name;
+ return str;
+ }
+
+ std::string get_msl_return_type_str() const;
+ std::string get_msl_texture_type_str() const;
+ std::string get_msl_wrapper_type_str() const;
+};
+
+struct MSLVertexInputAttribute {
+ /* layout_location of -1 means unspecified and will
+ * be populated manually. */
+ int layout_location;
+ shader::Type type;
+ std::string name;
+
+ bool operator==(const MSLVertexInputAttribute &right) const
+ {
+ return (layout_location == right.layout_location && type == right.type && name == right.name);
+ }
+};
+
+struct MSLVertexOutputAttribute {
+ std::string type;
+ std::string name;
+ /* Instance name specified if attributes belong to a struct. */
+ std::string instance_name;
+ /* Interpolation qualifier can be any of smooth (default), flat, no_perspective. */
+ std::string interpolation_qualifier;
+ bool is_array;
+ int array_elems;
+
+ bool operator==(const MSLVertexOutputAttribute &right) const
+ {
+ return (type == right.type && name == right.name &&
+ interpolation_qualifier == right.interpolation_qualifier &&
+ is_array == right.is_array && array_elems == right.array_elems);
+ }
+ std::string get_mtl_interpolation_qualifier() const
+ {
+ if (interpolation_qualifier == "" || interpolation_qualifier == "smooth") {
+ return "";
+ }
+ else if (interpolation_qualifier == "flat") {
+ return " [[flat]]";
+ }
+ else if (interpolation_qualifier == "noperspective") {
+ return " [[center_no_perspective]]";
+ }
+ return "";
+ }
+};
+
+struct MSLFragmentOutputAttribute {
+ /* Explicit output binding location N for [[color(N)]] -1 = unspecified. */
+ int layout_location;
+ /* Output index for dual source blending. -1 = unspecified. */
+ int layout_index;
+ shader::Type type;
+ std::string name;
+
+ bool operator==(const MSLFragmentOutputAttribute &right) const
+ {
+ return (layout_location == right.layout_location && type == right.type && name == right.name &&
+ layout_index == right.layout_index);
+ }
+};
+
+class MSLGeneratorInterface {
+ static char *msl_patch_default;
+
+ public:
+ /** Shader stage input/output binding information.
+ * Derived from shader source reflection or GPUShaderCreateInfo. */
+ blender::Vector<MSLUniformBlock> uniform_blocks;
+ blender::Vector<MSLUniform> uniforms;
+ blender::Vector<MSLTextureSampler> texture_samplers;
+ blender::Vector<MSLVertexInputAttribute> vertex_input_attributes;
+ blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings;
+ /* Should match vertex outputs, but defined separately as
+ * some shader permutations will not utilise all inputs/outputs.
+ * Final shader uses the intersection between the two sets. */
+ blender::Vector<MSLVertexOutputAttribute> fragment_input_varyings;
+ blender::Vector<MSLFragmentOutputAttribute> fragment_outputs;
+ /* Transform feedback interface. */
+ blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings_tf;
+ /* Clip Distances. */
+ blender::Vector<std::string> clip_distances;
+
+ /** GL Global usage. */
+ /* Whether GL position is used, or an alternative vertex output should be the default. */
+ bool uses_gl_Position;
+ /* Whether gl_FragColor is used, or whether an alternative fragment output
+ * should be the default. */
+ bool uses_gl_FragColor;
+ /* Whether gl_PointCoord is used in the fragment shader. If so,
+ * we define float2 gl_PointCoord [[point_coord]]. */
+ bool uses_gl_PointCoord;
+ /* Writes out to gl_PointSize in the vertex shader output. */
+ bool uses_gl_PointSize;
+ bool uses_gl_VertexID;
+ bool uses_gl_InstanceID;
+ bool uses_gl_BaseInstanceARB;
+ bool uses_gl_FrontFacing;
+ /* Sets the output render target array index when using multilayered rendering. */
+ bool uses_gl_FragDepth;
+ bool uses_mtl_array_index_;
+ bool uses_transform_feedback;
+ bool uses_barycentrics;
+
+ /* Parameters. */
+ shader::DepthWrite depth_write;
+
+ /* Shader buffer bind indices for argument buffers. */
+ int sampler_argument_buffer_bind_index[2] = {-1, -1};
+
+ /*** SSBO Vertex fetch mode. ***/
+ /* Indicates whether to pass in Vertex Buffer's as a regular buffers instead of using vertex
+ * assembly in the PSO descriptor. Enabled with special pragma. */
+ bool uses_ssbo_vertex_fetch_mode;
+
+ private:
+ /* Parent shader instance. */
+ MTLShader &parent_shader_;
+
+ /* If prepared from Create info. */
+ const shader::ShaderCreateInfo *create_info_;
+
+ public:
+ MSLGeneratorInterface(MTLShader &shader) : parent_shader_(shader){};
+
+ /** Prepare MSLGeneratorInterface from create-info. **/
+ void prepare_from_createinfo(const shader::ShaderCreateInfo *info);
+
+ /* When SSBO Vertex Fetch mode is used, uniforms are used to pass on the required information
+ * about vertex attribute bindings, in order to perform manual vertex assembly and random-access
+ * vertex lookup throughout the bound VBOs.
+ *
+ * Some parameters are global for the shader, others change with the currently bound
+ * VertexBuffers, and their format, as they do with regular GPUBatch's.
+ *
+ * (Where ##attr is the attributes name)
+ * uniform_ssbo_stride_##attr -- Representing the stride between elements of attribute(attr)
+ * uniform_ssbo_offset_##attr -- Representing the base offset within the vertex
+ * uniform_ssbo_fetchmode_##attr -- Whether using per-vertex fetch or per-instance fetch
+ * (0=vert, 1=inst) uniform_ssbo_vbo_id_##attr -- index of the vertex buffer within which the
+ * data for this attribute is contained uniform_ssbo_type_##attr - The type of data in the
+ * currently bound buffer -- Could be a mismatch with the Officially reported type. */
+ void prepare_ssbo_vertex_fetch_uniforms();
+
+ /* Samplers. */
+ bool use_argument_buffer_for_samplers() const;
+ uint32_t num_samplers_for_stage(ShaderStage stage) const;
+
+ /* Returns the bind index, relative to MTL_uniform_buffer_base_index. */
+ uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage);
+
+ /* Code generation utility functions. */
+ std::string generate_msl_uniform_structs(ShaderStage shader_stage);
+ std::string generate_msl_vertex_in_struct();
+ std::string generate_msl_vertex_out_struct(ShaderStage shader_stage);
+ std::string generate_msl_vertex_transform_feedback_out_struct(ShaderStage shader_stage);
+ std::string generate_msl_fragment_out_struct();
+ std::string generate_msl_vertex_inputs_string();
+ std::string generate_msl_fragment_inputs_string();
+ std::string generate_msl_vertex_entry_stub();
+ std::string generate_msl_fragment_entry_stub();
+ std::string generate_msl_global_uniform_population(ShaderStage stage);
+ std::string generate_ubo_block_macro_chain(MSLUniformBlock block);
+ std::string generate_msl_uniform_block_population(ShaderStage stage);
+ std::string generate_msl_vertex_attribute_input_population();
+ std::string generate_msl_vertex_output_population();
+ std::string generate_msl_vertex_output_tf_population();
+ std::string generate_msl_fragment_input_population();
+ std::string generate_msl_fragment_output_population();
+ std::string generate_msl_uniform_undefs(ShaderStage stage);
+ std::string generate_ubo_block_undef_chain(ShaderStage stage);
+ std::string generate_msl_texture_vars(ShaderStage shader_stage);
+ void generate_msl_textures_input_string(std::stringstream &out, ShaderStage stage);
+ void generate_msl_uniforms_input_string(std::stringstream &out, ShaderStage stage);
+
+ /* Location is not always specified, so this will resolve outstanding locations. */
+ void resolve_input_attribute_locations();
+ void resolve_fragment_output_locations();
+
+ /* Create shader interface for converted GLSL shader. */
+ MTLShaderInterface *bake_shader_interface(const char *name);
+
+ /* Fetch combined shader source header. */
+ char *msl_patch_default_get();
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MSLGeneratorInterface");
+};
+
+inline std::string get_stage_class_name(ShaderStage stage)
+{
+ switch (stage) {
+ case ShaderStage::VERTEX:
+ return "MTLShaderVertexImpl";
+ case ShaderStage::FRAGMENT:
+ return "MTLShaderFragmentImpl";
+ default:
+ BLI_assert_unreachable();
+ return "";
+ }
+ return "";
+}
+
+inline bool is_builtin_type(std::string type)
+{
+ /* Add Types as needed. */
+ /* TODO(Metal): Consider replacing this with a switch and constexpr hash and switch.
+ * Though most efficient and maintainable approach to be determined. */
+ static std::map<std::string, eMTLDataType> glsl_builtin_types = {
+ {"float", MTL_DATATYPE_FLOAT},
+ {"vec2", MTL_DATATYPE_FLOAT2},
+ {"vec3", MTL_DATATYPE_FLOAT3},
+ {"vec4", MTL_DATATYPE_FLOAT4},
+ {"int", MTL_DATATYPE_INT},
+ {"ivec2", MTL_DATATYPE_INT2},
+ {"ivec3", MTL_DATATYPE_INT3},
+ {"ivec4", MTL_DATATYPE_INT4},
+ {"uint32_t", MTL_DATATYPE_UINT},
+ {"uvec2", MTL_DATATYPE_UINT2},
+ {"uvec3", MTL_DATATYPE_UINT3},
+ {"uvec4", MTL_DATATYPE_UINT4},
+ {"mat3", MTL_DATATYPE_FLOAT3x3},
+ {"mat4", MTL_DATATYPE_FLOAT4x4},
+ {"bool", MTL_DATATYPE_INT},
+ {"uchar", MTL_DATATYPE_UCHAR},
+ {"uchar2", MTL_DATATYPE_UCHAR2},
+ {"uchar2", MTL_DATATYPE_UCHAR3},
+ {"uchar4", MTL_DATATYPE_UCHAR4},
+ {"vec3_1010102_Unorm", MTL_DATATYPE_UINT1010102_NORM},
+ {"vec3_1010102_Inorm", MTL_DATATYPE_INT1010102_NORM},
+ };
+ return (glsl_builtin_types.find(type) != glsl_builtin_types.end());
+}
+
+inline bool is_matrix_type(const std::string &type)
+{
+ /* Matrix type support. Add types as necessary. */
+ return (type == "mat4");
+}
+
+inline bool is_matrix_type(const shader::Type &type)
+{
+ /* Matrix type support. Add types as necessary. */
+ return (type == shader::Type::MAT4 || type == shader::Type::MAT3);
+}
+
+inline int get_matrix_location_count(const std::string &type)
+{
+ /* Matrix type support. Add types as necessary. */
+ if (type == "mat4") {
+ return 4;
+ }
+ if (type == "mat3") {
+ return 3;
+ }
+ return 1;
+}
+
+inline int get_matrix_location_count(const shader::Type &type)
+{
+ /* Matrix type support. Add types as necessary. */
+ if (type == shader::Type::MAT4) {
+ return 4;
+ }
+ else if (type == shader::Type::MAT3) {
+ return 3;
+ }
+ return 1;
+}
+
+inline std::string get_matrix_subtype(const std::string &type)
+{
+ if (type == "mat4") {
+ return "vec4";
+ }
+ return type;
+}
+
+inline shader::Type get_matrix_subtype(const shader::Type &type)
+{
+ if (type == shader::Type::MAT4) {
+ return shader::Type::VEC4;
+ }
+ if (type == shader::Type::MAT3) {
+ return shader::Type::VEC3;
+ }
+ return type;
+}
+
+inline std::string get_attribute_conversion_function(bool *uses_conversion,
+ const shader::Type &type)
+{
+ /* NOTE(Metal): Add more attribute types as required. */
+ if (type == shader::Type::FLOAT) {
+ *uses_conversion = true;
+ return "internal_vertex_attribute_convert_read_float";
+ }
+ else if (type == shader::Type::VEC2) {
+ *uses_conversion = true;
+ return "internal_vertex_attribute_convert_read_float2";
+ }
+ else if (type == shader::Type::VEC3) {
+ *uses_conversion = true;
+ return "internal_vertex_attribute_convert_read_float3";
+ }
+ else if (type == shader::Type::VEC4) {
+ *uses_conversion = true;
+ return "internal_vertex_attribute_convert_read_float4";
+ }
+ *uses_conversion = false;
+ return "";
+}
+
+inline const char *to_string(const shader::PrimitiveOut &layout)
+{
+ switch (layout) {
+ case shader::PrimitiveOut::POINTS:
+ return "points";
+ case shader::PrimitiveOut::LINE_STRIP:
+ return "line_strip";
+ case shader::PrimitiveOut::TRIANGLE_STRIP:
+ return "triangle_strip";
+ default:
+ BLI_assert(false);
+ return "unknown";
+ }
+}
+
+inline const char *to_string(const shader::PrimitiveIn &layout)
+{
+ switch (layout) {
+ case shader::PrimitiveIn::POINTS:
+ return "points";
+ case shader::PrimitiveIn::LINES:
+ return "lines";
+ case shader::PrimitiveIn::LINES_ADJACENCY:
+ return "lines_adjacency";
+ case shader::PrimitiveIn::TRIANGLES:
+ return "triangles";
+ case shader::PrimitiveIn::TRIANGLES_ADJACENCY:
+ return "triangles_adjacency";
+ default:
+ BLI_assert(false);
+ return "unknown";
+ }
+}
+
+inline const char *to_string(const shader::Interpolation &interp)
+{
+ switch (interp) {
+ case shader::Interpolation::SMOOTH:
+ return "smooth";
+ case shader::Interpolation::FLAT:
+ return "flat";
+ case shader::Interpolation::NO_PERSPECTIVE:
+ return "noperspective";
+ default:
+ BLI_assert(false);
+ return "unkown";
+ }
+}
+
+inline const char *to_string_msl(const shader::Interpolation &interp)
+{
+ switch (interp) {
+ case shader::Interpolation::SMOOTH:
+ return "[[smooth]]";
+ case shader::Interpolation::FLAT:
+ return "[[flat]]";
+ case shader::Interpolation::NO_PERSPECTIVE:
+ return "[[center_no_perspective]]";
+ default:
+ return "";
+ }
+}
+
+inline const char *to_string(const shader::Type &type)
+{
+ switch (type) {
+ case shader::Type::FLOAT:
+ return "float";
+ case shader::Type::VEC2:
+ return "vec2";
+ case shader::Type::VEC3:
+ return "vec3";
+ case shader::Type::VEC3_101010I2:
+ return "vec3_1010102_Inorm";
+ case shader::Type::VEC4:
+ return "vec4";
+ case shader::Type::MAT3:
+ return "mat3";
+ case shader::Type::MAT4:
+ return "mat4";
+ case shader::Type::UINT:
+ return "uint32_t";
+ case shader::Type::UVEC2:
+ return "uvec2";
+ case shader::Type::UVEC3:
+ return "uvec3";
+ case shader::Type::UVEC4:
+ return "uvec4";
+ case shader::Type::INT:
+ return "int";
+ case shader::Type::IVEC2:
+ return "ivec2";
+ case shader::Type::IVEC3:
+ return "ivec3";
+ case shader::Type::IVEC4:
+ return "ivec4";
+ case shader::Type::BOOL:
+ return "bool";
+ case shader::Type::UCHAR:
+ return "uchar";
+ case shader::Type::UCHAR2:
+ return "uchar2";
+ case shader::Type::UCHAR3:
+ return "uchar3";
+ case shader::Type::UCHAR4:
+ return "uchar4";
+ case shader::Type::CHAR:
+ return "char";
+ case shader::Type::CHAR2:
+ return "char2";
+ case shader::Type::CHAR3:
+ return "char3";
+ case shader::Type::CHAR4:
+ return "char4";
+ default:
+ BLI_assert(false);
+ return "unkown";
+ }
+}
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm
new file mode 100644
index 00000000000..37c1ddd6e7a
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -0,0 +1,2976 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BKE_global.h"
+
+#include "BLI_string.h"
+
+#include "BLI_string.h"
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <string>
+
+#include <cstring>
+
+#include "GPU_platform.h"
+#include "GPU_vertex_format.h"
+
+#include "gpu_shader_dependency_private.h"
+
+#include "mtl_common.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_shader.hh"
+#include "mtl_shader_generator.hh"
+#include "mtl_shader_interface.hh"
+#include "mtl_texture.hh"
+
+extern char datatoc_mtl_shader_defines_msl[];
+extern char datatoc_mtl_shader_shared_h[];
+
+using namespace blender;
+using namespace blender::gpu;
+using namespace blender::gpu::shader;
+
+namespace blender::gpu {
+
+char *MSLGeneratorInterface::msl_patch_default = nullptr;
+
+/* -------------------------------------------------------------------- */
+/** \name Shader Translation utility functions.
+ * \{ */
+
+static eMTLDataType to_mtl_type(Type type)
+{
+ switch (type) {
+ case Type::FLOAT:
+ return MTL_DATATYPE_FLOAT;
+ case Type::VEC2:
+ return MTL_DATATYPE_FLOAT2;
+ case Type::VEC3:
+ return MTL_DATATYPE_FLOAT3;
+ case Type::VEC4:
+ return MTL_DATATYPE_FLOAT4;
+ case Type::MAT3:
+ return MTL_DATATYPE_FLOAT3x3;
+ case Type::MAT4:
+ return MTL_DATATYPE_FLOAT4x4;
+ case Type::UINT:
+ return MTL_DATATYPE_UINT;
+ case Type::UVEC2:
+ return MTL_DATATYPE_UINT2;
+ case Type::UVEC3:
+ return MTL_DATATYPE_UINT3;
+ case Type::UVEC4:
+ return MTL_DATATYPE_UINT4;
+ case Type::INT:
+ return MTL_DATATYPE_INT;
+ case Type::IVEC2:
+ return MTL_DATATYPE_INT2;
+ case Type::IVEC3:
+ return MTL_DATATYPE_INT3;
+ case Type::IVEC4:
+ return MTL_DATATYPE_INT4;
+ case Type::VEC3_101010I2:
+ return MTL_DATATYPE_INT1010102_NORM;
+ case Type::BOOL:
+ return MTL_DATATYPE_BOOL;
+ case Type::UCHAR:
+ return MTL_DATATYPE_UCHAR;
+ case Type::UCHAR2:
+ return MTL_DATATYPE_UCHAR2;
+ case Type::UCHAR3:
+ return MTL_DATATYPE_UCHAR3;
+ case Type::UCHAR4:
+ return MTL_DATATYPE_UCHAR4;
+ case Type::CHAR:
+ return MTL_DATATYPE_CHAR;
+ case Type::CHAR2:
+ return MTL_DATATYPE_CHAR2;
+ case Type::CHAR3:
+ return MTL_DATATYPE_CHAR3;
+ case Type::CHAR4:
+ return MTL_DATATYPE_CHAR4;
+ default: {
+ BLI_assert_msg(false, "Unexpected data type");
+ }
+ }
+ return MTL_DATATYPE_FLOAT;
+}
+
+static std::regex remove_non_numeric_characters("[^0-9]");
+
+#ifndef NDEBUG
+static void remove_multiline_comments_func(std::string &str)
+{
+ char *current_str_begin = &*str.begin();
+ char *current_str_end = &*str.end();
+
+ bool is_inside_comment = false;
+ for (char *c = current_str_begin; c < current_str_end; c++) {
+ if (is_inside_comment) {
+ if ((*c == '*') && (c < current_str_end - 1) && (*(c + 1) == '/')) {
+ is_inside_comment = false;
+ *c = ' ';
+ *(c + 1) = ' ';
+ }
+ else {
+ *c = ' ';
+ }
+ }
+ else {
+ if ((*c == '/') && (c < current_str_end - 1) && (*(c + 1) == '*')) {
+ is_inside_comment = true;
+ *c = ' ';
+ }
+ }
+ }
+}
+
+static void remove_singleline_comments_func(std::string &str)
+{
+ char *current_str_begin = &*str.begin();
+ char *current_str_end = &*str.end();
+
+ bool is_inside_comment = false;
+ for (char *c = current_str_begin; c < current_str_end; c++) {
+ if (is_inside_comment) {
+ if (*c == '\n') {
+ is_inside_comment = false;
+ }
+ else {
+ *c = ' ';
+ }
+ }
+ else {
+ if ((*c == '/') && (c < current_str_end - 1) && (*(c + 1) == '/')) {
+ is_inside_comment = true;
+ *c = ' ';
+ }
+ }
+ }
+}
+#endif
+
+static bool is_program_word(const char *chr, int *len)
+{
+ int numchars = 0;
+ for (const char *c = chr; *c != '\0'; c++) {
+ char ch = *c;
+ if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+ (numchars > 0 && ch >= '0' && ch <= '9') || ch == '_') {
+ numchars++;
+ }
+ else {
+ *len = numchars;
+ return (numchars > 0);
+ }
+ }
+ *len = numchars;
+ return true;
+}
+
+/* Replace function parameter patterns containing:
+ * `out vec3 somevar` with `THD vec3&somevar`.
+ * which enables pass by reference via resolved macro:
+ * thread vec3& somevar. */
+static void replace_outvars(std::string &str)
+{
+ char *current_str_begin = &*str.begin();
+ char *current_str_end = &*str.end();
+
+ for (char *c = current_str_begin + 2; c < current_str_end - 6; c++) {
+ char *start = c;
+ if (strncmp(c, "out ", 4) == 0) {
+ if (strncmp(c - 2, "in", 2) == 0) {
+ start = c - 2;
+ }
+
+ /* Check that the following are words. */
+ int len1, len2;
+ char *word_base1 = c + 4;
+ char *word_base2 = word_base1;
+
+ if (is_program_word(word_base1, &len1) && (*(word_base1 + len1) == ' ')) {
+ word_base2 = word_base1 + len1 + 1;
+ if (is_program_word(word_base2, &len2)) {
+ /* Match found. */
+ bool is_array = (*(word_base2 + len2) == '[');
+
+ /* Generate outvar pattern of form 'THD type&var' from original 'out vec4 var'. */
+ *start = 'T';
+ *(start + 1) = 'H';
+ *(start + 2) = 'D';
+ for (char *clear = start + 3; clear < c + 4; clear++) {
+ *clear = ' ';
+ }
+ *(word_base2 - 1) = is_array ? '*' : '&';
+ }
+ }
+ }
+ }
+}
+
+static void replace_array_initializers_func(std::string &str)
+{
+ char *current_str_begin = &*str.begin();
+ char *current_str_end = &*str.end();
+
+ for (char *c = current_str_begin; c < current_str_end - 6; c++) {
+ char *base_scan = c;
+ int typelen = 0;
+
+ if (is_program_word(c, &typelen) && *(c + typelen) == '[') {
+
+ char *array_len_start = c + typelen + 1;
+ c = array_len_start;
+ char *closing_square_brace = strchr(c, ']');
+ if (closing_square_brace != nullptr) {
+ c = closing_square_brace;
+ char *first_bracket = c + 1;
+ if (*first_bracket == '(') {
+ c += 1;
+ char *semi_colon = strchr(c, ';');
+ if (semi_colon != nullptr && *(semi_colon - 1) == ')') {
+ char *closing_bracket = semi_colon - 1;
+
+ /* Resolve to MSL-compatible array formatting. */
+ *first_bracket = '{';
+ *closing_bracket = '}';
+ for (char *clear = base_scan; clear <= closing_square_brace; clear++) {
+ *clear = ' ';
+ }
+ }
+ }
+ }
+ else {
+ return;
+ }
+ }
+ }
+}
+
+#ifndef NDEBUG
+
+static bool balanced_braces(char *current_str_begin, char *current_str_end)
+{
+ int nested_bracket_depth = 0;
+ for (char *c = current_str_begin; c < current_str_end; c++) {
+ /* Track whether we are in global scope. */
+ if (*c == '{' || *c == '[' || *c == '(') {
+ nested_bracket_depth++;
+ continue;
+ }
+ if (*c == '}' || *c == ']' || *c == ')') {
+ nested_bracket_depth--;
+ continue;
+ }
+ }
+ return (nested_bracket_depth == 0);
+}
+
+/* Certain Constants (such as arrays, or pointer types) declared in Global-scope
+ * end up being initialised per shader thread, resulting in high
+ * register pressure within the shader.
+ * Here we flag occurences of these constants such that
+ * they can be moved to a place where this is not a problem.
+ *
+ * Constants declared within function-scope do not exhibit this problem. */
+static void extract_global_scope_constants(std::string &str, std::stringstream &global_scope_out)
+{
+ char *current_str_begin = &*str.begin();
+ char *current_str_end = &*str.end();
+
+ int nested_bracket_depth = 0;
+ for (char *c = current_str_begin; c < current_str_end - 6; c++) {
+ /* Track whether we are in global scope. */
+ if (*c == '{' || *c == '[' || *c == '(') {
+ nested_bracket_depth++;
+ continue;
+ }
+ if (*c == '}' || *c == ']' || *c == ')') {
+ nested_bracket_depth--;
+ BLI_assert(nested_bracket_depth >= 0);
+ continue;
+ }
+
+ /* Check For global const declarations */
+ if (nested_bracket_depth == 0 && strncmp(c, "const ", 6) == 0 &&
+ strncmp(c, "const constant ", 15) != 0) {
+ char *c_expr_end = strstr(c, ";");
+ if (c_expr_end != nullptr && balanced_braces(c, c_expr_end)) {
+ MTL_LOG_INFO(
+ "[PERFORMANCE WARNING] Global scope constant expression found - These get allocated "
+ "per-thread in METAL - Best to use Macro's or uniforms to avoid overhead: '%.*s'\n",
+ (int)(c_expr_end + 1 - c),
+ c);
+
+ /* Jump ptr forward as we know we remain in global scope. */
+ c = c_expr_end - 1;
+ continue;
+ }
+ }
+ }
+}
+#endif
+
+static bool extract_ssbo_pragma_info(const MTLShader *shader,
+ const MSLGeneratorInterface &,
+ const std::string &in_vertex_src,
+ MTLPrimitiveType &out_prim_tye,
+ uint32_t &out_num_output_verts)
+{
+ /* SSBO Vertex-fetch parameter extraction. */
+ static std::regex use_ssbo_fetch_mode_find(
+ "#pragma "
+ "USE_SSBO_VERTEX_FETCH\\(\\s*(TriangleList|LineList|\\w+)\\s*,\\s*([0-9]+)\\s*\\)");
+
+ /* Perform regex search if pragma string found. */
+ std::smatch vertex_shader_ssbo_flags;
+ bool uses_ssbo_fetch = false;
+ if (in_vertex_src.find("#pragma USE_SSBO_VERTEX_FETCH") != std::string::npos) {
+ uses_ssbo_fetch = std::regex_search(
+ in_vertex_src, vertex_shader_ssbo_flags, use_ssbo_fetch_mode_find);
+ }
+ if (uses_ssbo_fetch) {
+ /* Extract Expected output primitive type:
+ * #pragma USE_SSBO_VERTEX_FETCH(Output Prim Type, num output vertices per input primitive)
+ *
+ * Supported Primitive Types (Others can be added if needed, but List types for efficiency):
+ * - TriangleList
+ * - LineList
+ *
+ * Output vertex count is determined by calculating the number of input primitives, and
+ * multiplying that by the number of output vertices specified. */
+ std::string str_output_primitive_type = vertex_shader_ssbo_flags[1].str();
+ std::string str_output_prim_count_per_vertex = vertex_shader_ssbo_flags[2].str();
+
+ /* Ensure output primitive type is valid. */
+ if (str_output_primitive_type == "TriangleList") {
+ out_prim_tye = MTLPrimitiveTypeTriangle;
+ }
+ else if (str_output_primitive_type == "LineList") {
+ out_prim_tye = MTLPrimitiveTypeLine;
+ }
+ else {
+ MTL_LOG_ERROR("Unsupported output primitive type for SSBO VERTEX FETCH MODE. Shader: %s",
+ shader->name_get());
+ return false;
+ }
+
+ /* Assign output num vertices per primitive. */
+ out_num_output_verts = std::stoi(
+ std::regex_replace(str_output_prim_count_per_vertex, remove_non_numeric_characters, ""));
+ BLI_assert(out_num_output_verts > 0);
+ return true;
+ }
+
+ /* SSBO Vertex fetchmode not used. */
+ return false;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name MTLShader builtin shader generation utilities.
+ * \{ */
+
+static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &res)
+{
+ switch (res.bind_type) {
+ case ShaderCreateInfo::Resource::BindType::SAMPLER:
+ break;
+ case ShaderCreateInfo::Resource::BindType::IMAGE:
+ break;
+ case ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: {
+ int64_t array_offset = res.uniformbuf.name.find_first_of("[");
+ if (array_offset == -1) {
+ /* Create local class member as constant pointer reference to bound UBO buffer.
+ * Given usage within a shader follows ubo_name.ubo_element syntax, we can
+ * dereference the pointer as the compiler will optimise this data fetch.
+ * To do this, we also give the ubo name a postfix of `_local` to avoid
+ * macro accessor collisions. */
+ os << "constant " << res.uniformbuf.type_name << " *" << res.uniformbuf.name
+ << "_local;\n";
+ os << "#define " << res.uniformbuf.name << " (*" << res.uniformbuf.name << "_local)\n";
+ }
+ else {
+ /* For arrays, we can directly provide the constant access pointer, as the array
+ * syntax will de-reference this at the correct fetch index. */
+ StringRef name_no_array = StringRef(res.uniformbuf.name.c_str(), array_offset);
+ os << "constant " << res.uniformbuf.type_name << " *" << name_no_array << ";\n";
+ }
+ break;
+ }
+ case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER:
+ break;
+ }
+}
+
+std::string MTLShader::resources_declare(const ShaderCreateInfo &info) const
+{
+ /* NOTE(Metal): We only use the upfront preparation functions to populate members which
+ * would exist in the original non-create-info variant.
+ *
+ * This function is only used to generate resource structs.
+ * Global-scope handles for Uniforms, UBOs, textures and samplers
+ * are generated during class-wrapper construction in `generate_msl_from_glsl`. */
+ std::stringstream ss;
+
+ /* Generate resource stubs for UBOs and textures. */
+ ss << "\n/* Pass Resources. */\n";
+ for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
+ print_resource(ss, res);
+ }
+ ss << "\n/* Batch Resources. */\n";
+ for (const ShaderCreateInfo::Resource &res : info.batch_resources_) {
+ print_resource(ss, res);
+ }
+ /* Note: Push constant uniform data is generated during `generate_msl_from_glsl`
+ * as the generated output is needed for all paths. This includes generation
+ * of the push constant data structure (struct PushConstantBlock).
+ * As all shader generation paths require creation of this. */
+ return ss.str();
+}
+
+std::string MTLShader::vertex_interface_declare(const shader::ShaderCreateInfo &info) const
+{
+ /* NOTE(Metal): We only use the upfront preparation functions to populate members which
+ * would exist in the original non-create-info variant.
+ *
+ * Here we generate the variables within class wrapper scope to allow reading of
+ * input attributes by the main code. */
+ std::stringstream ss;
+ ss << "\n/* Vertex Inputs. */\n";
+ for (const ShaderCreateInfo::VertIn &attr : info.vertex_inputs_) {
+ ss << to_string(attr.type) << " " << attr.name << ";\n";
+ }
+ return ss.str();
+}
+
+std::string MTLShader::fragment_interface_declare(const shader::ShaderCreateInfo &info) const
+{
+ /* For shaders generated from MSL, the fragment-output struct is generated as part of the entry
+ * stub during glsl->MSL conversion in `generate_msl_from_glsl`.
+ * Here, we can instead generate the global-scope variables which will be populated during
+ * execution.
+ *
+ * NOTE: The output declaration for location and blend index are generated in the entry-point
+ * struct. This is simply a mirror class member which stores the value during main shader body
+ * execution. */
+ std::stringstream ss;
+ ss << "\n/* Fragment Outputs. */\n";
+ for (const ShaderCreateInfo::FragOut &output : info.fragment_outputs_) {
+ ss << to_string(output.type) << " " << output.name << ";\n";
+ }
+ ss << "\n";
+
+ return ss.str();
+}
+
+std::string MTLShader::MTLShader::geometry_interface_declare(
+ const shader::ShaderCreateInfo &info) const
+{
+ BLI_assert_msg(false, "Geometry shading unsupported by Metal");
+ return "";
+}
+
+std::string MTLShader::geometry_layout_declare(const shader::ShaderCreateInfo &info) const
+{
+ BLI_assert_msg(false, "Geometry shading unsupported by Metal");
+ return "";
+}
+
+std::string MTLShader::compute_layout_declare(const ShaderCreateInfo &info) const
+{
+ /* TODO(Metal): Metal compute layout pending compute support. */
+ BLI_assert_msg(false, "Compute shaders unsupported by Metal");
+ return "";
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Shader Translation.
+ * \{ */
+
+char *MSLGeneratorInterface::msl_patch_default_get()
+{
+ if (msl_patch_default != nullptr) {
+ return msl_patch_default;
+ }
+
+ std::stringstream ss_patch;
+ ss_patch << datatoc_mtl_shader_shared_h << std::endl;
+ ss_patch << datatoc_mtl_shader_defines_msl << std::endl;
+ size_t len = strlen(ss_patch.str().c_str());
+
+ msl_patch_default = (char *)malloc(len * sizeof(char));
+ strcpy(msl_patch_default, ss_patch.str().c_str());
+ return msl_patch_default;
+}
+
+bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
+{
+ /* Verify if create-info is available.
+ * NOTE(Metal): For now, only support creation from CreateInfo.
+ * If needed, we can perform source translation without this using
+ * manual reflection. */
+ bool uses_create_info = info != nullptr;
+ if (!uses_create_info) {
+ MTL_LOG_WARNING("Unable to compile shader %p '%s' as no create-info was provided!\n",
+ this,
+ this->name_get());
+ valid_ = false;
+ return false;
+ }
+
+ /* MSLGeneratorInterface is a class populated to describe all parameters, resources, bindings
+ * and features used by the source GLSL shader. This information is then used to generate the
+ * appropriate Metal entry points and perform any required source translation. */
+ MSLGeneratorInterface msl_iface(*this);
+ BLI_assert(shd_builder_ != nullptr);
+
+ /* Populate MSLGeneratorInterface from Create-Info.
+ * Note this is a seperate path as MSLGeneratorInterface can also be manually populated
+ * from parsing, if support for shaders without create-info is required. */
+ msl_iface.prepare_from_createinfo(info);
+
+ /* Verify Source sizes are greater than zero. */
+ BLI_assert(shd_builder_->glsl_vertex_source_.size() > 0);
+ if (!msl_iface.uses_transform_feedback) {
+ BLI_assert(shd_builder_->glsl_fragment_source_.size() > 0);
+ }
+
+ /** Determine use of Transform Feedback. **/
+ msl_iface.uses_transform_feedback = false;
+ if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
+ /* Ensure TransformFeedback is configured correctly. */
+ BLI_assert(tf_output_name_list_.size() > 0);
+ msl_iface.uses_transform_feedback = true;
+ }
+
+ /* Concatenate msl_shader_defines to provide functionality mapping
+ * from GLSL to MSL. Also include additioanl GPU defines for
+ * optional high-level feature support. */
+ const std::string msl_defines_string =
+ "#define GPU_ARB_texture_cube_map_array 1\n\
+ #define GPU_ARB_shader_draw_parameters 1\n\
+ #define GPU_ARB_texture_gather 1\n";
+
+ shd_builder_->glsl_vertex_source_ = msl_defines_string + shd_builder_->glsl_vertex_source_;
+ if (!msl_iface.uses_transform_feedback) {
+ shd_builder_->glsl_fragment_source_ = msl_defines_string + shd_builder_->glsl_fragment_source_;
+ }
+
+ /* Extract SSBO usage information from shader pragma:
+ *
+ * #pragma USE_SSBO_VERTEX_FETCH(Output Prim Type, num output vertices per input primitive)
+ *
+ * This will determine whether SSBO-vertex-fetch
+ * mode is ued for this shader. Returns true if used, and populates output reference
+ * values with the output prim type and output number of vertices. */
+ MTLPrimitiveType vertex_fetch_ssbo_output_prim_type = MTLPrimitiveTypeTriangle;
+ uint32_t vertex_fetch_ssbo_num_output_verts = 0;
+ msl_iface.uses_ssbo_vertex_fetch_mode = extract_ssbo_pragma_info(
+ this,
+ msl_iface,
+ shd_builder_->glsl_vertex_source_,
+ vertex_fetch_ssbo_output_prim_type,
+ vertex_fetch_ssbo_num_output_verts);
+
+ if (msl_iface.uses_ssbo_vertex_fetch_mode) {
+ shader_debug_printf(
+ "[Shader] SSBO VERTEX FETCH Enabled for Shader '%s' With Output primitive type: %s, "
+ "vertex count: %u\n",
+ this->name_get(),
+ output_primitive_type.c_str(),
+ vertex_fetch_ssbo_num_output_verts);
+ }
+
+ /*** Regex Commands ***/
+ /* Source cleanup and syntax replacement. */
+ static std::regex remove_excess_newlines("\\n+");
+ static std::regex replace_mat3("mat3\\s*\\(");
+
+ /* Special condition - mat3 and array constructor replacement.
+ * Also replace excessive new lines to ensure cases are not missed.
+ * NOTE(Metal): May be able to skip excess-newline removal. */
+ shd_builder_->glsl_vertex_source_ = std::regex_replace(
+ shd_builder_->glsl_vertex_source_, remove_excess_newlines, "\n");
+ shd_builder_->glsl_vertex_source_ = std::regex_replace(
+ shd_builder_->glsl_vertex_source_, replace_mat3, "MAT3(");
+ replace_array_initializers_func(shd_builder_->glsl_vertex_source_);
+
+ if (!msl_iface.uses_transform_feedback) {
+ shd_builder_->glsl_fragment_source_ = std::regex_replace(
+ shd_builder_->glsl_fragment_source_, remove_excess_newlines, "\n");
+ shd_builder_->glsl_fragment_source_ = std::regex_replace(
+ shd_builder_->glsl_fragment_source_, replace_mat3, "MAT3(");
+ replace_array_initializers_func(shd_builder_->glsl_fragment_source_);
+ }
+
+ /**** Extract usage of GL globals. ****/
+ /* NOTE(METAL): Currently still performing fallback string scan, as info->builtins_ does
+ * not always contain the usage flag. This can be removed once all appropriate create-info's
+ * have been updated. In some cases, this may incur a false positive if access is guarded
+ * behind a macro. Though in these cases, unused code paths and paramters will be
+ * optimised out by the Metal shader compiler. */
+
+ /** Identify usage of vertex-shader builtins. */
+ msl_iface.uses_gl_VertexID = bool(info->builtins_ & BuiltinBits::VERTEX_ID) ||
+ shd_builder_->glsl_vertex_source_.find("gl_VertexID") !=
+ std::string::npos;
+ msl_iface.uses_gl_InstanceID = bool(info->builtins_ & BuiltinBits::INSTANCE_ID) ||
+ shd_builder_->glsl_vertex_source_.find("gl_InstanceID") !=
+ std::string::npos ||
+ shd_builder_->glsl_vertex_source_.find("gpu_InstanceIndex") !=
+ std::string::npos ||
+ msl_iface.uses_ssbo_vertex_fetch_mode;
+
+ /* instance ID in GL is [0, instancecount] in metal it is [base_instance,
+ * base_instance+instance_count], so we need to offset instanceID by base instance in Metal --
+ * Thus we expose the [[base_instance]] attribute if instance ID is used at all. */
+ msl_iface.uses_gl_BaseInstanceARB = msl_iface.uses_gl_InstanceID ||
+ shd_builder_->glsl_vertex_source_.find(
+ "gl_BaseInstanceARB") != std::string::npos ||
+ shd_builder_->glsl_vertex_source_.find("gpu_BaseInstance") !=
+ std::string::npos;
+ msl_iface.uses_gl_Position = shd_builder_->glsl_vertex_source_.find("gl_Position") !=
+ std::string::npos;
+ msl_iface.uses_gl_PointSize = shd_builder_->glsl_vertex_source_.find("gl_PointSize") !=
+ std::string::npos;
+ msl_iface.uses_mtl_array_index_ = shd_builder_->glsl_vertex_source_.find(
+ "MTLRenderTargetArrayIndex") != std::string::npos;
+
+ /** Identify usage of fragment-shader builtins. */
+ if (!msl_iface.uses_transform_feedback) {
+ std::smatch gl_special_cases;
+ msl_iface.uses_gl_PointCoord = bool(info->builtins_ & BuiltinBits::POINT_COORD) ||
+ shd_builder_->glsl_fragment_source_.find("gl_PointCoord") !=
+ std::string::npos;
+ msl_iface.uses_barycentrics = bool(info->builtins_ & BuiltinBits::BARYCENTRIC_COORD);
+ msl_iface.uses_gl_FrontFacing = bool(info->builtins_ & BuiltinBits::FRONT_FACING) ||
+ shd_builder_->glsl_fragment_source_.find("gl_FrontFacing") !=
+ std::string::npos;
+
+ /* NOTE(Metal): If FragColor is not used, then we treat the first fragment output attachment
+ * as the primary output. */
+ msl_iface.uses_gl_FragColor = shd_builder_->glsl_fragment_source_.find("gl_FragColor") !=
+ std::string::npos;
+
+ /* NOTE(Metal): FragDepth output mode specified in create-info 'DepthWrite depth_write_'.
+ * If parsing without create-info, manual extraction will be required. */
+ msl_iface.uses_gl_FragDepth = shd_builder_->glsl_fragment_source_.find("gl_FragDepth") !=
+ std::string::npos;
+ msl_iface.depth_write = info->depth_write_;
+ }
+
+ /* Generate SSBO vertex fetch mode uniform data hooks. */
+ if (msl_iface.uses_ssbo_vertex_fetch_mode) {
+ msl_iface.prepare_ssbo_vertex_fetch_uniforms();
+ }
+
+ /* Extract gl_ClipDistances. */
+ static std::regex gl_clipdistance_find("gl_ClipDistance\\[([0-9])\\]");
+
+ std::string clip_search_str = shd_builder_->glsl_vertex_source_;
+ std::smatch vertex_clip_distances;
+
+ while (std::regex_search(clip_search_str, vertex_clip_distances, gl_clipdistance_find)) {
+ shader_debug_printf("VERTEX CLIP DISTANCES FOUND: str: %s\n",
+ vertex_clip_distances[1].str().c_str());
+ auto found = std::find(msl_iface.clip_distances.begin(),
+ msl_iface.clip_distances.end(),
+ vertex_clip_distances[1].str());
+ if (found == msl_iface.clip_distances.end()) {
+ msl_iface.clip_distances.append(vertex_clip_distances[1].str());
+ }
+ clip_search_str = vertex_clip_distances.suffix();
+ }
+ shd_builder_->glsl_vertex_source_ = std::regex_replace(
+ shd_builder_->glsl_vertex_source_, gl_clipdistance_find, "gl_ClipDistance_$1");
+
+ /* Replace 'out' attribute on function parameters with pass-by-reference. */
+ replace_outvars(shd_builder_->glsl_vertex_source_);
+ if (!msl_iface.uses_transform_feedback) {
+ replace_outvars(shd_builder_->glsl_fragment_source_);
+ }
+
+ /**** METAL Shader source generation. ****/
+ /* Setup stringstream for populaing generated MSL shader vertex/frag shaders. */
+ std::stringstream ss_vertex;
+ std::stringstream ss_fragment;
+
+ /*** Generate VERTEX Stage ***/
+ /* Conditional defines. */
+ if (msl_iface.use_argument_buffer_for_samplers()) {
+ ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
+ ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
+ << msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl;
+ }
+ if (msl_iface.uses_ssbo_vertex_fetch_mode) {
+ ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
+ ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
+ << std::endl;
+ ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
+ << std::endl;
+ for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
+ ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
+ }
+
+ /* Macro's */
+ ss_vertex << "#define "
+ "UNIFORM_SSBO_USES_INDEXED_RENDERING_STR " UNIFORM_SSBO_USES_INDEXED_RENDERING_STR
+ "\n"
+ "#define UNIFORM_SSBO_INDEX_MODE_U16_STR " UNIFORM_SSBO_INDEX_MODE_U16_STR
+ "\n"
+ "#define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR " UNIFORM_SSBO_INPUT_PRIM_TYPE_STR
+ "\n"
+ "#define UNIFORM_SSBO_INPUT_VERT_COUNT_STR " UNIFORM_SSBO_INPUT_VERT_COUNT_STR
+ "\n"
+ "#define UNIFORM_SSBO_OFFSET_STR " UNIFORM_SSBO_OFFSET_STR
+ "\n"
+ "#define UNIFORM_SSBO_STRIDE_STR " UNIFORM_SSBO_STRIDE_STR
+ "\n"
+ "#define UNIFORM_SSBO_FETCHMODE_STR " UNIFORM_SSBO_FETCHMODE_STR
+ "\n"
+ "#define UNIFORM_SSBO_VBO_ID_STR " UNIFORM_SSBO_VBO_ID_STR
+ "\n"
+ "#define UNIFORM_SSBO_TYPE_STR " UNIFORM_SSBO_TYPE_STR "\n";
+ }
+
+ /* Inject common Metal header. */
+ ss_vertex << msl_iface.msl_patch_default_get() << std::endl << std::endl;
+
+#ifndef NDEBUG
+ /* Performance warning: Extract global-scope expressions.
+ * Note: This is dependent on stripping out comments
+ * to remove false positives. */
+ remove_multiline_comments_func(shd_builder_->glsl_vertex_source_);
+ remove_singleline_comments_func(shd_builder_->glsl_vertex_source_);
+ extract_global_scope_constants(shd_builder_->glsl_vertex_source_, ss_vertex);
+#endif
+
+ /* Generate additional shader interface struct members from create-info. */
+ for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) {
+
+ /* Only generate struct for ones with instance names */
+ if (!iface->instance_name.is_empty()) {
+ ss_vertex << "struct " << iface->name << " {" << std::endl;
+ for (const StageInterfaceInfo::InOut &inout : iface->inouts) {
+ ss_vertex << to_string(inout.type) << " " << inout.name << " "
+ << to_string_msl(inout.interp) << ";" << std::endl;
+ }
+ ss_vertex << "};" << std::endl;
+ }
+ }
+
+ /* Wrap entire GLSL source inside class to create
+ * a scope within the class to enable use of global variables.
+ * e.g. global access to attributes, uniforms, UBOs, textures etc; */
+ ss_vertex << "class " << get_stage_class_name(ShaderStage::VERTEX) << " {" << std::endl;
+ ss_vertex << "public:" << std::endl;
+
+ /* Generate additional shader interface struct members from create-info. */
+ for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) {
+
+ bool is_inside_struct = false;
+ if (!iface->instance_name.is_empty()) {
+ /* If shader stage interface has an instance name, then it
+ * is using a struct foramt and as such we only need a local
+ * class member for the struct, not each element. */
+ ss_vertex << iface->name << " " << iface->instance_name << ";" << std::endl;
+ is_inside_struct = true;
+ }
+
+ /* Generate local variables, populate elems for vertex out struct gen. */
+ for (const StageInterfaceInfo::InOut &inout : iface->inouts) {
+
+ /* Only output individual elements if they are not part of an interface struct instance. */
+ if (!is_inside_struct) {
+ ss_vertex << to_string(inout.type) << " " << inout.name << ";" << std::endl;
+ }
+
+ const char *arraystart = strstr(inout.name.c_str(), "[");
+ bool is_array = (arraystart != nullptr);
+ int array_len = (is_array) ? std::stoi(std::regex_replace(
+ arraystart, remove_non_numeric_characters, "")) :
+ 0;
+
+ /* Remove array from string name. */
+ std::string out_name = inout.name.c_str();
+ std::size_t pos = out_name.find('[');
+ if (is_array && pos != std::string::npos) {
+ out_name.resize(pos);
+ }
+
+ /* Add to vertex-output interface. */
+ msl_iface.vertex_output_varyings.append(
+ {to_string(inout.type),
+ out_name.c_str(),
+ ((is_inside_struct) ? iface->instance_name.c_str() : ""),
+ to_string(inout.interp),
+ is_array,
+ array_len});
+
+ /* Add to fragment-input interface.*/
+ msl_iface.fragment_input_varyings.append(
+ {to_string(inout.type),
+ out_name.c_str(),
+ ((is_inside_struct) ? iface->instance_name.c_str() : ""),
+ to_string(inout.interp),
+ is_array,
+ array_len});
+ }
+ }
+
+ /** Generate structs from MSL Interface. **/
+ /* Generate VertexIn struct. */
+ if (!msl_iface.uses_ssbo_vertex_fetch_mode) {
+ ss_vertex << msl_iface.generate_msl_vertex_in_struct();
+ }
+ /* Genrate Uniform data structs. */
+ ss_vertex << msl_iface.generate_msl_uniform_structs(ShaderStage::VERTEX);
+
+ /* Conditionally use global GL variables. */
+ if (msl_iface.uses_gl_Position) {
+ ss_vertex << "float4 gl_Position;" << std::endl;
+ }
+ if (msl_iface.uses_gl_PointSize) {
+ ss_vertex << "float gl_PointSize = 1.0;" << std::endl;
+ }
+ if (msl_iface.uses_gl_VertexID) {
+ ss_vertex << "int gl_VertexID;" << std::endl;
+ }
+ if (msl_iface.uses_gl_InstanceID) {
+ ss_vertex << "int gl_InstanceID;" << std::endl;
+ }
+ if (msl_iface.uses_gl_BaseInstanceARB) {
+ ss_vertex << "int gl_BaseInstanceARB;" << std::endl;
+ }
+ for (const int cd : IndexRange(msl_iface.clip_distances.size())) {
+ ss_vertex << "float gl_ClipDistance_" << cd << ";" << std::endl;
+ }
+
+ /* Render target array index if using multilayered rendering. */
+ if (msl_iface.uses_mtl_array_index_) {
+ ss_vertex << "int MTLRenderTargetArrayIndex = 0;" << std::endl;
+ }
+
+ /* Global vertex data pointers when using SSBO vertex fetch mode.
+ * Bound vertex buffers passed in via the entry point function
+ * are assigned to these pointers to be globally accessible
+ * from any function within the GLSL source shader. */
+ if (msl_iface.uses_ssbo_vertex_fetch_mode) {
+ ss_vertex << "constant uchar** MTL_VERTEX_DATA;" << std::endl;
+ ss_vertex << "constant ushort* MTL_INDEX_DATA_U16 = nullptr;" << std::endl;
+ ss_vertex << "constant uint32_t* MTL_INDEX_DATA_U32 = nullptr;" << std::endl;
+ }
+
+ /* Add Texture members.
+ * These members pack both a texture and a sampler into a single
+ * struct, as both are needed within texture functions.
+ * e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
+ * The exact typename is generated inside `get_msl_typestring_wrapper()`. */
+ for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
+ if (bool(tex.stage & ShaderStage::VERTEX)) {
+ ss_vertex << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
+ }
+ }
+ ss_vertex << std::endl;
+
+ /* Inject main GLSL source into output stream. */
+ ss_vertex << shd_builder_->glsl_vertex_source_ << std::endl;
+
+ /* Generate VertexOut and TransformFeedbackOutput structs. */
+ ss_vertex << msl_iface.generate_msl_vertex_out_struct(ShaderStage::VERTEX);
+ if (msl_iface.uses_transform_feedback) {
+ ss_vertex << msl_iface.generate_msl_vertex_transform_feedback_out_struct(ShaderStage::VERTEX);
+ }
+
+ /* Class Closing Bracket to end shader global scope. */
+ ss_vertex << "};" << std::endl;
+
+ /* Generate Vertex shader entrypoint function containing resource bindings. */
+ ss_vertex << msl_iface.generate_msl_vertex_entry_stub();
+
+ /*** Generate FRAGMENT Stage. ***/
+ if (!msl_iface.uses_transform_feedback) {
+
+ /* Conditional defines. */
+ if (msl_iface.use_argument_buffer_for_samplers()) {
+ ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
+ ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
+ << msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl;
+ }
+
+ /* Inject common Metal header. */
+ ss_fragment << msl_iface.msl_patch_default_get() << std::endl << std::endl;
+
+#ifndef NDEBUG
+ /* Performance warning: Identify global-scope expressions.
+ * These cause excessive register pressure due to global
+ * arrays being instanciated per-thread.
+ * Note: This is dependent on stripping out comments
+ * to remove false positives. */
+ remove_multiline_comments_func(shd_builder_->glsl_fragment_source_);
+ remove_singleline_comments_func(shd_builder_->glsl_fragment_source_);
+ extract_global_scope_constants(shd_builder_->glsl_fragment_source_, ss_fragment);
+#endif
+
+ /* Generate additional shader interface struct members from create-info. */
+ for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) {
+
+ /* Only generate struct for ones with instance names. */
+ if (!iface->instance_name.is_empty()) {
+ ss_fragment << "struct " << iface->name << " {" << std::endl;
+ for (const StageInterfaceInfo::InOut &inout : iface->inouts) {
+ ss_fragment << to_string(inout.type) << " " << inout.name << ""
+ << to_string_msl(inout.interp) << ";" << std::endl;
+ }
+ ss_fragment << "};" << std::endl;
+ }
+ }
+
+ /* Wrap entire GLSL source inside class to create
+ * a scope within the class to enable use of global variables. */
+ ss_fragment << "class " << get_stage_class_name(ShaderStage::FRAGMENT) << " {" << std::endl;
+ ss_fragment << "public:" << std::endl;
+
+ /* In/out interface values */
+ /* Generate additional shader interface struct members from create-info. */
+ for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) {
+ bool is_inside_struct = false;
+ if (!iface->instance_name.is_empty()) {
+ /* Struct local variable. */
+ ss_fragment << iface->name << " " << iface->instance_name << ";" << std::endl;
+ is_inside_struct = true;
+ }
+
+ /* Generate local variables, populate elems for vertex out struct gen. */
+ for (const StageInterfaceInfo::InOut &inout : iface->inouts) {
+ /* Only output individual elements if they are not part of an interface struct instance.
+ */
+ if (!is_inside_struct) {
+ ss_fragment << to_string(inout.type) << " " << inout.name << ";" << std::endl;
+ }
+ }
+ }
+
+ /* Generate global structs */
+ ss_fragment << msl_iface.generate_msl_vertex_out_struct(ShaderStage::FRAGMENT);
+ ss_fragment << msl_iface.generate_msl_fragment_out_struct();
+ ss_fragment << msl_iface.generate_msl_uniform_structs(ShaderStage::FRAGMENT);
+
+ /** GL globals. */
+ /* gl_FragCoord will always be assigned to the output position from vertex shading. */
+ ss_fragment << "float4 gl_FragCoord;" << std::endl;
+ if (msl_iface.uses_gl_FragColor) {
+ ss_fragment << "float4 gl_FragColor;" << std::endl;
+ }
+ if (msl_iface.uses_gl_FragDepth) {
+ ss_fragment << "float gl_FragDepth;" << std::endl;
+ }
+ if (msl_iface.uses_gl_PointCoord) {
+ ss_fragment << "float2 gl_PointCoord;" << std::endl;
+ }
+ if (msl_iface.uses_gl_FrontFacing) {
+ ss_fragment << "MTLBOOL gl_FrontFacing;" << std::endl;
+ }
+
+ /* Add Texture members. */
+ for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
+ if (bool(tex.stage & ShaderStage::FRAGMENT)) {
+ ss_fragment << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
+ }
+ }
+
+ /* Inject Main GLSL Fragment Source into output stream. */
+ ss_fragment << shd_builder_->glsl_fragment_source_ << std::endl;
+
+ /* Class Closing Bracket to end shader global scope. */
+ ss_fragment << "};" << std::endl;
+
+ /* Generate Fragment entrypoint function. */
+ ss_fragment << msl_iface.generate_msl_fragment_entry_stub();
+ }
+
+ /* DEBUG: Export source to file for manual verification. */
+#if MTL_SHADER_DEBUG_EXPORT_SOURCE
+ NSFileManager *sharedFM = [NSFileManager defaultManager];
+ NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL];
+ NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent]
+ URLByAppendingPathComponent:@"Shaders/"
+ isDirectory:YES];
+ [sharedFM createDirectoryAtURL:shader_dir
+ withIntermediateDirectories:YES
+ attributes:nil
+ error:nil];
+ const char *path_cstr = [shader_dir fileSystemRepresentation];
+
+ std::ofstream vertex_fs;
+ vertex_fs.open(
+ (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedVertexShader.msl")
+ .c_str());
+ vertex_fs << ss_vertex.str();
+ vertex_fs.close();
+
+ if (!msl_iface.uses_transform_feedback) {
+ std::ofstream fragment_fs;
+ fragment_fs.open(
+ (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedFragmentShader.msl")
+ .c_str());
+ fragment_fs << ss_fragment.str();
+ fragment_fs.close();
+ }
+
+ shader_debug_printf(
+ "Vertex Shader Saved to: %s\n",
+ (std::string(path_cstr) + std::string(this->name) + "_GeneratedFragmentShader.msl").c_str());
+#endif
+
+ /* Set MSL source NSString's. Required by Metal API. */
+ NSString *msl_final_vert = [NSString stringWithCString:ss_vertex.str().c_str()
+ encoding:[NSString defaultCStringEncoding]];
+ NSString *msl_final_frag = (msl_iface.uses_transform_feedback) ?
+ (@"") :
+ ([NSString stringWithCString:ss_fragment.str().c_str()
+ encoding:[NSString defaultCStringEncoding]]);
+
+ this->shader_source_from_msl(msl_final_vert, msl_final_frag);
+ shader_debug_printf("[METAL] BSL Converted into MSL\n");
+
+#ifndef NDEBUG
+ /* In debug mode, we inject the name of the shader into the entrypoint function
+ * name, as these are what show up in the Xcode GPU debugger. */
+ this->set_vertex_function_name(
+ [[NSString stringWithFormat:@"vertex_function_entry_%s", this->name] retain]);
+ this->set_fragment_function_name(
+ [[NSString stringWithFormat:@"fragment_function_entry_%s", this->name] retain]);
+#else
+ this->set_vertex_function_name(@"vertex_function_entry");
+ this->set_fragment_function_name(@"fragment_function_entry");
+#endif
+
+ /* Bake shader interface. */
+ this->set_interface(msl_iface.bake_shader_interface(this->name));
+
+ /* Update other shader properties. */
+ uses_mtl_array_index_ = msl_iface.uses_mtl_array_index_;
+ use_ssbo_vertex_fetch_mode_ = msl_iface.uses_ssbo_vertex_fetch_mode;
+ if (msl_iface.uses_ssbo_vertex_fetch_mode) {
+ ssbo_vertex_fetch_output_prim_type_ = vertex_fetch_ssbo_output_prim_type;
+ ssbo_vertex_fetch_output_num_verts_ = vertex_fetch_ssbo_num_output_verts;
+ this->prepare_ssbo_vertex_fetch_metadata();
+ }
+
+ /* Successfully completed GLSL to MSL translation. */
+ return true;
+}
+
+constexpr size_t const_strlen(const char *str)
+{
+ return (*str == '\0') ? 0 : const_strlen(str + 1) + 1;
+}
+
+void MTLShader::prepare_ssbo_vertex_fetch_metadata()
+{
+ BLI_assert(use_ssbo_vertex_fetch_mode_);
+
+ /* Cache global SSBO-vertex-fetch uniforms locations. */
+ const ShaderInput *inp_prim_type = interface->uniform_get(UNIFORM_SSBO_INPUT_PRIM_TYPE_STR);
+ const ShaderInput *inp_vert_count = interface->uniform_get(UNIFORM_SSBO_INPUT_VERT_COUNT_STR);
+ const ShaderInput *inp_uses_indexed_rendering = interface->uniform_get(
+ UNIFORM_SSBO_USES_INDEXED_RENDERING_STR);
+ const ShaderInput *inp_uses_index_mode_u16 = interface->uniform_get(
+ UNIFORM_SSBO_INDEX_MODE_U16_STR);
+
+ this->uni_ssbo_input_prim_type_loc = (inp_prim_type != nullptr) ? inp_prim_type->location : -1;
+ this->uni_ssbo_input_vert_count_loc = (inp_vert_count != nullptr) ? inp_vert_count->location :
+ -1;
+ this->uni_ssbo_uses_indexed_rendering = (inp_uses_indexed_rendering != nullptr) ?
+ inp_uses_indexed_rendering->location :
+ -1;
+ this->uni_ssbo_uses_index_mode_u16 = (inp_uses_index_mode_u16 != nullptr) ?
+ inp_uses_index_mode_u16->location :
+ -1;
+
+ BLI_assert_msg(this->uni_ssbo_input_prim_type_loc != -1,
+ "uni_ssbo_input_prim_type_loc uniform location invalid!");
+ BLI_assert_msg(this->uni_ssbo_input_vert_count_loc != -1,
+ "uni_ssbo_input_vert_count_loc uniform location invalid!");
+ BLI_assert_msg(this->uni_ssbo_uses_indexed_rendering != -1,
+ "uni_ssbo_uses_indexed_rendering uniform location invalid!");
+ BLI_assert_msg(this->uni_ssbo_uses_index_mode_u16 != -1,
+ "uni_ssbo_uses_index_mode_u16 uniform location invalid!");
+
+ /* Prepare SSBO-vertex-fetch attribute uniform location cache. */
+ MTLShaderInterface *mtl_interface = this->get_interface();
+ for (int i = 0; i < mtl_interface->get_total_attributes(); i++) {
+ const MTLShaderInputAttribute &mtl_shader_attribute = mtl_interface->get_attribute(i);
+ const char *attr_name = mtl_interface->get_name_at_offset(mtl_shader_attribute.name_offset);
+
+ /* SSBO-vertex-fetch Attribute data is passed via uniforms. here we need to extract the uniform
+ * address for each attribute, and we can cache it for later use. */
+ ShaderSSBOAttributeBinding &cached_ssbo_attr = cached_ssbo_attribute_bindings_[i];
+ cached_ssbo_attr.attribute_index = i;
+
+ constexpr int len_UNIFORM_SSBO_STRIDE_STR = const_strlen(UNIFORM_SSBO_STRIDE_STR);
+ constexpr int len_UNIFORM_SSBO_OFFSET_STR = const_strlen(UNIFORM_SSBO_OFFSET_STR);
+ constexpr int len_UNIFORM_SSBO_FETCHMODE_STR = const_strlen(UNIFORM_SSBO_FETCHMODE_STR);
+ constexpr int len_UNIFORM_SSBO_VBO_ID_STR = const_strlen(UNIFORM_SSBO_VBO_ID_STR);
+ constexpr int len_UNIFORM_SSBO_TYPE_STR = const_strlen(UNIFORM_SSBO_TYPE_STR);
+
+ char strattr_buf_stride[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_STRIDE_STR + 1] =
+ UNIFORM_SSBO_STRIDE_STR;
+ char strattr_buf_offset[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_OFFSET_STR + 1] =
+ UNIFORM_SSBO_OFFSET_STR;
+ char strattr_buf_fetchmode[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_FETCHMODE_STR + 1] =
+ UNIFORM_SSBO_FETCHMODE_STR;
+ char strattr_buf_vbo_id[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_VBO_ID_STR + 1] =
+ UNIFORM_SSBO_VBO_ID_STR;
+ char strattr_buf_type[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_TYPE_STR + 1] =
+ UNIFORM_SSBO_TYPE_STR;
+
+ strcpy(&strattr_buf_stride[len_UNIFORM_SSBO_STRIDE_STR], attr_name);
+ strcpy(&strattr_buf_offset[len_UNIFORM_SSBO_OFFSET_STR], attr_name);
+ strcpy(&strattr_buf_fetchmode[len_UNIFORM_SSBO_FETCHMODE_STR], attr_name);
+ strcpy(&strattr_buf_vbo_id[len_UNIFORM_SSBO_VBO_ID_STR], attr_name);
+ strcpy(&strattr_buf_type[len_UNIFORM_SSBO_TYPE_STR], attr_name);
+
+ /* Fetch uniform locations and cache for fast access. */
+ const ShaderInput *inp_unf_stride = mtl_interface->uniform_get(strattr_buf_stride);
+ const ShaderInput *inp_unf_offset = mtl_interface->uniform_get(strattr_buf_offset);
+ const ShaderInput *inp_unf_fetchmode = mtl_interface->uniform_get(strattr_buf_fetchmode);
+ const ShaderInput *inp_unf_vbo_id = mtl_interface->uniform_get(strattr_buf_vbo_id);
+ const ShaderInput *inp_unf_attr_type = mtl_interface->uniform_get(strattr_buf_type);
+
+ BLI_assert(inp_unf_stride != nullptr);
+ BLI_assert(inp_unf_offset != nullptr);
+ BLI_assert(inp_unf_fetchmode != nullptr);
+ BLI_assert(inp_unf_vbo_id != nullptr);
+ BLI_assert(inp_unf_attr_type != nullptr);
+
+ cached_ssbo_attr.uniform_stride = (inp_unf_stride != nullptr) ? inp_unf_stride->location : -1;
+ cached_ssbo_attr.uniform_offset = (inp_unf_offset != nullptr) ? inp_unf_offset->location : -1;
+ cached_ssbo_attr.uniform_fetchmode = (inp_unf_fetchmode != nullptr) ?
+ inp_unf_fetchmode->location :
+ -1;
+ cached_ssbo_attr.uniform_vbo_id = (inp_unf_vbo_id != nullptr) ? inp_unf_vbo_id->location : -1;
+ cached_ssbo_attr.uniform_attr_type = (inp_unf_attr_type != nullptr) ?
+ inp_unf_attr_type->location :
+ -1;
+
+ BLI_assert(cached_ssbo_attr.uniform_offset != -1);
+ BLI_assert(cached_ssbo_attr.uniform_stride != -1);
+ BLI_assert(cached_ssbo_attr.uniform_fetchmode != -1);
+ BLI_assert(cached_ssbo_attr.uniform_vbo_id != -1);
+ BLI_assert(cached_ssbo_attr.uniform_attr_type != -1);
+ }
+}
+
+void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateInfo *info)
+{
+ /** Assign info. */
+ create_info_ = info;
+
+ /** Prepare Uniforms. */
+ for (const shader::ShaderCreateInfo::PushConst &push_constant : create_info_->push_constants_) {
+ MSLUniform uniform(push_constant.type,
+ push_constant.name,
+ bool(push_constant.array_size > 1),
+ push_constant.array_size);
+ uniforms.append(uniform);
+ }
+
+ /** Prepare textures and uniform blocks.
+ * Perform across both resource categories and extract both
+ * texture samplers and image types. */
+ for (int i = 0; i < 2; i++) {
+ const Vector<ShaderCreateInfo::Resource> &resources = (i == 0) ? info->pass_resources_ :
+ info->batch_resources_;
+ for (const ShaderCreateInfo::Resource &res : resources) {
+ /* TODO(Metal): Consider adding stage flags to textures in create info. */
+ /* Handle sampler types. */
+ switch (res.bind_type) {
+ case shader::ShaderCreateInfo::Resource::BindType::SAMPLER: {
+
+ /* Samplers to have access::sample by default. */
+ MSLTextureSamplerAccess access = MSLTextureSamplerAccess::TEXTURE_ACCESS_SAMPLE;
+ /* TextureBuffers must have read/write/read-write access pattern. */
+ if (res.sampler.type == ImageType::FLOAT_BUFFER ||
+ res.sampler.type == ImageType::INT_BUFFER ||
+ res.sampler.type == ImageType::UINT_BUFFER) {
+ access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
+ }
+ BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS);
+ MSLTextureSampler msl_tex(
+ ShaderStage::BOTH, res.sampler.type, res.sampler.name, access, res.slot);
+ texture_samplers.append(msl_tex);
+ } break;
+
+ case shader::ShaderCreateInfo::Resource::BindType::IMAGE: {
+ /* Flatten qualifier flags into final access state. */
+ MSLTextureSamplerAccess access;
+ if (bool(res.image.qualifiers & Qualifier::READ_WRITE)) {
+ access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READWRITE;
+ }
+ else if (bool(res.image.qualifiers & Qualifier::WRITE)) {
+ access = MSLTextureSamplerAccess::TEXTURE_ACCESS_WRITE;
+ }
+ else {
+ access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
+ }
+ BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS);
+ MSLTextureSampler msl_tex(
+ ShaderStage::BOTH, res.image.type, res.image.name, access, res.slot);
+ texture_samplers.append(msl_tex);
+ } break;
+
+ case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: {
+ MSLUniformBlock ubo;
+ BLI_assert(res.uniformbuf.type_name.size() > 0);
+ BLI_assert(res.uniformbuf.name.size() > 0);
+ int64_t array_offset = res.uniformbuf.name.find_first_of("[");
+
+ ubo.type_name = res.uniformbuf.type_name;
+ ubo.is_array = (array_offset > -1);
+ if (ubo.is_array) {
+ /* If is array UBO, strip out array tag from name. */
+ StringRef name_no_array = StringRef(res.uniformbuf.name.c_str(), array_offset);
+ ubo.name = name_no_array;
+ }
+ else {
+ ubo.name = res.uniformbuf.name;
+ }
+ ubo.stage = ShaderStage::VERTEX | ShaderStage::FRAGMENT;
+ uniform_blocks.append(ubo);
+ } break;
+
+ case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
+ /* TODO(Metal): Support shader storage buffer in Metal.
+ * Pending compute support. */
+ } break;
+ }
+ }
+ }
+
+ /** Vertex Inputs. */
+ bool all_attr_location_assigned = true;
+ for (const ShaderCreateInfo::VertIn &attr : info->vertex_inputs_) {
+
+ /* Validate input. */
+ BLI_assert(attr.name.size() > 0);
+
+ /* NOTE(Metal): Input attributes may not have a location specified.
+ * unset locations are resolved during: `resolve_input_attribute_locations`. */
+ MSLVertexInputAttribute msl_attr;
+ bool attr_location_assigned = (attr.index >= 0);
+ all_attr_location_assigned = all_attr_location_assigned && attr_location_assigned;
+ msl_attr.layout_location = attr_location_assigned ? attr.index : -1;
+ msl_attr.type = attr.type;
+ msl_attr.name = attr.name;
+ vertex_input_attributes.append(msl_attr);
+ }
+
+ /* Ensure all attributes are assigned a location. */
+ if (!all_attr_location_assigned) {
+ this->resolve_input_attribute_locations();
+ }
+
+ /** Fragment outputs. */
+ for (const shader::ShaderCreateInfo::FragOut &frag_out : create_info_->fragment_outputs_) {
+
+ /* Validate input. */
+ BLI_assert(frag_out.name.size() > 0);
+ BLI_assert(frag_out.index >= 0);
+
+ /* Populate MSLGenerator attribute. */
+ MSLFragmentOutputAttribute mtl_frag_out;
+ mtl_frag_out.layout_location = frag_out.index;
+ mtl_frag_out.layout_index = (frag_out.blend != DualBlend::NONE) ?
+ ((frag_out.blend == DualBlend::SRC_0) ? 0 : 1) :
+ -1;
+ mtl_frag_out.type = frag_out.type;
+ mtl_frag_out.name = frag_out.name;
+
+ fragment_outputs.append(mtl_frag_out);
+ }
+}
+
+bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const
+{
+ /* We can only use argument buffers IF sampler count exceeds static limit of 16,
+ * AND we can support more samplers with an argument buffer. */
+ return texture_samplers.size() >= 16 && GPU_max_samplers() > 16;
+}
+
+uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
+{
+ /* Note: Sampler bindings and argument buffer shared across stages,
+ in case stages share texture/sampler bindings. */
+ return texture_samplers.size();
+}
+
+uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage)
+{
+ BLI_assert(stage == ShaderStage::VERTEX || stage == ShaderStage::FRAGMENT);
+ if (sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] >= 0) {
+ return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
+ }
+ sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] =
+ (this->uniform_blocks.size() + 1);
+ return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
+}
+
+void MSLGeneratorInterface::prepare_ssbo_vertex_fetch_uniforms()
+{
+ BLI_assert(this->uses_ssbo_vertex_fetch_mode);
+
+ /* Add Special Uniforms for SSBO vertex fetch mode. */
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INPUT_PRIM_TYPE_STR, false));
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INPUT_VERT_COUNT_STR, false));
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_USES_INDEXED_RENDERING_STR, false));
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INDEX_MODE_U16_STR, false));
+
+ for (const MSLVertexInputAttribute &attr : this->vertex_input_attributes) {
+ const std::string &uname = attr.name;
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_STRIDE_STR + uname, false));
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_OFFSET_STR + uname, false));
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_FETCHMODE_STR + uname, false));
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_VBO_ID_STR + uname, false));
+ this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_TYPE_STR + uname, false));
+ }
+}
+
+std::string MSLGeneratorInterface::generate_msl_vertex_entry_stub()
+{
+ std::stringstream out;
+ out << std::endl << "/*** AUTO-GENERATED MSL VERETX SHADER STUB. ***/" << std::endl;
+
+ /* Undef texture defines from main source - avoid conflict with MSL texture. */
+ out << "#undef texture" << std::endl;
+ out << "#undef textureLod" << std::endl;
+
+ /* Disable special case for booleans being treated as ints in GLSL. */
+ out << "#undef bool" << std::endl;
+
+ /* Undef uniform mappings to avoid name collisions. */
+ out << generate_msl_uniform_undefs(ShaderStage::VERTEX);
+
+ /* Generate function entry point signature w/ resource bindings and inputs. */
+ out << "vertex ";
+ if (this->uses_transform_feedback) {
+ out << "void ";
+ }
+ else {
+ out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexOut ";
+ }
+#ifndef NDEBUG
+ out << "vertex_function_entry_" << parent_shader_.name_get() << "(\n\t";
+#else
+ out << "vertex_function_entry(\n\t";
+#endif
+
+ out << this->generate_msl_vertex_inputs_string();
+ out << ") {" << std::endl << std::endl;
+ out << "\tMTLShaderVertexImpl::VertexOut output;" << std::endl
+ << "\tMTLShaderVertexImpl vertex_shader_instance;" << std::endl;
+
+ /* Copy Vertex Globals. */
+ if (this->uses_gl_VertexID) {
+ out << "vertex_shader_instance.gl_VertexID = gl_VertexID;" << std::endl;
+ }
+ if (this->uses_gl_InstanceID) {
+ out << "vertex_shader_instance.gl_InstanceID = gl_InstanceID-gl_BaseInstanceARB;" << std::endl;
+ }
+ if (this->uses_gl_BaseInstanceARB) {
+ out << "vertex_shader_instance.gl_BaseInstanceARB = gl_BaseInstanceARB;" << std::endl;
+ }
+
+ /* Copy vertex attributes into local variables. */
+ out << this->generate_msl_vertex_attribute_input_population();
+
+ /* Populate Uniforms and uniform blocks. */
+ out << this->generate_msl_texture_vars(ShaderStage::VERTEX);
+ out << this->generate_msl_global_uniform_population(ShaderStage::VERTEX);
+ out << this->generate_msl_uniform_block_population(ShaderStage::VERTEX);
+
+ /* Execute original 'main' function within class scope. */
+ out << "\t/* Execute Vertex main function */\t" << std::endl
+ << "\tvertex_shader_instance.main();" << std::endl
+ << std::endl;
+
+ /* Populate Output values. */
+ out << this->generate_msl_vertex_output_population();
+
+ /* Final point size,
+ * This is only compiled if the MTL_global_pointsize is specified
+ * as a function specialisation in the PSO. This is restricted to
+ * point primitive types. */
+ out << "if(is_function_constant_defined(MTL_global_pointsize)){ output.pointsize = "
+ "(MTL_global_pointsize > 0.0)?MTL_global_pointsize:output.pointsize; }"
+ << std::endl;
+
+ /* Populate transform feedback buffer. */
+ if (this->uses_transform_feedback) {
+ out << this->generate_msl_vertex_output_tf_population();
+ }
+ else {
+ out << "\treturn output;" << std::endl;
+ }
+ out << "}";
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_fragment_entry_stub()
+{
+ std::stringstream out;
+ out << std::endl << "/*** AUTO-GENERATED MSL FRAGMENT SHADER STUB. ***/" << std::endl;
+
+ /* Undef texture defines from main source - avoid conflict with MSL texture*/
+ out << "#undef texture" << std::endl;
+ out << "#undef textureLod" << std::endl;
+
+ /* Disable special case for booleans being treated as ints in GLSL. */
+ out << "#undef bool" << std::endl;
+
+ /* Undef uniform mappings to avoid name collisions. */
+ out << generate_msl_uniform_undefs(ShaderStage::FRAGMENT);
+
+ /* Generate function entry point signature w/ resource bindings and inputs. */
+#ifndef NDEBUG
+ out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT)
+ << "::FragmentOut fragment_function_entry_" << parent_shader_.name_get() << "(\n\t";
+#else
+ out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT)
+ << "::FragmentOut fragment_function_entry(\n\t";
+#endif
+ out << this->generate_msl_fragment_inputs_string();
+ out << ") {" << std::endl << std::endl;
+ out << "\tMTLShaderFragmentImpl::FragmentOut output;" << std::endl
+ << "\tMTLShaderFragmentImpl fragment_shader_instance;" << std::endl;
+
+ /* Copy Fragment Globals. */
+ if (this->uses_gl_PointCoord) {
+ out << "fragment_shader_instance.gl_PointCoord = gl_PointCoord;" << std::endl;
+ }
+ if (this->uses_gl_FrontFacing) {
+ out << "fragment_shader_instance.gl_FrontFacing = gl_FrontFacing;" << std::endl;
+ }
+
+ /* Copy vertex attributes into local variable.s */
+ out << this->generate_msl_fragment_input_population();
+
+ /* Barycentrics. */
+ if (this->uses_barycentrics) {
+
+ /* Main barycentrics. */
+ out << "fragment_shader_instance.gpu_BaryCoord = mtl_barycentric_coord.xyz;";
+
+ /* barycentricDist represents the world-space distance from the current world-space position
+ * to the opposite edge of the vertex. */
+ out << "float3 worldPos = fragment_shader_instance.worldPosition.xyz;" << std::endl;
+ out << "float3 wpChange = (length(dfdx(worldPos))+length(dfdy(worldPos)));" << std::endl;
+ out << "float3 bcChange = "
+ "(length(dfdx(mtl_barycentric_coord))+length(dfdy(mtl_barycentric_coord)));"
+ << std::endl;
+ out << "float3 rateOfChange = wpChange/bcChange;" << std::endl;
+
+ /* Distance to edge using inverse barycentric value, as rather than the length of 0.7
+ * contribution, we'd want the distance to the opposite side. */
+ out << "fragment_shader_instance.gpu_BarycentricDist.x = length(rateOfChange * "
+ "(1.0-mtl_barycentric_coord.x));"
+ << std::endl;
+ out << "fragment_shader_instance.gpu_BarycentricDist.y = length(rateOfChange * "
+ "(1.0-mtl_barycentric_coord.y));"
+ << std::endl;
+ out << "fragment_shader_instance.gpu_BarycentricDist.z = length(rateOfChange * "
+ "(1.0-mtl_barycentric_coord.z));"
+ << std::endl;
+ }
+
+ /* Populate Uniforms and uniform blocks. */
+ out << this->generate_msl_texture_vars(ShaderStage::FRAGMENT);
+ out << this->generate_msl_global_uniform_population(ShaderStage::FRAGMENT);
+ out << this->generate_msl_uniform_block_population(ShaderStage::FRAGMENT);
+
+ /* Execute original 'main' function within class scope. */
+ out << "\t/* Execute Fragment main function */\t" << std::endl
+ << "\tfragment_shader_instance.main();" << std::endl
+ << std::endl;
+
+ /* Populate Output values. */
+ out << this->generate_msl_fragment_output_population();
+ out << " return output;" << std::endl << "}";
+
+ return out.str();
+}
+
+void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out,
+ ShaderStage stage)
+{
+ BLI_assert(stage == ShaderStage::VERTEX || stage == ShaderStage::FRAGMENT);
+ /* Generate texture signatures. */
+ BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
+ for (const MSLTextureSampler &tex : this->texture_samplers) {
+ if (bool(tex.stage & stage)) {
+ out << ",\n\t" << tex.get_msl_typestring(false) << " [[texture(" << tex.location << ")]]";
+ }
+ }
+
+ /* Generate sampler signatures. */
+ /* Note: Currently textures and samplers share indices across shading stages, so the limit is
+ * shared.
+ * If we exceed the hardware-supported limit, then follow a bindless model using argument
+ * buffers. */
+ if (this->use_argument_buffer_for_samplers()) {
+ out << ",\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
+ << (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]";
+ }
+ else {
+ /* Maximum Limit of samplers defined in the function argument table is
+ * MTL_MAX_DEFAULT_SAMPLERS=16. */
+ BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
+ for (const MSLTextureSampler &tex : this->texture_samplers) {
+ if (bool(tex.stage & stage)) {
+ out << ",\n\tsampler " << tex.name << "_sampler [[sampler(" << tex.location << ")]]";
+ }
+ }
+
+ /* Fallback. */
+ if (this->texture_samplers.size() > 16) {
+ shader_debug_printf(
+ "[Metal] Warning: Shader exceeds limit of %u samplers on current hardware\n",
+ MTL_MAX_DEFAULT_SAMPLERS);
+ }
+ }
+}
+
+void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
+ ShaderStage stage)
+{
+ int ubo_index = 0;
+ for (const MSLUniformBlock &ubo : this->uniform_blocks) {
+ if (bool(ubo.stage & stage)) {
+ /* For literal/existing global types, we do not need the class namespace accessor. */
+ out << ",\n\tconstant ";
+ if (!is_builtin_type(ubo.type_name)) {
+ out << get_stage_class_name(stage) << "::";
+ }
+ /* UniformBuffer bind indices start at MTL_uniform_buffer_base_index+1, as
+ * MTL_uniform_buffer_base_index is reserved for the PushConstantBlock (push constants).
+ * MTL_uniform_buffer_base_index is an offset depending on the number of unique VBOs
+ * bound for the current PSO specialisation. */
+ out << ubo.type_name << "* " << ubo.name << "[[buffer(MTL_uniform_buffer_base_index+"
+ << (ubo_index + 1) << ")]]";
+ }
+ ubo_index++;
+ }
+}
+
+std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
+{
+ std::stringstream out;
+
+ if (this->uses_ssbo_vertex_fetch_mode) {
+ /* Vertex Buffers bound as raw buffers. */
+ for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+ out << "\tconstant uchar* MTL_VERTEX_DATA_" << i << " [[buffer(" << i << ")]],\n";
+ }
+ out << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]],";
+ }
+ else {
+ if (this->vertex_input_attributes.size() > 0) {
+ /* Vertex Buffers use input assembly. */
+ out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]],";
+ }
+ }
+ out << "\n\tconstant " << get_stage_class_name(ShaderStage::VERTEX)
+ << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
+
+ this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX);
+
+ /* Transform feedback buffer binding. */
+ if (this->uses_transform_feedback) {
+ out << ",\n\tdevice " << get_stage_class_name(ShaderStage::VERTEX)
+ << "::VertexOut_TF* "
+ "transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]";
+ }
+
+ /* Generate texture signatures. */
+ this->generate_msl_textures_input_string(out, ShaderStage::VERTEX);
+
+ /* Entry point parameters for gl Globals. */
+ if (this->uses_gl_VertexID) {
+ out << ",\n\tconst uint32_t gl_VertexID [[vertex_id]]";
+ }
+ if (this->uses_gl_InstanceID) {
+ out << ",\n\tconst uint32_t gl_InstanceID [[instance_id]]";
+ }
+ if (this->uses_gl_BaseInstanceARB) {
+ out << ",\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
+ }
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string()
+{
+ std::stringstream out;
+ out << get_stage_class_name(ShaderStage::FRAGMENT)
+ << "::VertexOut v_in [[stage_in]],\n\tconstant "
+ << get_stage_class_name(ShaderStage::FRAGMENT)
+ << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
+
+ this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT);
+
+ /* Generate texture signatures. */
+ this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT);
+
+ if (this->uses_gl_PointCoord) {
+ out << ",\n\tconst float2 gl_PointCoord [[point_coord]]";
+ }
+ if (this->uses_gl_FrontFacing) {
+ out << ",\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
+ }
+
+ /* Barycentrics. */
+ if (this->uses_barycentrics) {
+ out << ",\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
+ }
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage)
+{
+ BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
+ std::stringstream out;
+
+ /* Common Uniforms. */
+ out << "typedef struct {" << std::endl;
+
+ for (const MSLUniform &uniform : this->uniforms) {
+ if (uniform.is_array) {
+ out << "\t" << to_string(uniform.type) << " " << uniform.name << "[" << uniform.array_elems
+ << "];" << std::endl;
+ }
+ else {
+ out << "\t" << to_string(uniform.type) << " " << uniform.name << ";" << std::endl;
+ }
+ }
+ out << "} PushConstantBlock;\n\n";
+
+ /* Member UBO block reference. */
+ out << std::endl << "const constant PushConstantBlock *global_uniforms;" << std::endl;
+
+ /* Macro define chain.
+ * To access uniforms, we generate a macro such that the uniform name can
+ * be used directly without using the struct's handle. */
+ for (const MSLUniform &uniform : this->uniforms) {
+ out << "#define " << uniform.name << " global_uniforms->" << uniform.name << std::endl;
+ }
+ out << std::endl;
+ return out.str();
+}
+
+/* Note: Uniform macro definition vars can conflict with other parameters. */
+std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shader_stage)
+{
+ std::stringstream out;
+
+ /* Macro undef chain. */
+ for (const MSLUniform &uniform : this->uniforms) {
+ out << "#undef " << uniform.name << std::endl;
+ }
+ /* UBO block undef. */
+ for (const MSLUniformBlock &ubo : this->uniform_blocks) {
+ out << "#undef " << ubo.name << std::endl;
+ }
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_vertex_in_struct()
+{
+ std::stringstream out;
+
+ /* Skip struct if no vert attributes. */
+ if (this->vertex_input_attributes.size() == 0) {
+ return "";
+ }
+
+ /* Output */
+ out << "typedef struct {" << std::endl;
+ for (const MSLVertexInputAttribute &in_attr : this->vertex_input_attributes) {
+ /* Matrix and array attributes are not trivially supported and thus
+ * require each element to be passed as an individual attribute.
+ * This requires shader source generation of sequential elements.
+ * The matrix type is then re-packed into a Mat4 inside the entry function.
+ *
+ * e.g.
+ * float4 __internal_modelmatrix_0 [[attribute(0)]];
+ * float4 __internal_modelmatrix_1 [[attribute(1)]];
+ * float4 __internal_modelmatrix_2 [[attribute(2)]];
+ * float4 __internal_modelmatrix_3 [[attribute(3)]];
+ */
+ if (is_matrix_type(in_attr.type) && !this->uses_ssbo_vertex_fetch_mode) {
+ for (int elem = 0; elem < get_matrix_location_count(in_attr.type); elem++) {
+ out << "\t" << get_matrix_subtype(in_attr.type) << " __internal_" << in_attr.name << elem
+ << " [[attribute(" << (in_attr.layout_location + elem) << ")]];" << std::endl;
+ }
+ }
+ else {
+ out << "\t" << in_attr.type << " " << in_attr.name << " [[attribute("
+ << in_attr.layout_location << ")]];" << std::endl;
+ }
+ }
+
+ out << "} VertexIn;" << std::endl << std::endl;
+
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_vertex_out_struct(ShaderStage shader_stage)
+{
+ BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
+ std::stringstream out;
+
+ /* Vertex output struct. */
+ out << "typedef struct {" << std::endl;
+
+ /* If we use GL position, our standard output variable will be mapped to '_default_position_'.
+ * Otherwise, we use the FIRST element in the output array.
+ * If transform feedback is enabled, we do not need to output position, unless it
+ * is explicitly specified as a tf output. */
+ bool first_attr_is_position = false;
+ if (this->uses_gl_Position) {
+ out << "\tfloat4 _default_position_ [[position]];" << std::endl;
+ }
+ else {
+ if (!this->uses_transform_feedback) {
+ /* Use first output element for position. */
+ BLI_assert(this->vertex_output_varyings.size() > 0);
+ BLI_assert(this->vertex_output_varyings[0].type == "vec4");
+ out << "\tfloat4 " << this->vertex_output_varyings[0].name << " [[position]];" << std::endl;
+ first_attr_is_position = true;
+ }
+ }
+
+ /* Generate other vertex output members. */
+ bool skip_first_index = first_attr_is_position;
+ for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) {
+
+ /* Skip first index if used for position. */
+ if (skip_first_index) {
+ skip_first_index = false;
+ continue;
+ }
+
+ if (v_out.is_array) {
+ /* Array types cannot be trivially passed between shading stages.
+ * Instead we pass each component individually. E.g. vec4 pos[2]
+ * will be converted to: `vec4 pos_0; vec4 pos_1;`
+ * The specified interpolation qualifier will be applied per element. */
+ /* TODO(Metal): Support array of matrix in-out types if required
+ * e.g. Mat4 out_matrices[3]. */
+ for (int i = 0; i < v_out.array_elems; i++) {
+ out << "\t" << v_out.type << " " << v_out.instance_name << "_" << v_out.name << i
+ << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl;
+ }
+ }
+ else {
+ /* Matrix types need to be expressed as their vector subcomponents. */
+ if (is_matrix_type(v_out.type)) {
+ BLI_assert(v_out.get_mtl_interpolation_qualifier() == " [[flat]]" &&
+ "Matrix varying types must have [[flat]] interpolation");
+ std::string subtype = get_matrix_subtype(v_out.type);
+ for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) {
+ out << "\t" << subtype << v_out.instance_name << " __matrix_" << v_out.name << elem
+ << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl;
+ }
+ }
+ else {
+ out << "\t" << v_out.type << " " << v_out.instance_name << "_" << v_out.name
+ << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl;
+ }
+ }
+ }
+
+ /* Add gl_PointSize if written to. */
+ if (shader_stage == ShaderStage::VERTEX) {
+ if (this->uses_gl_PointSize) {
+ /* If gl_PointSize is explicitly written to,
+ * we will output the written value directly.
+ * This value can still be overriden by the
+ * global pointsize value. */
+ out << "\tfloat pointsize [[point_size]];" << std::endl;
+ }
+ else {
+ /* Otherwise, if pointsize is not written to inside the shader,
+ * then its usage is controlled by whether the MTL_global_pointsize
+ * function constant has been specified.
+ * This function constant is enabled for all point primitives beign
+ * rendered. */
+ out << "\tfloat pointsize [[point_size, function_constant(MTL_global_pointsize)]];"
+ << std::endl;
+ }
+ }
+
+ /* Add gl_ClipDistance[n]. */
+ if (shader_stage == ShaderStage::VERTEX) {
+ out << "#if defined(USE_CLIP_PLANES) || defined(USE_WORLD_CLIP_PLANES)" << std::endl;
+ if (this->clip_distances.size() > 1) {
+ /* Output array of clip distances if specified. */
+ out << "\tfloat clipdistance [[clip_distance]] [" << this->clip_distances.size() << "];"
+ << std::endl;
+ }
+ else if (this->clip_distances.size() > 0) {
+ out << "\tfloat clipdistance [[clip_distance]];" << std::endl;
+ }
+ out << "#endif" << std::endl;
+ }
+
+ /* Add MTL render target array index for multilayered rendering support. */
+ if (uses_mtl_array_index_) {
+ out << "\tuint MTLRenderTargetArrayIndex [[render_target_array_index]];" << std::endl;
+ }
+
+ out << "} VertexOut;" << std::endl << std::endl;
+
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_vertex_transform_feedback_out_struct(
+ ShaderStage shader_stage)
+{
+ BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
+ std::stringstream out;
+ vertex_output_varyings_tf.clear();
+
+ out << "typedef struct {" << std::endl;
+
+ /* If we use GL position, our standard output variable will be mapped to '_default_position_'.
+ * Otherwise, we use the FIRST element in the output array -- If transform feedback is enabled,
+ * we do not need to output position */
+ bool first_attr_is_position = false;
+ if (this->uses_gl_Position) {
+
+ if (parent_shader_.has_transform_feedback_varying("gl_Position")) {
+ out << "\tfloat4 pos [[position]];" << std::endl;
+ vertex_output_varyings_tf.append({.type = "vec4",
+ .name = "gl_Position",
+ .interpolation_qualifier = "",
+ .is_array = false,
+ .array_elems = 1});
+ }
+ }
+ else {
+ if (!this->uses_transform_feedback) {
+ /* Use first output element for position */
+ BLI_assert(this->vertex_output_varyings.size() > 0);
+ BLI_assert(this->vertex_output_varyings[0].type == "vec4");
+ first_attr_is_position = true;
+ }
+ }
+
+ /* Generate other vertex outputs. */
+ bool skip_first_index = first_attr_is_position;
+ for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) {
+
+ /* Skip first index if used for position. */
+ if (skip_first_index) {
+ skip_first_index = false;
+ continue;
+ }
+
+ if (!parent_shader_.has_transform_feedback_varying(v_out.name)) {
+ continue;
+ }
+ vertex_output_varyings_tf.append(v_out);
+
+ if (v_out.is_array) {
+ /* TODO(Metal): Support array of matrix types if required. */
+ for (int i = 0; i < v_out.array_elems; i++) {
+ out << "\t" << v_out.type << " " << v_out.name << i
+ << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl;
+ }
+ }
+ else {
+ /* Matrix types need to be expressed as their vector subcomponents. */
+ if (is_matrix_type(v_out.type)) {
+ BLI_assert(v_out.get_mtl_interpolation_qualifier() == " [[flat]]" &&
+ "Matrix varying types must have [[flat]] interpolation");
+ std::string subtype = get_matrix_subtype(v_out.type);
+ for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) {
+ out << "\t" << subtype << " __matrix_" << v_out.name << elem
+ << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl;
+ }
+ }
+ else {
+ out << "\t" << v_out.type << " " << v_out.name << v_out.get_mtl_interpolation_qualifier()
+ << ";" << std::endl;
+ }
+ }
+ }
+
+ out << "} VertexOut_TF;" << std::endl << std::endl;
+
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_fragment_out_struct()
+{
+ std::stringstream out;
+
+ /* Output. */
+ out << "typedef struct {" << std::endl;
+ for (int f_output = 0; f_output < this->fragment_outputs.size(); f_output++) {
+ out << "\t" << to_string(this->fragment_outputs[f_output].type) << " "
+ << this->fragment_outputs[f_output].name << " [[color("
+ << this->fragment_outputs[f_output].layout_location << ")";
+ if (this->fragment_outputs[f_output].layout_index >= 0) {
+ out << ", index(" << this->fragment_outputs[f_output].layout_index << ")";
+ }
+ out << "]]"
+ << ";" << std::endl;
+ }
+ /* Add gl_FragDepth output if used. */
+ if (this->uses_gl_FragDepth) {
+ std::string out_depth_argument = ((this->depth_write == DepthWrite::GREATER) ?
+ "greater" :
+ ((this->depth_write == DepthWrite::LESS) ? "less" :
+ "any"));
+ out << "\tfloat fragdepth [[depth(" << out_depth_argument << ")]];" << std::endl;
+ }
+
+ out << "} FragmentOut;" << std::endl;
+ out << std::endl;
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage)
+{
+ /* Populate Global Uniforms. */
+ std::stringstream out;
+
+ /* Copy UBO block ref. */
+ out << "\t/* Copy Uniform block member reference */" << std::endl;
+ out << "\t"
+ << ((stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : "fragment_shader_instance.")
+ << "global_uniforms = uniforms;" << std::endl;
+
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderStage stage)
+{
+ /* Populate Global Uniforms. */
+ std::stringstream out;
+ out << "\t/* Copy UBO block references into local class variables */" << std::endl;
+ for (const MSLUniformBlock &ubo : this->uniform_blocks) {
+
+ /* Only include blocks which are used within this stage. */
+ if (bool(ubo.stage & stage)) {
+ /* Generate UBO reference assignment.
+ * NOTE(Metal): We append `_local` postfix onto the class member name
+ * for the ubo to avoid name collision with the UBO accessor macro.
+ * We only need to add this postfix for the non-array access variant,
+ * as the array is indexed directly, rather than requiring a dereference. */
+ out << "\t"
+ << ((stage == ShaderStage::VERTEX) ? "vertex_shader_instance." :
+ "fragment_shader_instance.")
+ << ubo.name;
+ if (!ubo.is_array) {
+ out << "_local";
+ }
+ out << " = " << ubo.name << ";" << std::endl;
+ }
+ }
+ out << std::endl;
+ return out.str();
+}
+
+/* Copy input attributes from stage_in into class local variables. */
+std::string MSLGeneratorInterface::generate_msl_vertex_attribute_input_population()
+{
+
+ /* SSBO Vertex Fetch mode does not require local attribute population,
+ * we only need to pass over the buffer pointer references. */
+ if (this->uses_ssbo_vertex_fetch_mode) {
+ std::stringstream out;
+ out << "const constant uchar* GLOBAL_MTL_VERTEX_DATA[MTL_SSBO_VERTEX_FETCH_MAX_VBOS] = {"
+ << std::endl;
+ for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+ char delimiter = (i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS - 1) ? ',' : ' ';
+ out << "\t\tMTL_VERTEX_DATA_" << i << delimiter << std::endl;
+ }
+ out << "};" << std::endl;
+ out << "\tvertex_shader_instance.MTL_VERTEX_DATA = GLOBAL_MTL_VERTEX_DATA;" << std::endl;
+ out << "\tvertex_shader_instance.MTL_INDEX_DATA_U16 = MTL_INDEX_DATA;" << std::endl;
+ out << "\tvertex_shader_instance.MTL_INDEX_DATA_U32 = reinterpret_cast<constant "
+ "uint32_t*>(MTL_INDEX_DATA);"
+ << std::endl;
+ return out.str();
+ }
+
+ /* Populate local attribute variables. */
+ std::stringstream out;
+ out << "\t/* Copy Vertex Stage-in attributes into local variables */" << std::endl;
+ for (int attribute = 0; attribute < this->vertex_input_attributes.size(); attribute++) {
+
+ if (is_matrix_type(this->vertex_input_attributes[attribute].type)) {
+ /* Reading into an internal matrix from split attributes: Should generate the following:
+ * vertex_shader_instance.mat_attribute_type =
+ * mat4(v_in.__internal_mat_attribute_type0,
+ * v_in.__internal_mat_attribute_type1,
+ * v_in.__internal_mat_attribute_type2,
+ * v_in.__internal_mat_attribute_type3). */
+ out << "\tvertex_shader_instance." << this->vertex_input_attributes[attribute].name << " = "
+ << this->vertex_input_attributes[attribute].type << "(v_in.__internal_"
+ << this->vertex_input_attributes[attribute].name << 0;
+ for (int elem = 1;
+ elem < get_matrix_location_count(this->vertex_input_attributes[attribute].type);
+ elem++) {
+ out << ",\n"
+ << "v_in.__internal_" << this->vertex_input_attributes[attribute].name << elem;
+ }
+ out << ");";
+ }
+ else {
+ /* OpenGL uses the GPU_FETCH_* functions which can alter how an attribute value is
+ * interpreted. In Metal, we cannot support all implicit conversions within the vertex
+ * descriptor/vertex stage-in, so we need to perform value transformation on-read.
+ *
+ * This is handled by wrapping attribute reads to local shader registers in a
+ * suitable conversion function `attribute_conversion_func_name`.
+ * This conversion function performs a specific transformation on the source
+ * vertex data, depending on the specified GPU_FETCH_* mode for the current
+ * vertex format.
+ *
+ * The fetch_mode is specified per-attribute using specialisation constants
+ * on the PSO, wherein a unique set of constants is passed in per vertex
+ * buffer/format configuration. Efficiently enabling pass-through reads
+ * if no special fetch is required. */
+ bool do_attribute_conversion_on_read = false;
+ std::string attribute_conversion_func_name = get_attribute_conversion_function(
+ &do_attribute_conversion_on_read, this->vertex_input_attributes[attribute].type);
+
+ if (do_attribute_conversion_on_read) {
+ out << "\t" << attribute_conversion_func_name << "(MTL_AttributeConvert" << attribute
+ << ", v_in." << this->vertex_input_attributes[attribute].name
+ << ", vertex_shader_instance." << this->vertex_input_attributes[attribute].name << ");"
+ << std::endl;
+ }
+ else {
+ out << "\tvertex_shader_instance." << this->vertex_input_attributes[attribute].name
+ << " = v_in." << this->vertex_input_attributes[attribute].name << ";" << std::endl;
+ }
+ }
+ }
+ out << std::endl;
+ return out.str();
+}
+
+/* Copy post-main, modified, local class variables into vertex-output struct. */
+std::string MSLGeneratorInterface::generate_msl_vertex_output_population()
+{
+
+ std::stringstream out;
+ out << "\t/* Copy Vertex Outputs into output struct */" << std::endl;
+
+ /* Output gl_Position with conversion to Metal coordinate-space. */
+ if (this->uses_gl_Position) {
+ out << "\toutput._default_position_ = vertex_shader_instance.gl_Position;" << std::endl;
+
+ /* Invert Y and rescale depth range.
+ * This is an alternative method to modifying all projection matrices. */
+ out << "\toutput._default_position_.y = -output._default_position_.y;" << std::endl;
+ out << "\toutput._default_position_.z = "
+ "(output._default_position_.z+output._default_position_.w)/2.0;"
+ << std::endl;
+ }
+
+ /* Output Pointsize. */
+ if (this->uses_gl_PointSize) {
+ out << "\toutput.pointsize = vertex_shader_instance.gl_PointSize;" << std::endl;
+ }
+
+ /* Output render target array Index. */
+ if (uses_mtl_array_index_) {
+ out << "\toutput.MTLRenderTargetArrayIndex = "
+ "vertex_shader_instance.MTLRenderTargetArrayIndex;"
+ << std::endl;
+ }
+
+ /* Output clipdistances. */
+ out << "#if defined(USE_CLIP_PLANES) || defined(USE_WORLD_CLIP_PLANES)" << std::endl;
+ if (this->clip_distances.size() > 1) {
+ for (int cd = 0; cd < this->clip_distances.size(); cd++) {
+ out << "\toutput.clipdistance[" << cd << "] = vertex_shader_instance.gl_ClipDistance_" << cd
+ << ";" << std::endl;
+ }
+ }
+ else if (this->clip_distances.size() > 0) {
+ out << "\toutput.clipdistance = vertex_shader_instance.gl_ClipDistance_0;" << std::endl;
+ }
+ out << "#endif" << std::endl;
+
+ /* Populate output vertex variables. */
+ int output_id = 0;
+ for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) {
+ if (v_out.is_array) {
+
+ for (int i = 0; i < v_out.array_elems; i++) {
+ out << "\toutput." << v_out.instance_name << "_" << v_out.name << i
+ << " = vertex_shader_instance.";
+
+ if (v_out.instance_name != "") {
+ out << v_out.instance_name << ".";
+ }
+
+ out << v_out.name << "[" << i << "]"
+ << ";" << std::endl;
+ }
+ }
+ else {
+ /* Matrix types are split into vectors and need to be reconstructed. */
+ if (is_matrix_type(v_out.type)) {
+ for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) {
+ out << "\toutput." << v_out.instance_name << "__matrix_" << v_out.name << elem
+ << " = vertex_shader_instance.";
+
+ if (v_out.instance_name != "") {
+ out << v_out.instance_name << ".";
+ }
+
+ out << v_out.name << "[" << elem << "];" << std::endl;
+ }
+ }
+ else {
+ /* If we are not using gl_Position, first vertex output is used for position.
+ * Ensure it is vec4. If transform feedback is enabled, we do not need position. */
+ if (!this->uses_gl_Position && output_id == 0 && !this->uses_transform_feedback) {
+
+ out << "\toutput." << v_out.instance_name << "_" << v_out.name
+ << " = to_vec4(vertex_shader_instance." << v_out.name << ");" << std::endl;
+
+ /* Invert Y */
+ out << "\toutput." << v_out.instance_name << "_" << v_out.name << ".y = -output."
+ << v_out.name << ".y;" << std::endl;
+ }
+ else {
+
+ /* Assign vertex output. */
+ out << "\toutput." << v_out.instance_name << "_" << v_out.name
+ << " = vertex_shader_instance.";
+
+ if (v_out.instance_name != "") {
+ out << v_out.instance_name << ".";
+ }
+
+ out << v_out.name << ";" << std::endl;
+ }
+ }
+ }
+ output_id++;
+ }
+ out << std::endl;
+ return out.str();
+}
+
+/* Copy desired output varyings into transform feedback structure */
+std::string MSLGeneratorInterface::generate_msl_vertex_output_tf_population()
+{
+
+ std::stringstream out;
+ out << "\t/* Copy Vertex TF Outputs into transform feedback buffer */" << std::endl;
+
+ /* Populate output vertex variables */
+ /* TODO(Metal): Currently do not need to support output matrix types etc; but may need to
+ * verify for other configurations if these occur in any cases. */
+ for (int v_output = 0; v_output < this->vertex_output_varyings_tf.size(); v_output++) {
+ out << "transform_feedback_results[gl_VertexID]."
+ << this->vertex_output_varyings_tf[v_output].name << " = vertex_shader_instance."
+ << this->vertex_output_varyings_tf[v_output].name << ";" << std::endl;
+ }
+ out << std::endl;
+ return out.str();
+}
+
+/* Copy fragment stage inputs (Vertex Outputs) into local class variables. */
+std::string MSLGeneratorInterface::generate_msl_fragment_input_population()
+{
+
+ /* Populate local attribute variables. */
+ std::stringstream out;
+ out << "\t/* Copy Fragment input into local variables. */" << std::endl;
+
+ /* Special common case for gl_FragCoord, assigning to input position. */
+ if (this->uses_gl_Position) {
+ out << "\tfragment_shader_instance.gl_FragCoord = v_in._default_position_;" << std::endl;
+ }
+ else {
+ /* When gl_Position is not set, first VertexIn element is used for position. */
+ out << "\tfragment_shader_instance.gl_FragCoord = v_in."
+ << this->vertex_output_varyings[0].name << ";" << std::endl;
+ }
+
+ /* NOTE: We will only assign to the intersection of the vertex output and fragment input.
+ * Fragment input represents varying variables which are declared (but are not necessarily
+ * used). The Vertex out defines the set which is passed into the fragment shader, which
+ * contains out variables declared in the vertex shader, though these are not necessarily
+ * consumed by the fragment shader.
+ *
+ * In the cases where the fragment shader expects a variable, but it does not exist in the
+ * vertex shader, a warning will be provided. */
+ for (int f_input = (this->uses_gl_Position) ? 0 : 1;
+ f_input < this->fragment_input_varyings.size();
+ f_input++) {
+ bool exists_in_vertex_output = false;
+ for (int v_o = 0; v_o < this->vertex_output_varyings.size() && !exists_in_vertex_output;
+ v_o++) {
+ if (this->fragment_input_varyings[f_input].name == this->vertex_output_varyings[v_o].name) {
+ exists_in_vertex_output = true;
+ }
+ }
+ if (!exists_in_vertex_output) {
+ shader_debug_printf(
+ "[Warning] Fragment shader expects varying input '%s', but this is not passed from "
+ "the "
+ "vertex shader\n",
+ this->fragment_input_varyings[f_input].name.c_str());
+ continue;
+ }
+ if (this->fragment_input_varyings[f_input].is_array) {
+ for (int i = 0; i < this->fragment_input_varyings[f_input].array_elems; i++) {
+ out << "\tfragment_shader_instance.";
+
+ if (this->fragment_input_varyings[f_input].instance_name != "") {
+ out << this->fragment_input_varyings[f_input].instance_name << ".";
+ }
+
+ out << this->fragment_input_varyings[f_input].name << "[" << i << "] = v_in."
+ << this->fragment_input_varyings[f_input].instance_name << "_"
+ << this->fragment_input_varyings[f_input].name << i << ";" << std::endl;
+ }
+ }
+ else {
+ /* Matrix types are split into components and need to be regrouped into a matrix. */
+ if (is_matrix_type(this->fragment_input_varyings[f_input].type)) {
+ out << "\tfragment_shader_instance.";
+
+ if (this->fragment_input_varyings[f_input].instance_name != "") {
+ out << this->fragment_input_varyings[f_input].instance_name << ".";
+ }
+
+ out << this->fragment_input_varyings[f_input].name << " = "
+ << this->fragment_input_varyings[f_input].type;
+ int count = get_matrix_location_count(this->fragment_input_varyings[f_input].type);
+ for (int elem = 0; elem < count; elem++) {
+ out << ((elem == 0) ? "(" : "") << "v_in."
+ << this->fragment_input_varyings[f_input].instance_name << "__matrix_"
+ << this->fragment_input_varyings[f_input].name << elem
+ << ((elem < count - 1) ? ",\n" : "");
+ }
+ out << ");" << std::endl;
+ }
+ else {
+ out << "\tfragment_shader_instance.";
+
+ if (this->fragment_input_varyings[f_input].instance_name != "") {
+ out << this->fragment_input_varyings[f_input].instance_name << ".";
+ }
+
+ out << this->fragment_input_varyings[f_input].name << " = v_in."
+ << this->fragment_input_varyings[f_input].instance_name << "_"
+ << this->fragment_input_varyings[f_input].name << ";" << std::endl;
+ }
+ }
+ }
+ out << std::endl;
+ return out.str();
+}
+
+/* Copy post-main, modified, local class variables into fragment-output struct. */
+std::string MSLGeneratorInterface::generate_msl_fragment_output_population()
+{
+
+ /* Populate output fragment variables. */
+ std::stringstream out;
+ out << "\t/* Copy Fragment Outputs into output struct. */" << std::endl;
+
+ /* Output gl_FragDepth. */
+ if (this->uses_gl_FragDepth) {
+ out << "\toutput.fragdepth = fragment_shader_instance.gl_FragDepth;" << std::endl;
+ }
+
+ /* Output attributes. */
+ for (int f_output = 0; f_output < this->fragment_outputs.size(); f_output++) {
+
+ out << "\toutput." << this->fragment_outputs[f_output].name << " = fragment_shader_instance."
+ << this->fragment_outputs[f_output].name << ";" << std::endl;
+ }
+ out << std::endl;
+ return out.str();
+}
+
+std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_stage)
+{
+ BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
+
+ std::stringstream out;
+ out << "\t/* Populate local texture and sampler members */" << std::endl;
+ for (int i = 0; i < this->texture_samplers.size(); i++) {
+ if (bool(this->texture_samplers[i].stage & shader_stage)) {
+
+ /* Assign texture reference. */
+ out << "\t"
+ << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." :
+ "fragment_shader_instance.")
+ << this->texture_samplers[i].name << ".texture = &" << this->texture_samplers[i].name
+ << ";" << std::endl;
+
+ /* Assign sampler reference. */
+ if (this->use_argument_buffer_for_samplers()) {
+ out << "\t"
+ << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." :
+ "fragment_shader_instance.")
+ << this->texture_samplers[i].name << ".samp = &samplers.sampler_args[" << i << "];"
+ << std::endl;
+ }
+ else {
+ out << "\t"
+ << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." :
+ "fragment_shader_instance.")
+ << this->texture_samplers[i].name << ".samp = &" << this->texture_samplers[i].name
+ << "_sampler;" << std::endl;
+ }
+ }
+ }
+ out << std::endl;
+ return out.str();
+}
+
+void MSLGeneratorInterface::resolve_input_attribute_locations()
+{
+ /* Determine used-attribute-location mask. */
+ uint32_t used_locations = 0;
+ for (const MSLVertexInputAttribute &attr : vertex_input_attributes) {
+ if (attr.layout_location >= 0) {
+ /* Matrix and array types span multiple location slots. */
+ uint32_t location_element_count = get_matrix_location_count(attr.type);
+ for (uint32_t i = 1; i <= location_element_count; i++) {
+ /* Ensure our location hasn't already been used. */
+ uint32_t location_mask = (i << attr.layout_location);
+ BLI_assert((used_locations & location_mask) == 0);
+ used_locations = used_locations | location_mask;
+ }
+ }
+ }
+
+ /* Assign unused location slots to other attributes. */
+ for (MSLVertexInputAttribute &attr : vertex_input_attributes) {
+ if (attr.layout_location == -1) {
+ /* Determine number of locations required. */
+ uint32_t required_attr_slot_count = get_matrix_location_count(attr.type);
+
+ /* Determine free location.
+ * Starting from 1 is slightly less efficient, however,
+ * given mutli-sized attributes, an earlier slot may remain free.
+ * given GPU_VERT_ATTR_MAX_LEN is small, this wont matter. */
+ for (int loc = 0; loc < GPU_VERT_ATTR_MAX_LEN - (required_attr_slot_count - 1); loc++) {
+
+ uint32_t location_mask = (1 << loc);
+ /* Generate sliding mask using location and required number of slots,
+ * to ensure contiguous slots are free.
+ * slot mask will be a number containing N binary 1's, where N is the
+ * number of attributes needed.
+ * e.g. N=4 -> 1111. */
+ uint32_t location_slot_mask = (1 << required_attr_slot_count) - 1;
+ uint32_t sliding_location_slot_mask = location_slot_mask << location_mask;
+ if ((used_locations & sliding_location_slot_mask) == 0) {
+ /* Assign location and update mask. */
+ attr.layout_location = loc;
+ used_locations = used_locations | location_slot_mask;
+ continue;
+ }
+ }
+
+ /* Error if could not assign attribute. */
+ MTL_LOG_ERROR("Could not assign attribute location to attribute %s for shader %s\n",
+ attr.name.c_str(),
+ this->parent_shader_.name_get());
+ }
+ }
+}
+
+void MSLGeneratorInterface::resolve_fragment_output_locations()
+{
+ int running_location_ind = 0;
+
+ /* This code works under the assumption that either all layout_locations are set,
+ * or none are. */
+ for (int i = 0; i < this->fragment_outputs.size(); i++) {
+ BLI_assert_msg(
+ ((running_location_ind > 0) ? (this->fragment_outputs[i].layout_location == -1) : true),
+ "Error: Mismatched input attributes, some with location specified, some without");
+ if (this->fragment_outputs[i].layout_location == -1) {
+ this->fragment_outputs[i].layout_location = running_location_ind;
+ running_location_ind++;
+ }
+ }
+}
+
+/* Add string to name buffer. Utility function to be used in bake_shader_interface.
+ * Returns the offset of the inserted name.*/
+static uint32_t name_buffer_copystr(char **name_buffer_ptr,
+ const char *str_to_copy,
+ uint32_t &name_buffer_size,
+ uint32_t &name_buffer_offset)
+{
+ /* Verify input is valid. */
+ BLI_assert(str_to_copy != nullptr);
+
+ /* Determine length of new string, and ensure name buffer is large enough. */
+ uint32_t ret_len = strlen(str_to_copy);
+ BLI_assert(ret_len > 0);
+
+ /* If required name buffer size is larger, increase by atleast 128 bytes. */
+ if (name_buffer_size + ret_len > name_buffer_size) {
+ name_buffer_size = name_buffer_size + max_ii(128, ret_len);
+ *name_buffer_ptr = (char *)MEM_reallocN(*name_buffer_ptr, name_buffer_size);
+ }
+
+ /* Copy string into name buffer. */
+ uint32_t insert_offset = name_buffer_offset;
+ char *current_offset = (*name_buffer_ptr) + insert_offset;
+ strcpy(current_offset, str_to_copy);
+
+ /* Adjust offset including null terminator. */
+ name_buffer_offset += ret_len + 1;
+
+ /* Return offset into name buffer for inserted string. */
+ return insert_offset;
+}
+
+MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *name)
+{
+ MTLShaderInterface *interface = new MTLShaderInterface(name);
+ interface->init();
+
+ /* Name buffer. */
+ /* Initialise name buffer. */
+ uint32_t name_buffer_size = 256;
+ uint32_t name_buffer_offset = 0;
+ interface->name_buffer_ = (char *)MEM_mallocN(name_buffer_size, "name_buffer");
+
+ /* Prepare Interface Input Attributes. */
+ int c_offset = 0;
+ for (int attribute = 0; attribute < this->vertex_input_attributes.size(); attribute++) {
+
+ /* We need a special case for handling matrix types, which splits the matrix into its vector
+ * components. */
+ if (is_matrix_type(this->vertex_input_attributes[attribute].type)) {
+
+ eMTLDataType mtl_type = to_mtl_type(
+ get_matrix_subtype(this->vertex_input_attributes[attribute].type));
+ int size = mtl_get_data_type_size(mtl_type);
+ for (int elem = 0;
+ elem < get_matrix_location_count(this->vertex_input_attributes[attribute].type);
+ elem++) {
+ /* First attribute matches the core name -- subsequent attributes tagged with
+ * __internal_<name><index>. */
+ std::string _internal_name = (elem == 0) ?
+ this->vertex_input_attributes[attribute].name :
+ "__internal_" +
+ this->vertex_input_attributes[attribute].name +
+ std::to_string(elem);
+
+ /* IF Using SSBO vertex Fetch, we do not need to expose other dummy attributes in the
+ * shader interface, only the first one for the whole matrix, as we can pass whatever data
+ * we want in this mode, and do not need to split attributes. */
+ if (elem == 0 || !this->uses_ssbo_vertex_fetch_mode) {
+ interface->add_input_attribute(
+ name_buffer_copystr(&interface->name_buffer_,
+ _internal_name.c_str(),
+ name_buffer_size,
+ name_buffer_offset),
+ this->vertex_input_attributes[attribute].layout_location + elem,
+ mtl_datatype_to_vertex_type(mtl_type),
+ 0,
+ size,
+ c_offset,
+ (elem == 0) ?
+ get_matrix_location_count(this->vertex_input_attributes[attribute].type) :
+ 0);
+ }
+ c_offset += size;
+ }
+ shader_debug_printf(
+ "[Note] Matrix Type '%s' added to shader interface as vertex attribute. (Elem Count: "
+ "%d)\n",
+ this->vertex_input_attributes[attribute].name.c_str(),
+ get_matrix_location_count(this->vertex_input_attributes[attribute].type));
+ }
+ else {
+
+ /* Normal attribute types. */
+ eMTLDataType mtl_type = to_mtl_type(this->vertex_input_attributes[attribute].type);
+ int size = mtl_get_data_type_size(mtl_type);
+ interface->add_input_attribute(
+ name_buffer_copystr(&interface->name_buffer_,
+ this->vertex_input_attributes[attribute].name.c_str(),
+ name_buffer_size,
+ name_buffer_offset),
+ this->vertex_input_attributes[attribute].layout_location,
+ mtl_datatype_to_vertex_type(mtl_type),
+ 0,
+ size,
+ c_offset);
+ c_offset += size;
+ }
+ }
+
+ /* Prepare Interface Default Uniform Block. */
+ interface->add_push_constant_block(name_buffer_copystr(
+ &interface->name_buffer_, "PushConstantBlock", name_buffer_size, name_buffer_offset));
+
+ for (int uniform = 0; uniform < this->uniforms.size(); uniform++) {
+ interface->add_uniform(
+ name_buffer_copystr(&interface->name_buffer_,
+ this->uniforms[uniform].name.c_str(),
+ name_buffer_size,
+ name_buffer_offset),
+ to_mtl_type(this->uniforms[uniform].type),
+ (this->uniforms[uniform].is_array) ? this->uniforms[uniform].array_elems : 1);
+ }
+
+ /* Prepare Interface Uniform Blocks. */
+ for (int uniform_block = 0; uniform_block < this->uniform_blocks.size(); uniform_block++) {
+ interface->add_uniform_block(
+ name_buffer_copystr(&interface->name_buffer_,
+ this->uniform_blocks[uniform_block].name.c_str(),
+ name_buffer_size,
+ name_buffer_offset),
+ uniform_block,
+ 0,
+ this->uniform_blocks[uniform_block].stage);
+ }
+
+ /* Texture/sampler bindings to interface. */
+ for (const MSLTextureSampler &texture_sampler : this->texture_samplers) {
+ interface->add_texture(name_buffer_copystr(&interface->name_buffer_,
+ texture_sampler.name.c_str(),
+ name_buffer_size,
+ name_buffer_offset),
+ texture_sampler.location,
+ texture_sampler.get_texture_binding_type(),
+ texture_sampler.stage);
+ }
+
+ /* Sampler Parameters. */
+ interface->set_sampler_properties(
+ this->use_argument_buffer_for_samplers(),
+ this->get_sampler_argument_buffer_bind_index(ShaderStage::VERTEX),
+ this->get_sampler_argument_buffer_bind_index(ShaderStage::FRAGMENT));
+
+ /* Map Metal bindings to standardised ShaderInput struct name/binding index. */
+ interface->prepare_common_shader_inputs();
+
+ /* Resize name buffer to save some memory. */
+ if (name_buffer_offset < name_buffer_size) {
+ interface->name_buffer_ = (char *)MEM_reallocN(interface->name_buffer_, name_buffer_offset);
+ }
+
+ return interface;
+}
+
+std::string MSLTextureSampler::get_msl_texture_type_str() const
+{
+ /* Add Types as needed. */
+ switch (this->type) {
+ case ImageType::FLOAT_1D: {
+ return "texture1d";
+ }
+ case ImageType::FLOAT_2D: {
+ return "texture2d";
+ }
+ case ImageType::FLOAT_3D: {
+ return "texture3d";
+ }
+ case ImageType::FLOAT_CUBE: {
+ return "texturecube";
+ }
+ case ImageType::FLOAT_1D_ARRAY: {
+ return "texture1d_array";
+ }
+ case ImageType::FLOAT_2D_ARRAY: {
+ return "texture2d_array";
+ }
+ case ImageType::FLOAT_CUBE_ARRAY: {
+ return "texturecube_array";
+ }
+ case ImageType::FLOAT_BUFFER: {
+ return "texture_buffer";
+ }
+ case ImageType::DEPTH_2D: {
+ return "depth2d";
+ }
+ case ImageType::SHADOW_2D: {
+ return "depth2d";
+ }
+ case ImageType::DEPTH_2D_ARRAY: {
+ return "depth2d_array";
+ }
+ case ImageType::SHADOW_2D_ARRAY: {
+ return "depth2d_array";
+ }
+ case ImageType::DEPTH_CUBE: {
+ return "depthcube";
+ }
+ case ImageType::SHADOW_CUBE: {
+ return "depthcube";
+ }
+ case ImageType::DEPTH_CUBE_ARRAY: {
+ return "depthcube_array";
+ }
+ case ImageType::SHADOW_CUBE_ARRAY: {
+ return "depthcube_array";
+ }
+ case ImageType::INT_1D: {
+ return "texture1d";
+ }
+ case ImageType::INT_2D: {
+ return "texture2d";
+ }
+ case ImageType::INT_3D: {
+ return "texture3d";
+ }
+ case ImageType::INT_CUBE: {
+ return "texturecube";
+ }
+ case ImageType::INT_1D_ARRAY: {
+ return "texture1d_array";
+ }
+ case ImageType::INT_2D_ARRAY: {
+ return "texture2d_array";
+ }
+ case ImageType::INT_CUBE_ARRAY: {
+ return "texturecube_array";
+ }
+ case ImageType::INT_BUFFER: {
+ return "texture_buffer";
+ }
+ case ImageType::UINT_1D: {
+ return "texture1d";
+ }
+ case ImageType::UINT_2D: {
+ return "texture2d";
+ }
+ case ImageType::UINT_3D: {
+ return "texture3d";
+ }
+ case ImageType::UINT_CUBE: {
+ return "texturecube";
+ }
+ case ImageType::UINT_1D_ARRAY: {
+ return "texture1d_array";
+ }
+ case ImageType::UINT_2D_ARRAY: {
+ return "texture2d_array";
+ }
+ case ImageType::UINT_CUBE_ARRAY: {
+ return "texturecube_array";
+ }
+ case ImageType::UINT_BUFFER: {
+ return "texture_buffer";
+ }
+ default: {
+ /* Unrecognised type. */
+ BLI_assert_unreachable();
+ return "ERROR";
+ }
+ };
+}
+
+std::string MSLTextureSampler::get_msl_wrapper_type_str() const
+{
+ /* Add Types as needed. */
+ switch (this->type) {
+ case ImageType::FLOAT_1D: {
+ return "_mtl_combined_image_sampler_1d";
+ }
+ case ImageType::FLOAT_2D: {
+ return "_mtl_combined_image_sampler_2d";
+ }
+ case ImageType::FLOAT_3D: {
+ return "_mtl_combined_image_sampler_3d";
+ }
+ case ImageType::FLOAT_CUBE: {
+ return "_mtl_combined_image_sampler_cube";
+ }
+ case ImageType::FLOAT_1D_ARRAY: {
+ return "_mtl_combined_image_sampler_1d_array";
+ }
+ case ImageType::FLOAT_2D_ARRAY: {
+ return "_mtl_combined_image_sampler_2d_array";
+ }
+ case ImageType::FLOAT_CUBE_ARRAY: {
+ return "_mtl_combined_image_sampler_cube_array";
+ }
+ case ImageType::FLOAT_BUFFER: {
+ return "_mtl_combined_image_sampler_buffer";
+ }
+ case ImageType::DEPTH_2D: {
+ return "_mtl_combined_image_sampler_depth_2d";
+ }
+ case ImageType::SHADOW_2D: {
+ return "_mtl_combined_image_sampler_depth_2d";
+ }
+ case ImageType::DEPTH_2D_ARRAY: {
+ return "_mtl_combined_image_sampler_depth_2d_array";
+ }
+ case ImageType::SHADOW_2D_ARRAY: {
+ return "_mtl_combined_image_sampler_depth_2d_array";
+ }
+ case ImageType::DEPTH_CUBE: {
+ return "_mtl_combined_image_sampler_depth_cube";
+ }
+ case ImageType::SHADOW_CUBE: {
+ return "_mtl_combined_image_sampler_depth_cube";
+ }
+ case ImageType::DEPTH_CUBE_ARRAY: {
+ return "_mtl_combined_image_sampler_depth_cube_array";
+ }
+ case ImageType::SHADOW_CUBE_ARRAY: {
+ return "_mtl_combined_image_sampler_depth_cube_array";
+ }
+ case ImageType::INT_1D: {
+ return "_mtl_combined_image_sampler_1d";
+ }
+ case ImageType::INT_2D: {
+ return "_mtl_combined_image_sampler_2d";
+ }
+ case ImageType::INT_3D: {
+ return "_mtl_combined_image_sampler_3d";
+ }
+ case ImageType::INT_CUBE: {
+ return "_mtl_combined_image_sampler_cube";
+ }
+ case ImageType::INT_1D_ARRAY: {
+ return "_mtl_combined_image_sampler_1d_array";
+ }
+ case ImageType::INT_2D_ARRAY: {
+ return "_mtl_combined_image_sampler_2d_array";
+ }
+ case ImageType::INT_CUBE_ARRAY: {
+ return "_mtl_combined_image_sampler_cube_array";
+ }
+ case ImageType::INT_BUFFER: {
+ return "_mtl_combined_image_sampler_buffer";
+ }
+ case ImageType::UINT_1D: {
+ return "_mtl_combined_image_sampler_1d";
+ }
+ case ImageType::UINT_2D: {
+ return "_mtl_combined_image_sampler_2d";
+ }
+ case ImageType::UINT_3D: {
+ return "_mtl_combined_image_sampler_3d";
+ }
+ case ImageType::UINT_CUBE: {
+ return "_mtl_combined_image_sampler_cube";
+ }
+ case ImageType::UINT_1D_ARRAY: {
+ return "_mtl_combined_image_sampler_1d_array";
+ }
+ case ImageType::UINT_2D_ARRAY: {
+ return "_mtl_combined_image_sampler_2d_array";
+ }
+ case ImageType::UINT_CUBE_ARRAY: {
+ return "_mtl_combined_image_sampler_cube_array";
+ }
+ case ImageType::UINT_BUFFER: {
+ return "_mtl_combined_image_sampler_buffer";
+ }
+ default: {
+ /* Unrecognised type. */
+ BLI_assert_unreachable();
+ return "ERROR";
+ }
+ };
+}
+
+std::string MSLTextureSampler::get_msl_return_type_str() const
+{
+ /* Add Types as needed */
+ switch (this->type) {
+ /* Floating point return. */
+ case ImageType::FLOAT_1D:
+ case ImageType::FLOAT_2D:
+ case ImageType::FLOAT_3D:
+ case ImageType::FLOAT_CUBE:
+ case ImageType::FLOAT_1D_ARRAY:
+ case ImageType::FLOAT_2D_ARRAY:
+ case ImageType::FLOAT_CUBE_ARRAY:
+ case ImageType::FLOAT_BUFFER:
+ case ImageType::DEPTH_2D:
+ case ImageType::SHADOW_2D:
+ case ImageType::DEPTH_2D_ARRAY:
+ case ImageType::SHADOW_2D_ARRAY:
+ case ImageType::DEPTH_CUBE:
+ case ImageType::SHADOW_CUBE:
+ case ImageType::DEPTH_CUBE_ARRAY:
+ case ImageType::SHADOW_CUBE_ARRAY: {
+ return "float";
+ }
+ /* Integer return. */
+ case ImageType::INT_1D:
+ case ImageType::INT_2D:
+ case ImageType::INT_3D:
+ case ImageType::INT_CUBE:
+ case ImageType::INT_1D_ARRAY:
+ case ImageType::INT_2D_ARRAY:
+ case ImageType::INT_CUBE_ARRAY:
+ case ImageType::INT_BUFFER: {
+ return "int";
+ }
+
+ /* Unsigned Integer return. */
+ case ImageType::UINT_1D:
+ case ImageType::UINT_2D:
+ case ImageType::UINT_3D:
+ case ImageType::UINT_CUBE:
+ case ImageType::UINT_1D_ARRAY:
+ case ImageType::UINT_2D_ARRAY:
+ case ImageType::UINT_CUBE_ARRAY:
+ case ImageType::UINT_BUFFER: {
+ return "uint32_t";
+ }
+
+ default: {
+ /* Unrecognised type. */
+ BLI_assert_unreachable();
+ return "ERROR";
+ }
+ };
+}
+
+eGPUTextureType MSLTextureSampler::get_texture_binding_type() const
+{
+ /* Add Types as needed */
+ switch (this->type) {
+ case ImageType::FLOAT_1D: {
+ return GPU_TEXTURE_1D;
+ }
+ case ImageType::FLOAT_2D: {
+ return GPU_TEXTURE_2D;
+ }
+ case ImageType::FLOAT_3D: {
+ return GPU_TEXTURE_3D;
+ }
+ case ImageType::FLOAT_CUBE: {
+ return GPU_TEXTURE_CUBE;
+ }
+ case ImageType::FLOAT_1D_ARRAY: {
+ return GPU_TEXTURE_1D_ARRAY;
+ }
+ case ImageType::FLOAT_2D_ARRAY: {
+ return GPU_TEXTURE_2D_ARRAY;
+ }
+ case ImageType::FLOAT_CUBE_ARRAY: {
+ return GPU_TEXTURE_CUBE_ARRAY;
+ }
+ case ImageType::FLOAT_BUFFER: {
+ return GPU_TEXTURE_BUFFER;
+ }
+ case ImageType::DEPTH_2D: {
+ return GPU_TEXTURE_2D;
+ }
+ case ImageType::SHADOW_2D: {
+ return GPU_TEXTURE_2D;
+ }
+ case ImageType::DEPTH_2D_ARRAY: {
+ return GPU_TEXTURE_2D_ARRAY;
+ }
+ case ImageType::SHADOW_2D_ARRAY: {
+ return GPU_TEXTURE_2D_ARRAY;
+ }
+ case ImageType::DEPTH_CUBE: {
+ return GPU_TEXTURE_CUBE;
+ }
+ case ImageType::SHADOW_CUBE: {
+ return GPU_TEXTURE_CUBE;
+ }
+ case ImageType::DEPTH_CUBE_ARRAY: {
+ return GPU_TEXTURE_CUBE_ARRAY;
+ }
+ case ImageType::SHADOW_CUBE_ARRAY: {
+ return GPU_TEXTURE_CUBE_ARRAY;
+ }
+ case ImageType::INT_1D: {
+ return GPU_TEXTURE_1D;
+ }
+ case ImageType::INT_2D: {
+ return GPU_TEXTURE_2D;
+ }
+ case ImageType::INT_3D: {
+ return GPU_TEXTURE_3D;
+ }
+ case ImageType::INT_CUBE: {
+ return GPU_TEXTURE_CUBE;
+ }
+ case ImageType::INT_1D_ARRAY: {
+ return GPU_TEXTURE_1D_ARRAY;
+ }
+ case ImageType::INT_2D_ARRAY: {
+ return GPU_TEXTURE_2D_ARRAY;
+ }
+ case ImageType::INT_CUBE_ARRAY: {
+ return GPU_TEXTURE_CUBE_ARRAY;
+ }
+ case ImageType::INT_BUFFER: {
+ return GPU_TEXTURE_BUFFER;
+ }
+ case ImageType::UINT_1D: {
+ return GPU_TEXTURE_1D;
+ }
+ case ImageType::UINT_2D: {
+ return GPU_TEXTURE_2D;
+ }
+ case ImageType::UINT_3D: {
+ return GPU_TEXTURE_3D;
+ }
+ case ImageType::UINT_CUBE: {
+ return GPU_TEXTURE_CUBE;
+ }
+ case ImageType::UINT_1D_ARRAY: {
+ return GPU_TEXTURE_1D_ARRAY;
+ }
+ case ImageType::UINT_2D_ARRAY: {
+ return GPU_TEXTURE_2D_ARRAY;
+ }
+ case ImageType::UINT_CUBE_ARRAY: {
+ return GPU_TEXTURE_CUBE_ARRAY;
+ }
+ case ImageType::UINT_BUFFER: {
+ return GPU_TEXTURE_BUFFER;
+ }
+ default: {
+ BLI_assert_unreachable();
+ return GPU_TEXTURE_2D;
+ }
+ };
+}
+
+/** \} */
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader_interface.hh b/source/blender/gpu/metal/mtl_shader_interface.hh
new file mode 100644
index 00000000000..0f04c04031d
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader_interface.hh
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_vector.hh"
+
+#include "gpu_shader_interface.hh"
+#include "mtl_capabilities.hh"
+#include "mtl_shader_interface_type.hh"
+
+#include "GPU_common.h"
+#include "GPU_common_types.h"
+#include "GPU_texture.h"
+#include "gpu_texture_private.hh"
+#include <Metal/Metal.h>
+#include <functional>
+
+namespace blender::gpu {
+
+/* MTLShaderInterface describes the layout and properties of a given shader,
+ * including input and output bindings, and any special properties or modes
+ * that the shader may require.
+ *
+ * -- Shader input/output bindings --
+ *
+ * We require custom datastructures for the binding information in Metal.
+ * This is because certain bindings contain and require more information to
+ * be stored than can be tracked solely within the `ShaderInput` struct.
+ * e.g. data sizes and offsets.
+ *
+ * Upon interface completion, `prepare_common_shader_inputs` is used to
+ * populate the global ShaderInput* array to enable correct functionality
+ * of shader binding location lookups. These returned locations act as indices
+ * into the arrays stored here in the MTLShaderInterace, such that extraction
+ * of required information can be performed within the backend.
+ *
+ * e.g. `int loc = GPU_shader_get_uniform(...)`
+ * `loc` will match the index into the MTLShaderUniform uniforms_[] array
+ * to fetch the required Metal specific information.
+ *
+ *
+ *
+ * -- Argument Buffers and Argument Encoders --
+ *
+ * We can use ArgumentBuffers (AB's) in Metal to extend the resource bind limitations
+ * by providing bindless support.
+ *
+ * Argument Buffers are used for sampler bindings when the builtin
+ * sampler limit of 16 is exceeded, as in all cases for Blender,
+ * each individual texture is associated with a given sampler, and this
+ * lower limit would otherwise reduce the total availability of textures
+ * used in shaders.
+ *
+ * In future, argument buffers may be extended to support other resource
+ * types, if overall bind limits are ever increased within Blender.
+ *
+ * The ArgumentEncoder cache used to store the generated ArgumentEncoders for a given
+ * shader permutation. The ArgumentEncoder is the resource used to write resource binding
+ * information to a specified buffer, and is unique to the shader's resource interface.
+ */
+
+enum class ShaderStage : uint32_t {
+ VERTEX = 1 << 0,
+ FRAGMENT = 1 << 1,
+ BOTH = (ShaderStage::VERTEX | ShaderStage::FRAGMENT),
+};
+ENUM_OPERATORS(ShaderStage, ShaderStage::BOTH);
+
+inline uint get_shader_stage_index(ShaderStage stage)
+{
+ switch (stage) {
+ case ShaderStage::VERTEX:
+ return 0;
+ case ShaderStage::FRAGMENT:
+ return 1;
+ default:
+ BLI_assert_unreachable();
+ return 0;
+ }
+ return 0;
+}
+
+/* Shader input/output binding information. */
+struct MTLShaderInputAttribute {
+ uint32_t name_offset;
+ MTLVertexFormat format;
+ uint32_t index;
+ uint32_t location;
+ uint32_t size;
+ uint32_t buffer_index;
+ uint32_t offset;
+ /* For attributes of Matrix/array types, we need to insert "fake" attributes for
+ * each element, as matrix types are not natively supported.
+ *
+ * > 1 if matrix/arrays are used, specifying number of elements.
+ * = 1 for non-matrix types
+ * = 0 if used as a dummy slot for "fake" matrix attributes. */
+ uint32_t matrix_element_count;
+};
+
+struct MTLShaderUniformBlock {
+ uint32_t name_offset;
+ uint32_t size = 0;
+ /* Buffer resouce bind index in shader [[buffer(index)]]. */
+ uint32_t buffer_index;
+
+ /* Tracking for manual uniform addition. */
+ uint32_t current_offset;
+ ShaderStage stage_mask;
+};
+
+struct MTLShaderUniform {
+ uint32_t name_offset;
+ /* Index of `MTLShaderUniformBlock` this uniform belongs to. */
+ uint32_t size_in_bytes;
+ uint32_t byte_offset;
+ eMTLDataType type;
+ uint32_t array_len;
+};
+
+struct MTLShaderTexture {
+ bool used;
+ uint32_t name_offset;
+ /* Texture resource bind slot in shader [[texture(n)]]. */
+ int slot_index;
+ eGPUTextureType type;
+ ShaderStage stage_mask;
+};
+
+struct MTLShaderSampler {
+ uint32_t name_offset;
+ /* Sampler resource bind slot in shader [[sampler(n)]]. */
+ uint32_t slot_index = 0;
+};
+
+/* Utility Functions. */
+MTLVertexFormat mtl_datatype_to_vertex_type(eMTLDataType type);
+
+/**
+ * Implementation of Shader interface for Metal Backend.
+ **/
+class MTLShaderInterface : public ShaderInterface {
+
+ private:
+ /* Argument encoders caching.
+ * Static size is based on common input permutation variations. */
+ static const int ARGUMENT_ENCODERS_CACHE_SIZE = 3;
+ struct ArgumentEncoderCacheEntry {
+ id<MTLArgumentEncoder> encoder;
+ int buffer_index;
+ };
+ ArgumentEncoderCacheEntry arg_encoders_[ARGUMENT_ENCODERS_CACHE_SIZE] = {};
+
+ /* Vertex input Attribues. */
+ uint32_t total_attributes_;
+ uint32_t total_vert_stride_;
+ MTLShaderInputAttribute attributes_[MTL_MAX_VERTEX_INPUT_ATTRIBUTES];
+
+ /* Uniforms. */
+ uint32_t total_uniforms_;
+ MTLShaderUniform uniforms_[MTL_MAX_UNIFORMS_PER_BLOCK];
+
+ /* Uniform Blocks. */
+ uint32_t total_uniform_blocks_;
+ MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
+ MTLShaderUniformBlock push_constant_block_;
+
+ /* Textures. */
+ /* Textures support explicit binding indices, so some texture slots
+ * remain unused. */
+ uint32_t total_textures_;
+ int max_texture_index_;
+ MTLShaderTexture textures_[MTL_MAX_TEXTURE_SLOTS];
+
+ /* Whether argument buffers are used for sampler bindings. */
+ bool sampler_use_argument_buffer_;
+ int sampler_argument_buffer_bind_index_vert_;
+ int sampler_argument_buffer_bind_index_frag_;
+
+ /* Attribute Mask. */
+ uint32_t enabled_attribute_mask_;
+
+ /* Debug. */
+ char name[256];
+
+ public:
+ MTLShaderInterface(const char *name);
+ ~MTLShaderInterface();
+
+ void init();
+ void add_input_attribute(uint32_t name_offset,
+ uint32_t attribute_location,
+ MTLVertexFormat format,
+ uint32_t buffer_index,
+ uint32_t size,
+ uint32_t offset,
+ int matrix_element_count = 1);
+ uint32_t add_uniform_block(uint32_t name_offset,
+ uint32_t buffer_index,
+ uint32_t size,
+ ShaderStage stage_mask = ShaderStage::BOTH);
+ void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1);
+ void add_texture(uint32_t name_offset,
+ uint32_t texture_slot,
+ eGPUTextureType tex_binding_type,
+ ShaderStage stage_mask = ShaderStage::FRAGMENT);
+ void add_push_constant_block(uint32_t name_offset);
+
+ /* Resolve and cache locations of builtin uniforms and uniform blocks. */
+ void map_builtins();
+ void set_sampler_properties(bool use_argument_buffer,
+ uint32_t argument_buffer_bind_index_vert,
+ uint32_t argument_buffer_bind_index_frag);
+
+ /* Prepare ShaderInput interface for binding resolution. */
+ void prepare_common_shader_inputs();
+
+ /* Fetch Uniforms. */
+ const MTLShaderUniform &get_uniform(uint index) const;
+ uint32_t get_total_uniforms() const;
+
+ /* Fetch Uniform Blocks. */
+ const MTLShaderUniformBlock &get_uniform_block(uint index) const;
+ uint32_t get_total_uniform_blocks() const;
+ bool has_uniform_block(uint32_t block_index) const;
+ uint32_t get_uniform_block_size(uint32_t block_index) const;
+
+ /* Push constant uniform data block should always be available. */
+ const MTLShaderUniformBlock &get_push_constant_block() const;
+
+ /* Fetch textures. */
+ const MTLShaderTexture &get_texture(uint index) const;
+ uint32_t get_total_textures() const;
+ uint32_t get_max_texture_index() const;
+ bool get_use_argument_buffer_for_samplers(int *vertex_arg_buffer_bind_index,
+ int *fragment_arg_buffer_bind_index) const;
+
+ /* Fetch Attributes. */
+ const MTLShaderInputAttribute &get_attribute(uint index) const;
+ uint32_t get_total_attributes() const;
+ uint32_t get_total_vertex_stride() const;
+ uint32_t get_enabled_attribute_mask() const;
+
+ /* Name buffer fetching. */
+ const char *get_name_at_offset(uint32_t offset) const;
+
+ /* Interface name. */
+ const char *get_name() const
+ {
+ return this->name;
+ }
+
+ /* Argument buffer encoder management. */
+ id<MTLArgumentEncoder> find_argument_encoder(int buffer_index) const;
+
+ void insert_argument_encoder(int buffer_index, id encoder);
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLShaderInterface");
+};
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm
new file mode 100644
index 00000000000..1adf1210496
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader_interface.mm
@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU shader interface (C --> GLSL)
+ */
+
+#include "BLI_bitmap.h"
+
+#include "GPU_capabilities.h"
+
+#include "mtl_common.hh"
+#include "mtl_debug.hh"
+#include "mtl_shader_interface.hh"
+#include "mtl_shader_interface_type.hh"
+
+#include "BLI_blenlib.h"
+#include "BLI_math_base.h"
+#include "BLI_utildefines.h"
+#include "MEM_guardedalloc.h"
+
+namespace blender::gpu {
+
+MTLShaderInterface::MTLShaderInterface(const char *name)
+{
+ /* Shared ShaderInputs array is populated later on in `prepare_common_shader_inputs`
+ * after Metal Shader Interface preparation. */
+ inputs_ = nullptr;
+
+ if (name != nullptr) {
+ strcpy(this->name, name);
+ }
+
+ /* Ensure ShaderInterface parameters are cleared. */
+ this->init();
+}
+
+MTLShaderInterface::~MTLShaderInterface()
+{
+ for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) {
+ if (arg_encoders_[i].encoder != nil) {
+ id<MTLArgumentEncoder> enc = arg_encoders_[i].encoder;
+ [enc release];
+ }
+ }
+}
+
+const char *MTLShaderInterface::get_name_at_offset(uint32_t offset) const
+{
+ return name_buffer_ + offset;
+}
+
+void MTLShaderInterface::init()
+{
+ total_attributes_ = 0;
+ total_uniform_blocks_ = 0;
+ total_uniforms_ = 0;
+ total_textures_ = 0;
+ max_texture_index_ = -1;
+ enabled_attribute_mask_ = 0;
+ total_vert_stride_ = 0;
+ sampler_use_argument_buffer_ = false;
+ sampler_argument_buffer_bind_index_vert_ = -1;
+ sampler_argument_buffer_bind_index_frag_ = -1;
+
+ /* NULL initialise uniform location markers for builtins. */
+ for (const int u : IndexRange(GPU_NUM_UNIFORMS)) {
+ builtins_[u] = -1;
+ }
+ for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) {
+ builtin_blocks_[ubo] = -1;
+ }
+ for (const int tex : IndexRange(MTL_MAX_TEXTURE_SLOTS)) {
+ textures_[tex].used = false;
+ textures_[tex].slot_index = -1;
+ }
+
+ /* Null initialisation for argument encoders. */
+ for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) {
+ arg_encoders_[i].encoder = nil;
+ arg_encoders_[i].buffer_index = -1;
+ }
+}
+
+void MTLShaderInterface::add_input_attribute(uint32_t name_offset,
+ uint32_t attribute_location,
+ MTLVertexFormat format,
+ uint32_t buffer_index,
+ uint32_t size,
+ uint32_t offset,
+ int matrix_element_count)
+{
+ MTLShaderInputAttribute &input_attr = attributes_[total_attributes_];
+ input_attr.name_offset = name_offset;
+ input_attr.format = format;
+ input_attr.location = attribute_location;
+ input_attr.size = size;
+ input_attr.buffer_index = buffer_index;
+ input_attr.offset = offset;
+ input_attr.matrix_element_count = matrix_element_count;
+ input_attr.index = total_attributes_;
+ total_attributes_++;
+ total_vert_stride_ = max_ii(total_vert_stride_, offset + size);
+ enabled_attribute_mask_ |= (1 << attribute_location);
+}
+
+uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
+ uint32_t buffer_index,
+ uint32_t size,
+ ShaderStage stage_mask)
+{
+ /* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */
+ if ((size % 16) != 0) {
+ size += 16 - (size % 16);
+ }
+
+ MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
+ uni_block.name_offset = name_offset;
+ /* We offset the buffer bidning index by one, as the first slot is reserved for push constant
+ * data. */
+ uni_block.buffer_index = buffer_index + 1;
+ uni_block.size = size;
+ uni_block.current_offset = 0;
+ uni_block.stage_mask = ShaderStage::BOTH;
+ return (total_uniform_blocks_++);
+}
+
+void MTLShaderInterface::add_push_constant_block(uint32_t name_offset)
+{
+ push_constant_block_.name_offset = name_offset;
+ /* Push constant data block is always uniform buffer index 0. */
+ push_constant_block_.buffer_index = 0;
+ /* Size starts at zero and grows as uniforms are added. */
+ push_constant_block_.size = 0;
+
+ push_constant_block_.current_offset = 0;
+ push_constant_block_.stage_mask = ShaderStage::BOTH;
+}
+
+void MTLShaderInterface::add_uniform(uint32_t name_offset, eMTLDataType type, int array_len)
+{
+ BLI_assert(array_len > 0);
+ BLI_assert(total_uniforms_ < MTL_MAX_UNIFORMS_PER_BLOCK);
+ if (total_uniforms_ >= MTL_MAX_UNIFORMS_PER_BLOCK) {
+ MTL_LOG_WARNING(
+ "[Warning] Cannot add uniform '%s' to shader interface '%s' as the uniform limit of %d "
+ "has been reached.\n",
+ name,
+ name,
+ MTL_MAX_UNIFORMS_PER_BLOCK);
+ return;
+ }
+ MTLShaderUniform &uniform = uniforms_[total_uniforms_];
+ uniform.name_offset = name_offset;
+
+ /* Determine size and offset alignment -- C++ struct alignment rules: Base address of value must
+ * match alignment of type. GLSL follows minimum type alignment of 4. */
+ int data_type_size = mtl_get_data_type_size(type) * array_len;
+ int data_type_alignment = max_ii(mtl_get_data_type_alignment(type), 4);
+ int current_offset = push_constant_block_.current_offset;
+ if ((current_offset % data_type_alignment) != 0) {
+ current_offset += data_type_alignment - (current_offset % data_type_alignment);
+ }
+
+ uniform.size_in_bytes = data_type_size;
+ uniform.byte_offset = current_offset;
+ uniform.type = type;
+ uniform.array_len = array_len;
+ total_uniforms_++;
+
+ /* Update Push constant block-- update offset, re-size and re-align total memory requirement to
+ * be 16-byte aligned. Following GLSL std140. */
+ push_constant_block_.current_offset = current_offset + data_type_size;
+ if (push_constant_block_.current_offset > push_constant_block_.size) {
+ push_constant_block_.size = push_constant_block_.current_offset;
+ if ((push_constant_block_.size % 16) != 0) {
+ push_constant_block_.size += 16 - (push_constant_block_.size % 16);
+ }
+ }
+
+ /* Validate properties. */
+ BLI_assert(uniform.size_in_bytes > 0);
+ BLI_assert_msg(
+ current_offset + data_type_size <= push_constant_block_.size,
+ "Uniform size and offset sits outside the specified size range for the uniform block");
+}
+
+void MTLShaderInterface::add_texture(uint32_t name_offset,
+ uint32_t texture_slot,
+ eGPUTextureType tex_binding_type,
+ ShaderStage stage_mask)
+{
+ BLI_assert(texture_slot >= 0 && texture_slot < GPU_max_textures());
+ if (texture_slot >= 0 && texture_slot < GPU_max_textures()) {
+
+ MTLShaderTexture &tex = textures_[texture_slot];
+ BLI_assert_msg(tex.used == false, "Texture slot already in-use by another binding");
+ tex.name_offset = name_offset;
+ tex.slot_index = texture_slot;
+ tex.type = tex_binding_type;
+ tex.stage_mask = stage_mask;
+ tex.used = true;
+ total_textures_++;
+ max_texture_index_ = max_ii(max_texture_index_, texture_slot);
+ }
+ else {
+ BLI_assert_msg(false, "Exceeding maximum supported texture count.");
+ MTL_LOG_WARNING(
+ "Could not add additional texture with index %d to shader interface. Maximum "
+ "supported texture count is %d\n",
+ texture_slot,
+ GPU_max_textures());
+ }
+}
+
+void MTLShaderInterface::map_builtins()
+{
+ /* Clear builtin arrays to NULL locations. */
+ for (const int u : IndexRange(GPU_NUM_UNIFORMS)) {
+ builtins_[u] = -1;
+ }
+ for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) {
+ builtin_blocks_[ubo] = -1;
+ }
+
+ /* Resolve and cache uniform locations for bultin uniforms. */
+ for (const int u : IndexRange(GPU_NUM_UNIFORMS)) {
+ const ShaderInput *uni = this->uniform_get(builtin_uniform_name((GPUUniformBuiltin)u));
+ if (uni != nullptr) {
+ BLI_assert(uni->location >= 0);
+ if (uni->location >= 0) {
+ builtins_[u] = uni->location;
+ MTL_LOG_INFO("Mapped builtin uniform '%s' NB: '%s' to location: %d\n",
+ builtin_uniform_name((GPUUniformBuiltin)u),
+ get_name_at_offset(uni->name_offset),
+ uni->location);
+ }
+ }
+ }
+
+ /* Resolve and cache uniform locations for bultin uniform blocks. */
+ for (const int u : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) {
+ const ShaderInput *uni = this->ubo_get(builtin_uniform_block_name((GPUUniformBlockBuiltin)u));
+
+ if (uni != nullptr) {
+ BLI_assert(uni->location >= 0);
+ if (uni->location >= 0) {
+ builtin_blocks_[u] = uni->binding;
+ MTL_LOG_INFO("Mapped builtin uniform block '%s' to location %d\n",
+ builtin_uniform_block_name((GPUUniformBlockBuiltin)u),
+ uni->location);
+ }
+ }
+ }
+}
+
+/* Populate ShaderInput struct based on interface. */
+void MTLShaderInterface::prepare_common_shader_inputs()
+{
+ /* ShaderInput inputs_ maps a uniform name to an external
+ * uniform location, which is used as an array index to look-up
+ * information in the local MTLShaderInterface input structs.
+ *
+ * ShaderInput population follows the ordering rules in gpu_shader_interface. */
+
+ /* Populate ShaderInterface counts. */
+ attr_len_ = this->get_total_attributes();
+ ubo_len_ = this->get_total_uniform_blocks();
+ uniform_len_ = this->get_total_uniforms() + this->get_total_textures();
+
+ /* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */
+ ssbo_len_ = 0;
+
+ /* Calculate total inputs and allocate ShaderInput array. */
+ /* NOTE: We use the existing name_buffer_ allocated for internal input structs. */
+ int input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_;
+ inputs_ = (ShaderInput *)MEM_callocN(sizeof(ShaderInput) * input_tot_len, __func__);
+ ShaderInput *current_input = inputs_;
+
+ /* Attributes. */
+ for (const int attr_index : IndexRange(total_attributes_)) {
+ MTLShaderInputAttribute &shd_attr = attributes_[attr_index];
+ current_input->name_offset = shd_attr.name_offset;
+ current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_attr.name_offset));
+ current_input->location = attr_index;
+ current_input->binding = attr_index;
+ current_input++;
+ }
+
+ /* UBOs. */
+ BLI_assert(&inputs_[attr_len_] >= current_input);
+ current_input = &inputs_[attr_len_];
+ for (const int ubo_index : IndexRange(total_uniform_blocks_)) {
+ MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index];
+ current_input->name_offset = shd_ubo.name_offset;
+ current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
+ /* Location refers to the index in the ubos_ array. */
+ current_input->location = ubo_index;
+ /* Final binding location refers to the buffer binding index within the shader (Relative to
+ * MTL_uniform_buffer_base_index). */
+ current_input->binding = shd_ubo.buffer_index;
+ current_input++;
+ }
+
+ /* Uniforms. */
+ BLI_assert(&inputs_[attr_len_ + ubo_len_] >= current_input);
+ current_input = &inputs_[attr_len_ + ubo_len_];
+ for (const int uniform_index : IndexRange(total_uniforms_)) {
+ MTLShaderUniform &shd_uni = uniforms_[uniform_index];
+ current_input->name_offset = shd_uni.name_offset;
+ current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_uni.name_offset));
+ current_input->location = uniform_index;
+ current_input->binding = uniform_index;
+ current_input++;
+ }
+
+ /* Textures.
+ * NOTE(Metal): Textures are externally treated as uniforms in gpu_shader_interface.
+ * Location for textures resolved as `binding` value. This
+ * is the index into the local MTLShaderTexture textures[] array.
+ *
+ * In MSL, we cannot trivially remap which texture slot a given texture
+ * handle points to, unlike in GLSL, where a uniform sampler/image can be updated
+ * and queried as both a texture and a uniform. */
+ for (int texture_index = 0; texture_index <= max_texture_index_; texture_index++) {
+ const MTLShaderTexture &shd_tex = textures_[texture_index];
+
+ /* Not all texture entries are used when explicit texture locations are specified. */
+ if (shd_tex.used) {
+ BLI_assert_msg(shd_tex.slot_index == texture_index,
+ "Texture binding slot should match array index for texture.");
+ current_input->name_offset = shd_tex.name_offset;
+ current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_tex.name_offset));
+
+ /* Location represents look-up address.
+ * For Metal, this location is a unique value offset by
+ * total_uniforms such that it does not overlap.
+ *
+ * This range offset allows a check in the uniform look-up
+ * to ensure texture handles are not treated as standard uniforms in Metal. */
+ current_input->location = texture_index + total_uniforms_;
+
+ /* Binding represents texture slot [[texture(n)]]. */
+ current_input->binding = shd_tex.slot_index;
+ current_input++;
+ }
+ }
+
+ /* SSBO bindings.
+ * TODO(Metal): Support SSBOs. Pending compute support. */
+ BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input);
+ current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_];
+
+ /* Map builtin uniform indices to uniform binding locations. */
+ this->map_builtins();
+}
+
+void MTLShaderInterface::set_sampler_properties(bool use_argument_buffer,
+ uint32_t argument_buffer_bind_index_vert,
+ uint32_t argument_buffer_bind_index_frag)
+{
+ sampler_use_argument_buffer_ = use_argument_buffer;
+ sampler_argument_buffer_bind_index_vert_ = argument_buffer_bind_index_vert;
+ sampler_argument_buffer_bind_index_frag_ = argument_buffer_bind_index_frag;
+}
+
+/* Attributes. */
+const MTLShaderInputAttribute &MTLShaderInterface::get_attribute(uint index) const
+{
+ BLI_assert(index < MTL_MAX_VERTEX_INPUT_ATTRIBUTES);
+ BLI_assert(index < get_total_attributes());
+ return attributes_[index];
+}
+
+uint32_t MTLShaderInterface::get_total_attributes() const
+{
+ return total_attributes_;
+}
+
+uint32_t MTLShaderInterface::get_total_vertex_stride() const
+{
+ return total_vert_stride_;
+}
+
+uint32_t MTLShaderInterface::get_enabled_attribute_mask() const
+{
+ return enabled_attribute_mask_;
+}
+
+/* Uniforms. */
+const MTLShaderUniform &MTLShaderInterface::get_uniform(uint index) const
+{
+ BLI_assert(index < MTL_MAX_UNIFORMS_PER_BLOCK);
+ BLI_assert(index < get_total_uniforms());
+ return uniforms_[index];
+}
+
+uint32_t MTLShaderInterface::get_total_uniforms() const
+{
+ return total_uniforms_;
+}
+
+/* Uniform Blocks. */
+const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const
+{
+ BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ BLI_assert(index < get_total_uniform_blocks());
+ return ubos_[index];
+}
+
+const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const
+{
+ return push_constant_block_;
+}
+
+uint32_t MTLShaderInterface::get_total_uniform_blocks() const
+{
+ return total_uniform_blocks_;
+}
+
+bool MTLShaderInterface::has_uniform_block(uint32_t block_index) const
+{
+ return (block_index < total_uniform_blocks_);
+}
+
+uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const
+{
+ return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0;
+}
+
+/* Textures. */
+const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const
+{
+ BLI_assert(index < MTL_MAX_TEXTURE_SLOTS);
+ BLI_assert(index <= get_max_texture_index());
+ return textures_[index];
+}
+
+uint32_t MTLShaderInterface::get_total_textures() const
+{
+ return total_textures_;
+}
+
+uint32_t MTLShaderInterface::get_max_texture_index() const
+{
+ return max_texture_index_;
+}
+
+bool MTLShaderInterface::get_use_argument_buffer_for_samplers(
+ int *vertex_arg_buffer_bind_index, int *fragment_arg_buffer_bind_index) const
+{
+ /* Returns argument buffer binding slot for each shader stage.
+ * The exact bind slot may be different, as each stage has different buffer inputs. */
+ *vertex_arg_buffer_bind_index = sampler_argument_buffer_bind_index_vert_;
+ *fragment_arg_buffer_bind_index = sampler_argument_buffer_bind_index_frag_;
+ return sampler_use_argument_buffer_;
+}
+
+id<MTLArgumentEncoder> MTLShaderInterface::find_argument_encoder(int buffer_index) const
+{
+ id encoder = nil;
+ for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) {
+ encoder = arg_encoders_[i].buffer_index == buffer_index ? arg_encoders_[i].encoder : encoder;
+ }
+ return encoder;
+}
+
+void MTLShaderInterface::insert_argument_encoder(int buffer_index, id encoder)
+{
+ for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) {
+ if (arg_encoders_[i].encoder == nil) {
+ arg_encoders_[i].encoder = encoder;
+ arg_encoders_[i].buffer_index = buffer_index;
+ return;
+ }
+ }
+ MTL_LOG_WARNING("could not insert encoder into cache!");
+}
+
+MTLVertexFormat mtl_datatype_to_vertex_type(eMTLDataType type)
+{
+ switch (type) {
+ case MTL_DATATYPE_CHAR:
+ return MTLVertexFormatChar;
+ case MTL_DATATYPE_UCHAR:
+ return MTLVertexFormatUChar;
+ case MTL_DATATYPE_BOOL:
+ return MTLVertexFormatUChar;
+ case MTL_DATATYPE_CHAR2:
+ return MTLVertexFormatChar2;
+ case MTL_DATATYPE_UCHAR2:
+ return MTLVertexFormatUChar2;
+ case MTL_DATATYPE_BOOL2:
+ return MTLVertexFormatUChar2;
+ case MTL_DATATYPE_SHORT:
+ return MTLVertexFormatShort;
+ case MTL_DATATYPE_USHORT:
+ return MTLVertexFormatUShort;
+ case MTL_DATATYPE_CHAR3:
+ return MTLVertexFormatChar3;
+ case MTL_DATATYPE_UCHAR3:
+ return MTLVertexFormatUChar3;
+ case MTL_DATATYPE_BOOL3:
+ return MTLVertexFormatUChar3;
+ case MTL_DATATYPE_CHAR4:
+ return MTLVertexFormatChar4;
+ case MTL_DATATYPE_UCHAR4:
+ return MTLVertexFormatUChar4;
+ case MTL_DATATYPE_INT:
+ return MTLVertexFormatInt;
+ case MTL_DATATYPE_UINT:
+ return MTLVertexFormatUInt;
+ case MTL_DATATYPE_BOOL4:
+ return MTLVertexFormatUChar4;
+ case MTL_DATATYPE_SHORT2:
+ return MTLVertexFormatShort2;
+ case MTL_DATATYPE_USHORT2:
+ return MTLVertexFormatUShort2;
+ case MTL_DATATYPE_FLOAT:
+ return MTLVertexFormatFloat;
+ case MTL_DATATYPE_HALF2x2:
+ case MTL_DATATYPE_HALF3x2:
+ case MTL_DATATYPE_HALF4x2:
+ BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender.");
+ return MTLVertexFormatInvalid;
+
+ case MTL_DATATYPE_SHORT3:
+ return MTLVertexFormatShort3;
+ case MTL_DATATYPE_USHORT3:
+ return MTLVertexFormatUShort3;
+ case MTL_DATATYPE_SHORT4:
+ return MTLVertexFormatShort4;
+ case MTL_DATATYPE_USHORT4:
+ return MTLVertexFormatUShort4;
+ case MTL_DATATYPE_INT2:
+ return MTLVertexFormatInt2;
+ case MTL_DATATYPE_UINT2:
+ return MTLVertexFormatUInt2;
+ case MTL_DATATYPE_FLOAT2:
+ return MTLVertexFormatFloat2;
+ case MTL_DATATYPE_LONG:
+ return MTLVertexFormatInt;
+ case MTL_DATATYPE_ULONG:
+ return MTLVertexFormatUInt;
+ case MTL_DATATYPE_HALF2x3:
+ case MTL_DATATYPE_HALF2x4:
+ case MTL_DATATYPE_HALF3x3:
+ case MTL_DATATYPE_HALF3x4:
+ case MTL_DATATYPE_HALF4x3:
+ case MTL_DATATYPE_HALF4x4:
+ case MTL_DATATYPE_FLOAT2x2:
+ case MTL_DATATYPE_FLOAT3x2:
+ case MTL_DATATYPE_FLOAT4x2:
+ BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender.");
+ return MTLVertexFormatInvalid;
+
+ case MTL_DATATYPE_INT3:
+ return MTLVertexFormatInt3;
+ case MTL_DATATYPE_INT4:
+ return MTLVertexFormatInt4;
+ case MTL_DATATYPE_UINT3:
+ return MTLVertexFormatUInt3;
+ case MTL_DATATYPE_UINT4:
+ return MTLVertexFormatUInt4;
+ case MTL_DATATYPE_FLOAT3:
+ return MTLVertexFormatFloat3;
+ case MTL_DATATYPE_FLOAT4:
+ return MTLVertexFormatFloat4;
+ case MTL_DATATYPE_LONG2:
+ return MTLVertexFormatInt2;
+ case MTL_DATATYPE_ULONG2:
+ return MTLVertexFormatUInt2;
+ case MTL_DATATYPE_FLOAT2x3:
+ case MTL_DATATYPE_FLOAT2x4:
+ case MTL_DATATYPE_FLOAT3x3:
+ case MTL_DATATYPE_FLOAT3x4:
+ case MTL_DATATYPE_FLOAT4x3:
+ case MTL_DATATYPE_FLOAT4x4:
+ BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender.");
+ return MTLVertexFormatInvalid;
+
+ case MTL_DATATYPE_LONG3:
+ return MTLVertexFormatInt3;
+ case MTL_DATATYPE_LONG4:
+ return MTLVertexFormatInt4;
+ case MTL_DATATYPE_ULONG3:
+ return MTLVertexFormatUInt3;
+ case MTL_DATATYPE_ULONG4:
+ return MTLVertexFormatUInt4;
+
+ /* Special Types */
+ case MTL_DATATYPE_UINT1010102_NORM:
+ return MTLVertexFormatUInt1010102Normalized;
+ case MTL_DATATYPE_INT1010102_NORM:
+ return MTLVertexFormatInt1010102Normalized;
+
+ default:
+ BLI_assert(false);
+ return MTLVertexFormatInvalid;
+ };
+}
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader_interface_type.hh b/source/blender/gpu/metal/mtl_shader_interface_type.hh
new file mode 100644
index 00000000000..a8e651d8509
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader_interface_type.hh
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+#pragma once
+
+#include "BLI_assert.h"
+
+enum eMTLDataType {
+ MTL_DATATYPE_CHAR,
+ MTL_DATATYPE_CHAR2,
+ MTL_DATATYPE_CHAR3,
+ MTL_DATATYPE_CHAR4,
+
+ MTL_DATATYPE_UCHAR,
+ MTL_DATATYPE_UCHAR2,
+ MTL_DATATYPE_UCHAR3,
+ MTL_DATATYPE_UCHAR4,
+
+ MTL_DATATYPE_BOOL,
+ MTL_DATATYPE_BOOL2,
+ MTL_DATATYPE_BOOL3,
+ MTL_DATATYPE_BOOL4,
+
+ MTL_DATATYPE_SHORT,
+ MTL_DATATYPE_SHORT2,
+ MTL_DATATYPE_SHORT3,
+ MTL_DATATYPE_SHORT4,
+
+ MTL_DATATYPE_USHORT,
+ MTL_DATATYPE_USHORT2,
+ MTL_DATATYPE_USHORT3,
+ MTL_DATATYPE_USHORT4,
+
+ MTL_DATATYPE_INT,
+ MTL_DATATYPE_INT2,
+ MTL_DATATYPE_INT3,
+ MTL_DATATYPE_INT4,
+
+ MTL_DATATYPE_UINT,
+ MTL_DATATYPE_UINT2,
+ MTL_DATATYPE_UINT3,
+ MTL_DATATYPE_UINT4,
+
+ MTL_DATATYPE_FLOAT,
+ MTL_DATATYPE_FLOAT2,
+ MTL_DATATYPE_FLOAT3,
+ MTL_DATATYPE_FLOAT4,
+
+ MTL_DATATYPE_LONG,
+ MTL_DATATYPE_LONG2,
+ MTL_DATATYPE_LONG3,
+ MTL_DATATYPE_LONG4,
+
+ MTL_DATATYPE_ULONG,
+ MTL_DATATYPE_ULONG2,
+ MTL_DATATYPE_ULONG3,
+ MTL_DATATYPE_ULONG4,
+
+ MTL_DATATYPE_HALF2x2,
+ MTL_DATATYPE_HALF2x3,
+ MTL_DATATYPE_HALF2x4,
+ MTL_DATATYPE_HALF3x2,
+ MTL_DATATYPE_HALF3x3,
+ MTL_DATATYPE_HALF3x4,
+ MTL_DATATYPE_HALF4x2,
+ MTL_DATATYPE_HALF4x3,
+ MTL_DATATYPE_HALF4x4,
+
+ MTL_DATATYPE_FLOAT2x2,
+ MTL_DATATYPE_FLOAT2x3,
+ MTL_DATATYPE_FLOAT2x4,
+ MTL_DATATYPE_FLOAT3x2,
+ MTL_DATATYPE_FLOAT3x3,
+ MTL_DATATYPE_FLOAT3x4,
+ MTL_DATATYPE_FLOAT4x2,
+ MTL_DATATYPE_FLOAT4x3,
+ MTL_DATATYPE_FLOAT4x4,
+
+ MTL_DATATYPE_UINT1010102_NORM,
+ MTL_DATATYPE_INT1010102_NORM
+};
+
+inline uint mtl_get_data_type_size(eMTLDataType type)
+{
+ switch (type) {
+ case MTL_DATATYPE_CHAR:
+ case MTL_DATATYPE_UCHAR:
+ case MTL_DATATYPE_BOOL:
+ return 1;
+ case MTL_DATATYPE_CHAR2:
+ case MTL_DATATYPE_UCHAR2:
+ case MTL_DATATYPE_BOOL2:
+ case MTL_DATATYPE_SHORT:
+ case MTL_DATATYPE_USHORT:
+ return 2;
+
+ case MTL_DATATYPE_CHAR3:
+ case MTL_DATATYPE_UCHAR3:
+ case MTL_DATATYPE_BOOL3:
+ return 3;
+ case MTL_DATATYPE_CHAR4:
+ case MTL_DATATYPE_UCHAR4:
+ case MTL_DATATYPE_INT:
+ case MTL_DATATYPE_UINT:
+ case MTL_DATATYPE_BOOL4:
+ case MTL_DATATYPE_SHORT2:
+ case MTL_DATATYPE_USHORT2:
+ case MTL_DATATYPE_FLOAT:
+ case MTL_DATATYPE_UINT1010102_NORM:
+ case MTL_DATATYPE_INT1010102_NORM:
+ return 4;
+
+ case MTL_DATATYPE_SHORT3:
+ case MTL_DATATYPE_USHORT3:
+ case MTL_DATATYPE_SHORT4:
+ case MTL_DATATYPE_USHORT4:
+ case MTL_DATATYPE_INT2:
+ case MTL_DATATYPE_UINT2:
+ case MTL_DATATYPE_FLOAT2:
+ case MTL_DATATYPE_LONG:
+ case MTL_DATATYPE_ULONG:
+ case MTL_DATATYPE_HALF2x2:
+ return 8;
+
+ case MTL_DATATYPE_HALF3x2:
+ return 12;
+
+ case MTL_DATATYPE_INT3:
+ case MTL_DATATYPE_INT4:
+ case MTL_DATATYPE_UINT3:
+ case MTL_DATATYPE_UINT4:
+ case MTL_DATATYPE_FLOAT3:
+ case MTL_DATATYPE_FLOAT4:
+ case MTL_DATATYPE_LONG2:
+ case MTL_DATATYPE_ULONG2:
+ case MTL_DATATYPE_HALF2x3:
+ case MTL_DATATYPE_HALF2x4:
+ case MTL_DATATYPE_HALF4x2:
+ return 16;
+
+ case MTL_DATATYPE_HALF3x3:
+ case MTL_DATATYPE_HALF3x4:
+ case MTL_DATATYPE_FLOAT3x2:
+ return 24;
+
+ case MTL_DATATYPE_LONG3:
+ case MTL_DATATYPE_LONG4:
+ case MTL_DATATYPE_ULONG3:
+ case MTL_DATATYPE_ULONG4:
+ case MTL_DATATYPE_HALF4x3:
+ case MTL_DATATYPE_HALF4x4:
+ case MTL_DATATYPE_FLOAT2x3:
+ case MTL_DATATYPE_FLOAT2x4:
+ case MTL_DATATYPE_FLOAT4x2:
+ return 32;
+
+ case MTL_DATATYPE_FLOAT3x3:
+ case MTL_DATATYPE_FLOAT3x4:
+ return 48;
+
+ case MTL_DATATYPE_FLOAT4x3:
+ case MTL_DATATYPE_FLOAT4x4:
+ return 64;
+ default:
+ BLI_assert(false);
+ return 0;
+ };
+}
+
+inline uint mtl_get_data_type_alignment(eMTLDataType type)
+{
+ switch (type) {
+ case MTL_DATATYPE_CHAR:
+ case MTL_DATATYPE_UCHAR:
+ case MTL_DATATYPE_BOOL:
+ return 1;
+ case MTL_DATATYPE_CHAR2:
+ case MTL_DATATYPE_UCHAR2:
+ case MTL_DATATYPE_BOOL2:
+ case MTL_DATATYPE_SHORT:
+ case MTL_DATATYPE_USHORT:
+ return 2;
+
+ case MTL_DATATYPE_CHAR3:
+ case MTL_DATATYPE_UCHAR3:
+ case MTL_DATATYPE_BOOL3:
+ return 3;
+ case MTL_DATATYPE_CHAR4:
+ case MTL_DATATYPE_UCHAR4:
+ case MTL_DATATYPE_INT:
+ case MTL_DATATYPE_UINT:
+ case MTL_DATATYPE_BOOL4:
+ case MTL_DATATYPE_SHORT2:
+ case MTL_DATATYPE_USHORT2:
+ case MTL_DATATYPE_FLOAT:
+ case MTL_DATATYPE_HALF2x2:
+ case MTL_DATATYPE_HALF3x2:
+ case MTL_DATATYPE_HALF4x2:
+ case MTL_DATATYPE_UINT1010102_NORM:
+ case MTL_DATATYPE_INT1010102_NORM:
+ return 4;
+
+ case MTL_DATATYPE_SHORT3:
+ case MTL_DATATYPE_USHORT3:
+ case MTL_DATATYPE_SHORT4:
+ case MTL_DATATYPE_USHORT4:
+ case MTL_DATATYPE_INT2:
+ case MTL_DATATYPE_UINT2:
+ case MTL_DATATYPE_FLOAT2:
+ case MTL_DATATYPE_LONG:
+ case MTL_DATATYPE_ULONG:
+ case MTL_DATATYPE_HALF2x3:
+ case MTL_DATATYPE_HALF2x4:
+ case MTL_DATATYPE_HALF3x3:
+ case MTL_DATATYPE_HALF3x4:
+ case MTL_DATATYPE_HALF4x3:
+ case MTL_DATATYPE_HALF4x4:
+ case MTL_DATATYPE_FLOAT2x2:
+ case MTL_DATATYPE_FLOAT3x2:
+ case MTL_DATATYPE_FLOAT4x2:
+ return 8;
+
+ case MTL_DATATYPE_INT3:
+ case MTL_DATATYPE_INT4:
+ case MTL_DATATYPE_UINT3:
+ case MTL_DATATYPE_UINT4:
+ case MTL_DATATYPE_FLOAT3:
+ case MTL_DATATYPE_FLOAT4:
+ case MTL_DATATYPE_LONG2:
+ case MTL_DATATYPE_ULONG2:
+ case MTL_DATATYPE_FLOAT2x3:
+ case MTL_DATATYPE_FLOAT2x4:
+ case MTL_DATATYPE_FLOAT3x3:
+ case MTL_DATATYPE_FLOAT3x4:
+ case MTL_DATATYPE_FLOAT4x3:
+ case MTL_DATATYPE_FLOAT4x4:
+ return 16;
+
+ case MTL_DATATYPE_LONG3:
+ case MTL_DATATYPE_LONG4:
+ case MTL_DATATYPE_ULONG3:
+ case MTL_DATATYPE_ULONG4:
+ return 32;
+
+ default:
+ BLI_assert_msg(false, "Unrecognised MTL datatype.");
+ return 0;
+ };
+}
diff --git a/source/blender/gpu/metal/mtl_shader_shared.h b/source/blender/gpu/metal/mtl_shader_shared.h
new file mode 100644
index 00000000000..f6fd9035001
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_shader_shared.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* Global parameters. */
+#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS 6 /* buffer bind 0..5 */
+#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX MTL_SSBO_VERTEX_FETCH_MAX_VBOS
+
+/* Add Types as needed (Also need to be added to mtl_shader.h). */
+#define GPU_SHADER_ATTR_TYPE_FLOAT 0
+#define GPU_SHADER_ATTR_TYPE_INT 1
+#define GPU_SHADER_ATTR_TYPE_SHORT 2
+#define GPU_SHADER_ATTR_TYPE_CHAR 3
+#define GPU_SHADER_ATTR_TYPE_VEC2 4
+#define GPU_SHADER_ATTR_TYPE_VEC3 5
+#define GPU_SHADER_ATTR_TYPE_VEC4 6
+#define GPU_SHADER_ATTR_TYPE_UVEC2 7
+#define GPU_SHADER_ATTR_TYPE_UVEC3 8
+#define GPU_SHADER_ATTR_TYPE_UVEC4 9
+#define GPU_SHADER_ATTR_TYPE_IVEC2 10
+#define GPU_SHADER_ATTR_TYPE_IVEC3 11
+#define GPU_SHADER_ATTR_TYPE_IVEC4 12
+#define GPU_SHADER_ATTR_TYPE_MAT3 13
+#define GPU_SHADER_ATTR_TYPE_MAT4 14
+#define GPU_SHADER_ATTR_TYPE_UCHAR_NORM 15
+#define GPU_SHADER_ATTR_TYPE_UCHAR2_NORM 16
+#define GPU_SHADER_ATTR_TYPE_UCHAR3_NORM 17
+#define GPU_SHADER_ATTR_TYPE_UCHAR4_NORM 18
+#define GPU_SHADER_ATTR_TYPE_INT1010102_NORM 19
+#define GPU_SHADER_ATTR_TYPE_SHORT3_NORM 20
+#define GPU_SHADER_ATTR_TYPE_CHAR2 21
+#define GPU_SHADER_ATTR_TYPE_CHAR3 22
+#define GPU_SHADER_ATTR_TYPE_CHAR4 23
+#define GPU_SHADER_ATTR_TYPE_UINT 24
diff --git a/source/blender/gpu/metal/mtl_state.hh b/source/blender/gpu/metal/mtl_state.hh
index e6472491b35..1af56378c5a 100644
--- a/source/blender/gpu/metal/mtl_state.hh
+++ b/source/blender/gpu/metal/mtl_state.hh
@@ -3,6 +3,7 @@
/** \file
* \ingroup gpu
*/
+#pragma once
#include "MEM_guardedalloc.h"
@@ -11,6 +12,8 @@
#include "GPU_state.h"
#include "gpu_state_private.hh"
+#include "mtl_pso_descriptor_state.hh"
+
namespace blender::gpu {
/* Forward Declarations. */
@@ -21,7 +24,7 @@ class MTLContext;
* Metal Implementation.
**/
class MTLStateManager : public StateManager {
- public:
+
private:
/* Current state of the associated MTLContext.
* Avoids resetting the whole state for every change. */
@@ -29,6 +32,9 @@ class MTLStateManager : public StateManager {
GPUStateMutable current_mutable_;
MTLContext *context_;
+ /* Global pipeline descriptors. */
+ MTLRenderPipelineStateDescriptor pipeline_descriptor_;
+
public:
MTLStateManager(MTLContext *ctx);
@@ -47,6 +53,12 @@ class MTLStateManager : public StateManager {
void texture_unpack_row_length_set(uint len) override;
+ /* Global pipeline descriptors. */
+ MTLRenderPipelineStateDescriptor &get_pipeline_descriptor()
+ {
+ return pipeline_descriptor_;
+ }
+
private:
void set_write_mask(const eGPUWriteMask value);
void set_depth_test(const eGPUDepthTest value);
diff --git a/source/blender/gpu/metal/mtl_state.mm b/source/blender/gpu/metal/mtl_state.mm
index 0f2d4d7dc48..85080041246 100644
--- a/source/blender/gpu/metal/mtl_state.mm
+++ b/source/blender/gpu/metal/mtl_state.mm
@@ -11,6 +11,7 @@
#include "mtl_context.hh"
#include "mtl_framebuffer.hh"
+#include "mtl_shader_interface_type.hh"
#include "mtl_state.hh"
namespace blender::gpu {
diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh
index 82a7a20a310..be6f3a3a02b 100644
--- a/source/blender/gpu/metal/mtl_texture.hh
+++ b/source/blender/gpu/metal/mtl_texture.hh
@@ -363,20 +363,20 @@ class MTLTexture : public Texture {
};
id<MTLComputePipelineState> texture_update_1d_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation);
+ TextureUpdateRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_update_1d_array_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation);
+ TextureUpdateRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_update_2d_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation);
+ TextureUpdateRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_update_2d_array_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation);
+ TextureUpdateRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_update_3d_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation);
+ TextureUpdateRoutineSpecialisation specialization);
id<MTLComputePipelineState> mtl_texture_update_impl(
- TextureUpdateRoutineSpecialisation specialisation_params,
+ TextureUpdateRoutineSpecialisation specialization_params,
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
- &specialisation_cache,
+ &specialization_cache,
eGPUTextureType texture_type);
/* Depth Update Utilities */
@@ -384,7 +384,7 @@ class MTLTexture : public Texture {
* use a compute shader to write to depth, so we must instead render to a depth target.
* These processes use vertex/fragment shaders to render texture data from an intermediate
* source, in order to prime the depth buffer*/
- GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialisation);
+ GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialization);
void update_sub_depth_2d(
int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data);
@@ -397,20 +397,20 @@ class MTLTexture : public Texture {
};
id<MTLComputePipelineState> texture_read_1d_get_kernel(
- TextureReadRoutineSpecialisation specialisation);
+ TextureReadRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_read_1d_array_get_kernel(
- TextureReadRoutineSpecialisation specialisation);
+ TextureReadRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_read_2d_get_kernel(
- TextureReadRoutineSpecialisation specialisation);
+ TextureReadRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_read_2d_array_get_kernel(
- TextureReadRoutineSpecialisation specialisation);
+ TextureReadRoutineSpecialisation specialization);
id<MTLComputePipelineState> texture_read_3d_get_kernel(
- TextureReadRoutineSpecialisation specialisation);
+ TextureReadRoutineSpecialisation specialization);
id<MTLComputePipelineState> mtl_texture_read_impl(
- TextureReadRoutineSpecialisation specialisation_params,
+ TextureReadRoutineSpecialisation specialization_params,
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
- &specialisation_cache,
+ &specialization_cache,
eGPUTextureType texture_type);
/* fullscreen blit utilities. */
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 0cb38a3a2b7..2b7c2333bff 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -479,8 +479,8 @@ void gpu::MTLTexture::update_sub(
int expected_dst_bytes_per_pixel = get_mtl_format_bytesize(destination_format);
int destination_num_channels = get_mtl_format_num_components(destination_format);
- /* Prepare specialisation struct (For texture update routine). */
- TextureUpdateRoutineSpecialisation compute_specialisation_kernel = {
+ /* Prepare specialization struct (For texture update routine). */
+ TextureUpdateRoutineSpecialisation compute_specialization_kernel = {
tex_data_format_to_msl_type_str(type), /* INPUT DATA FORMAT */
tex_data_format_to_msl_texture_template_type(type), /* TEXTURE DATA FORMAT */
num_channels,
@@ -620,7 +620,7 @@ void gpu::MTLTexture::update_sub(
/* Use Compute Based update. */
if (type_ == GPU_TEXTURE_1D) {
id<MTLComputePipelineState> pso = texture_update_1d_get_kernel(
- compute_specialisation_kernel);
+ compute_specialization_kernel);
TextureUpdateParams params = {mip,
{extent[0], 1, 1},
{offset[0], 0, 0},
@@ -637,7 +637,7 @@ void gpu::MTLTexture::update_sub(
}
else if (type_ == GPU_TEXTURE_1D_ARRAY) {
id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel(
- compute_specialisation_kernel);
+ compute_specialization_kernel);
TextureUpdateParams params = {mip,
{extent[0], extent[1], 1},
{offset[0], offset[1], 0},
@@ -694,7 +694,7 @@ void gpu::MTLTexture::update_sub(
/* Use Compute texture update. */
if (type_ == GPU_TEXTURE_2D) {
id<MTLComputePipelineState> pso = texture_update_2d_get_kernel(
- compute_specialisation_kernel);
+ compute_specialization_kernel);
TextureUpdateParams params = {mip,
{extent[0], extent[1], 1},
{offset[0], offset[1], 0},
@@ -712,7 +712,7 @@ void gpu::MTLTexture::update_sub(
}
else if (type_ == GPU_TEXTURE_2D_ARRAY) {
id<MTLComputePipelineState> pso = texture_update_2d_array_get_kernel(
- compute_specialisation_kernel);
+ compute_specialization_kernel);
TextureUpdateParams params = {mip,
{extent[0], extent[1], extent[2]},
{offset[0], offset[1], offset[2]},
@@ -752,7 +752,7 @@ void gpu::MTLTexture::update_sub(
}
else {
id<MTLComputePipelineState> pso = texture_update_3d_get_kernel(
- compute_specialisation_kernel);
+ compute_specialization_kernel);
TextureUpdateParams params = {mip,
{extent[0], extent[1], extent[2]},
{offset[0], offset[1], offset[2]},
@@ -1216,7 +1216,7 @@ void gpu::MTLTexture::read_internal(int mip,
destination_buffer_host_ptr = (void *)((uint8_t *)([destination_buffer contents]) +
destination_offset);
- /* Prepare specialisation struct (For non-trivial texture read routine). */
+ /* Prepare specialization struct (For non-trivial texture read routine). */
int depth_format_mode = 0;
if (is_depth_format) {
depth_format_mode = 1;
@@ -1236,7 +1236,7 @@ void gpu::MTLTexture::read_internal(int mip,
}
}
- TextureReadRoutineSpecialisation compute_specialisation_kernel = {
+ TextureReadRoutineSpecialisation compute_specialization_kernel = {
tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA TYPE */
tex_data_format_to_msl_type_str(desired_output_format), /* OUTPUT DATA TYPE */
num_channels, /* TEXTURE COMPONENT COUNT */
@@ -1283,7 +1283,7 @@ void gpu::MTLTexture::read_internal(int mip,
id<MTLComputeCommandEncoder> compute_encoder =
ctx->main_command_buffer.ensure_begin_compute_encoder();
id<MTLComputePipelineState> pso = texture_read_2d_get_kernel(
- compute_specialisation_kernel);
+ compute_specialization_kernel);
TextureReadParams params = {
mip,
{width, height, 1},
@@ -1339,7 +1339,7 @@ void gpu::MTLTexture::read_internal(int mip,
id<MTLComputeCommandEncoder> compute_encoder =
ctx->main_command_buffer.ensure_begin_compute_encoder();
id<MTLComputePipelineState> pso = texture_read_2d_array_get_kernel(
- compute_specialisation_kernel);
+ compute_specialization_kernel);
TextureReadParams params = {
mip,
{width, height, depth},
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index e2f0b3c848e..25b30c6cb0e 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -305,13 +305,13 @@ bool mtl_format_supports_blending(MTLPixelFormat format)
* \{ */
id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
- TextureUpdateRoutineSpecialisation specialisation_params,
+ TextureUpdateRoutineSpecialisation specialization_params,
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
- &specialisation_cache,
+ &specialization_cache,
eGPUTextureType texture_type)
{
/* Check whether the Kernel exists. */
- id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params);
+ id<MTLComputePipelineState> *result = specialization_cache.lookup_ptr(specialization_params);
if (result != nullptr) {
return *result;
}
@@ -332,18 +332,18 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
options.languageVersion = MTLLanguageVersion2_2;
options.preprocessorMacros = @{
@"INPUT_DATA_TYPE" :
- [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()],
+ [NSString stringWithUTF8String:specialization_params.input_data_type.c_str()],
@"OUTPUT_DATA_TYPE" :
- [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()],
+ [NSString stringWithUTF8String:specialization_params.output_data_type.c_str()],
@"COMPONENT_COUNT_INPUT" :
- [NSNumber numberWithInt:specialisation_params.component_count_input],
+ [NSNumber numberWithInt:specialization_params.component_count_input],
@"COMPONENT_COUNT_OUTPUT" :
- [NSNumber numberWithInt:specialisation_params.component_count_output],
+ [NSNumber numberWithInt:specialization_params.component_count_output],
@"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))]
};
/* Prepare shader library for conversion routine. */
- NSError *error = NULL;
+ NSError *error = nullptr;
id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src
options:options
error:&error] autorelease];
@@ -370,7 +370,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
/* Store PSO. */
[compute_pso retain];
- specialisation_cache.add_new(specialisation_params, compute_pso);
+ specialization_cache.add_new(specialization_params, compute_pso);
return_pso = compute_pso;
}
@@ -379,53 +379,53 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation)
+ TextureUpdateRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_update_impl(specialisation,
+ return mtl_texture_update_impl(specialization,
mtl_context->get_texture_utils().texture_1d_update_compute_psos,
GPU_TEXTURE_1D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_array_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation)
+ TextureUpdateRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_update_impl(
- specialisation,
+ specialization,
mtl_context->get_texture_utils().texture_1d_array_update_compute_psos,
GPU_TEXTURE_1D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation)
+ TextureUpdateRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_update_impl(specialisation,
+ return mtl_texture_update_impl(specialization,
mtl_context->get_texture_utils().texture_2d_update_compute_psos,
GPU_TEXTURE_2D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_array_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation)
+ TextureUpdateRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
return mtl_texture_update_impl(
- specialisation,
+ specialization,
mtl_context->get_texture_utils().texture_2d_array_update_compute_psos,
GPU_TEXTURE_2D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel(
- TextureUpdateRoutineSpecialisation specialisation)
+ TextureUpdateRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_update_impl(specialisation,
+ return mtl_texture_update_impl(specialization,
mtl_context->get_texture_utils().texture_3d_update_compute_psos,
GPU_TEXTURE_3D);
}
@@ -434,7 +434,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel(
* Currently does not appear to be hit. */
GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
- DepthTextureUpdateRoutineSpecialisation specialisation)
+ DepthTextureUpdateRoutineSpecialisation specialization)
{
/* Check whether the Kernel exists. */
@@ -442,13 +442,13 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
BLI_assert(mtl_context != nullptr);
GPUShader **result = mtl_context->get_texture_utils().depth_2d_update_shaders.lookup_ptr(
- specialisation);
+ specialization);
if (result != nullptr) {
return *result;
}
const char *fragment_source = nullptr;
- switch (specialisation.data_mode) {
+ switch (specialization.data_mode) {
case MTL_DEPTH_UPDATE_MODE_FLOAT:
fragment_source = datatoc_depth_2d_update_float_frag_glsl;
break;
@@ -469,7 +469,7 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
nullptr,
nullptr,
"depth_2d_update_sh_get");
- mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialisation, shader);
+ mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
return shader;
}
@@ -507,18 +507,18 @@ void gpu::MTLTexture::update_sub_depth_2d(
eGPUTextureFormat format = (is_float) ? GPU_R32F : GPU_R32I;
/* Shader key - Add parameters here for different configurations. */
- DepthTextureUpdateRoutineSpecialisation specialisation;
+ DepthTextureUpdateRoutineSpecialisation specialization;
switch (type) {
case GPU_DATA_FLOAT:
- specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT;
+ specialization.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT;
break;
case GPU_DATA_UINT_24_8:
- specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT24;
+ specialization.data_mode = MTL_DEPTH_UPDATE_MODE_INT24;
break;
case GPU_DATA_UINT:
- specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT32;
+ specialization.data_mode = MTL_DEPTH_UPDATE_MODE_INT32;
break;
default:
@@ -544,7 +544,7 @@ void gpu::MTLTexture::update_sub_depth_2d(
GPU_framebuffer_clear_stencil(depth_fb_temp, 0);
}
- GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialisation);
+ GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialization);
BLI_assert(depth_2d_update_sh != nullptr);
GPUBatch *quad = GPU_batch_preset_quad();
GPU_batch_set_shader(quad, depth_2d_update_sh);
@@ -591,13 +591,13 @@ void gpu::MTLTexture::update_sub_depth_2d(
* \{ */
id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
- TextureReadRoutineSpecialisation specialisation_params,
+ TextureReadRoutineSpecialisation specialization_params,
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
- &specialisation_cache,
+ &specialization_cache,
eGPUTextureType texture_type)
{
/* Check whether the Kernel exists. */
- id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params);
+ id<MTLComputePipelineState> *result = specialization_cache.lookup_ptr(specialization_params);
if (result != nullptr) {
return *result;
}
@@ -615,10 +615,10 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
/* Defensive Debug Checks. */
long long int depth_scale_factor = 1;
- if (specialisation_params.depth_format_mode > 0) {
- BLI_assert(specialisation_params.component_count_input == 1);
- BLI_assert(specialisation_params.component_count_output == 1);
- switch (specialisation_params.depth_format_mode) {
+ if (specialization_params.depth_format_mode > 0) {
+ BLI_assert(specialization_params.component_count_input == 1);
+ BLI_assert(specialization_params.component_count_output == 1);
+ switch (specialization_params.depth_format_mode) {
case 1:
/* FLOAT */
depth_scale_factor = 1;
@@ -642,24 +642,24 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
options.languageVersion = MTLLanguageVersion2_2;
options.preprocessorMacros = @{
@"INPUT_DATA_TYPE" :
- [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()],
+ [NSString stringWithUTF8String:specialization_params.input_data_type.c_str()],
@"OUTPUT_DATA_TYPE" :
- [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()],
+ [NSString stringWithUTF8String:specialization_params.output_data_type.c_str()],
@"COMPONENT_COUNT_INPUT" :
- [NSNumber numberWithInt:specialisation_params.component_count_input],
+ [NSNumber numberWithInt:specialization_params.component_count_input],
@"COMPONENT_COUNT_OUTPUT" :
- [NSNumber numberWithInt:specialisation_params.component_count_output],
+ [NSNumber numberWithInt:specialization_params.component_count_output],
@"WRITE_COMPONENT_COUNT" :
- [NSNumber numberWithInt:min_ii(specialisation_params.component_count_input,
- specialisation_params.component_count_output)],
+ [NSNumber numberWithInt:min_ii(specialization_params.component_count_input,
+ specialization_params.component_count_output)],
@"IS_DEPTH_FORMAT" :
- [NSNumber numberWithInt:((specialisation_params.depth_format_mode > 0) ? 1 : 0)],
+ [NSNumber numberWithInt:((specialization_params.depth_format_mode > 0) ? 1 : 0)],
@"DEPTH_SCALE_FACTOR" : [NSNumber numberWithLongLong:depth_scale_factor],
@"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))]
};
/* Prepare shader library for conversion routine. */
- NSError *error = NULL;
+ NSError *error = nullptr;
id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src
options:options
error:&error] autorelease];
@@ -687,7 +687,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
/* Store PSO. */
[compute_pso retain];
- specialisation_cache.add_new(specialisation_params, compute_pso);
+ specialization_cache.add_new(specialization_params, compute_pso);
return_pso = compute_pso;
}
@@ -696,51 +696,51 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_get_kernel(
- TextureReadRoutineSpecialisation specialisation)
+ TextureReadRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_read_impl(specialisation,
+ return mtl_texture_read_impl(specialization,
mtl_context->get_texture_utils().texture_2d_read_compute_psos,
GPU_TEXTURE_2D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_array_get_kernel(
- TextureReadRoutineSpecialisation specialisation)
+ TextureReadRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_read_impl(specialisation,
+ return mtl_texture_read_impl(specialization,
mtl_context->get_texture_utils().texture_2d_array_read_compute_psos,
GPU_TEXTURE_2D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_get_kernel(
- TextureReadRoutineSpecialisation specialisation)
+ TextureReadRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_read_impl(specialisation,
+ return mtl_texture_read_impl(specialization,
mtl_context->get_texture_utils().texture_1d_read_compute_psos,
GPU_TEXTURE_1D);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_array_get_kernel(
- TextureReadRoutineSpecialisation specialisation)
+ TextureReadRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_read_impl(specialisation,
+ return mtl_texture_read_impl(specialization,
mtl_context->get_texture_utils().texture_1d_array_read_compute_psos,
GPU_TEXTURE_1D_ARRAY);
}
id<MTLComputePipelineState> gpu::MTLTexture::texture_read_3d_get_kernel(
- TextureReadRoutineSpecialisation specialisation)
+ TextureReadRoutineSpecialisation specialization)
{
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
- return mtl_texture_read_impl(specialisation,
+ return mtl_texture_read_impl(specialization,
mtl_context->get_texture_utils().texture_3d_read_compute_psos,
GPU_TEXTURE_3D);
}
diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc
index 2375e78d9f1..4814a5ad71b 100644
--- a/source/blender/gpu/opengl/gl_backend.cc
+++ b/source/blender/gpu/opengl/gl_backend.cc
@@ -497,6 +497,7 @@ void GLBackend::capabilities_init()
glGetIntegerv(GL_NUM_EXTENSIONS, &GCaps.extensions_len);
GCaps.extension_get = gl_extension_get;
+ GCaps.max_samplers = GCaps.max_textures;
GCaps.mem_stats_support = epoxy_has_gl_extension("GL_NVX_gpu_memory_info") ||
epoxy_has_gl_extension("GL_ATI_meminfo");
GCaps.shader_image_load_store_support = epoxy_has_gl_extension("GL_ARB_shader_image_load_store");
diff --git a/source/blender/gpu/shaders/metal/mtl_shader_common.msl b/source/blender/gpu/shaders/metal/mtl_shader_common.msl
new file mode 100644
index 00000000000..c504cdbacb1
--- /dev/null
+++ b/source/blender/gpu/shaders/metal/mtl_shader_common.msl
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* Common Metal header to be included in all compiled Metal shaders.
+ * Both native MSL shaders and GLSL shaders. */
+
+using namespace metal;
+
+/* Should match GPUVertFetchMode. */
+typedef enum {
+ GPU_FETCH_FLOAT = 0,
+ GPU_FETCH_INT,
+ GPU_FETCH_INT_TO_FLOAT_UNIT,
+ GPU_FETCH_INT_TO_FLOAT,
+} GPUVertFetchMode;
+
+/* Consant to flag base binding index of uniform buffers. */
+constant int MTL_uniform_buffer_base_index [[function_constant(0)]];
+
+/* Default Point Size.
+ * Unused if function constant not set. */
+constant float MTL_global_pointsize [[function_constant(1)]];
+
+/* Attribute conversions flags (Up to 16 attributes supported in Blender). */
+constant int MTL_AttributeConvert0 [[function_constant(2)]];
+constant int MTL_AttributeConvert1 [[function_constant(3)]];
+constant int MTL_AttributeConvert2 [[function_constant(4)]];
+constant int MTL_AttributeConvert3 [[function_constant(5)]];
+constant int MTL_AttributeConvert4 [[function_constant(6)]];
+constant int MTL_AttributeConvert5 [[function_constant(7)]];
+constant int MTL_AttributeConvert6 [[function_constant(8)]];
+constant int MTL_AttributeConvert7 [[function_constant(9)]];
+constant int MTL_AttributeConvert8 [[function_constant(10)]];
+constant int MTL_AttributeConvert9 [[function_constant(11)]];
+constant int MTL_AttributeConvert10 [[function_constant(12)]];
+constant int MTL_AttributeConvert11 [[function_constant(13)]];
+constant int MTL_AttributeConvert12 [[function_constant(14)]];
+constant int MTL_AttributeConvert13 [[function_constant(15)]];
+constant int MTL_AttributeConvert14 [[function_constant(16)]];
+constant int MTL_AttributeConvert15 [[function_constant(17)]];
+
+/* Consant to flag binding index of transform feedback buffer.
+ * Unused if function constant not set. */
+constant int MTL_transform_feedback_buffer_index [[function_constant(18)]];
+
+/** Internal attribute conversion functionality. */
+/* Following descriptions in mtl_shader.hh, Metal only supports some implicit
+ * attribute type conversions. These conversions occur when there is a difference
+ * between the type specified in the vertex descriptor (In the input vertex buffers),
+ * and the attribute type in the shader's VertexIn struct (ShaderInterface).
+ *
+ * The supported implicit conversions are described here:
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
+ *
+ * For unsupported conversions, the mtl_shader_generator will create an attribute reading function
+ * which performs this conversion manually upon read, depending on the requested fetchmode.
+ *
+ * These conversions use the function constants above, so any branching is optimized out during
+ * backend shader compilation (PSO creation).
+ *
+ * NOTE: Not all possibilities have been covered here, any additional conversion routines should
+ * be added as needed, and mtl_shader_generator should also be updated with any newly required
+ * read functions.
+ *
+ * These paths are only needed for cases where implicit conversion will not happen, in which
+ * case the value will be read as the type in the shader.
+ */
+#define internal_vertex_attribute_convert_read_float(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float(as_type<int>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float(as_type<int>(v_in)) / float(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
+
+#define internal_vertex_attribute_convert_read_float2(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float2(as_type<int2>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float2(as_type<int2>(v_in)) / float2(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
+
+#define internal_vertex_attribute_convert_read_float3(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float3(as_type<int3>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float3(as_type<int3>(v_in)) / float3(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
+
+#define internal_vertex_attribute_convert_read_float4(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float4(as_type<int4>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float4(as_type<int4>(v_in)) / float4(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
new file mode 100644
index 00000000000..3b32783620d
--- /dev/null
+++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** Special header for mapping commonly defined tokens to API-specific variations.
+ * Where possible, this will adhere closely to base GLSL, where semantics are the same.
+ * However, host code shader code may need modifying to support types where necessary variations
+ * exist between APIs but are not expressed through the source. (e.g. distinctio between depth2d
+ * and texture2d types in metal).
+ */
+
+/* Base instance with offsets. */
+#define gpu_BaseInstance gl_BaseInstanceARB
+#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)
+
+/* derivative signs. */
+#define DFDX_SIGN 1.0
+#define DFDY_SIGN 1.0
+
+/* Type definitions. */
+#define vec2 float2
+#define vec3 float3
+#define vec4 float4
+#define mat2 float2x2
+#define mat2x2 float2x2
+#define mat3 float3x3
+#define mat4 float4x4
+#define ivec2 int2
+#define ivec3 int3
+#define ivec4 int4
+#define uvec2 uint2
+#define uvec3 uint3
+#define uvec4 uint4
+/* MTLBOOL is used for native boolean's generated by the Metal backend, to avoid type-emulation
+ * for GLSL bools, which are treated as integers. */
+#define MTLBOOL bool
+#define bool int
+#define bvec2 bool2
+#define bvec3 bool3
+#define bvec4 bool4
+#define vec3_1010102_Unorm uint
+#define vec3_1010102_Inorm int
+
+/* Strip GLSL Decorators. */
+#define in
+#define flat
+#define smooth
+#define noperspective
+#define layout(std140) struct
+#define uniform
+
+/* Used to replace 'out' in function parameters with threadlocal reference
+ * shortened to avoid expanding the glsl source string. */
+#define THD thread
+
+/* Generate wrapper structs for combined texture and sampler type. */
+#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
+# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \
+ template<typename T, access A = access::sample> struct STRUCT_NAME { \
+ thread TEX_TYPE<T, A> *texture; \
+ constant sampler *samp; \
+ }
+#else
+# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \
+ template<typename T, access A = access::sample> struct STRUCT_NAME { \
+ thread TEX_TYPE<T, A> *texture; \
+ thread sampler *samp; \
+ }
+#endif
+
+/* Add any types as needed. */
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array);
+
+/* Sampler struct for argument buffer. */
+#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
+struct SStruct {
+ array<sampler, ARGUMENT_BUFFER_NUM_SAMPLERS> sampler_args [[id(0)]];
+};
+#endif
+
+/* Samplers as function parameters. */
+#define sampler1D thread _mtl_combined_image_sampler_1d<float>
+#define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float>
+#define sampler2D thread _mtl_combined_image_sampler_2d<float>
+#define depth2D thread _mtl_combined_image_sampler_depth_2d<float>
+#define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float>
+#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
+#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
+#define sampler3D thread _mtl_combined_image_sampler_3d<float>
+#define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read>
+#define samplerCube thread _mtl_combined_image_sampler_cube<float>
+#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float>
+
+#define usampler1D thread _mtl_combined_image_sampler_1d<uint>
+#define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint>
+#define usampler2D thread _mtl_combined_image_sampler_2d<uint>
+#define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint>
+#define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint>
+#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
+#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
+#define usampler3D thread _mtl_combined_image_sampler_3d<uint>
+#define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read>
+#define usamplerCube thread _mtl_combined_image_sampler_cube<uint>
+#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint>
+
+#define isampler1D thread _mtl_combined_image_sampler_1d<int>
+#define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int>
+#define isampler2D thread _mtl_combined_image_sampler_2d<int>
+#define idepth2D thread _mtl_combined_image_sampler_depth_2d<int>
+#define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int>
+#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
+#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
+#define isampler3D thread _mtl_combined_image_sampler_3d<int>
+#define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read>
+#define isamplerCube thread _mtl_combined_image_sampler_cube<int>
+#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int>
+
+/* Vector accessor aliases. */
+#define st xy
+
+/* Texture functions. */
+#define texelFetch _texelFetch_internal
+#define texelFetchOffset(__tex, __texel, __lod, __offset) \
+ _texelFetch_internal(__tex, __texel, __lod, __offset)
+#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv)
+#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias)))
+#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod)))
+#define textureLodOffset(__tex, __uv, __lod, __offset) \
+ _texture_internal_level(__tex, __uv, level(float(__lod)), __offset)
+#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0)
+#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp)
+#define textureGatherOffset(__tex, __offset, __uv, __comp) \
+ _texture_gather_internal(__tex, __uv, __comp, __offset)
+
+#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME
+#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__)
+#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__)
+
+/* Texture-write functions. */
+#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value)
+
+/* Singular return values from texture functions of type DEPTH are often indexed with either .r or
+ * .x. This is a lightweight wrapper type for handling this syntax. */
+union _msl_return_float {
+ float r;
+ float x;
+ inline operator float() const
+ {
+ return r;
+ }
+};
+
+/* Add custom texture sampling/reading routines for each type to account for special return cases,
+ * e.g. returning a float with an r parameter Note: Cannot use template specialization for input
+ * type, as return types are specific to the signature of 'tex'. */
+/* Texture Read. */
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, T texel)
+{
+ float w = tex.texture->get_width();
+ if (texel >= 0 && texel < w) {
+ return tex.texture->read(uint(texel));
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T>
+inline vec<S, 4> _texelFetch_internal(
+ const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel)
+{
+ float w = tex.texture->get_width();
+ if (texel >= 0 && texel < w) {
+ return tex.texture->read(uint(texel));
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+ T texel,
+ uint lod,
+ T offset = 0)
+{
+ float w = tex.texture->get_width();
+ if ((texel + offset) >= 0 && (texel + offset) < w) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel + offset), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+ vec<T, 1> texel,
+ uint lod,
+ vec<T, 1> offset = 0)
+{
+ float w = tex.texture->get_width();
+ if ((texel + offset) >= 0 && (texel + offset) < w) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel + offset), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, int n, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+ vec<T, n> texel,
+ uint lod,
+ vec<T, n> offset = vec<T, n>(0))
+{
+ float w = tex.texture->get_width();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel.x + offset.x), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
+ vec<T, 2> texel,
+ uint lod,
+ vec<T, 2> offset = vec<T, 2>(0, 0))
+{
+
+ float w = tex.texture->get_width();
+ float h = tex.texture->get_array_size();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
+ vec<T, 2> texel,
+ uint lod,
+ vec<T, 2> offset = vec<T, 2>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h) {
+ return tex.texture->read(uint2(texel + offset), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
+ vec<T, 3> texel,
+ uint lod,
+ vec<T, 3> offset = vec<T, 3>(0))
+{
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ float d = tex.texture->get_array_size();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) {
+ return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
+ vec<T, 3> texel,
+ uint lod,
+ vec<T, 3> offset = vec<T, 3>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ float d = tex.texture->get_depth() >> lod;
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) {
+ return tex.texture->read(uint3(texel + offset), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename T, access A>
+inline _msl_return_float _texelFetch_internal(
+ thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
+ vec<T, 2> texel,
+ uint lod,
+ vec<T, 2> offset = vec<T, 2>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h) {
+ _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)};
+ return fl;
+ }
+ else {
+ _msl_return_float fl = {0};
+ return fl;
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
+ vec<T, 3> texel,
+ uint lod,
+ vec<T, 3> offset = vec<T, 3>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ float d = tex.texture->get_array_size();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) {
+ return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+/* Sample. */
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u)
+{
+ return tex.texture->sample(*tex.samp, u);
+}
+
+inline float4 _texture_internal_samp(
+ thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua)
+{
+ return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
+}
+
+inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
+ float2 uv)
+{
+ return tex.texture->sample(*tex.samp, uv);
+}
+
+inline uint4 _texture_internal_samp(
+ thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv)
+{
+ return tex.texture->sample(*tex.samp, uv);
+}
+
+inline float4 _texture_internal_samp(
+ thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv)
+{
+ return tex.texture->sample(*tex.samp, uv);
+}
+
+inline _msl_return_float _texture_internal_samp(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv)
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uv)};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw)
+{
+ return tex.texture->sample(*tex.samp, uvw);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva)
+{
+ return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z));
+}
+
+inline _msl_return_float _texture_internal_samp(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva)
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))};
+ return fl;
+}
+
+inline _msl_return_float _texture_internal_samp(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac)
+{
+ _msl_return_float fl = {
+ tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs)
+{
+ return tex.texture->sample(*tex.samp, uvs.xyz);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a)
+{
+ return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w));
+}
+
+/* Sample Level. */
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
+ float u,
+ level options,
+ int offset = 0)
+{
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->sample(*tex.samp, u);
+}
+
+inline float4 _texture_internal_level(
+ thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex,
+ float2 ua,
+ level options,
+ int offset = 0)
+{
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
+}
+
+inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline uint4 _texture_internal_level(
+ thread _mtl_combined_image_sampler_2d<uint, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline float4 _texture_internal_level(
+ thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline _msl_return_float _texture_internal_level(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_3d<T, access::sample> tex,
+ float3 uvw,
+ level options = level(0),
+ int3 offset = int3(0))
+{
+ return tex.texture->sample(*tex.samp, uvw, options, offset);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
+ float3 uva,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset);
+}
+
+inline _msl_return_float _texture_internal_level(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
+ float3 uva,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)};
+ return fl;
+}
+
+inline _msl_return_float _texture_internal_level(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
+ float4 uvac,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {
+ tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_cube<T, access::sample> tex,
+ float3 uvs,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uvs.xyz, options);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex,
+ float4 coord_a,
+ level options = level(0),
+ int3 offset = int3(0))
+{
+ return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options);
+}
+
+/* Sample Bias. */
+template<typename T>
+inline vec<T, 4> _texture_internal_bias(
+ thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
+ float u,
+ bias options = bias(0.0),
+ int offset = 0)
+{
+ return tex.texture->sample(*tex.samp, u);
+}
+
+inline float4 _texture_internal_bias(
+ thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
+ float2 uv,
+ bias options = bias(0.0),
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline _msl_return_float _texture_internal_bias(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
+ float2 uv,
+ bias options = bias(0),
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
+ return fl;
+}
+
+/* Texture Gather. */
+component int_to_component(const int comp)
+{
+ switch (comp) {
+ default:
+ case 0:
+ return component::x;
+ case 1:
+ return component::y;
+ case 2:
+ return component::z;
+ case 3:
+ return component::w;
+ }
+ return component::x;
+}
+
+inline float4 _texture_gather_internal(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
+ float2 uv,
+ const int comp = 0,
+ int2 offset = int2(0))
+{
+ return tex.texture->gather(*tex.samp, uv, offset);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_gather_internal(
+ thread _mtl_combined_image_sampler_2d<T, access::sample> tex,
+ float2 uv,
+ const int comp = 0,
+ int2 offset = int2(0))
+{
+ return tex.texture->gather(*tex.samp, uv, offset);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_gather_internal(
+ thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
+ float2 uv,
+ const int comp = 0,
+ int2 offset = int2(0))
+{
+ return tex.texture->gather(*tex.samp, uv, offset);
+}
+
+/* Texture write support. */
+template<typename S, typename T, access A>
+inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
+ T _coord,
+ vec<S, 4> value)
+{
+ float w = tex.texture->get_width();
+ float h = tex.texture->get_height();
+ if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) {
+ tex.texture->write(value, uint2(_coord.xy));
+ }
+}
+
+template<typename S, typename T, access A>
+inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
+ T _coord,
+ vec<S, 4> value)
+{
+ float w = tex.texture->get_width();
+ float h = tex.texture->get_height();
+ float d = tex.texture->get_depth();
+ if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
+ _coord.z < d) {
+ tex.texture->write(value, uint3(_coord.xyz));
+ }
+}
+
+/* SSBO Vertex Fetch Mode. */
+#ifdef MTL_SSBO_VERTEX_FETCH
+/* Enabled when geometry is passed via raw buffer bindings, rather than using
+ * vertex assembly in the vertex-descriptor.
+ *
+ * To describe the layout of input attribute data, we will generate uniforms (defaulting to 0)
+ * with the names per unique input attribute with name `attr`:
+ *
+ * - uniform_ssbo_stride_##attr -- Representing the stride between element.
+ * - uniform_ssbo_offset_##attr -- Representing the base offset within the vertex.
+ * - uniform_ssbo_fetchmode_##attr - Whether using per-vertex (=0) or per-instance fetch (=1).
+ * - uniform_ssbo_vbo_id_##attr - buffer binding index for VBO with data for this attribute.
+ * - uniform_ssbo_type_##attr - The type of data in the currently bound buffer.
+ *
+ * If the uniform_ssbo_type_* does not match with the desired type, then it is the responsibility
+ * of the shader to perform the conversion. Types should always be read as the raw attribute type,
+ * and then converted. e.g. If the uniform_ssbo_type_* is `int`, but we want to read it to be
+ * normalized to a float.
+ * The implementation should query the attribute type using vertex_fetch_get_attr_type(attr_name):
+ *
+ * float fweight = 0.0;
+ * if(vertex_fetch_get_attr_type(in_weight) == GPU_SHADER_ATTR_TYPE_INT) {
+ * int iweight = vertex_fetch_attribute(gl_VertexID, in_weight, int);
+ * fweight = (float)iweight/(float)INT32_MAX;
+ * } else {
+ * fweight = = vertex_fetch_attribute(gl_VertexID, in_weight, float);
+ * }
+ *
+ * Note: These uniforms are generated as part of the same data block used for regular uniforms
+ * and attribute data is written prior to each draw call, depending on the configuration of
+ * the vertex descriptor for an MTLBatch or MTLImmedaite call. */
+# define PPCAT_NX(A, B) A##B
+# define PPCAT(A, B) PPCAT_NX(A, B)
+
+# define RESOLVE_VERTEX(v_id) \
+ ((UNIFORM_SSBO_USES_INDEXED_RENDERING_STR > 0) ? \
+ ((UNIFORM_SSBO_INDEX_MODE_U16_STR > 0) ? MTL_INDEX_DATA_U16[v_id] : \
+ MTL_INDEX_DATA_U32[v_id]) : \
+ v_id)
+# define ATTR_TYPE(attr) PPCAT(SSBO_ATTR_TYPE_, attr)
+# define vertex_fetch_attribute_raw(n, attr, type) \
+ (reinterpret_cast<constant type *>( \
+ &MTL_VERTEX_DATA[PPCAT(UNIFORM_SSBO_VBO_ID_STR, attr)] \
+ [(PPCAT(UNIFORM_SSBO_STRIDE_STR, attr) * \
+ ((PPCAT(UNIFORM_SSBO_FETCHMODE_STR, attr)) ? gl_InstanceID : n)) + \
+ PPCAT(UNIFORM_SSBO_OFFSET_STR, attr)]))[0]
+# define vertex_fetch_attribute(n, attr, type) \
+ vertex_fetch_attribute_raw(RESOLVE_VERTEX(n), attr, type)
+# define vertex_id_from_index_id(n) RESOLVE_VERTEX(n)
+# define vertex_fetch_get_input_prim_type() UNIFORM_SSBO_INPUT_PRIM_TYPE_STR
+# define vertex_fetch_get_input_vert_count() UNIFORM_SSBO_INPUT_VERT_COUNT_STR
+# define vertex_fetch_get_attr_type(attr) PPCAT(UNIFORM_SSBO_TYPE_STR, attr)
+
+/* Must mirror GPU_primitive.h. */
+# define GPU_PRIM_POINTS 0
+# define GPU_PRIM_LINES 1
+# define GPU_PRIM_TRIS 2
+# define GPU_PRIM_LINE_STRIP 3
+# define GPU_PRIM_LINE_LOOP 4
+# define GPU_PRIM_TRI_STRIP 5
+# define GPU_PRIM_TRI_FAN 6
+# define GPU_PRIM_LINES_ADJ 7
+# define GPU_PRIM_TRIS_ADJ 8
+# define GPU_PRIM_LINE_STRIP_ADJ 9
+#endif
+
+/* Common Functions. */
+#define dFdx(x) dfdx(x)
+#define dFdy(x) dfdy(x)
+#define mod(x, y) _mtlmod(x, y)
+#define discard discard_fragment()
+#define inversesqrt rsqrt
+
+inline float radians(float deg)
+{
+ /* Constant factor: M_PI_F/180.0. */
+ return deg * 0.01745329251f;
+}
+
+inline float degrees(float rad)
+{
+ /* Constant factor: 180.0/M_PI_F. */
+ return rad * 57.2957795131;
+}
+
+#define select(A, B, C) mix(A, B, C)
+
+/* Type conversions and type truncations. */
+inline float4 to_float4(float3 val)
+{
+ return float4(val, 1.0);
+}
+
+/* Type conversions and type truncations (Utility Functions). */
+inline float3x3 mat4_to_mat3(float4x4 matrix)
+{
+ return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz);
+}
+
+inline int floatBitsToInt(float f)
+{
+ return as_type<int>(f);
+}
+
+inline int2 floatBitsToInt(float2 f)
+{
+ return as_type<int2>(f);
+}
+
+inline int3 floatBitsToInt(float3 f)
+{
+ return as_type<int3>(f);
+}
+
+inline int4 floatBitsToInt(float4 f)
+{
+ return as_type<int4>(f);
+}
+
+inline uint floatBitsToUint(float f)
+{
+ return as_type<uint>(f);
+}
+
+inline uint2 floatBitsToUint(float2 f)
+{
+ return as_type<uint2>(f);
+}
+
+inline uint3 floatBitsToUint(float3 f)
+{
+ return as_type<uint3>(f);
+}
+
+inline uint4 floatBitsToUint(float4 f)
+{
+ return as_type<uint4>(f);
+}
+
+inline float intBitsToFloat(int f)
+{
+ return as_type<float>(f);
+}
+
+inline float2 intBitsToFloat(int2 f)
+{
+ return as_type<float2>(f);
+}
+
+inline float3 intBitsToFloat(int3 f)
+{
+ return as_type<float3>(f);
+}
+
+inline float4 intBitsToFloat(int4 f)
+{
+ return as_type<float4>(f);
+}
+
+/* Texture size functions. Add texture types as needed. */
+template<typename T, access A>
+int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod)
+{
+ return int(image.texture->get_width());
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(), image.texture->get_array_size());
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(lod), image.texture->get_height(lod));
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(lod), image.texture->get_height(lod));
+}
+
+template<typename T, access A>
+int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod)
+{
+ return int3(image.texture->get_width(lod),
+ image.texture->get_height(lod),
+ image.texture->get_array_size());
+}
+
+template<typename T, access A>
+int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod)
+{
+ return int3(image.texture->get_width(lod),
+ image.texture->get_height(lod),
+ image.texture->get_array_size());
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(lod), image.texture->get_height(lod));
+}
+
+template<typename T, access A>
+int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod)
+{
+ return int3(image.texture->get_width(lod),
+ image.texture->get_height(lod),
+ image.texture->get_depth(lod));
+}
+
+/* Equality and comparison functions. */
+#define lessThan(a, b) ((a) < (b))
+#define lessThanEqual(a, b) ((a) <= (b))
+#define greaterThan(a, b) ((a) > (b))
+#define greaterThanEqual(a, b) ((a) >= (b))
+#define equal(a, b) ((a) == (b))
+#define notEqual(a, b) ((a) != (b))
+
+template<typename T, int n> bool all(vec<T, n> x)
+{
+ bool _all = true;
+ for (int i = 0; i < n; i++) {
+ _all = _all && (x[i] > 0);
+ }
+ return _all;
+}
+
+template<typename T, int n> bool any(vec<T, n> x)
+{
+ bool _any = false;
+ for (int i = 0; i < n; i++) {
+ _any = _any || (x[i] > 0);
+ }
+ return _any;
+}
+
+/* Modulo functionality. */
+int _mtlmod(int a, int b)
+{
+ return a - b * (a / b);
+}
+
+template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y)
+{
+ return x - y * floor(x / y);
+}
+
+template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y)
+{
+ return x - vec<T, n>(y) * floor(x / vec<T, n>(y));
+}
+
+template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y)
+{
+ return vec<U, n>(x) - y * floor(vec<U, n>(x) / y);
+}
+
+/* Mathematical functions. */
+template<typename T> T atan(T y, T x)
+{
+ return atan2(y, x);
+}
+
+/* Matrix Inverse. */
+float4x4 inverse(float4x4 a)
+{
+ float b00 = a[0][0] * a[1][1] - a[0][1] * a[1][0];
+ float b01 = a[0][0] * a[1][2] - a[0][2] * a[1][0];
+ float b02 = a[0][0] * a[1][3] - a[0][3] * a[1][0];
+ float b03 = a[0][1] * a[1][2] - a[0][2] * a[1][1];
+ float b04 = a[0][1] * a[1][3] - a[0][3] * a[1][1];
+ float b05 = a[0][2] * a[1][3] - a[0][3] * a[1][2];
+ float b06 = a[2][0] * a[3][1] - a[2][1] * a[3][0];
+ float b07 = a[2][0] * a[3][2] - a[2][2] * a[3][0];
+ float b08 = a[2][0] * a[3][3] - a[2][3] * a[3][0];
+ float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1];
+ float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1];
+ float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2];
+
+ float invdet = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
+
+ return float4x4(a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09,
+ a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09,
+ a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03,
+ a[2][2] * b04 - a[2][1] * b05 - a[2][3] * b03,
+ a[1][2] * b08 - a[1][0] * b11 - a[1][3] * b07,
+ a[0][0] * b11 - a[0][2] * b08 + a[0][3] * b07,
+ a[3][2] * b02 - a[3][0] * b05 - a[3][3] * b01,
+ a[2][0] * b05 - a[2][2] * b02 + a[2][3] * b01,
+ a[1][0] * b10 - a[1][1] * b08 + a[1][3] * b06,
+ a[0][1] * b08 - a[0][0] * b10 - a[0][3] * b06,
+ a[3][0] * b04 - a[3][1] * b02 + a[3][3] * b00,
+ a[2][1] * b02 - a[2][0] * b04 - a[2][3] * b00,
+ a[1][1] * b07 - a[1][0] * b09 - a[1][2] * b06,
+ a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06,
+ a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00,
+ a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00) *
+ invdet;
+}
+
+float3x3 inverse(float3x3 m)
+{
+
+ float invdet = 1.0 / (m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) -
+ m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) +
+ m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]));
+
+ float3x3 inverse(0);
+ inverse[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]);
+ inverse[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]);
+ inverse[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]);
+ inverse[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]);
+ inverse[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]);
+ inverse[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]);
+ inverse[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]);
+ inverse[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]);
+ inverse[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]);
+ inverse = inverse * invdet;
+
+ return inverse;
+}
+
+/* Additional overloads for builtin functions. */
+float distance(float x, float y)
+{
+ return abs(y - x);
+}
+
+/* Overload for mix(A, B, float ratio). */
+template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val)
+{
+ return mix(a, b, vec<T, Size>(val));
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T, int Size>
+vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask)
+{
+ vec<T, Size> result;
+ for (int i = 0; i < Size; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Using vec<bool, S> does not appear to work, splitting cases. */
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask)
+{
+ vec<T, 4> result;
+ for (int i = 0; i < 4; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask)
+{
+ vec<T, 3> result;
+ for (int i = 0; i < 3; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask)
+{
+ vec<T, 2> result;
+ for (int i = 0; i < 2; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> T mix(T a, T b, MTLBOOL mask)
+{
+ return (mask) ? b : a;
+}
+
+template<typename T, unsigned int Size> bool is_zero(vec<T, Size> a)
+{
+ for (int i = 0; i < Size; i++) {
+ if (a[i] != T(0)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Matrix conversion fallback. */
+mat3 MAT3(vec3 a, vec3 b, vec3 c)
+{
+ return mat3(a, b, c);
+}
+mat3 MAT3(float f)
+{
+ return mat3(f);
+}
+mat3 MAT3(mat4 m)
+{
+ return mat4_to_mat3(m);
+} \ No newline at end of file
diff --git a/source/blender/python/gpu/gpu_py_shader_create_info.cc b/source/blender/python/gpu/gpu_py_shader_create_info.cc
index fbab39efe24..c9e49c5cc4b 100644
--- a/source/blender/python/gpu/gpu_py_shader_create_info.cc
+++ b/source/blender/python/gpu/gpu_py_shader_create_info.cc
@@ -673,6 +673,9 @@ static int constant_type_size(Type type)
case Type::FLOAT:
case Type::INT:
case Type::UINT:
+ case Type::UCHAR4:
+ case Type::CHAR4:
+ case blender::gpu::shader::Type::VEC3_101010I2:
return 4;
break;
case Type::VEC2:
@@ -695,6 +698,18 @@ static int constant_type_size(Type type)
case Type::MAT4:
return 64;
break;
+ case blender::gpu::shader::Type::UCHAR:
+ case blender::gpu::shader::Type::CHAR:
+ return 1;
+ break;
+ case blender::gpu::shader::Type::UCHAR2:
+ case blender::gpu::shader::Type::CHAR2:
+ return 2;
+ break;
+ case blender::gpu::shader::Type::UCHAR3:
+ case blender::gpu::shader::Type::CHAR3:
+ return 3;
+ break;
}
BLI_assert(false);
return -1;