Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <dingto>2022-09-01 23:22:32 +0300
committerClément Foucault <foucault.clem@gmail.com>2022-09-01 23:28:40 +0300
commitcc8ea6ac67a108fcb96e4a8373ac02faf9ccea3d (patch)
tree351d23dc82e72ea34e6f2d630f9746ec76344af5 /source/blender/gpu/shaders
parentac07fb38a1b35fa156b2d0901eb35cd65ed73903 (diff)
Metal: MTLShader and MTLShaderGenerator implementation.
Full support for translation and compilation of shaders in Metal, using GPUShaderCreateInfo. Includes render pipeline state creation and management, enabling all standard GPU viewport rendering features in Metal. Authored by Apple: Michael Parkin-White, Marco Giordano Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D15563
Diffstat (limited to 'source/blender/gpu/shaders')
-rw-r--r--source/blender/gpu/shaders/metal/mtl_shader_common.msl109
-rw-r--r--source/blender/gpu/shaders/metal/mtl_shader_defines.msl1065
2 files changed, 1174 insertions, 0 deletions
diff --git a/source/blender/gpu/shaders/metal/mtl_shader_common.msl b/source/blender/gpu/shaders/metal/mtl_shader_common.msl
new file mode 100644
index 00000000000..c504cdbacb1
--- /dev/null
+++ b/source/blender/gpu/shaders/metal/mtl_shader_common.msl
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* Common Metal header to be included in all compiled Metal shaders.
+ * Both native MSL shaders and GLSL shaders. */
+
+using namespace metal;
+
+/* Should match GPUVertFetchMode. */
+typedef enum {
+ GPU_FETCH_FLOAT = 0,
+ GPU_FETCH_INT,
+ GPU_FETCH_INT_TO_FLOAT_UNIT,
+ GPU_FETCH_INT_TO_FLOAT,
+} GPUVertFetchMode;
+
+/* Consant to flag base binding index of uniform buffers. */
+constant int MTL_uniform_buffer_base_index [[function_constant(0)]];
+
+/* Default Point Size.
+ * Unused if function constant not set. */
+constant float MTL_global_pointsize [[function_constant(1)]];
+
+/* Attribute conversions flags (Up to 16 attributes supported in Blender). */
+constant int MTL_AttributeConvert0 [[function_constant(2)]];
+constant int MTL_AttributeConvert1 [[function_constant(3)]];
+constant int MTL_AttributeConvert2 [[function_constant(4)]];
+constant int MTL_AttributeConvert3 [[function_constant(5)]];
+constant int MTL_AttributeConvert4 [[function_constant(6)]];
+constant int MTL_AttributeConvert5 [[function_constant(7)]];
+constant int MTL_AttributeConvert6 [[function_constant(8)]];
+constant int MTL_AttributeConvert7 [[function_constant(9)]];
+constant int MTL_AttributeConvert8 [[function_constant(10)]];
+constant int MTL_AttributeConvert9 [[function_constant(11)]];
+constant int MTL_AttributeConvert10 [[function_constant(12)]];
+constant int MTL_AttributeConvert11 [[function_constant(13)]];
+constant int MTL_AttributeConvert12 [[function_constant(14)]];
+constant int MTL_AttributeConvert13 [[function_constant(15)]];
+constant int MTL_AttributeConvert14 [[function_constant(16)]];
+constant int MTL_AttributeConvert15 [[function_constant(17)]];
+
+/* Consant to flag binding index of transform feedback buffer.
+ * Unused if function constant not set. */
+constant int MTL_transform_feedback_buffer_index [[function_constant(18)]];
+
+/** Internal attribute conversion functionality. */
+/* Following descriptions in mtl_shader.hh, Metal only supports some implicit
+ * attribute type conversions. These conversions occur when there is a difference
+ * between the type specified in the vertex descriptor (In the input vertex buffers),
+ * and the attribute type in the shader's VertexIn struct (ShaderInterface).
+ *
+ * The supported implicit conversions are described here:
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
+ *
+ * For unsupported conversions, the mtl_shader_generator will create an attribute reading function
+ * which performs this conversion manually upon read, depending on the requested fetchmode.
+ *
+ * These conversions use the function constants above, so any branching is optimized out during
+ * backend shader compilation (PSO creation).
+ *
+ * NOTE: Not all possibilities have been covered here, any additional conversion routines should
+ * be added as needed, and mtl_shader_generator should also be updated with any newly required
+ * read functions.
+ *
+ * These paths are only needed for cases where implicit conversion will not happen, in which
+ * case the value will be read as the type in the shader.
+ */
+#define internal_vertex_attribute_convert_read_float(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float(as_type<int>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float(as_type<int>(v_in)) / float(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
+
+#define internal_vertex_attribute_convert_read_float2(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float2(as_type<int2>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float2(as_type<int2>(v_in)) / float2(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
+
+#define internal_vertex_attribute_convert_read_float3(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float3(as_type<int3>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float3(as_type<int3>(v_in)) / float3(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
+
+#define internal_vertex_attribute_convert_read_float4(ATTR, v_in, v_out) \
+ if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \
+ v_out = float4(as_type<int4>(v_in)); \
+ } \
+ else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \
+ v_out = float4(as_type<int4>(v_in)) / float4(__INT_MAX__); \
+ } \
+ else { \
+ v_out = v_in; \
+ }
diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
new file mode 100644
index 00000000000..3b32783620d
--- /dev/null
+++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** Special header for mapping commonly defined tokens to API-specific variations.
+ * Where possible, this will adhere closely to base GLSL, where semantics are the same.
+ * However, host code shader code may need modifying to support types where necessary variations
+ * exist between APIs but are not expressed through the source. (e.g. distinctio between depth2d
+ * and texture2d types in metal).
+ */
+
+/* Base instance with offsets. */
+#define gpu_BaseInstance gl_BaseInstanceARB
+#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)
+
+/* derivative signs. */
+#define DFDX_SIGN 1.0
+#define DFDY_SIGN 1.0
+
+/* Type definitions. */
+#define vec2 float2
+#define vec3 float3
+#define vec4 float4
+#define mat2 float2x2
+#define mat2x2 float2x2
+#define mat3 float3x3
+#define mat4 float4x4
+#define ivec2 int2
+#define ivec3 int3
+#define ivec4 int4
+#define uvec2 uint2
+#define uvec3 uint3
+#define uvec4 uint4
+/* MTLBOOL is used for native boolean's generated by the Metal backend, to avoid type-emulation
+ * for GLSL bools, which are treated as integers. */
+#define MTLBOOL bool
+#define bool int
+#define bvec2 bool2
+#define bvec3 bool3
+#define bvec4 bool4
+#define vec3_1010102_Unorm uint
+#define vec3_1010102_Inorm int
+
+/* Strip GLSL Decorators. */
+#define in
+#define flat
+#define smooth
+#define noperspective
+#define layout(std140) struct
+#define uniform
+
+/* Used to replace 'out' in function parameters with threadlocal reference
+ * shortened to avoid expanding the glsl source string. */
+#define THD thread
+
+/* Generate wrapper structs for combined texture and sampler type. */
+#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
+# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \
+ template<typename T, access A = access::sample> struct STRUCT_NAME { \
+ thread TEX_TYPE<T, A> *texture; \
+ constant sampler *samp; \
+ }
+#else
+# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \
+ template<typename T, access A = access::sample> struct STRUCT_NAME { \
+ thread TEX_TYPE<T, A> *texture; \
+ thread sampler *samp; \
+ }
+#endif
+
+/* Add any types as needed. */
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array);
+COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array);
+
+/* Sampler struct for argument buffer. */
+#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
+struct SStruct {
+ array<sampler, ARGUMENT_BUFFER_NUM_SAMPLERS> sampler_args [[id(0)]];
+};
+#endif
+
+/* Samplers as function parameters. */
+#define sampler1D thread _mtl_combined_image_sampler_1d<float>
+#define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float>
+#define sampler2D thread _mtl_combined_image_sampler_2d<float>
+#define depth2D thread _mtl_combined_image_sampler_depth_2d<float>
+#define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float>
+#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
+#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
+#define sampler3D thread _mtl_combined_image_sampler_3d<float>
+#define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read>
+#define samplerCube thread _mtl_combined_image_sampler_cube<float>
+#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float>
+
+#define usampler1D thread _mtl_combined_image_sampler_1d<uint>
+#define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint>
+#define usampler2D thread _mtl_combined_image_sampler_2d<uint>
+#define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint>
+#define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint>
+#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
+#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
+#define usampler3D thread _mtl_combined_image_sampler_3d<uint>
+#define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read>
+#define usamplerCube thread _mtl_combined_image_sampler_cube<uint>
+#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint>
+
+#define isampler1D thread _mtl_combined_image_sampler_1d<int>
+#define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int>
+#define isampler2D thread _mtl_combined_image_sampler_2d<int>
+#define idepth2D thread _mtl_combined_image_sampler_depth_2d<int>
+#define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int>
+#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
+#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
+#define isampler3D thread _mtl_combined_image_sampler_3d<int>
+#define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read>
+#define isamplerCube thread _mtl_combined_image_sampler_cube<int>
+#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int>
+
+/* Vector accessor aliases. */
+#define st xy
+
+/* Texture functions. */
+#define texelFetch _texelFetch_internal
+#define texelFetchOffset(__tex, __texel, __lod, __offset) \
+ _texelFetch_internal(__tex, __texel, __lod, __offset)
+#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv)
+#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias)))
+#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod)))
+#define textureLodOffset(__tex, __uv, __lod, __offset) \
+ _texture_internal_level(__tex, __uv, level(float(__lod)), __offset)
+#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0)
+#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp)
+#define textureGatherOffset(__tex, __offset, __uv, __comp) \
+ _texture_gather_internal(__tex, __uv, __comp, __offset)
+
+#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME
+#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__)
+#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__)
+
+/* Texture-write functions. */
+#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value)
+
+/* Singular return values from texture functions of type DEPTH are often indexed with either .r or
+ * .x. This is a lightweight wrapper type for handling this syntax. */
+union _msl_return_float {
+ float r;
+ float x;
+ inline operator float() const
+ {
+ return r;
+ }
+};
+
+/* Add custom texture sampling/reading routines for each type to account for special return cases,
+ * e.g. returning a float with an r parameter Note: Cannot use template specialization for input
+ * type, as return types are specific to the signature of 'tex'. */
+/* Texture Read. */
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, T texel)
+{
+ float w = tex.texture->get_width();
+ if (texel >= 0 && texel < w) {
+ return tex.texture->read(uint(texel));
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T>
+inline vec<S, 4> _texelFetch_internal(
+ const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel)
+{
+ float w = tex.texture->get_width();
+ if (texel >= 0 && texel < w) {
+ return tex.texture->read(uint(texel));
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+ T texel,
+ uint lod,
+ T offset = 0)
+{
+ float w = tex.texture->get_width();
+ if ((texel + offset) >= 0 && (texel + offset) < w) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel + offset), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+ vec<T, 1> texel,
+ uint lod,
+ vec<T, 1> offset = 0)
+{
+ float w = tex.texture->get_width();
+ if ((texel + offset) >= 0 && (texel + offset) < w) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel + offset), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, int n, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+ vec<T, n> texel,
+ uint lod,
+ vec<T, n> offset = vec<T, n>(0))
+{
+ float w = tex.texture->get_width();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel.x + offset.x), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
+ vec<T, 2> texel,
+ uint lod,
+ vec<T, 2> offset = vec<T, 2>(0, 0))
+{
+
+ float w = tex.texture->get_width();
+ float h = tex.texture->get_array_size();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h) {
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
+ vec<T, 2> texel,
+ uint lod,
+ vec<T, 2> offset = vec<T, 2>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h) {
+ return tex.texture->read(uint2(texel + offset), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
+ vec<T, 3> texel,
+ uint lod,
+ vec<T, 3> offset = vec<T, 3>(0))
+{
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ float d = tex.texture->get_array_size();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) {
+ return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
+ vec<T, 3> texel,
+ uint lod,
+ vec<T, 3> offset = vec<T, 3>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ float d = tex.texture->get_depth() >> lod;
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) {
+ return tex.texture->read(uint3(texel + offset), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+template<typename T, access A>
+inline _msl_return_float _texelFetch_internal(
+ thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
+ vec<T, 2> texel,
+ uint lod,
+ vec<T, 2> offset = vec<T, 2>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h) {
+ _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)};
+ return fl;
+ }
+ else {
+ _msl_return_float fl = {0};
+ return fl;
+ }
+}
+
+template<typename S, typename T, access A>
+inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
+ vec<T, 3> texel,
+ uint lod,
+ vec<T, 3> offset = vec<T, 3>(0))
+{
+
+ float w = tex.texture->get_width() >> lod;
+ float h = tex.texture->get_height() >> lod;
+ float d = tex.texture->get_array_size();
+ if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
+ (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) {
+ return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
+ }
+ else {
+ return vec<S, 4>(0);
+ }
+}
+
+/* Sample. */
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u)
+{
+ return tex.texture->sample(*tex.samp, u);
+}
+
+inline float4 _texture_internal_samp(
+ thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua)
+{
+ return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
+}
+
+inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
+ float2 uv)
+{
+ return tex.texture->sample(*tex.samp, uv);
+}
+
+inline uint4 _texture_internal_samp(
+ thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv)
+{
+ return tex.texture->sample(*tex.samp, uv);
+}
+
+inline float4 _texture_internal_samp(
+ thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv)
+{
+ return tex.texture->sample(*tex.samp, uv);
+}
+
+inline _msl_return_float _texture_internal_samp(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv)
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uv)};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw)
+{
+ return tex.texture->sample(*tex.samp, uvw);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva)
+{
+ return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z));
+}
+
+inline _msl_return_float _texture_internal_samp(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva)
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))};
+ return fl;
+}
+
+inline _msl_return_float _texture_internal_samp(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac)
+{
+ _msl_return_float fl = {
+ tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs)
+{
+ return tex.texture->sample(*tex.samp, uvs.xyz);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_samp(
+ thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a)
+{
+ return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w));
+}
+
+/* Sample Level. */
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
+ float u,
+ level options,
+ int offset = 0)
+{
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->sample(*tex.samp, u);
+}
+
+inline float4 _texture_internal_level(
+ thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex,
+ float2 ua,
+ level options,
+ int offset = 0)
+{
+ /* LODs not supported for 1d textures. This must be zero. */
+ return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
+}
+
+inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline uint4 _texture_internal_level(
+ thread _mtl_combined_image_sampler_2d<uint, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline float4 _texture_internal_level(
+ thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline _msl_return_float _texture_internal_level(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
+ float2 uv,
+ level options,
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_3d<T, access::sample> tex,
+ float3 uvw,
+ level options = level(0),
+ int3 offset = int3(0))
+{
+ return tex.texture->sample(*tex.samp, uvw, options, offset);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
+ float3 uva,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset);
+}
+
+inline _msl_return_float _texture_internal_level(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
+ float3 uva,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)};
+ return fl;
+}
+
+inline _msl_return_float _texture_internal_level(
+ thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
+ float4 uvac,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {
+ tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)};
+ return fl;
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_cube<T, access::sample> tex,
+ float3 uvs,
+ level options = level(0),
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uvs.xyz, options);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_internal_level(
+ thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex,
+ float4 coord_a,
+ level options = level(0),
+ int3 offset = int3(0))
+{
+ return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options);
+}
+
+/* Sample Bias. */
+template<typename T>
+inline vec<T, 4> _texture_internal_bias(
+ thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
+ float u,
+ bias options = bias(0.0),
+ int offset = 0)
+{
+ return tex.texture->sample(*tex.samp, u);
+}
+
+inline float4 _texture_internal_bias(
+ thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
+ float2 uv,
+ bias options = bias(0.0),
+ int2 offset = int2(0))
+{
+ return tex.texture->sample(*tex.samp, uv, options, offset);
+}
+
+inline _msl_return_float _texture_internal_bias(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
+ float2 uv,
+ bias options = bias(0),
+ int2 offset = int2(0))
+{
+ _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
+ return fl;
+}
+
+/* Texture Gather. */
+component int_to_component(const int comp)
+{
+ switch (comp) {
+ default:
+ case 0:
+ return component::x;
+ case 1:
+ return component::y;
+ case 2:
+ return component::z;
+ case 3:
+ return component::w;
+ }
+ return component::x;
+}
+
+inline float4 _texture_gather_internal(
+ thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
+ float2 uv,
+ const int comp = 0,
+ int2 offset = int2(0))
+{
+ return tex.texture->gather(*tex.samp, uv, offset);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_gather_internal(
+ thread _mtl_combined_image_sampler_2d<T, access::sample> tex,
+ float2 uv,
+ const int comp = 0,
+ int2 offset = int2(0))
+{
+ return tex.texture->gather(*tex.samp, uv, offset);
+}
+
+template<typename T>
+inline vec<T, 4> _texture_gather_internal(
+ thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
+ float2 uv,
+ const int comp = 0,
+ int2 offset = int2(0))
+{
+ return tex.texture->gather(*tex.samp, uv, offset);
+}
+
+/* Texture write support. */
+template<typename S, typename T, access A>
+inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
+ T _coord,
+ vec<S, 4> value)
+{
+ float w = tex.texture->get_width();
+ float h = tex.texture->get_height();
+ if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) {
+ tex.texture->write(value, uint2(_coord.xy));
+ }
+}
+
+template<typename S, typename T, access A>
+inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
+ T _coord,
+ vec<S, 4> value)
+{
+ float w = tex.texture->get_width();
+ float h = tex.texture->get_height();
+ float d = tex.texture->get_depth();
+ if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
+ _coord.z < d) {
+ tex.texture->write(value, uint3(_coord.xyz));
+ }
+}
+
+/* SSBO Vertex Fetch Mode. */
+#ifdef MTL_SSBO_VERTEX_FETCH
+/* Enabled when geometry is passed via raw buffer bindings, rather than using
+ * vertex assembly in the vertex-descriptor.
+ *
+ * To describe the layout of input attribute data, we will generate uniforms (defaulting to 0)
+ * with the names per unique input attribute with name `attr`:
+ *
+ * - uniform_ssbo_stride_##attr -- Representing the stride between element.
+ * - uniform_ssbo_offset_##attr -- Representing the base offset within the vertex.
+ * - uniform_ssbo_fetchmode_##attr - Whether using per-vertex (=0) or per-instance fetch (=1).
+ * - uniform_ssbo_vbo_id_##attr - buffer binding index for VBO with data for this attribute.
+ * - uniform_ssbo_type_##attr - The type of data in the currently bound buffer.
+ *
+ * If the uniform_ssbo_type_* does not match with the desired type, then it is the responsibility
+ * of the shader to perform the conversion. Types should always be read as the raw attribute type,
+ * and then converted. e.g. If the uniform_ssbo_type_* is `int`, but we want to read it to be
+ * normalized to a float.
+ * The implementation should query the attribute type using vertex_fetch_get_attr_type(attr_name):
+ *
+ * float fweight = 0.0;
+ * if(vertex_fetch_get_attr_type(in_weight) == GPU_SHADER_ATTR_TYPE_INT) {
+ * int iweight = vertex_fetch_attribute(gl_VertexID, in_weight, int);
+ * fweight = (float)iweight/(float)INT32_MAX;
+ * } else {
+ * fweight = = vertex_fetch_attribute(gl_VertexID, in_weight, float);
+ * }
+ *
+ * Note: These uniforms are generated as part of the same data block used for regular uniforms
+ * and attribute data is written prior to each draw call, depending on the configuration of
+ * the vertex descriptor for an MTLBatch or MTLImmedaite call. */
+# define PPCAT_NX(A, B) A##B
+# define PPCAT(A, B) PPCAT_NX(A, B)
+
+# define RESOLVE_VERTEX(v_id) \
+ ((UNIFORM_SSBO_USES_INDEXED_RENDERING_STR > 0) ? \
+ ((UNIFORM_SSBO_INDEX_MODE_U16_STR > 0) ? MTL_INDEX_DATA_U16[v_id] : \
+ MTL_INDEX_DATA_U32[v_id]) : \
+ v_id)
+# define ATTR_TYPE(attr) PPCAT(SSBO_ATTR_TYPE_, attr)
+# define vertex_fetch_attribute_raw(n, attr, type) \
+ (reinterpret_cast<constant type *>( \
+ &MTL_VERTEX_DATA[PPCAT(UNIFORM_SSBO_VBO_ID_STR, attr)] \
+ [(PPCAT(UNIFORM_SSBO_STRIDE_STR, attr) * \
+ ((PPCAT(UNIFORM_SSBO_FETCHMODE_STR, attr)) ? gl_InstanceID : n)) + \
+ PPCAT(UNIFORM_SSBO_OFFSET_STR, attr)]))[0]
+# define vertex_fetch_attribute(n, attr, type) \
+ vertex_fetch_attribute_raw(RESOLVE_VERTEX(n), attr, type)
+# define vertex_id_from_index_id(n) RESOLVE_VERTEX(n)
+# define vertex_fetch_get_input_prim_type() UNIFORM_SSBO_INPUT_PRIM_TYPE_STR
+# define vertex_fetch_get_input_vert_count() UNIFORM_SSBO_INPUT_VERT_COUNT_STR
+# define vertex_fetch_get_attr_type(attr) PPCAT(UNIFORM_SSBO_TYPE_STR, attr)
+
+/* Must mirror GPU_primitive.h. */
+# define GPU_PRIM_POINTS 0
+# define GPU_PRIM_LINES 1
+# define GPU_PRIM_TRIS 2
+# define GPU_PRIM_LINE_STRIP 3
+# define GPU_PRIM_LINE_LOOP 4
+# define GPU_PRIM_TRI_STRIP 5
+# define GPU_PRIM_TRI_FAN 6
+# define GPU_PRIM_LINES_ADJ 7
+# define GPU_PRIM_TRIS_ADJ 8
+# define GPU_PRIM_LINE_STRIP_ADJ 9
+#endif
+
+/* Common Functions. */
+#define dFdx(x) dfdx(x)
+#define dFdy(x) dfdy(x)
+#define mod(x, y) _mtlmod(x, y)
+#define discard discard_fragment()
+#define inversesqrt rsqrt
+
+inline float radians(float deg)
+{
+ /* Constant factor: M_PI_F/180.0. */
+ return deg * 0.01745329251f;
+}
+
+inline float degrees(float rad)
+{
+ /* Constant factor: 180.0/M_PI_F. */
+ return rad * 57.2957795131;
+}
+
+#define select(A, B, C) mix(A, B, C)
+
+/* Type conversions and type truncations. */
+inline float4 to_float4(float3 val)
+{
+ return float4(val, 1.0);
+}
+
+/* Type conversions and type truncations (Utility Functions). */
+inline float3x3 mat4_to_mat3(float4x4 matrix)
+{
+ return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz);
+}
+
+inline int floatBitsToInt(float f)
+{
+ return as_type<int>(f);
+}
+
+inline int2 floatBitsToInt(float2 f)
+{
+ return as_type<int2>(f);
+}
+
+inline int3 floatBitsToInt(float3 f)
+{
+ return as_type<int3>(f);
+}
+
+inline int4 floatBitsToInt(float4 f)
+{
+ return as_type<int4>(f);
+}
+
+inline uint floatBitsToUint(float f)
+{
+ return as_type<uint>(f);
+}
+
+inline uint2 floatBitsToUint(float2 f)
+{
+ return as_type<uint2>(f);
+}
+
+inline uint3 floatBitsToUint(float3 f)
+{
+ return as_type<uint3>(f);
+}
+
+inline uint4 floatBitsToUint(float4 f)
+{
+ return as_type<uint4>(f);
+}
+
+inline float intBitsToFloat(int f)
+{
+ return as_type<float>(f);
+}
+
+inline float2 intBitsToFloat(int2 f)
+{
+ return as_type<float2>(f);
+}
+
+inline float3 intBitsToFloat(int3 f)
+{
+ return as_type<float3>(f);
+}
+
+inline float4 intBitsToFloat(int4 f)
+{
+ return as_type<float4>(f);
+}
+
+/* Texture size functions. Add texture types as needed. */
+template<typename T, access A>
+int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod)
+{
+ return int(image.texture->get_width());
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(), image.texture->get_array_size());
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(lod), image.texture->get_height(lod));
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(lod), image.texture->get_height(lod));
+}
+
+template<typename T, access A>
+int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod)
+{
+ return int3(image.texture->get_width(lod),
+ image.texture->get_height(lod),
+ image.texture->get_array_size());
+}
+
+template<typename T, access A>
+int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod)
+{
+ return int3(image.texture->get_width(lod),
+ image.texture->get_height(lod),
+ image.texture->get_array_size());
+}
+
+template<typename T, access A>
+int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod)
+{
+ return int2(image.texture->get_width(lod), image.texture->get_height(lod));
+}
+
+template<typename T, access A>
+int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod)
+{
+ return int3(image.texture->get_width(lod),
+ image.texture->get_height(lod),
+ image.texture->get_depth(lod));
+}
+
+/* Equality and comparison functions. */
+#define lessThan(a, b) ((a) < (b))
+#define lessThanEqual(a, b) ((a) <= (b))
+#define greaterThan(a, b) ((a) > (b))
+#define greaterThanEqual(a, b) ((a) >= (b))
+#define equal(a, b) ((a) == (b))
+#define notEqual(a, b) ((a) != (b))
+
+template<typename T, int n> bool all(vec<T, n> x)
+{
+ bool _all = true;
+ for (int i = 0; i < n; i++) {
+ _all = _all && (x[i] > 0);
+ }
+ return _all;
+}
+
+template<typename T, int n> bool any(vec<T, n> x)
+{
+ bool _any = false;
+ for (int i = 0; i < n; i++) {
+ _any = _any || (x[i] > 0);
+ }
+ return _any;
+}
+
+/* Modulo functionality. */
+int _mtlmod(int a, int b)
+{
+ return a - b * (a / b);
+}
+
+template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y)
+{
+ return x - y * floor(x / y);
+}
+
+template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y)
+{
+ return x - vec<T, n>(y) * floor(x / vec<T, n>(y));
+}
+
+template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y)
+{
+ return vec<U, n>(x) - y * floor(vec<U, n>(x) / y);
+}
+
+/* Mathematical functions. */
+template<typename T> T atan(T y, T x)
+{
+ return atan2(y, x);
+}
+
+/* Matrix Inverse. */
+float4x4 inverse(float4x4 a)
+{
+ float b00 = a[0][0] * a[1][1] - a[0][1] * a[1][0];
+ float b01 = a[0][0] * a[1][2] - a[0][2] * a[1][0];
+ float b02 = a[0][0] * a[1][3] - a[0][3] * a[1][0];
+ float b03 = a[0][1] * a[1][2] - a[0][2] * a[1][1];
+ float b04 = a[0][1] * a[1][3] - a[0][3] * a[1][1];
+ float b05 = a[0][2] * a[1][3] - a[0][3] * a[1][2];
+ float b06 = a[2][0] * a[3][1] - a[2][1] * a[3][0];
+ float b07 = a[2][0] * a[3][2] - a[2][2] * a[3][0];
+ float b08 = a[2][0] * a[3][3] - a[2][3] * a[3][0];
+ float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1];
+ float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1];
+ float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2];
+
+ float invdet = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
+
+ return float4x4(a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09,
+ a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09,
+ a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03,
+ a[2][2] * b04 - a[2][1] * b05 - a[2][3] * b03,
+ a[1][2] * b08 - a[1][0] * b11 - a[1][3] * b07,
+ a[0][0] * b11 - a[0][2] * b08 + a[0][3] * b07,
+ a[3][2] * b02 - a[3][0] * b05 - a[3][3] * b01,
+ a[2][0] * b05 - a[2][2] * b02 + a[2][3] * b01,
+ a[1][0] * b10 - a[1][1] * b08 + a[1][3] * b06,
+ a[0][1] * b08 - a[0][0] * b10 - a[0][3] * b06,
+ a[3][0] * b04 - a[3][1] * b02 + a[3][3] * b00,
+ a[2][1] * b02 - a[2][0] * b04 - a[2][3] * b00,
+ a[1][1] * b07 - a[1][0] * b09 - a[1][2] * b06,
+ a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06,
+ a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00,
+ a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00) *
+ invdet;
+}
+
+float3x3 inverse(float3x3 m)
+{
+
+ float invdet = 1.0 / (m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) -
+ m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) +
+ m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]));
+
+ float3x3 inverse(0);
+ inverse[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]);
+ inverse[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]);
+ inverse[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]);
+ inverse[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]);
+ inverse[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]);
+ inverse[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]);
+ inverse[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]);
+ inverse[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]);
+ inverse[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]);
+ inverse = inverse * invdet;
+
+ return inverse;
+}
+
+/* Additional overloads for builtin functions. */
+float distance(float x, float y)
+{
+ return abs(y - x);
+}
+
+/* Overload for mix(A, B, float ratio). */
+template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val)
+{
+ return mix(a, b, vec<T, Size>(val));
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T, int Size>
+vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask)
+{
+ vec<T, Size> result;
+ for (int i = 0; i < Size; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Using vec<bool, S> does not appear to work, splitting cases. */
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask)
+{
+ vec<T, 4> result;
+ for (int i = 0; i < 4; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask)
+{
+ vec<T, 3> result;
+ for (int i = 0; i < 3; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask)
+{
+ vec<T, 2> result;
+ for (int i = 0; i < 2; i++) {
+ result[i] = mask[i] ? b[i] : a[i];
+ }
+ return result;
+}
+
+/* Overload for mix(A, B, bvec<N>). */
+template<typename T> T mix(T a, T b, MTLBOOL mask)
+{
+ return (mask) ? b : a;
+}
+
+template<typename T, unsigned int Size> bool is_zero(vec<T, Size> a)
+{
+ for (int i = 0; i < Size; i++) {
+ if (a[i] != T(0)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Matrix conversion fallback. */
+mat3 MAT3(vec3 a, vec3 b, vec3 c)
+{
+ return mat3(a, b, c);
+}
+mat3 MAT3(float f)
+{
+ return mat3(f);
+}
+mat3 MAT3(mat4 m)
+{
+ return mat4_to_mat3(m);
+} \ No newline at end of file