diff options
Diffstat (limited to 'source/blender/gpu/shaders')
-rw-r--r-- | source/blender/gpu/shaders/metal/mtl_shader_common.msl | 109 | ||||
-rw-r--r-- | source/blender/gpu/shaders/metal/mtl_shader_defines.msl | 1065 |
2 files changed, 1174 insertions, 0 deletions
diff --git a/source/blender/gpu/shaders/metal/mtl_shader_common.msl b/source/blender/gpu/shaders/metal/mtl_shader_common.msl new file mode 100644 index 00000000000..c504cdbacb1 --- /dev/null +++ b/source/blender/gpu/shaders/metal/mtl_shader_common.msl @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* Common Metal header to be included in all compiled Metal shaders. + * Both native MSL shaders and GLSL shaders. */ + +using namespace metal; + +/* Should match GPUVertFetchMode. */ +typedef enum { + GPU_FETCH_FLOAT = 0, + GPU_FETCH_INT, + GPU_FETCH_INT_TO_FLOAT_UNIT, + GPU_FETCH_INT_TO_FLOAT, +} GPUVertFetchMode; + +/* Consant to flag base binding index of uniform buffers. */ +constant int MTL_uniform_buffer_base_index [[function_constant(0)]]; + +/* Default Point Size. + * Unused if function constant not set. */ +constant float MTL_global_pointsize [[function_constant(1)]]; + +/* Attribute conversions flags (Up to 16 attributes supported in Blender). */ +constant int MTL_AttributeConvert0 [[function_constant(2)]]; +constant int MTL_AttributeConvert1 [[function_constant(3)]]; +constant int MTL_AttributeConvert2 [[function_constant(4)]]; +constant int MTL_AttributeConvert3 [[function_constant(5)]]; +constant int MTL_AttributeConvert4 [[function_constant(6)]]; +constant int MTL_AttributeConvert5 [[function_constant(7)]]; +constant int MTL_AttributeConvert6 [[function_constant(8)]]; +constant int MTL_AttributeConvert7 [[function_constant(9)]]; +constant int MTL_AttributeConvert8 [[function_constant(10)]]; +constant int MTL_AttributeConvert9 [[function_constant(11)]]; +constant int MTL_AttributeConvert10 [[function_constant(12)]]; +constant int MTL_AttributeConvert11 [[function_constant(13)]]; +constant int MTL_AttributeConvert12 [[function_constant(14)]]; +constant int MTL_AttributeConvert13 [[function_constant(15)]]; +constant int MTL_AttributeConvert14 [[function_constant(16)]]; +constant int MTL_AttributeConvert15 [[function_constant(17)]]; + +/* Consant to flag binding index of transform feedback buffer. + * Unused if function constant not set. */ +constant int MTL_transform_feedback_buffer_index [[function_constant(18)]]; + +/** Internal attribute conversion functionality. */ +/* Following descriptions in mtl_shader.hh, Metal only supports some implicit + * attribute type conversions. These conversions occur when there is a difference + * between the type specified in the vertex descriptor (In the input vertex buffers), + * and the attribute type in the shader's VertexIn struct (ShaderInterface). + * + * The supported implicit conversions are described here: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc + * + * For unsupported conversions, the mtl_shader_generator will create an attribute reading function + * which performs this conversion manually upon read, depending on the requested fetchmode. + * + * These conversions use the function constants above, so any branching is optimized out during + * backend shader compilation (PSO creation). + * + * NOTE: Not all possibilities have been covered here, any additional conversion routines should + * be added as needed, and mtl_shader_generator should also be updated with any newly required + * read functions. + * + * These paths are only needed for cases where implicit conversion will not happen, in which + * case the value will be read as the type in the shader. + */ +#define internal_vertex_attribute_convert_read_float(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float(as_type<int>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float(as_type<int>(v_in)) / float(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float2(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float2(as_type<int2>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float2(as_type<int2>(v_in)) / float2(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float3(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float3(as_type<int3>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float3(as_type<int3>(v_in)) / float3(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float4(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float4(as_type<int4>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float4(as_type<int4>(v_in)) / float4(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl new file mode 100644 index 00000000000..3b32783620d --- /dev/null +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -0,0 +1,1065 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** Special header for mapping commonly defined tokens to API-specific variations. + * Where possible, this will adhere closely to base GLSL, where semantics are the same. + * However, host code shader code may need modifying to support types where necessary variations + * exist between APIs but are not expressed through the source. (e.g. distinctio between depth2d + * and texture2d types in metal). + */ + +/* Base instance with offsets. */ +#define gpu_BaseInstance gl_BaseInstanceARB +#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance) + +/* derivative signs. */ +#define DFDX_SIGN 1.0 +#define DFDY_SIGN 1.0 + +/* Type definitions. */ +#define vec2 float2 +#define vec3 float3 +#define vec4 float4 +#define mat2 float2x2 +#define mat2x2 float2x2 +#define mat3 float3x3 +#define mat4 float4x4 +#define ivec2 int2 +#define ivec3 int3 +#define ivec4 int4 +#define uvec2 uint2 +#define uvec3 uint3 +#define uvec4 uint4 +/* MTLBOOL is used for native boolean's generated by the Metal backend, to avoid type-emulation + * for GLSL bools, which are treated as integers. */ +#define MTLBOOL bool +#define bool int +#define bvec2 bool2 +#define bvec3 bool3 +#define bvec4 bool4 +#define vec3_1010102_Unorm uint +#define vec3_1010102_Inorm int + +/* Strip GLSL Decorators. */ +#define in +#define flat +#define smooth +#define noperspective +#define layout(std140) struct +#define uniform + +/* Used to replace 'out' in function parameters with threadlocal reference + * shortened to avoid expanding the glsl source string. */ +#define THD thread + +/* Generate wrapper structs for combined texture and sampler type. */ +#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS +# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ + template<typename T, access A = access::sample> struct STRUCT_NAME { \ + thread TEX_TYPE<T, A> *texture; \ + constant sampler *samp; \ + } +#else +# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ + template<typename T, access A = access::sample> struct STRUCT_NAME { \ + thread TEX_TYPE<T, A> *texture; \ + thread sampler *samp; \ + } +#endif + +/* Add any types as needed. */ +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array); + +/* Sampler struct for argument buffer. */ +#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS +struct SStruct { + array<sampler, ARGUMENT_BUFFER_NUM_SAMPLERS> sampler_args [[id(0)]]; +}; +#endif + +/* Samplers as function parameters. */ +#define sampler1D thread _mtl_combined_image_sampler_1d<float> +#define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float> +#define sampler2D thread _mtl_combined_image_sampler_2d<float> +#define depth2D thread _mtl_combined_image_sampler_depth_2d<float> +#define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float> +#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> +#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> +#define sampler3D thread _mtl_combined_image_sampler_3d<float> +#define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read> +#define samplerCube thread _mtl_combined_image_sampler_cube<float> +#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float> + +#define usampler1D thread _mtl_combined_image_sampler_1d<uint> +#define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint> +#define usampler2D thread _mtl_combined_image_sampler_2d<uint> +#define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint> +#define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint> +#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> +#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> +#define usampler3D thread _mtl_combined_image_sampler_3d<uint> +#define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read> +#define usamplerCube thread _mtl_combined_image_sampler_cube<uint> +#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint> + +#define isampler1D thread _mtl_combined_image_sampler_1d<int> +#define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int> +#define isampler2D thread _mtl_combined_image_sampler_2d<int> +#define idepth2D thread _mtl_combined_image_sampler_depth_2d<int> +#define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int> +#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> +#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> +#define isampler3D thread _mtl_combined_image_sampler_3d<int> +#define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read> +#define isamplerCube thread _mtl_combined_image_sampler_cube<int> +#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int> + +/* Vector accessor aliases. */ +#define st xy + +/* Texture functions. */ +#define texelFetch _texelFetch_internal +#define texelFetchOffset(__tex, __texel, __lod, __offset) \ + _texelFetch_internal(__tex, __texel, __lod, __offset) +#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv) +#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias))) +#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod))) +#define textureLodOffset(__tex, __uv, __lod, __offset) \ + _texture_internal_level(__tex, __uv, level(float(__lod)), __offset) +#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0) +#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp) +#define textureGatherOffset(__tex, __offset, __uv, __comp) \ + _texture_gather_internal(__tex, __uv, __comp, __offset) + +#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME +#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__) +#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__) + +/* Texture-write functions. */ +#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value) + +/* Singular return values from texture functions of type DEPTH are often indexed with either .r or + * .x. This is a lightweight wrapper type for handling this syntax. */ +union _msl_return_float { + float r; + float x; + inline operator float() const + { + return r; + } +}; + +/* Add custom texture sampling/reading routines for each type to account for special return cases, + * e.g. returning a float with an r parameter Note: Cannot use template specialization for input + * type, as return types are specific to the signature of 'tex'. */ +/* Texture Read. */ +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, T texel) +{ + float w = tex.texture->get_width(); + if (texel >= 0 && texel < w) { + return tex.texture->read(uint(texel)); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T> +inline vec<S, 4> _texelFetch_internal( + const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel) +{ + float w = tex.texture->get_width(); + if (texel >= 0 && texel < w) { + return tex.texture->read(uint(texel)); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + T texel, + uint lod, + T offset = 0) +{ + float w = tex.texture->get_width(); + if ((texel + offset) >= 0 && (texel + offset) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel + offset), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + vec<T, 1> texel, + uint lod, + vec<T, 1> offset = 0) +{ + float w = tex.texture->get_width(); + if ((texel + offset) >= 0 && (texel + offset) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel + offset), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, int n, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + vec<T, n> texel, + uint lod, + vec<T, n> offset = vec<T, n>(0)) +{ + float w = tex.texture->get_width(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel.x + offset.x), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0, 0)) +{ + + float w = tex.texture->get_width(); + float h = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + return tex.texture->read(uint2(texel + offset), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_depth() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint3(texel + offset), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename T, access A> +inline _msl_return_float _texelFetch_internal( + thread _mtl_combined_image_sampler_depth_2d<float, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)}; + return fl; + } + else { + _msl_return_float fl = {0}; + return fl; + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); + } + else { + return vec<S, 4>(0); + } +} + +/* Sample. */ +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u) +{ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_samp( + thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua) +{ + return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); +} + +inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, + float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline uint4 _texture_internal_samp( + thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline float4 _texture_internal_samp( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw) +{ + return tex.texture->sample(*tex.samp, uvw); +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva) +{ + return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z)); +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))}; + return fl; +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac) +{ + _msl_return_float fl = { + tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs) +{ + return tex.texture->sample(*tex.samp, uvs.xyz); +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a) +{ + return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w)); +} + +/* Sample Level. */ +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, + float u, + level options, + int offset = 0) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_level( + thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, + float2 ua, + level options, + int offset = 0) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); +} + +inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline uint4 _texture_internal_level( + thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline float4 _texture_internal_level( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_3d<T, access::sample> tex, + float3 uvw, + level options = level(0), + int3 offset = int3(0)) +{ + return tex.texture->sample(*tex.samp, uvw, options, offset); +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, + float3 uva, + level options = level(0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset); +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, + float3 uva, + level options = level(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)}; + return fl; +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, + float4 uvac, + level options = level(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = { + tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_cube<T, access::sample> tex, + float3 uvs, + level options = level(0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uvs.xyz, options); +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, + float4 coord_a, + level options = level(0), + int3 offset = int3(0)) +{ + return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options); +} + +/* Sample Bias. */ +template<typename T> +inline vec<T, 4> _texture_internal_bias( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, + float u, + bias options = bias(0.0), + int offset = 0) +{ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_bias( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, + float2 uv, + bias options = bias(0.0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline _msl_return_float _texture_internal_bias( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + bias options = bias(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; + return fl; +} + +/* Texture Gather. */ +component int_to_component(const int comp) +{ + switch (comp) { + default: + case 0: + return component::x; + case 1: + return component::y; + case 2: + return component::z; + case 3: + return component::w; + } + return component::x; +} + +inline float4 _texture_gather_internal( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +template<typename T> +inline vec<T, 4> _texture_gather_internal( + thread _mtl_combined_image_sampler_2d<T, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +template<typename T> +inline vec<T, 4> _texture_gather_internal( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +/* Texture write support. */ +template<typename S, typename T, access A> +inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, + T _coord, + vec<S, 4> value) +{ + float w = tex.texture->get_width(); + float h = tex.texture->get_height(); + if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) { + tex.texture->write(value, uint2(_coord.xy)); + } +} + +template<typename S, typename T, access A> +inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, + T _coord, + vec<S, 4> value) +{ + float w = tex.texture->get_width(); + float h = tex.texture->get_height(); + float d = tex.texture->get_depth(); + if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && + _coord.z < d) { + tex.texture->write(value, uint3(_coord.xyz)); + } +} + +/* SSBO Vertex Fetch Mode. */ +#ifdef MTL_SSBO_VERTEX_FETCH +/* Enabled when geometry is passed via raw buffer bindings, rather than using + * vertex assembly in the vertex-descriptor. + * + * To describe the layout of input attribute data, we will generate uniforms (defaulting to 0) + * with the names per unique input attribute with name `attr`: + * + * - uniform_ssbo_stride_##attr -- Representing the stride between element. + * - uniform_ssbo_offset_##attr -- Representing the base offset within the vertex. + * - uniform_ssbo_fetchmode_##attr - Whether using per-vertex (=0) or per-instance fetch (=1). + * - uniform_ssbo_vbo_id_##attr - buffer binding index for VBO with data for this attribute. + * - uniform_ssbo_type_##attr - The type of data in the currently bound buffer. + * + * If the uniform_ssbo_type_* does not match with the desired type, then it is the responsibility + * of the shader to perform the conversion. Types should always be read as the raw attribute type, + * and then converted. e.g. If the uniform_ssbo_type_* is `int`, but we want to read it to be + * normalized to a float. + * The implementation should query the attribute type using vertex_fetch_get_attr_type(attr_name): + * + * float fweight = 0.0; + * if(vertex_fetch_get_attr_type(in_weight) == GPU_SHADER_ATTR_TYPE_INT) { + * int iweight = vertex_fetch_attribute(gl_VertexID, in_weight, int); + * fweight = (float)iweight/(float)INT32_MAX; + * } else { + * fweight = = vertex_fetch_attribute(gl_VertexID, in_weight, float); + * } + * + * Note: These uniforms are generated as part of the same data block used for regular uniforms + * and attribute data is written prior to each draw call, depending on the configuration of + * the vertex descriptor for an MTLBatch or MTLImmedaite call. */ +# define PPCAT_NX(A, B) A##B +# define PPCAT(A, B) PPCAT_NX(A, B) + +# define RESOLVE_VERTEX(v_id) \ + ((UNIFORM_SSBO_USES_INDEXED_RENDERING_STR > 0) ? \ + ((UNIFORM_SSBO_INDEX_MODE_U16_STR > 0) ? MTL_INDEX_DATA_U16[v_id] : \ + MTL_INDEX_DATA_U32[v_id]) : \ + v_id) +# define ATTR_TYPE(attr) PPCAT(SSBO_ATTR_TYPE_, attr) +# define vertex_fetch_attribute_raw(n, attr, type) \ + (reinterpret_cast<constant type *>( \ + &MTL_VERTEX_DATA[PPCAT(UNIFORM_SSBO_VBO_ID_STR, attr)] \ + [(PPCAT(UNIFORM_SSBO_STRIDE_STR, attr) * \ + ((PPCAT(UNIFORM_SSBO_FETCHMODE_STR, attr)) ? gl_InstanceID : n)) + \ + PPCAT(UNIFORM_SSBO_OFFSET_STR, attr)]))[0] +# define vertex_fetch_attribute(n, attr, type) \ + vertex_fetch_attribute_raw(RESOLVE_VERTEX(n), attr, type) +# define vertex_id_from_index_id(n) RESOLVE_VERTEX(n) +# define vertex_fetch_get_input_prim_type() UNIFORM_SSBO_INPUT_PRIM_TYPE_STR +# define vertex_fetch_get_input_vert_count() UNIFORM_SSBO_INPUT_VERT_COUNT_STR +# define vertex_fetch_get_attr_type(attr) PPCAT(UNIFORM_SSBO_TYPE_STR, attr) + +/* Must mirror GPU_primitive.h. */ +# define GPU_PRIM_POINTS 0 +# define GPU_PRIM_LINES 1 +# define GPU_PRIM_TRIS 2 +# define GPU_PRIM_LINE_STRIP 3 +# define GPU_PRIM_LINE_LOOP 4 +# define GPU_PRIM_TRI_STRIP 5 +# define GPU_PRIM_TRI_FAN 6 +# define GPU_PRIM_LINES_ADJ 7 +# define GPU_PRIM_TRIS_ADJ 8 +# define GPU_PRIM_LINE_STRIP_ADJ 9 +#endif + +/* Common Functions. */ +#define dFdx(x) dfdx(x) +#define dFdy(x) dfdy(x) +#define mod(x, y) _mtlmod(x, y) +#define discard discard_fragment() +#define inversesqrt rsqrt + +inline float radians(float deg) +{ + /* Constant factor: M_PI_F/180.0. */ + return deg * 0.01745329251f; +} + +inline float degrees(float rad) +{ + /* Constant factor: 180.0/M_PI_F. */ + return rad * 57.2957795131; +} + +#define select(A, B, C) mix(A, B, C) + +/* Type conversions and type truncations. */ +inline float4 to_float4(float3 val) +{ + return float4(val, 1.0); +} + +/* Type conversions and type truncations (Utility Functions). */ +inline float3x3 mat4_to_mat3(float4x4 matrix) +{ + return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz); +} + +inline int floatBitsToInt(float f) +{ + return as_type<int>(f); +} + +inline int2 floatBitsToInt(float2 f) +{ + return as_type<int2>(f); +} + +inline int3 floatBitsToInt(float3 f) +{ + return as_type<int3>(f); +} + +inline int4 floatBitsToInt(float4 f) +{ + return as_type<int4>(f); +} + +inline uint floatBitsToUint(float f) +{ + return as_type<uint>(f); +} + +inline uint2 floatBitsToUint(float2 f) +{ + return as_type<uint2>(f); +} + +inline uint3 floatBitsToUint(float3 f) +{ + return as_type<uint3>(f); +} + +inline uint4 floatBitsToUint(float4 f) +{ + return as_type<uint4>(f); +} + +inline float intBitsToFloat(int f) +{ + return as_type<float>(f); +} + +inline float2 intBitsToFloat(int2 f) +{ + return as_type<float2>(f); +} + +inline float3 intBitsToFloat(int3 f) +{ + return as_type<float3>(f); +} + +inline float4 intBitsToFloat(int4 f) +{ + return as_type<float4>(f); +} + +/* Texture size functions. Add texture types as needed. */ +template<typename T, access A> +int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod) +{ + return int(image.texture->get_width()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod) +{ + return int2(image.texture->get_width(), image.texture->get_array_size()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_array_size()); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_array_size()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_depth(lod)); +} + +/* Equality and comparison functions. */ +#define lessThan(a, b) ((a) < (b)) +#define lessThanEqual(a, b) ((a) <= (b)) +#define greaterThan(a, b) ((a) > (b)) +#define greaterThanEqual(a, b) ((a) >= (b)) +#define equal(a, b) ((a) == (b)) +#define notEqual(a, b) ((a) != (b)) + +template<typename T, int n> bool all(vec<T, n> x) +{ + bool _all = true; + for (int i = 0; i < n; i++) { + _all = _all && (x[i] > 0); + } + return _all; +} + +template<typename T, int n> bool any(vec<T, n> x) +{ + bool _any = false; + for (int i = 0; i < n; i++) { + _any = _any || (x[i] > 0); + } + return _any; +} + +/* Modulo functionality. */ +int _mtlmod(int a, int b) +{ + return a - b * (a / b); +} + +template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y) +{ + return x - y * floor(x / y); +} + +template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y) +{ + return x - vec<T, n>(y) * floor(x / vec<T, n>(y)); +} + +template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y) +{ + return vec<U, n>(x) - y * floor(vec<U, n>(x) / y); +} + +/* Mathematical functions. */ +template<typename T> T atan(T y, T x) +{ + return atan2(y, x); +} + +/* Matrix Inverse. */ +float4x4 inverse(float4x4 a) +{ + float b00 = a[0][0] * a[1][1] - a[0][1] * a[1][0]; + float b01 = a[0][0] * a[1][2] - a[0][2] * a[1][0]; + float b02 = a[0][0] * a[1][3] - a[0][3] * a[1][0]; + float b03 = a[0][1] * a[1][2] - a[0][2] * a[1][1]; + float b04 = a[0][1] * a[1][3] - a[0][3] * a[1][1]; + float b05 = a[0][2] * a[1][3] - a[0][3] * a[1][2]; + float b06 = a[2][0] * a[3][1] - a[2][1] * a[3][0]; + float b07 = a[2][0] * a[3][2] - a[2][2] * a[3][0]; + float b08 = a[2][0] * a[3][3] - a[2][3] * a[3][0]; + float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1]; + float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1]; + float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2]; + + float invdet = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); + + return float4x4(a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09, + a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09, + a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03, + a[2][2] * b04 - a[2][1] * b05 - a[2][3] * b03, + a[1][2] * b08 - a[1][0] * b11 - a[1][3] * b07, + a[0][0] * b11 - a[0][2] * b08 + a[0][3] * b07, + a[3][2] * b02 - a[3][0] * b05 - a[3][3] * b01, + a[2][0] * b05 - a[2][2] * b02 + a[2][3] * b01, + a[1][0] * b10 - a[1][1] * b08 + a[1][3] * b06, + a[0][1] * b08 - a[0][0] * b10 - a[0][3] * b06, + a[3][0] * b04 - a[3][1] * b02 + a[3][3] * b00, + a[2][1] * b02 - a[2][0] * b04 - a[2][3] * b00, + a[1][1] * b07 - a[1][0] * b09 - a[1][2] * b06, + a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06, + a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00, + a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00) * + invdet; +} + +float3x3 inverse(float3x3 m) +{ + + float invdet = 1.0 / (m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) - + m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) + + m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2])); + + float3x3 inverse(0); + inverse[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]); + inverse[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]); + inverse[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]); + inverse[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]); + inverse[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]); + inverse[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]); + inverse[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]); + inverse[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]); + inverse[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]); + inverse = inverse * invdet; + + return inverse; +} + +/* Additional overloads for builtin functions. */ +float distance(float x, float y) +{ + return abs(y - x); +} + +/* Overload for mix(A, B, float ratio). */ +template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val) +{ + return mix(a, b, vec<T, Size>(val)); +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T, int Size> +vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask) +{ + vec<T, Size> result; + for (int i = 0; i < Size; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Using vec<bool, S> does not appear to work, splitting cases. */ +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask) +{ + vec<T, 4> result; + for (int i = 0; i < 4; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask) +{ + vec<T, 3> result; + for (int i = 0; i < 3; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask) +{ + vec<T, 2> result; + for (int i = 0; i < 2; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> T mix(T a, T b, MTLBOOL mask) +{ + return (mask) ? b : a; +} + +template<typename T, unsigned int Size> bool is_zero(vec<T, Size> a) +{ + for (int i = 0; i < Size; i++) { + if (a[i] != T(0)) { + return false; + } + } + return true; +} + +/* Matrix conversion fallback. */ +mat3 MAT3(vec3 a, vec3 b, vec3 c) +{ + return mat3(a, b, c); +} +mat3 MAT3(float f) +{ + return mat3(f); +} +mat3 MAT3(mat4 m) +{ + return mat4_to_mat3(m); +}
\ No newline at end of file |