Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/KhronosGroup/SPIRV-Cross.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans-Kristian Arntzen <post@arntzen-software.no>2022-11-02 16:37:51 +0300
committerGitHub <noreply@github.com>2022-11-02 16:37:51 +0300
commitabc31207bffbc1bef4192746af44b3be1abcff17 (patch)
tree5237896ea791ea3fe3febc4ac83ec7728f738bc2
parent744279ec78ff9bd12656fffe79068ec03d9e9ed3 (diff)
parentb606e4f7525acf63e8f3eb041bf9a4863ac688b7 (diff)
Merge pull request #2052 from KhronosGroup/hlsl-mesh-shader-ext
Merge HLSL support for VK_EXT_mesh_shader
-rw-r--r--reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh90
-rw-r--r--reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh90
-rw-r--r--reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh63
-rw-r--r--reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh97
-rw-r--r--reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh87
-rw-r--r--reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk44
-rw-r--r--shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh150
-rw-r--r--shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh74
-rw-r--r--shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh64
-rw-r--r--shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh150
-rw-r--r--spirv_glsl.cpp49
-rw-r--r--spirv_glsl.hpp2
-rw-r--r--spirv_hlsl.cpp513
-rw-r--r--spirv_hlsl.hpp8
-rwxr-xr-xtest_shaders.py6
15 files changed, 1440 insertions, 47 deletions
diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000..4819b14f
--- /dev/null
+++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
@@ -0,0 +1,90 @@
+struct BlockOut
+{
+ float4 a;
+ float4 b;
+};
+
+struct BlockOutPrim
+{
+ float4 a;
+ float4 b;
+};
+
+struct TaskPayload
+{
+ float a;
+ float b;
+ int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+ uint3 gl_WorkGroupID : SV_GroupID;
+ uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+ uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+ float4 vOut : TEXCOORD0;
+ BlockOut outputs : TEXCOORD2;
+ float4 gl_Position : SV_Position;
+ float gl_ClipDistance[1] : SV_ClipDistance;
+ float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+ float4 vPrim : TEXCOORD1;
+ BlockOutPrim prim_outputs : TEXCOORD4;
+ uint gl_PrimitiveID : SV_PrimitiveID;
+ uint gl_Layer : SV_RenderTargetArrayIndex;
+ uint gl_ViewportIndex : SV_ViewportArrayIndex;
+ uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+ bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+ SetMeshOutputCounts(24u, 22u);
+ float3 _173 = float3(gl_GlobalInvocationID);
+ float _174 = _173.x;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_174, _173.yz, 1.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_174, _173.yz, 2.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+ GroupMemoryBarrierWithGroupSync();
+ if (gl_LocalInvocationIndex < 22u)
+ {
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+ gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx;
+ int _229 = int(gl_GlobalInvocationID.x);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _229;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _229 + 1;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _229 + 2;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _229 + 3;
+ }
+}
+
+[outputtopology("line")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+ gl_WorkGroupID = stage_input.gl_WorkGroupID;
+ gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+ gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+ mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT);
+}
diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000..7436c463
--- /dev/null
+++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
@@ -0,0 +1,90 @@
+struct BlockOut
+{
+ float4 a;
+ float4 b;
+};
+
+struct BlockOutPrim
+{
+ float4 a;
+ float4 b;
+};
+
+struct TaskPayload
+{
+ float a;
+ float b;
+ int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+ uint3 gl_WorkGroupID : SV_GroupID;
+ uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+ uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+ float4 vOut : TEXCOORD0;
+ BlockOut outputs : TEXCOORD2;
+ float4 gl_Position : SV_Position;
+ float gl_ClipDistance[1] : SV_ClipDistance;
+ float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+ float4 vPrim : TEXCOORD1;
+ BlockOutPrim prim_outputs : TEXCOORD4;
+ uint gl_PrimitiveID : SV_PrimitiveID;
+ uint gl_Layer : SV_RenderTargetArrayIndex;
+ uint gl_ViewportIndex : SV_ViewportArrayIndex;
+ uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+ bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+ SetMeshOutputCounts(24u, 22u);
+ float3 _29 = float3(gl_GlobalInvocationID);
+ float _31 = _29.x;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_31, _29.yz, 1.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_31, _29.yz, 2.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+ GroupMemoryBarrierWithGroupSync();
+ if (gl_LocalInvocationIndex < 22u)
+ {
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+ gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx;
+ int _127 = int(gl_GlobalInvocationID.x);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _127;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _127 + 1;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _127 + 2;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _127 + 3;
+ }
+}
+
+[outputtopology("triangle")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+ gl_WorkGroupID = stage_input.gl_WorkGroupID;
+ gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+ gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+ mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT);
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
new file mode 100644
index 00000000..8fbd2915
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
@@ -0,0 +1,63 @@
+struct _12
+{
+ float _m0;
+};
+
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+ uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+ float4 B : TEXCOORD1;
+ float4 gl_Position : SV_Position;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+ float4 C : TEXCOORD3;
+ uint gl_PrimitiveID : SV_PrimitiveID;
+ uint gl_Layer : SV_RenderTargetArrayIndex;
+ bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float _9[64];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout uint3 gl_PrimitiveTriangleIndicesEXT[8], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8])
+{
+ _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex);
+ GroupMemoryBarrierWithGroupSync();
+ SetMeshOutputCounts(24u, 8u);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex];
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex];
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex];
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex];
+ float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u];
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.x = _63;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.y = _63;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.z = _63;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.w = _63;
+ if (gl_LocalInvocationIndex < 8u)
+ {
+ uint _71 = gl_LocalInvocationIndex * 3u;
+ gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(_71, _71 + 1u, _71 + 2u);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex);
+ uint _81 = gl_LocalInvocationIndex ^ 2u;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.x = _9[_81];
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.y = _9[_81];
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.z = _9[_81];
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.w = _9[_81];
+ }
+}
+
+[outputtopology("triangle")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], in payload _12 _11, out indices uint3 gl_PrimitiveTriangleIndicesEXT[8], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8])
+{
+ gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+ mesh_main(gl_MeshVerticesEXT, _11, gl_PrimitiveTriangleIndicesEXT, gl_MeshPrimitivesEXT);
+}
diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000..dad35928
--- /dev/null
+++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
@@ -0,0 +1,97 @@
+struct BlockOut
+{
+ float4 a;
+ float4 b;
+};
+
+struct BlockOutPrim
+{
+ float4 a;
+ float4 b;
+};
+
+struct TaskPayload
+{
+ float a;
+ float b;
+ int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+ uint3 gl_WorkGroupID : SV_GroupID;
+ uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+ uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+ float4 vOut : TEXCOORD0;
+ BlockOut outputs : TEXCOORD2;
+ float4 gl_Position : SV_Position;
+ float gl_ClipDistance[1] : SV_ClipDistance;
+ float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+ float4 vPrim : TEXCOORD1;
+ BlockOutPrim prim_outputs : TEXCOORD4;
+ uint gl_PrimitiveID : SV_PrimitiveID;
+ uint gl_Layer : SV_RenderTargetArrayIndex;
+ uint gl_ViewportIndex : SV_ViewportArrayIndex;
+ uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+ bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void main3(inout uint2 gl_PrimitiveLineIndicesEXT[22], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22])
+{
+ gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+}
+
+void main2(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+ SetMeshOutputCounts(24u, 22u);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+ GroupMemoryBarrierWithGroupSync();
+ if (gl_LocalInvocationIndex < 22u)
+ {
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+ main3(gl_PrimitiveLineIndicesEXT, gl_MeshPrimitivesEXT);
+ }
+}
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+ main2(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT);
+}
+
+[outputtopology("line")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+ gl_WorkGroupID = stage_input.gl_WorkGroupID;
+ gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+ gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+ mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT);
+}
diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000..e636453d
--- /dev/null
+++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
@@ -0,0 +1,87 @@
+struct BlockOut
+{
+ float4 a;
+ float4 b;
+};
+
+struct BlockOutPrim
+{
+ float4 a;
+ float4 b;
+};
+
+struct TaskPayload
+{
+ float a;
+ float b;
+ int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+ uint3 gl_WorkGroupID : SV_GroupID;
+ uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+ uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+ float4 vOut : TEXCOORD0;
+ BlockOut outputs : TEXCOORD2;
+ float4 gl_Position : SV_Position;
+ float gl_ClipDistance[1] : SV_ClipDistance;
+ float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+ float4 vPrim : TEXCOORD1;
+ BlockOutPrim prim_outputs : TEXCOORD4;
+ uint gl_PrimitiveID : SV_PrimitiveID;
+ uint gl_Layer : SV_RenderTargetArrayIndex;
+ uint gl_ViewportIndex : SV_ViewportArrayIndex;
+ uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+ bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+ SetMeshOutputCounts(24u, 22u);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+ GroupMemoryBarrierWithGroupSync();
+ if (gl_LocalInvocationIndex < 22u)
+ {
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+ gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+ }
+}
+
+[outputtopology("triangle")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+ gl_WorkGroupID = stage_input.gl_WorkGroupID;
+ gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+ gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+ mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT);
+}
diff --git a/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk
new file mode 100644
index 00000000..5040aa46
--- /dev/null
+++ b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk
@@ -0,0 +1,44 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 8, triangles) out;
+
+struct _12
+{
+ float _m0;
+};
+
+layout(location = 1) out vec4 B[24];
+layout(location = 3) perprimitiveEXT out vec4 C[8];
+shared float _9[64];
+taskPayloadSharedEXT _12 _11;
+
+void main()
+{
+ _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex);
+ barrier();
+ SetMeshOutputsEXT(24u, 8u);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex];
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex];
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex];
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex];
+ float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u];
+ B[gl_LocalInvocationIndex].x = _63;
+ B[gl_LocalInvocationIndex].y = _63;
+ B[gl_LocalInvocationIndex].z = _63;
+ B[gl_LocalInvocationIndex].w = _63;
+ if (gl_LocalInvocationIndex < 8u)
+ {
+ uint _71 = gl_LocalInvocationIndex * 3u;
+ gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_71, _71 + 1u, _71 + 2u);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex);
+ uint _81 = gl_LocalInvocationIndex ^ 2u;
+ C[gl_LocalInvocationIndex].x = _9[_81];
+ C[gl_LocalInvocationIndex].y = _9[_81];
+ C[gl_LocalInvocationIndex].z = _9[_81];
+ C[gl_LocalInvocationIndex].w = _9[_81];
+ }
+}
+
diff --git a/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
new file mode 100644
index 00000000..7b38001d
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
@@ -0,0 +1,150 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Unknown(30017); 21022
+; Bound: 89
+; Schema: 0
+ OpCapability Shader
+ OpCapability Geometry
+ OpCapability ShaderViewportIndexLayerEXT
+ OpCapability MeshShadingEXT
+ OpExtension "SPV_EXT_mesh_shader"
+ OpExtension "SPV_EXT_shader_viewport_index_layer"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38
+ OpExecutionMode %main OutputVertices 24
+ OpExecutionMode %main OutputPrimitivesNV 8
+ OpExecutionMode %main OutputTrianglesNV
+ OpExecutionMode %main LocalSize 2 3 4
+ OpName %main "main"
+ OpName %SV_Position "SV_Position"
+ OpName %B "B"
+ OpName %SV_CullPrimitive "SV_CullPrimitive"
+ OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex"
+ OpName %SV_PrimitiveID "SV_PrimitiveID"
+ OpName %C "C"
+ OpName %indices "indices"
+ OpName %_ ""
+ OpDecorate %SV_Position BuiltIn Position
+ OpDecorate %B Location 1
+ OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT
+ OpDecorate %SV_CullPrimitive PerPrimitiveNV
+ OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer
+ OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV
+ OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId
+ OpDecorate %SV_PrimitiveID PerPrimitiveNV
+ OpDecorate %C Location 3
+ OpDecorate %C PerPrimitiveNV
+ OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT
+ OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+ %void = OpTypeVoid
+ %2 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+ %uint = OpTypeInt 32 0
+ %uint_24 = OpConstant %uint 24
+%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24
+%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24
+%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+ %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+ %bool = OpTypeBool
+ %uint_8 = OpConstant %uint 8
+%_arr_bool_uint_8 = OpTypeArray %bool %uint_8
+%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8
+%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output
+%_arr_uint_uint_8 = OpTypeArray %uint %uint_8
+%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8
+%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
+%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8
+ %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output
+ %v3uint = OpTypeVector %uint 3
+%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8
+%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8
+ %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output
+ %uint_64 = OpConstant %uint 64
+%_arr_float_uint_64 = OpTypeArray %float %uint_64
+%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64
+ %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+ %_ = OpTypeStruct %float
+%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_
+ %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+ %uint_2 = OpConstant %uint 2
+ %uint_264 = OpConstant %uint 264
+%_ptr_Output_float = OpTypePointer Output %float
+ %uint_0 = OpConstant %uint 0
+ %uint_1 = OpConstant %uint 1
+ %uint_3 = OpConstant %uint 3
+%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float
+%_ptr_Output_v3uint = OpTypePointer Output %v3uint
+%_ptr_Output_bool = OpTypePointer Output %bool
+%_ptr_Output_uint = OpTypePointer Output %uint
+ %main = OpFunction %void None %2
+ %4 = OpLabel
+ OpBranch %85
+ %85 = OpLabel
+ %35 = OpLoad %uint %gl_LocalInvocationIndex
+ %39 = OpConvertUToF %float %35
+ %41 = OpAccessChain %_ptr_Workgroup_float %32 %35
+ OpStore %41 %39
+ OpControlBarrier %uint_2 %uint_2 %uint_264
+ OpSetMeshOutputsEXT %uint_24 %uint_8
+ %44 = OpLoad %float %41
+ %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0
+ OpStore %46 %44
+ %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1
+ OpStore %48 %44
+ %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2
+ OpStore %50 %44
+ %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3
+ OpStore %51 %44
+ %53 = OpBitwiseXor %uint %35 %uint_1
+ %54 = OpAccessChain %_ptr_Workgroup_float %32 %53
+ %55 = OpLoad %float %54
+ %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0
+ %58 = OpLoad %float %57
+ %59 = OpFAdd %float %58 %55
+ %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0
+ OpStore %60 %59
+ %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1
+ OpStore %61 %59
+ %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2
+ OpStore %62 %59
+ %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3
+ OpStore %63 %59
+ %64 = OpULessThan %bool %35 %uint_8
+ OpSelectionMerge %87 None
+ OpBranchConditional %64 %86 %87
+ %86 = OpLabel
+ %65 = OpIMul %uint %35 %uint_3
+ %66 = OpIAdd %uint %65 %uint_1
+ %67 = OpIAdd %uint %65 %uint_2
+ %68 = OpCompositeConstruct %v3uint %65 %66 %67
+ %70 = OpAccessChain %_ptr_Output_v3uint %indices %35
+ OpStore %70 %68
+ %71 = OpBitwiseAnd %uint %35 %uint_1
+ %72 = OpINotEqual %bool %71 %uint_0
+ %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35
+ OpStore %74 %72
+ %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35
+ OpStore %76 %35
+ %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35
+ OpStore %77 %35
+ %78 = OpBitwiseXor %uint %35 %uint_2
+ %79 = OpAccessChain %_ptr_Workgroup_float %32 %78
+ %80 = OpLoad %float %79
+ %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0
+ OpStore %81 %80
+ %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1
+ OpStore %82 %80
+ %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2
+ OpStore %83 %80
+ %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3
+ OpStore %84 %80
+ OpBranch %87
+ %87 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000..4f9500fe
--- /dev/null
+++ b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
@@ -0,0 +1,74 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(lines, max_vertices = 24, max_primitives = 22) out;
+
+out gl_MeshPerVertexEXT
+{
+ vec4 gl_Position;
+ float gl_PointSize;
+ float gl_ClipDistance[1];
+ float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+layout(location = 0) out vec4 vOut[];
+layout(location = 1) perprimitiveEXT out vec4 vPrim[];
+
+layout(location = 2) out BlockOut
+{
+ vec4 a;
+ vec4 b;
+} outputs[];
+
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+ vec4 a;
+ vec4 b;
+} prim_outputs[];
+
+shared float shared_float[16];
+
+struct TaskPayload
+{
+ float a;
+ float b;
+ int c;
+};
+
+taskPayloadSharedEXT TaskPayload payload;
+
+void main3()
+{
+ gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+}
+
+void main2()
+{
+ SetMeshOutputsEXT(24, 22);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
+ // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+ vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
+ outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+ outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+ barrier();
+ if (gl_LocalInvocationIndex < 22)
+ {
+ vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
+ prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+ prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+ main3();
+ }
+}
+
+void main()
+{
+ main2();
+}
diff --git a/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000..4d8e3f64
--- /dev/null
+++ b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
@@ -0,0 +1,64 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(triangles, max_vertices = 24, max_primitives = 22) out;
+
+out gl_MeshPerVertexEXT
+{
+ vec4 gl_Position;
+ float gl_PointSize;
+ float gl_ClipDistance[1];
+ float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+layout(location = 0) out vec4 vOut[];
+layout(location = 1) perprimitiveEXT out vec4 vPrim[];
+
+layout(location = 2) out BlockOut
+{
+ vec4 a;
+ vec4 b;
+} outputs[];
+
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+ vec4 a;
+ vec4 b;
+} prim_outputs[];
+
+shared float shared_float[16];
+
+struct TaskPayload
+{
+ float a;
+ float b;
+ int c;
+};
+
+taskPayloadSharedEXT TaskPayload payload;
+
+void main()
+{
+ SetMeshOutputsEXT(24, 22);
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
+ // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0;
+ gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+ vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
+ outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+ outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+ barrier();
+ if (gl_LocalInvocationIndex < 22)
+ {
+ vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
+ prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+ prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+ gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
+ gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+ }
+}
diff --git a/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
new file mode 100644
index 00000000..7b38001d
--- /dev/null
+++ b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
@@ -0,0 +1,150 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Unknown(30017); 21022
+; Bound: 89
+; Schema: 0
+ OpCapability Shader
+ OpCapability Geometry
+ OpCapability ShaderViewportIndexLayerEXT
+ OpCapability MeshShadingEXT
+ OpExtension "SPV_EXT_mesh_shader"
+ OpExtension "SPV_EXT_shader_viewport_index_layer"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38
+ OpExecutionMode %main OutputVertices 24
+ OpExecutionMode %main OutputPrimitivesNV 8
+ OpExecutionMode %main OutputTrianglesNV
+ OpExecutionMode %main LocalSize 2 3 4
+ OpName %main "main"
+ OpName %SV_Position "SV_Position"
+ OpName %B "B"
+ OpName %SV_CullPrimitive "SV_CullPrimitive"
+ OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex"
+ OpName %SV_PrimitiveID "SV_PrimitiveID"
+ OpName %C "C"
+ OpName %indices "indices"
+ OpName %_ ""
+ OpDecorate %SV_Position BuiltIn Position
+ OpDecorate %B Location 1
+ OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT
+ OpDecorate %SV_CullPrimitive PerPrimitiveNV
+ OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer
+ OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV
+ OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId
+ OpDecorate %SV_PrimitiveID PerPrimitiveNV
+ OpDecorate %C Location 3
+ OpDecorate %C PerPrimitiveNV
+ OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT
+ OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+ %void = OpTypeVoid
+ %2 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+ %uint = OpTypeInt 32 0
+ %uint_24 = OpConstant %uint 24
+%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24
+%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24
+%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+ %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+ %bool = OpTypeBool
+ %uint_8 = OpConstant %uint 8
+%_arr_bool_uint_8 = OpTypeArray %bool %uint_8
+%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8
+%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output
+%_arr_uint_uint_8 = OpTypeArray %uint %uint_8
+%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8
+%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
+%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8
+ %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output
+ %v3uint = OpTypeVector %uint 3
+%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8
+%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8
+ %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output
+ %uint_64 = OpConstant %uint 64
+%_arr_float_uint_64 = OpTypeArray %float %uint_64
+%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64
+ %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+ %_ = OpTypeStruct %float
+%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_
+ %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+ %uint_2 = OpConstant %uint 2
+ %uint_264 = OpConstant %uint 264
+%_ptr_Output_float = OpTypePointer Output %float
+ %uint_0 = OpConstant %uint 0
+ %uint_1 = OpConstant %uint 1
+ %uint_3 = OpConstant %uint 3
+%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float
+%_ptr_Output_v3uint = OpTypePointer Output %v3uint
+%_ptr_Output_bool = OpTypePointer Output %bool
+%_ptr_Output_uint = OpTypePointer Output %uint
+ %main = OpFunction %void None %2
+ %4 = OpLabel
+ OpBranch %85
+ %85 = OpLabel
+ %35 = OpLoad %uint %gl_LocalInvocationIndex
+ %39 = OpConvertUToF %float %35
+ %41 = OpAccessChain %_ptr_Workgroup_float %32 %35
+ OpStore %41 %39
+ OpControlBarrier %uint_2 %uint_2 %uint_264
+ OpSetMeshOutputsEXT %uint_24 %uint_8
+ %44 = OpLoad %float %41
+ %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0
+ OpStore %46 %44
+ %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1
+ OpStore %48 %44
+ %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2
+ OpStore %50 %44
+ %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3
+ OpStore %51 %44
+ %53 = OpBitwiseXor %uint %35 %uint_1
+ %54 = OpAccessChain %_ptr_Workgroup_float %32 %53
+ %55 = OpLoad %float %54
+ %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0
+ %58 = OpLoad %float %57
+ %59 = OpFAdd %float %58 %55
+ %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0
+ OpStore %60 %59
+ %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1
+ OpStore %61 %59
+ %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2
+ OpStore %62 %59
+ %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3
+ OpStore %63 %59
+ %64 = OpULessThan %bool %35 %uint_8
+ OpSelectionMerge %87 None
+ OpBranchConditional %64 %86 %87
+ %86 = OpLabel
+ %65 = OpIMul %uint %35 %uint_3
+ %66 = OpIAdd %uint %65 %uint_1
+ %67 = OpIAdd %uint %65 %uint_2
+ %68 = OpCompositeConstruct %v3uint %65 %66 %67
+ %70 = OpAccessChain %_ptr_Output_v3uint %indices %35
+ OpStore %70 %68
+ %71 = OpBitwiseAnd %uint %35 %uint_1
+ %72 = OpINotEqual %bool %71 %uint_0
+ %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35
+ OpStore %74 %72
+ %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35
+ OpStore %76 %35
+ %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35
+ OpStore %77 %35
+ %78 = OpBitwiseXor %uint %35 %uint_2
+ %79 = OpAccessChain %_ptr_Workgroup_float %32 %78
+ %80 = OpLoad %float %79
+ %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0
+ OpStore %81 %80
+ %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1
+ OpStore %82 %80
+ %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2
+ OpStore %83 %80
+ %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3
+ OpStore %84 %80
+ OpBranch %87
+ %87 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index ddf1f76f..31af3b2d 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -3146,9 +3146,30 @@ void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
{
auto flags = get_buffer_block_flags(var.self);
if (flags.get(DecorationPerPrimitiveEXT))
+ {
set_name(var.self, "gl_MeshPrimitivesEXT");
+ set_name(type.self, "gl_MeshPerPrimitiveEXT");
+ }
else
+ {
set_name(var.self, "gl_MeshVerticesEXT");
+ set_name(type.self, "gl_MeshPerVertexEXT");
+ }
+ }
+ }
+
+ if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block)
+ {
+ auto *m = ir.find_meta(var.self);
+ if (m && m->decoration.builtin)
+ {
+ auto builtin_type = m->decoration.builtin_type;
+ if (builtin_type == BuiltInPrimitivePointIndicesEXT)
+ set_name(var.self, "gl_PrimitivePointIndicesEXT");
+ else if (builtin_type == BuiltInPrimitiveLineIndicesEXT)
+ set_name(var.self, "gl_PrimitiveLineIndicesEXT");
+ else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT)
+ set_name(var.self, "gl_PrimitiveTriangleIndicesEXT");
}
}
});
@@ -9323,6 +9344,14 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
break;
}
}
+ else if (backend.force_merged_mesh_block && i == 0 && var &&
+ !is_builtin_variable(*var) && var->storage == StorageClassOutput)
+ {
+ if (is_per_primitive_variable(*var))
+ expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
+ else
+ expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
+ }
else if (options.flatten_multidimensional_arrays && dimension_flatten)
{
// If we are flattening multidimensional arrays, do manual stride computation.
@@ -9372,7 +9401,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
if (index >= type->member_types.size())
SPIRV_CROSS_THROW("Member index is out of bounds!");
- BuiltIn builtin;
+ BuiltIn builtin = BuiltInMax;
if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
{
if (access_chain_is_arrayed)
@@ -14120,7 +14149,7 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
res += "shared ";
- else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT)
+ else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied)
res += "taskPayloadSharedEXT ";
res += to_interpolation_qualifiers(flags);
@@ -17365,6 +17394,22 @@ bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var,
}
}
+bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const
+{
+ if (has_decoration(var.self, DecorationPerPrimitiveEXT))
+ return true;
+
+ auto &type = get<SPIRType>(var.basetype);
+ if (!has_decoration(type.self, DecorationBlock))
+ return false;
+
+ for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++)
+ if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
+ return false;
+
+ return true;
+}
+
bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
{
return masked_output_locations.count({ location, component }) != 0;
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index f8d17259..2ccb36cb 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -602,6 +602,7 @@ protected:
bool allow_precision_qualifiers = false;
bool can_swizzle_scalar = false;
bool force_gl_in_out_block = false;
+ bool force_merged_mesh_block = false;
bool can_return_array = true;
bool allow_truncated_access_chain = false;
bool supports_extensions = false;
@@ -982,6 +983,7 @@ protected:
bool is_stage_output_builtin_masked(spv::BuiltIn builtin) const;
bool is_stage_output_variable_masked(const SPIRVariable &var) const;
bool is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const;
+ bool is_per_primitive_variable(const SPIRVariable &var) const;
uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const;
uint32_t get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const;
std::unordered_set<LocationComponentPair, InternalHasher> masked_output_locations;
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 65c9882b..48aabef4 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -603,36 +603,80 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
break;
case BuiltInClipDistance:
+ {
+ static const char *types[] = { "float", "float2", "float3", "float4" };
+
// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
- for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
+ if (execution.model == ExecutionModelMeshEXT)
{
- uint32_t to_declare = clip_distance_count - clip;
- if (to_declare > 4)
- to_declare = 4;
+ if (clip_distance_count > 4)
+ SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders.");
- uint32_t semantic_index = clip / 4;
+ if (clip_distance_count == 1)
+ {
+ // Avoids having to hack up access_chain code. Makes it trivially indexable.
+ statement("float gl_ClipDistance[1] : SV_ClipDistance;");
+ }
+ else
+ {
+ // Replace array with vector directly, avoids any weird fixup path.
+ statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;");
+ }
+ }
+ else
+ {
+ for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
+ {
+ uint32_t to_declare = clip_distance_count - clip;
+ if (to_declare > 4)
+ to_declare = 4;
- static const char *types[] = { "float", "float2", "float3", "float4" };
- statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
- " : SV_ClipDistance", semantic_index, ";");
+ uint32_t semantic_index = clip / 4;
+
+ statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
+ " : SV_ClipDistance", semantic_index, ";");
+ }
}
break;
+ }
case BuiltInCullDistance:
+ {
+ static const char *types[] = { "float", "float2", "float3", "float4" };
+
// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
- for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
+ if (execution.model == ExecutionModelMeshEXT)
{
- uint32_t to_declare = cull_distance_count - cull;
- if (to_declare > 4)
- to_declare = 4;
+ if (cull_distance_count > 4)
+ SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders.");
- uint32_t semantic_index = cull / 4;
+ if (cull_distance_count == 1)
+ {
+ // Avoids having to hack up access_chain code. Makes it trivially indexable.
+ statement("float gl_CullDistance[1] : SV_CullDistance;");
+ }
+ else
+ {
+ // Replace array with vector directly, avoids any weird fixup path.
+ statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;");
+ }
+ }
+ else
+ {
+ for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
+ {
+ uint32_t to_declare = cull_distance_count - cull;
+ if (to_declare > 4)
+ to_declare = 4;
- static const char *types[] = { "float", "float2", "float3", "float4" };
- statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
- " : SV_CullDistance", semantic_index, ";");
+ uint32_t semantic_index = cull / 4;
+
+ statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
+ " : SV_CullDistance", semantic_index, ";");
+ }
}
break;
+ }
case BuiltInPointSize:
// If point_size_compat is enabled, just ignore PointSize.
@@ -644,14 +688,69 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
case BuiltInLayer:
- if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelGeometry)
- SPIRV_CROSS_THROW("Render target array index output is only supported in GS 5.0 or higher.");
+ case BuiltInPrimitiveId:
+ case BuiltInViewportIndex:
+ case BuiltInPrimitiveShadingRateKHR:
+ case BuiltInCullPrimitiveEXT:
+ // per-primitive attributes handled separatly
+ break;
+
+ case BuiltInPrimitivePointIndicesEXT:
+ case BuiltInPrimitiveLineIndicesEXT:
+ case BuiltInPrimitiveTriangleIndicesEXT:
+ // meshlet local-index buffer handled separatly
+ break;
+
+ default:
+ SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
+ }
+
+ if (type && semantic)
+ statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
+ });
+}
+
+void CompilerHLSL::emit_builtin_primitive_outputs_in_struct()
+{
+ active_output_builtins.for_each_bit([&](uint32_t i) {
+ const char *type = nullptr;
+ const char *semantic = nullptr;
+ auto builtin = static_cast<BuiltIn>(i);
+ switch (builtin)
+ {
+ case BuiltInLayer:
+ {
+ const ExecutionModel model = get_entry_point().model;
+ if (hlsl_options.shader_model < 50 ||
+ (model != ExecutionModelGeometry && model != ExecutionModelMeshEXT))
+ SPIRV_CROSS_THROW("Render target array index output is only supported in GS/MS 5.0 or higher.");
type = "uint";
semantic = "SV_RenderTargetArrayIndex";
break;
+ }
+
+ case BuiltInPrimitiveId:
+ type = "uint";
+ semantic = "SV_PrimitiveID";
+ break;
+
+ case BuiltInViewportIndex:
+ type = "uint";
+ semantic = "SV_ViewportArrayIndex";
+ break;
+
+ case BuiltInPrimitiveShadingRateKHR:
+ type = "uint";
+ semantic = "SV_ShadingRate";
+ break;
+
+ case BuiltInCullPrimitiveEXT:
+ type = "bool";
+ semantic = "SV_CullPrimitive";
+ break;
default:
- SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
+ break;
}
if (type && semantic)
@@ -981,17 +1080,25 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord
}
else
{
- statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ",
+ auto decl_type = type;
+ if (execution.model == ExecutionModelMeshEXT)
+ {
+ decl_type.array.erase(decl_type.array.begin());
+ decl_type.array_size_literal.erase(decl_type.array_size_literal.begin());
+ }
+ statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ",
semantic, ";");
// Structs and arrays should consume more locations.
- uint32_t consumed_locations = type_to_consumed_locations(type);
+ uint32_t consumed_locations = type_to_consumed_locations(decl_type);
for (uint32_t i = 0; i < consumed_locations; i++)
active_locations.insert(location_number + i);
}
}
else
+ {
statement(variable_decl(type, name), " : ", binding, ";");
+ }
}
std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage)
@@ -1071,6 +1178,18 @@ void CompilerHLSL::emit_builtin_variables()
if (init_itr != builtin_to_initializer.end())
init_expr = join(" = ", to_expression(init_itr->second));
+ if (get_execution_model() == ExecutionModelMeshEXT)
+ {
+ if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
+ builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId ||
+ builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT ||
+ builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT ||
+ builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT)
+ {
+ return;
+ }
+ }
+
switch (builtin)
{
case BuiltInFragCoord:
@@ -1171,6 +1290,13 @@ void CompilerHLSL::emit_builtin_variables()
type = "uint";
break;
+ case BuiltInViewportIndex:
+ case BuiltInPrimitiveShadingRateKHR:
+ case BuiltInPrimitiveLineIndicesEXT:
+ case BuiltInCullPrimitiveEXT:
+ type = "uint";
+ break;
+
default:
SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
}
@@ -1365,12 +1491,12 @@ void CompilerHLSL::replace_illegal_names()
"double", "DomainShader", "dword",
"else", "export", "false", "float", "for", "fxgroup",
"GeometryShader", "groupshared", "half", "HullShader",
- "if", "in", "inline", "inout", "InputPatch", "int", "interface",
+ "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface",
"line", "lineadj", "linear", "LineStream",
"matrix", "min16float", "min10float", "min16int", "min16uint",
"namespace", "nointerpolation", "noperspective", "NULL",
"out", "OutputPatch",
- "packoffset", "pass", "pixelfragment", "PixelShader", "point",
+ "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point",
"PointStream", "precise", "RasterizerState", "RenderTargetView",
"return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer",
"RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D",
@@ -1381,7 +1507,7 @@ void CompilerHLSL::replace_illegal_names()
"Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray",
"Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle",
"triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned",
- "vector", "vertexfragment", "VertexShader", "void", "volatile", "while",
+ "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while",
};
CompilerGLSL::replace_illegal_names(keywords);
@@ -1415,6 +1541,19 @@ void CompilerHLSL::emit_resources()
replace_illegal_names();
+ switch (execution.model)
+ {
+ case ExecutionModelGeometry:
+ case ExecutionModelTessellationControl:
+ case ExecutionModelTessellationEvaluation:
+ case ExecutionModelMeshEXT:
+ fixup_implicit_builtin_block_names(execution.model);
+ break;
+
+ default:
+ break;
+ }
+
emit_specialization_constants_and_structs();
emit_composite_constants();
@@ -1487,18 +1626,21 @@ void CompilerHLSL::emit_resources()
// Emit builtin input and output variables here.
emit_builtin_variables();
- ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
- auto &type = this->get<SPIRType>(var.basetype);
+ if (execution.model != ExecutionModelMeshEXT)
+ {
+ ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+ auto &type = this->get<SPIRType>(var.basetype);
- if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
- (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
- interface_variable_exists_in_entry_point(var.self))
- {
- // Builtin variables are handled separately.
- emit_interface_block_globally(var);
- emitted = true;
- }
- });
+ if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
+ (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
+ interface_variable_exists_in_entry_point(var.self))
+ {
+ // Builtin variables are handled separately.
+ emit_interface_block_globally(var);
+ emitted = true;
+ }
+ });
+ }
if (emitted)
statement("");
@@ -1612,23 +1754,48 @@ void CompilerHLSL::emit_resources()
statement("");
}
+ const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT;
if (!output_variables.empty() || !active_output_builtins.empty())
{
- require_output = true;
- statement("struct SPIRV_Cross_Output");
+ sort(output_variables.begin(), output_variables.end(), variable_compare);
+ require_output = !is_mesh_shader;
+ statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output");
begin_scope();
- sort(output_variables.begin(), output_variables.end(), variable_compare);
for (auto &var : output_variables)
{
- if (var.block)
+ if (is_per_primitive_variable(*var.var))
+ continue;
+ if (var.block && is_mesh_shader && var.block_member_index != 0)
+ continue;
+ if (var.block && !is_mesh_shader)
emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs);
else
emit_interface_block_in_struct(*var.var, active_outputs);
}
emit_builtin_outputs_in_struct();
+ if (!is_mesh_shader)
+ emit_builtin_primitive_outputs_in_struct();
end_scope_decl();
statement("");
+
+ if (is_mesh_shader)
+ {
+ statement("struct gl_MeshPerPrimitiveEXT");
+ begin_scope();
+ for (auto &var : output_variables)
+ {
+ if (!is_per_primitive_variable(*var.var))
+ continue;
+ if (var.block && var.block_member_index != 0)
+ continue;
+
+ emit_interface_block_in_struct(*var.var, active_outputs);
+ }
+ emit_builtin_primitive_outputs_in_struct();
+ end_scope_decl();
+ statement("");
+ }
}
// Global variables.
@@ -1638,7 +1805,8 @@ void CompilerHLSL::emit_resources()
if (is_hidden_variable(var, true))
continue;
- if (var.storage != StorageClassOutput)
+ if (var.storage != StorageClassOutput &&
+ var.storage != StorageClassTaskPayloadWorkgroupEXT)
{
if (!variable_is_lut(var))
{
@@ -2164,6 +2332,194 @@ void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char
}
}
+void CompilerHLSL::analyze_meshlet_writes()
+{
+ uint32_t id_per_vertex = 0;
+ uint32_t id_per_primitive = 0;
+ bool need_per_primitive = false;
+ bool need_per_vertex = false;
+
+ ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+ auto &type = this->get<SPIRType>(var.basetype);
+ bool block = has_decoration(type.self, DecorationBlock);
+ if (var.storage == StorageClassOutput && block && is_builtin_variable(var))
+ {
+ auto flags = get_buffer_block_flags(var.self);
+ if (flags.get(DecorationPerPrimitiveEXT))
+ id_per_primitive = var.self;
+ else
+ id_per_vertex = var.self;
+ }
+ else if (var.storage == StorageClassOutput)
+ {
+ Bitset flags;
+ if (block)
+ flags = get_buffer_block_flags(var.self);
+ else
+ flags = get_decoration_bitset(var.self);
+
+ if (flags.get(DecorationPerPrimitiveEXT))
+ need_per_primitive = true;
+ else
+ need_per_vertex = true;
+ }
+ });
+
+ // If we have per-primitive outputs, and no per-primitive builtins,
+ // empty version of gl_MeshPerPrimitiveEXT will be emitted.
+ // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block.
+
+ const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t {
+ auto &execution = get_entry_point();
+
+ uint32_t op_type = ir.increase_bound_by(4);
+ uint32_t op_arr = op_type + 1;
+ uint32_t op_ptr = op_type + 2;
+ uint32_t op_var = op_type + 3;
+
+ auto &type = set<SPIRType>(op_type);
+ type.basetype = SPIRType::Struct;
+ set_name(op_type, block_name);
+ set_decoration(op_type, DecorationBlock);
+ if (per_primitive)
+ set_decoration(op_type, DecorationPerPrimitiveEXT);
+
+ auto &arr = set<SPIRType>(op_arr, type);
+ arr.parent_type = type.self;
+ arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices);
+ arr.array_size_literal.push_back(true);
+
+ auto &ptr = set<SPIRType>(op_ptr, arr);
+ ptr.parent_type = arr.self;
+ ptr.pointer = true;
+ ptr.pointer_depth++;
+ ptr.storage = StorageClassOutput;
+ set_decoration(op_ptr, DecorationBlock);
+ set_name(op_ptr, block_name);
+
+ auto &var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput);
+ if (per_primitive)
+ set_decoration(op_var, DecorationPerPrimitiveEXT);
+ set_name(op_var, instance_name);
+ execution.interface_variables.push_back(var.self);
+
+ return op_var;
+ };
+
+ if (id_per_vertex == 0 && need_per_vertex)
+ id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false);
+ if (id_per_primitive == 0 && need_per_primitive)
+ id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true);
+
+ unordered_set<uint32_t> processed_func_ids;
+ analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids);
+}
+
+void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive,
+ std::unordered_set<uint32_t> &processed_func_ids)
+{
+ // Avoid processing a function more than once
+ if (processed_func_ids.find(func_id) != processed_func_ids.end())
+ return;
+ processed_func_ids.insert(func_id);
+
+ auto &func = get<SPIRFunction>(func_id);
+ // Recursively establish global args added to functions on which we depend.
+ for (auto& block : func.blocks)
+ {
+ auto &b = get<SPIRBlock>(block);
+ for (auto &i : b.ops)
+ {
+ auto ops = stream(i);
+ auto op = static_cast<Op>(i.op);
+
+ switch (op)
+ {
+ case OpFunctionCall:
+ {
+ // Then recurse into the function itself to extract globals used internally in the function
+ uint32_t inner_func_id = ops[2];
+ analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids);
+ auto &inner_func = get<SPIRFunction>(inner_func_id);
+ for (auto &iarg : inner_func.arguments)
+ {
+ if (!iarg.alias_global_variable)
+ continue;
+
+ bool already_declared = false;
+ for (auto &arg : func.arguments)
+ {
+ if (arg.id == iarg.id)
+ {
+ already_declared = true;
+ break;
+ }
+ }
+
+ if (!already_declared)
+ {
+ // basetype is effectively ignored here since we declare the argument
+ // with explicit types. Just pass down a valid type.
+ func.arguments.push_back({ expression_type_id(iarg.id), iarg.id,
+ iarg.read_count, iarg.write_count, true });
+ }
+ }
+ break;
+ }
+
+ case OpStore:
+ case OpLoad:
+ case OpInBoundsAccessChain:
+ case OpAccessChain:
+ case OpPtrAccessChain:
+ case OpInBoundsPtrAccessChain:
+ case OpArrayLength:
+ {
+ auto *var = maybe_get<SPIRVariable>(ops[op == OpStore ? 0 : 2]);
+ if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT))
+ {
+ bool already_declared = false;
+ auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
+
+ uint32_t var_id = var->self;
+ if (var->storage != StorageClassTaskPayloadWorkgroupEXT &&
+ builtin_type != BuiltInPrimitivePointIndicesEXT &&
+ builtin_type != BuiltInPrimitiveLineIndicesEXT &&
+ builtin_type != BuiltInPrimitiveTriangleIndicesEXT)
+ {
+ var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex;
+ }
+
+ for (auto &arg : func.arguments)
+ {
+ if (arg.id == var_id)
+ {
+ already_declared = true;
+ break;
+ }
+ }
+
+ if (!already_declared)
+ {
+ // basetype is effectively ignored here since we declare the argument
+ // with explicit types. Just pass down a valid type.
+ uint32_t type_id = expression_type_id(var_id);
+ if (var->storage == StorageClassTaskPayloadWorkgroupEXT)
+ func.arguments.push_back({ type_id, var_id, 1u, 0u, true });
+ else
+ func.arguments.push_back({ type_id, var_id, 1u, 1u, true });
+ }
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+}
+
string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
{
auto &flags = get_member_decoration_bitset(type.self, index);
@@ -2459,6 +2815,8 @@ string CompilerHLSL::get_inner_entry_point_name() const
return "frag_main";
else if (execution.model == ExecutionModelGLCompute)
return "comp_main";
+ else if (execution.model == ExecutionModelMeshEXT)
+ return "mesh_main";
else
SPIRV_CROSS_THROW("Unsupported execution model.");
}
@@ -2572,8 +2930,58 @@ void CompilerHLSL::emit_hlsl_entry_point()
switch (execution.model)
{
+ case ExecutionModelMeshEXT:
+ case ExecutionModelMeshNV:
case ExecutionModelGLCompute:
{
+ if (execution.model == ExecutionModelMeshEXT)
+ {
+ if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
+ statement("[outputtopology(\"triangle\")]");
+ else if (execution.flags.get(ExecutionModeOutputLinesEXT))
+ statement("[outputtopology(\"line\")]");
+ else if (execution.flags.get(ExecutionModeOutputPoints))
+ SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX");
+
+ auto &func = get<SPIRFunction>(ir.default_entry_point);
+ for (auto &arg : func.arguments)
+ {
+ auto &var = get<SPIRVariable>(arg.id);
+ auto &base_type = get<SPIRType>(var.basetype);
+ bool block = has_decoration(base_type.self, DecorationBlock);
+ if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
+ {
+ arguments.push_back("in payload " + variable_decl(var));
+ }
+ else if (block)
+ {
+ auto flags = get_buffer_block_flags(var.self);
+ if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT))
+ {
+ arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" +
+ std::to_string(execution.output_primitives) + "]");
+ }
+ else
+ {
+ arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" +
+ std::to_string(execution.output_vertices) + "]");
+ }
+ }
+ else
+ {
+ if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
+ {
+ arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" +
+ std::to_string(execution.output_primitives) + "]");
+ }
+ else
+ {
+ arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" +
+ std::to_string(execution.output_primitives) + "]");
+ }
+ }
+ }
+ }
SpecializationConstant wg_x, wg_y, wg_z;
get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
@@ -2795,9 +3203,18 @@ void CompilerHLSL::emit_hlsl_entry_point()
// Run the shader.
if (execution.model == ExecutionModelVertex ||
execution.model == ExecutionModelFragment ||
- execution.model == ExecutionModelGLCompute)
- {
- statement(get_inner_entry_point_name(), "();");
+ execution.model == ExecutionModelGLCompute ||
+ execution.model == ExecutionModelMeshEXT)
+ {
+ // For mesh shaders, we receive special arguments that we must pass down as function arguments.
+ // HLSL does not support proper reference types for passing these IO blocks,
+ // but DXC post-inlining seems to magically fix it up anyways *shrug*.
+ SmallVector<string> arglist;
+ auto &func = get<SPIRFunction>(ir.default_entry_point);
+ // The arguments are marked out, avoid detecting reads and emitting inout.
+ for (auto &arg : func.arguments)
+ arglist.push_back(to_expression(arg.id, false));
+ statement(get_inner_entry_point_name(), "(", merge(arglist), ");");
}
else
SPIRV_CROSS_THROW("Unsupported shader stage.");
@@ -5926,6 +6343,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false);
break;
}
+ case OpSetMeshOutputsEXT:
+ {
+ statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
+ break;
+ }
+
default:
CompilerGLSL::emit_instruction(instruction);
break;
@@ -6126,6 +6549,8 @@ string CompilerHLSL::compile()
backend.can_return_array = false;
backend.nonuniform_qualifier = "NonUniformResourceIndex";
backend.support_case_fallthrough = false;
+ backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT;
+ backend.force_gl_in_out_block = backend.force_merged_mesh_block;
// SM 4.1 does not support precise for some reason.
backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40;
@@ -6138,6 +6563,8 @@ string CompilerHLSL::compile()
update_active_builtins();
analyze_image_and_sampler_usage();
analyze_interlocked_resource_usage();
+ if (get_execution_model() == ExecutionModelMeshEXT)
+ analyze_meshlet_writes();
// Subpass input needs SV_Position.
if (need_subpass_input)
diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp
index 41ce73bf..77382363 100644
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@@ -233,11 +233,11 @@ private:
void declare_undefined_values() override;
void emit_interface_block_globally(const SPIRVariable &type);
void emit_interface_block_in_struct(const SPIRVariable &var, std::unordered_set<uint32_t> &active_locations);
- void emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index,
- uint32_t location,
+ void emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, uint32_t location,
std::unordered_set<uint32_t> &active_locations);
void emit_builtin_inputs_in_struct();
void emit_builtin_outputs_in_struct();
+ void emit_builtin_primitive_outputs_in_struct();
void emit_texture_op(const Instruction &i, bool sparse) override;
void emit_instruction(const Instruction &instruction) override;
void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
@@ -355,6 +355,10 @@ private:
TypeUnpackUint64
};
+ void analyze_meshlet_writes();
+ void analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive,
+ std::unordered_set<uint32_t> &processed_func_ids);
+
BitcastType get_bitcast_type(uint32_t result_type, uint32_t op0);
void emit_builtin_variables();
diff --git a/test_shaders.py b/test_shaders.py
index 31ec70a0..a9e2a2db 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -384,6 +384,10 @@ def shader_model_hlsl(shader):
return '-Tps_5_1'
elif '.comp' in shader:
return '-Tcs_5_1'
+ elif '.mesh' in shader:
+ return '-Tms_6_5'
+ elif '.task' in shader:
+ return '-Tas_6_5'
else:
return None
@@ -408,6 +412,8 @@ def validate_shader_hlsl(shader, force_no_external_validation, paths):
test_glslang = False
if '.fxconly.' in shader:
test_glslang = False
+ if '.task' in shader or '.mesh' in shader:
+ test_glslang = False
hlsl_args = [paths.glslang, '--amb', '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader]
if '.sm30.' in shader: