From 80146a20da26e729d424d92b05134089fc63694b Mon Sep 17 00:00:00 2001 From: Try Date: Tue, 11 Oct 2022 00:16:45 +0200 Subject: HLSL: Implement VK_EXT_mesh_shader --- checkout_glslang_spirv_tools.sh | 2 +- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 86 ++++ ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 86 ++++ .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 63 +++ ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 63 +++ spirv_glsl.cpp | 67 ++- spirv_glsl.hpp | 2 + spirv_hlsl.cpp | 467 +++++++++++++++++++-- spirv_hlsl.hpp | 8 +- test_shaders.py | 6 + 10 files changed, 804 insertions(+), 46 deletions(-) create mode 100644 reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh create mode 100644 reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh create mode 100644 shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh create mode 100644 shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh diff --git a/checkout_glslang_spirv_tools.sh b/checkout_glslang_spirv_tools.sh index 2f491d7e..82017008 100755 --- a/checkout_glslang_spirv_tools.sh +++ b/checkout_glslang_spirv_tools.sh @@ -2,7 +2,7 @@ # Copyright 2016-2021 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 -GLSLANG_REV=1a8869e4d1bee138f2813208777e1a58bebb9735 +GLSLANG_REV=df7fec2cfa966919172ee83bfcae6c9c9b3f91b8 SPIRV_TOOLS_REV=fb27bbf3077f92cc1a8a55777bce2810a94079cf SPIRV_HEADERS_REV=87d5b782bec60822aa878941e6b13c0a9a954c9b PROTOCOL=https diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 00000000..8832ada1 --- /dev/null +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,86 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float4 gl_ClipDistance[1] : SV_ClipDistance; + float4 gl_CullDistance[1] : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + +[outputtopology("line")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT); +} diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 00000000..8728a58d --- /dev/null +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,86 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float4 gl_ClipDistance[1] : SV_ClipDistance; + float4 gl_CullDistance[1] : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + +[outputtopology("triangle")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT); +} diff --git a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 00000000..ea3350a5 --- /dev/null +++ b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(lines, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 00000000..c981a26e --- /dev/null +++ b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(triangles, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index ddf1f76f..ffc884b3 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -3146,9 +3146,32 @@ void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) { auto flags = get_buffer_block_flags(var.self); if (flags.get(DecorationPerPrimitiveEXT)) + { set_name(var.self, "gl_MeshPrimitivesEXT"); + set_name(type.self, "gl_MeshPerPrimitiveEXT"); + } else + { set_name(var.self, "gl_MeshVerticesEXT"); + set_name(type.self, "gl_MeshPerVertexEXT"); + } + } + } + + if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) + { + auto *m = ir.find_meta(var.self); + if (m != nullptr && m->decoration.builtin_type == BuiltInPrimitivePointIndicesEXT) + { + set_name(var.self, "gl_PrimitivePointIndicesEXT"); + } + else if (m != nullptr && m->decoration.builtin_type == BuiltInPrimitiveLineIndicesEXT) + { + set_name(var.self, "gl_PrimitiveLineIndicesEXT"); + } + else if (m != nullptr && m->decoration.builtin_type == BuiltInPrimitiveTriangleIndicesEXT) + { + set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); } } }); @@ -9193,6 +9216,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision); bool pending_array_enclose = false; bool dimension_flatten = false; + bool clip_cull_fixup = false; const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) { AccessChainFlags mod_flags = flags; @@ -9323,6 +9347,18 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice break; } } + else if (backend.force_merged_mesh_block && i == 0 && var && !is_builtin_variable(*var) && + var->storage == StorageClassOutput) + { + if(is_per_primitive_variable(*var)) + { + expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + } + else + { + expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); + } + } else if (options.flatten_multidimensional_arrays && dimension_flatten) { // If we are flattening multidimensional arrays, do manual stride computation. @@ -9347,6 +9383,14 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (!pending_array_enclose) expr += "]"; } + else if (clip_cull_fixup) + { + string idx_expr = is_literal ? convert_to_string(index) : to_enclosed_unpacked_expression(index, register_expression_read); + + expr += "[" + idx_expr + " / 4]"; + expr += "[" + idx_expr + " % 4]"; + clip_cull_fixup = false; + } // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) @@ -9372,7 +9416,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (index >= type->member_types.size()) SPIRV_CROSS_THROW("Member index is out of bounds!"); - BuiltIn builtin; + BuiltIn builtin = BuiltInMax; if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base)) { if (access_chain_is_arrayed) @@ -9406,6 +9450,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice else physical_type = 0; + clip_cull_fixup = (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance) && backend.force_merged_mesh_block; + row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); type = &get(type->member_types[index]); } @@ -14120,7 +14166,7 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id) if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) res += "shared "; - else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT) + else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied) res += "taskPayloadSharedEXT "; res += to_interpolation_qualifiers(flags); @@ -17365,6 +17411,23 @@ bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, } } +bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const +{ + if (has_decoration(var.self, DecorationPerPrimitiveEXT)) + return true; + + auto &type = get(var.basetype); + if (!has_decoration(type.self, DecorationBlock)) + return false; + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) + return false; + } + + return true; +} + bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const { return masked_output_locations.count({ location, component }) != 0; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index f8d17259..2ccb36cb 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -602,6 +602,7 @@ protected: bool allow_precision_qualifiers = false; bool can_swizzle_scalar = false; bool force_gl_in_out_block = false; + bool force_merged_mesh_block = false; bool can_return_array = true; bool allow_truncated_access_chain = false; bool supports_extensions = false; @@ -982,6 +983,7 @@ protected: bool is_stage_output_builtin_masked(spv::BuiltIn builtin) const; bool is_stage_output_variable_masked(const SPIRVariable &var) const; bool is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const; + bool is_per_primitive_variable(const SPIRVariable &var) const; uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const; uint32_t get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const; std::unordered_set masked_output_locations; diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 65c9882b..d6d70680 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -604,33 +604,49 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() case BuiltInClipDistance: // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. - for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + if (execution.model == ExecutionModelMeshEXT) { - uint32_t to_declare = clip_distance_count - clip; - if (to_declare > 4) - to_declare = 4; + const uint32_t clip = (clip_distance_count + 3) / 4; + statement("float4 gl_ClipDistance", "[", clip,"] : SV_ClipDistance;"); + } + else + { + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = clip / 4; + uint32_t semantic_index = clip / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_ClipDistance", semantic_index, ";"); + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } } break; case BuiltInCullDistance: // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. - for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + if (execution.model == ExecutionModelMeshEXT) { - uint32_t to_declare = cull_distance_count - cull; - if (to_declare > 4) - to_declare = 4; + const uint32_t cull = (cull_distance_count + 3) / 4; + statement("float4 gl_CullDistance", "[", cull,"] : SV_CullDistance;"); + } + else + { + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = cull / 4; + uint32_t semantic_index = cull / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_CullDistance", semantic_index, ";"); + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } } break; @@ -644,14 +660,69 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); case BuiltInLayer: - if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelGeometry) - SPIRV_CROSS_THROW("Render target array index output is only supported in GS 5.0 or higher."); + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInCullPrimitiveEXT: + // per-primitive attributes handled separatly + break; + + case BuiltInPrimitivePointIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitiveTriangleIndicesEXT: + // meshlet local-index buffer handled separatly + break; + + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + } + + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); + }); +} + +void CompilerHLSL::emit_builtin_primitive_outputs_in_struct() +{ + active_output_builtins.for_each_bit([&](uint32_t i){ + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast(i); + switch (builtin) + { + case BuiltInLayer: + { + const ExecutionModel model = get_entry_point().model; + if (hlsl_options.shader_model < 50 || + (model != ExecutionModelGeometry && model != ExecutionModelMeshEXT)) + SPIRV_CROSS_THROW("Render target array index output is only supported in GS/MS 5.0 or higher."); type = "uint"; semantic = "SV_RenderTargetArrayIndex"; break; + } + + case BuiltInPrimitiveId: + type = "uint"; + semantic = "SV_PrimitiveID"; + break; + + case BuiltInViewportIndex: + type = "uint"; + semantic = "SV_ViewportArrayIndex"; + break; + + case BuiltInPrimitiveShadingRateKHR: + type = "uint"; + semantic = "SV_ShadingRate"; + break; + + case BuiltInCullPrimitiveEXT: + type = "bool"; + semantic = "SV_CullPrimitive"; + break; default: - SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + break; } if (type && semantic) @@ -981,17 +1052,25 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord } else { - statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ", + auto decl_type = type; + if (execution.model == ExecutionModelMeshEXT) + { + decl_type.array.erase(decl_type.array.begin()); + decl_type.array_size_literal.erase(decl_type.array_size_literal.begin()); + } + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ", semantic, ";"); // Structs and arrays should consume more locations. - uint32_t consumed_locations = type_to_consumed_locations(type); + uint32_t consumed_locations = type_to_consumed_locations(decl_type); for (uint32_t i = 0; i < consumed_locations; i++) active_locations.insert(location_number + i); } } else + { statement(variable_decl(type, name), " : ", binding, ";"); + } } std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) @@ -1071,6 +1150,18 @@ void CompilerHLSL::emit_builtin_variables() if (init_itr != builtin_to_initializer.end()) init_expr = join(" = ", to_expression(init_itr->second)); + if (get_execution_model() == ExecutionModelMeshEXT) + { + if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId || + builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT || + builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT || + builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) + { + return; + } + } + switch (builtin) { case BuiltInFragCoord: @@ -1171,6 +1262,13 @@ void CompilerHLSL::emit_builtin_variables() type = "uint"; break; + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInCullPrimitiveEXT: + type = "uint"; + break; + default: SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); } @@ -1365,12 +1463,12 @@ void CompilerHLSL::replace_illegal_names() "double", "DomainShader", "dword", "else", "export", "false", "float", "for", "fxgroup", "GeometryShader", "groupshared", "half", "HullShader", - "if", "in", "inline", "inout", "InputPatch", "int", "interface", + "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface", "line", "lineadj", "linear", "LineStream", "matrix", "min16float", "min10float", "min16int", "min16uint", "namespace", "nointerpolation", "noperspective", "NULL", "out", "OutputPatch", - "packoffset", "pass", "pixelfragment", "PixelShader", "point", + "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point", "PointStream", "precise", "RasterizerState", "RenderTargetView", "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer", "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D", @@ -1381,7 +1479,7 @@ void CompilerHLSL::replace_illegal_names() "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray", "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle", "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned", - "vector", "vertexfragment", "VertexShader", "void", "volatile", "while", + "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while", }; CompilerGLSL::replace_illegal_names(keywords); @@ -1415,6 +1513,19 @@ void CompilerHLSL::emit_resources() replace_illegal_names(); + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); + break; + + default: + break; + } + emit_specialization_constants_and_structs(); emit_composite_constants(); @@ -1487,18 +1598,21 @@ void CompilerHLSL::emit_resources() // Emit builtin input and output variables here. emit_builtin_variables(); - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); + if (execution.model != ExecutionModelMeshEXT) + { + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); - if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && - (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self)) - { - // Builtin variables are handled separately. - emit_interface_block_globally(var); - emitted = true; - } - }); + if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + // Builtin variables are handled separately. + emit_interface_block_globally(var); + emitted = true; + } + }); + } if (emitted) statement(""); @@ -1612,23 +1726,50 @@ void CompilerHLSL::emit_resources() statement(""); } + const bool is_mesh_shader = (execution.model == ExecutionModelMeshEXT); if (!output_variables.empty() || !active_output_builtins.empty()) { - require_output = true; - statement("struct SPIRV_Cross_Output"); + sort(output_variables.begin(), output_variables.end(), variable_compare); + require_output = !is_mesh_shader; + statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output"); begin_scope(); - sort(output_variables.begin(), output_variables.end(), variable_compare); for (auto &var : output_variables) { - if (var.block) + if (is_per_primitive_variable(*var.var)) + continue; + if (var.block && is_mesh_shader && var.block_member_index!=0) + continue; + if (var.block && !is_mesh_shader) emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); else emit_interface_block_in_struct(*var.var, active_outputs); } emit_builtin_outputs_in_struct(); + if (!is_mesh_shader) + emit_builtin_primitive_outputs_in_struct(); end_scope_decl(); statement(""); + + if (is_mesh_shader) + { + statement("struct gl_MeshPerPrimitiveEXT"); + begin_scope(); + for (auto &var : output_variables) + { + if (!is_per_primitive_variable(*var.var)) + continue; + if (var.block && is_mesh_shader && var.block_member_index!=0) + continue; + if (var.block && !is_mesh_shader) + emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); + else + emit_interface_block_in_struct(*var.var, active_outputs); + } + emit_builtin_primitive_outputs_in_struct(); + end_scope_decl(); + statement(""); + } } // Global variables. @@ -1642,6 +1783,9 @@ void CompilerHLSL::emit_resources() { if (!variable_is_lut(var)) { + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) + continue; + add_resource_name(var.self); const char *storage = nullptr; @@ -2164,6 +2308,176 @@ void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char } } +void CompilerHLSL::analyze_meshlet_writes() +{ + if (get_execution_model() == ExecutionModelMeshEXT) + { + uint32_t id_per_vertex = 0; + uint32_t id_per_primitive = 0; + bool need_per_primitive = false; + + ir.for_each_typed_id([&](uint32_t id, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + id_per_primitive = var.self; + else + id_per_vertex = var.self; + } + else if (var.storage == StorageClassOutput) + { + Bitset flags; + if (block) + flags = get_buffer_block_flags(var.self); + else + flags = get_decoration_bitset(var.self); + + if (flags.get(DecorationPerPrimitiveEXT)) + need_per_primitive = true; + } + }); + + // If we have per-primitive outputs, and no per-primitive builtins, empty version of gl_MeshPerPrimitiveEXT will be emitted + if (id_per_primitive == 0 && need_per_primitive) + { + auto &execution = get_entry_point(); + + uint32_t op_type = ir.increase_bound_by(4); + uint32_t op_arr = op_type + 1; + uint32_t op_ptr = op_type + 2; + uint32_t op_var = op_type + 3; + + auto& type = set(op_type); + type.basetype = SPIRType::Struct; + set_name(op_type, "gl_MeshPerPrimitiveEXT"); + set_decoration(op_type, DecorationBlock); + set_decoration(op_type, DecorationPerPrimitiveEXT); + + auto& arr = set(op_arr, type); + arr.parent_type = type.self; + arr.array.push_back(execution.output_primitives); + arr.array_size_literal.push_back(true); + + auto& ptr = set(op_ptr, arr); + ptr.parent_type = arr.self; + ptr.pointer = true; + ptr.pointer_depth++; + ptr.storage = StorageClassOutput; + set_decoration(op_ptr, DecorationBlock); + set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); + + auto& var = set(op_var, op_ptr, StorageClassOutput); + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, "gl_MeshPrimitivesEXT"); + execution.interface_variables.push_back(var.self); + + id_per_primitive = op_var; + } + + unordered_set processed_func_ids; + analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); + } +} + +void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, const uint32_t id_per_vertex, const uint32_t id_per_primitive, + std::unordered_set& processed_func_ids) +{ + // Avoid processing a function more than once + if (processed_func_ids.find(func_id) != processed_func_ids.end()) + { + return; + } + processed_func_ids.insert(func_id); + + auto &func = get(func_id); + // Recursively establish global args added to functions on which we depend. + for (auto& block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + // Then recurse into the function itself to extract globals used internally in the function + uint32_t inner_func_id = ops[2]; + analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids); + auto &inner_func = get(inner_func_id); + for (auto& iarg : inner_func.arguments) + { + if (!iarg.alias_global_variable) + continue; + bool already_declarated = false; + for (auto& arg : func.arguments) + if (arg.id==iarg.id) + { + already_declarated=true; + break; + } + if (!already_declarated) + { + func.arguments.push_back({ ops[0], iarg.id, iarg.read_count, iarg.write_count, true }); + } + } + break; + } + case OpLoad: + case OpInBoundsAccessChain: + case OpAccessChain: + case OpArrayLength: + { + auto &type = get(ops[0]); + if (type.storage==StorageClassOutput || type.storage==StorageClassTaskPayloadWorkgroupEXT) + { + bool already_declarated = false; + auto &var = get(ops[2]); + auto &base_type = get(var.basetype); + auto *m = ir.find_meta(var.self); + + uint32_t var_id = var.self; + if (m!=nullptr && var.storage != StorageClassTaskPayloadWorkgroupEXT && + m->decoration.builtin_type != BuiltInPrimitivePointIndicesEXT && + m->decoration.builtin_type != BuiltInPrimitiveLineIndicesEXT && + m->decoration.builtin_type != BuiltInPrimitiveTriangleIndicesEXT) + { + bool block = has_decoration(base_type.self, DecorationBlock); + auto flags = block ? get_buffer_block_flags(var.self) : Bitset(); + if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(var_id, DecorationPerPrimitiveEXT)) + var_id = id_per_primitive; + else + var_id = id_per_vertex; + } + + for (auto& arg : func.arguments) + if (arg.id==var_id) + { + already_declarated=true; + break; + } + if (!already_declarated) + { + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) + func.arguments.push_back({ ops[0], var_id, 1u, 0u, true }); + else + func.arguments.push_back({ ops[0], var_id, 0u, 1u, true }); + } + } + break; + } + default: + break; + } + } + } +} + string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) { auto &flags = get_member_decoration_bitset(type.self, index); @@ -2459,6 +2773,8 @@ string CompilerHLSL::get_inner_entry_point_name() const return "frag_main"; else if (execution.model == ExecutionModelGLCompute) return "comp_main"; + else if (execution.model == ExecutionModelMeshEXT) + return "mesh_main"; else SPIRV_CROSS_THROW("Unsupported execution model."); } @@ -2572,8 +2888,57 @@ void CompilerHLSL::emit_hlsl_entry_point() switch (execution.model) { + case ExecutionModelMeshEXT: + case ExecutionModelMeshNV: case ExecutionModelGLCompute: { + if (execution.model == ExecutionModelMeshEXT) + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + statement("[outputtopology(\"triangle\")]"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + statement("[outputtopology(\"line\")]"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX"); + auto& fn = get(ir.default_entry_point); + for (auto& arg : fn.arguments) + { + auto &var = get(arg.id); + auto &base_type = get(var.basetype); + bool block = has_decoration(base_type.self, DecorationBlock); + if (var.storage==StorageClassTaskPayloadWorkgroupEXT) + { + arguments.push_back("in payload " + variable_decl(var)); + } + else if (block) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT)) + { + arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" + + std::to_string(execution.output_vertices) + "]"); + } + } + else + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + { + arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + } + } + } SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); @@ -2795,9 +3160,21 @@ void CompilerHLSL::emit_hlsl_entry_point() // Run the shader. if (execution.model == ExecutionModelVertex || execution.model == ExecutionModelFragment || - execution.model == ExecutionModelGLCompute) + execution.model == ExecutionModelGLCompute || + execution.model == ExecutionModelMeshEXT) { - statement(get_inner_entry_point_name(), "();"); + SmallVector arglist; + auto& fn = get(ir.default_entry_point); + for (auto& arg : fn.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.type)) + continue; + + arglist.push_back(to_expression(arg.id,false)); + } + statement(get_inner_entry_point_name(), "(", merge(arglist), ");"); } else SPIRV_CROSS_THROW("Unsupported shader stage."); @@ -5926,6 +6303,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false); break; } + case OpSetMeshOutputsEXT: + { + statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; @@ -6126,6 +6509,7 @@ string CompilerHLSL::compile() backend.can_return_array = false; backend.nonuniform_qualifier = "NonUniformResourceIndex"; backend.support_case_fallthrough = false; + backend.force_merged_mesh_block = (get_execution_model() == ExecutionModelMeshEXT); // SM 4.1 does not support precise for some reason. backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; @@ -6138,6 +6522,7 @@ string CompilerHLSL::compile() update_active_builtins(); analyze_image_and_sampler_usage(); analyze_interlocked_resource_usage(); + analyze_meshlet_writes(); // Subpass input needs SV_Position. if (need_subpass_input) diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index 41ce73bf..4aabef3f 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -233,11 +233,11 @@ private: void declare_undefined_values() override; void emit_interface_block_globally(const SPIRVariable &type); void emit_interface_block_in_struct(const SPIRVariable &var, std::unordered_set &active_locations); - void emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, - uint32_t location, + void emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, uint32_t location, std::unordered_set &active_locations); void emit_builtin_inputs_in_struct(); void emit_builtin_outputs_in_struct(); + void emit_builtin_primitive_outputs_in_struct(); void emit_texture_op(const Instruction &i, bool sparse) override; void emit_instruction(const Instruction &instruction) override; void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, @@ -355,6 +355,10 @@ private: TypeUnpackUint64 }; + void analyze_meshlet_writes(); + void analyze_meshlet_writes(uint32_t func_id, const uint32_t id_per_vertex, const uint32_t id_per_primitive, + std::unordered_set &processed_func_ids); + BitcastType get_bitcast_type(uint32_t result_type, uint32_t op0); void emit_builtin_variables(); diff --git a/test_shaders.py b/test_shaders.py index 31ec70a0..a9e2a2db 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -384,6 +384,10 @@ def shader_model_hlsl(shader): return '-Tps_5_1' elif '.comp' in shader: return '-Tcs_5_1' + elif '.mesh' in shader: + return '-Tms_6_5' + elif '.task' in shader: + return '-Tas_6_5' else: return None @@ -408,6 +412,8 @@ def validate_shader_hlsl(shader, force_no_external_validation, paths): test_glslang = False if '.fxconly.' in shader: test_glslang = False + if '.task' in shader or '.mesh' in shader: + test_glslang = False hlsl_args = [paths.glslang, '--amb', '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader] if '.sm30.' in shader: -- cgit v1.2.3 From 322f1b9fb333d3757ca381cb191dc2d0f26fa371 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 11:50:03 +0100 Subject: Fixup glslang reference. --- checkout_glslang_spirv_tools.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/checkout_glslang_spirv_tools.sh b/checkout_glslang_spirv_tools.sh index 82017008..2f491d7e 100755 --- a/checkout_glslang_spirv_tools.sh +++ b/checkout_glslang_spirv_tools.sh @@ -2,7 +2,7 @@ # Copyright 2016-2021 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 -GLSLANG_REV=df7fec2cfa966919172ee83bfcae6c9c9b3f91b8 +GLSLANG_REV=1a8869e4d1bee138f2813208777e1a58bebb9735 SPIRV_TOOLS_REV=fb27bbf3077f92cc1a8a55777bce2810a94079cf SPIRV_HEADERS_REV=87d5b782bec60822aa878941e6b13c0a9a954c9b PROTOCOL=https -- cgit v1.2.3 From 10f55a11eb729edf13ae6dc5dd5b36dcff0dbf3a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 11:52:02 +0100 Subject: HLSL: Add missing reference for mesh shader test. --- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 89 ++++++++++++++++++++++ ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 89 ++++++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh create mode 100644 reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 00000000..9b9b3fa0 --- /dev/null +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,89 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float4 gl_ClipDistance[1] : SV_ClipDistance; + float4 gl_CullDistance[1] : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + float3 _29 = float3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_31, _29.yz, 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_31, _29.yz, 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; + int _126 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _126; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _126 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _126 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _126 + 3; + } +} + +[outputtopology("line")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT); +} diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 00000000..4d589bc9 --- /dev/null +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,89 @@ +struct BlockOut +{ + float4 a; + float4 b; +}; + +struct BlockOutPrim +{ + float4 a; + float4 b; +}; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u); + +static uint3 gl_WorkGroupID; +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 vOut : TEXCOORD0; + BlockOut outputs : TEXCOORD2; + float4 gl_Position : SV_Position; + float4 gl_ClipDistance[1] : SV_ClipDistance; + float4 gl_CullDistance[1] : SV_CullDistance; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 vPrim : TEXCOORD1; + BlockOutPrim prim_outputs : TEXCOORD4; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + uint gl_ViewportIndex : SV_ViewportArrayIndex; + uint gl_PrimitiveShadingRateEXT : SV_ShadingRate; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float shared_float[16]; + +void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + SetMeshOutputCounts(24u, 22u); + float3 _29 = float3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_31, _29.yz, 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_31, _29.yz, 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationIndex < 22u) + { + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx; + int _125 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _125; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _125 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _125 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _125 + 3; + } +} + +[outputtopology("triangle")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22]) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT); +} -- cgit v1.2.3 From a4d5c0c4a53904784e749971d309dfa4faef0fc8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 12:13:42 +0100 Subject: HLSL: Fix-up some style issues from review. --- spirv_glsl.cpp | 33 +++++++++++--------------- spirv_hlsl.cpp | 74 ++++++++++++++++++++++++++-------------------------------- spirv_hlsl.hpp | 4 ++-- 3 files changed, 48 insertions(+), 63 deletions(-) diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index ffc884b3..ce95729c 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -3161,17 +3161,15 @@ void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) { auto *m = ir.find_meta(var.self); - if (m != nullptr && m->decoration.builtin_type == BuiltInPrimitivePointIndicesEXT) + if (m && m->decoration.builtin) { - set_name(var.self, "gl_PrimitivePointIndicesEXT"); - } - else if (m != nullptr && m->decoration.builtin_type == BuiltInPrimitiveLineIndicesEXT) - { - set_name(var.self, "gl_PrimitiveLineIndicesEXT"); - } - else if (m != nullptr && m->decoration.builtin_type == BuiltInPrimitiveTriangleIndicesEXT) - { - set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); + auto builtin_type = m->decoration.builtin_type; + if (builtin_type == BuiltInPrimitivePointIndicesEXT) + set_name(var.self, "gl_PrimitivePointIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveLineIndicesEXT) + set_name(var.self, "gl_PrimitiveLineIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT) + set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); } } }); @@ -9347,17 +9345,13 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice break; } } - else if (backend.force_merged_mesh_block && i == 0 && var && !is_builtin_variable(*var) && - var->storage == StorageClassOutput) + else if (backend.force_merged_mesh_block && i == 0 && var && + !is_builtin_variable(*var) && var->storage == StorageClassOutput) { - if(is_per_primitive_variable(*var)) - { + if (is_per_primitive_variable(*var)) expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); - } else - { expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); - } } else if (options.flatten_multidimensional_arrays && dimension_flatten) { @@ -17419,11 +17413,10 @@ bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const auto &type = get(var.basetype); if (!has_decoration(type.self, DecorationBlock)) return false; - for (uint32_t i = 0; i < type.member_types.size(); i++) - { + + for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++) if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) return false; - } return true; } diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index d6d70680..5564d479 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -606,7 +606,7 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. if (execution.model == ExecutionModelMeshEXT) { - const uint32_t clip = (clip_distance_count + 3) / 4; + uint32_t clip = (clip_distance_count + 3) / 4; statement("float4 gl_ClipDistance", "[", clip,"] : SV_ClipDistance;"); } else @@ -630,7 +630,7 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. if (execution.model == ExecutionModelMeshEXT) { - const uint32_t cull = (cull_distance_count + 3) / 4; + uint32_t cull = (cull_distance_count + 3) / 4; statement("float4 gl_CullDistance", "[", cull,"] : SV_CullDistance;"); } else @@ -684,7 +684,7 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() void CompilerHLSL::emit_builtin_primitive_outputs_in_struct() { - active_output_builtins.for_each_bit([&](uint32_t i){ + active_output_builtins.for_each_bit([&](uint32_t i) { const char *type = nullptr; const char *semantic = nullptr; auto builtin = static_cast(i); @@ -694,7 +694,7 @@ void CompilerHLSL::emit_builtin_primitive_outputs_in_struct() { const ExecutionModel model = get_entry_point().model; if (hlsl_options.shader_model < 50 || - (model != ExecutionModelGeometry && model != ExecutionModelMeshEXT)) + (model != ExecutionModelGeometry && model != ExecutionModelMeshEXT)) SPIRV_CROSS_THROW("Render target array index output is only supported in GS/MS 5.0 or higher."); type = "uint"; semantic = "SV_RenderTargetArrayIndex"; @@ -1153,10 +1153,10 @@ void CompilerHLSL::emit_builtin_variables() if (get_execution_model() == ExecutionModelMeshEXT) { if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || - builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId || - builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT || - builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT || - builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) + builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId || + builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT || + builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT || + builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) { return; } @@ -1726,7 +1726,7 @@ void CompilerHLSL::emit_resources() statement(""); } - const bool is_mesh_shader = (execution.model == ExecutionModelMeshEXT); + const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT; if (!output_variables.empty() || !active_output_builtins.empty()) { sort(output_variables.begin(), output_variables.end(), variable_compare); @@ -1738,7 +1738,7 @@ void CompilerHLSL::emit_resources() { if (is_per_primitive_variable(*var.var)) continue; - if (var.block && is_mesh_shader && var.block_member_index!=0) + if (var.block && is_mesh_shader && var.block_member_index != 0) continue; if (var.block && !is_mesh_shader) emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); @@ -1759,12 +1759,10 @@ void CompilerHLSL::emit_resources() { if (!is_per_primitive_variable(*var.var)) continue; - if (var.block && is_mesh_shader && var.block_member_index!=0) + if (var.block && var.block_member_index != 0) continue; - if (var.block && !is_mesh_shader) - emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); - else - emit_interface_block_in_struct(*var.var, active_outputs); + + emit_interface_block_in_struct(*var.var, active_outputs); } emit_builtin_primitive_outputs_in_struct(); end_scope_decl(); @@ -1779,13 +1777,11 @@ void CompilerHLSL::emit_resources() if (is_hidden_variable(var, true)) continue; - if (var.storage != StorageClassOutput) + if (var.storage != StorageClassOutput && + var.storage != StorageClassTaskPayloadWorkgroupEXT) { if (!variable_is_lut(var)) { - if (var.storage == StorageClassTaskPayloadWorkgroupEXT) - continue; - add_resource_name(var.self); const char *storage = nullptr; @@ -2382,14 +2378,12 @@ void CompilerHLSL::analyze_meshlet_writes() } } -void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, const uint32_t id_per_vertex, const uint32_t id_per_primitive, - std::unordered_set& processed_func_ids) +void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive, + std::unordered_set &processed_func_ids) { // Avoid processing a function more than once if (processed_func_ids.find(func_id) != processed_func_ids.end()) - { return; - } processed_func_ids.insert(func_id); auto &func = get(func_id); @@ -2900,13 +2894,14 @@ void CompilerHLSL::emit_hlsl_entry_point() statement("[outputtopology(\"line\")]"); else if (execution.flags.get(ExecutionModeOutputPoints)) SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX"); - auto& fn = get(ir.default_entry_point); - for (auto& arg : fn.arguments) + + auto &func = get(ir.default_entry_point); + for (auto &arg : func.arguments) { auto &var = get(arg.id); auto &base_type = get(var.basetype); bool block = has_decoration(base_type.self, DecorationBlock); - if (var.storage==StorageClassTaskPayloadWorkgroupEXT) + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) { arguments.push_back("in payload " + variable_decl(var)); } @@ -2916,12 +2911,12 @@ void CompilerHLSL::emit_hlsl_entry_point() if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT)) { arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" + - std::to_string(execution.output_primitives) + "]"); + std::to_string(execution.output_primitives) + "]"); } else { arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" + - std::to_string(execution.output_vertices) + "]"); + std::to_string(execution.output_vertices) + "]"); } } else @@ -2929,12 +2924,12 @@ void CompilerHLSL::emit_hlsl_entry_point() if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) { arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" + - std::to_string(execution.output_primitives) + "]"); + std::to_string(execution.output_primitives) + "]"); } else { arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" + - std::to_string(execution.output_primitives) + "]"); + std::to_string(execution.output_primitives) + "]"); } } } @@ -3163,17 +3158,14 @@ void CompilerHLSL::emit_hlsl_entry_point() execution.model == ExecutionModelGLCompute || execution.model == ExecutionModelMeshEXT) { + // For mesh shaders, we receive special arguments that we must pass down as function arguments. + // HLSL does not support proper reference types for passing these IO blocks, + // but DXC post-inlining seems to magically fix it up anyways *shrug*. SmallVector arglist; - auto& fn = get(ir.default_entry_point); - for (auto& arg : fn.arguments) - { - // Do not pass in separate images or samplers if we're remapping - // to combined image samplers. - if (skip_argument(arg.type)) - continue; - - arglist.push_back(to_expression(arg.id,false)); - } + auto &func = get(ir.default_entry_point); + // The arguments are marked out, avoid detecting reads and emitting inout. + for (auto &arg : func.arguments) + arglist.push_back(to_expression(arg.id, false)); statement(get_inner_entry_point_name(), "(", merge(arglist), ");"); } else @@ -6509,7 +6501,7 @@ string CompilerHLSL::compile() backend.can_return_array = false; backend.nonuniform_qualifier = "NonUniformResourceIndex"; backend.support_case_fallthrough = false; - backend.force_merged_mesh_block = (get_execution_model() == ExecutionModelMeshEXT); + backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT; // SM 4.1 does not support precise for some reason. backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index 4aabef3f..77382363 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -356,8 +356,8 @@ private: }; void analyze_meshlet_writes(); - void analyze_meshlet_writes(uint32_t func_id, const uint32_t id_per_vertex, const uint32_t id_per_primitive, - std::unordered_set &processed_func_ids); + void analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive, + std::unordered_set &processed_func_ids); BitcastType get_bitcast_type(uint32_t result_type, uint32_t op0); -- cgit v1.2.3 From 892f65b3a6488690cb5917dff04ab6134fea2e20 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 12:14:31 +0100 Subject: HLSL: Fix some warnings from review. --- spirv_hlsl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 5564d479..c2de3a8f 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -2312,7 +2312,7 @@ void CompilerHLSL::analyze_meshlet_writes() uint32_t id_per_primitive = 0; bool need_per_primitive = false; - ir.for_each_typed_id([&](uint32_t id, SPIRVariable &var) { + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) @@ -2441,8 +2441,8 @@ void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vert m->decoration.builtin_type != BuiltInPrimitiveLineIndicesEXT && m->decoration.builtin_type != BuiltInPrimitiveTriangleIndicesEXT) { - bool block = has_decoration(base_type.self, DecorationBlock); - auto flags = block ? get_buffer_block_flags(var.self) : Bitset(); + bool is_block = has_decoration(base_type.self, DecorationBlock); + auto flags = is_block ? get_buffer_block_flags(var.self) : Bitset(); if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(var_id, DecorationPerPrimitiveEXT)) var_id = id_per_primitive; else -- cgit v1.2.3 From e418266d25975cdae15039741487bcb3b4b972a1 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 12:26:51 +0100 Subject: HLSL: Some cleanups and fixes in analyze_meshlet_writes. --- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 2 +- ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 2 +- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 2 +- ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 2 +- spirv_hlsl.cpp | 196 +++++++++++---------- 5 files changed, 107 insertions(+), 97 deletions(-) diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 9b9b3fa0..3ebb602a 100644 --- a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint2 gl_PrimitiveLineIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); float3 _29 = float3(gl_GlobalInvocationID); diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh index 4d589bc9..f3e40fd8 100644 --- a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint3 gl_PrimitiveTriangleIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); float3 _29 = float3(gl_GlobalInvocationID); diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 8832ada1..cfe09a2d 100644 --- a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint2 gl_PrimitiveLineIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh index 8728a58d..0249cda0 100644 --- a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint3 gl_PrimitiveTriangleIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index c2de3a8f..fe3ed058 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -2306,76 +2306,74 @@ void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char void CompilerHLSL::analyze_meshlet_writes() { - if (get_execution_model() == ExecutionModelMeshEXT) - { - uint32_t id_per_vertex = 0; - uint32_t id_per_primitive = 0; - bool need_per_primitive = false; - - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); - if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) - { - auto flags = get_buffer_block_flags(var.self); - if (flags.get(DecorationPerPrimitiveEXT)) - id_per_primitive = var.self; - else - id_per_vertex = var.self; - } - else if (var.storage == StorageClassOutput) - { - Bitset flags; - if (block) - flags = get_buffer_block_flags(var.self); - else - flags = get_decoration_bitset(var.self); - - if (flags.get(DecorationPerPrimitiveEXT)) - need_per_primitive = true; - } - }); + uint32_t id_per_vertex = 0; + uint32_t id_per_primitive = 0; + bool need_per_primitive = false; - // If we have per-primitive outputs, and no per-primitive builtins, empty version of gl_MeshPerPrimitiveEXT will be emitted - if (id_per_primitive == 0 && need_per_primitive) + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + id_per_primitive = var.self; + else + id_per_vertex = var.self; + } + else if (var.storage == StorageClassOutput) { - auto &execution = get_entry_point(); + Bitset flags; + if (block) + flags = get_buffer_block_flags(var.self); + else + flags = get_decoration_bitset(var.self); + + if (flags.get(DecorationPerPrimitiveEXT)) + need_per_primitive = true; + } + }); - uint32_t op_type = ir.increase_bound_by(4); - uint32_t op_arr = op_type + 1; - uint32_t op_ptr = op_type + 2; - uint32_t op_var = op_type + 3; + // If we have per-primitive outputs, and no per-primitive builtins, + // empty version of gl_MeshPerPrimitiveEXT will be emitted + if (id_per_primitive == 0 && need_per_primitive) + { + auto &execution = get_entry_point(); - auto& type = set(op_type); - type.basetype = SPIRType::Struct; - set_name(op_type, "gl_MeshPerPrimitiveEXT"); - set_decoration(op_type, DecorationBlock); - set_decoration(op_type, DecorationPerPrimitiveEXT); + uint32_t op_type = ir.increase_bound_by(4); + uint32_t op_arr = op_type + 1; + uint32_t op_ptr = op_type + 2; + uint32_t op_var = op_type + 3; - auto& arr = set(op_arr, type); - arr.parent_type = type.self; - arr.array.push_back(execution.output_primitives); - arr.array_size_literal.push_back(true); + auto& type = set(op_type); + type.basetype = SPIRType::Struct; + set_name(op_type, "gl_MeshPerPrimitiveEXT"); + set_decoration(op_type, DecorationBlock); + set_decoration(op_type, DecorationPerPrimitiveEXT); - auto& ptr = set(op_ptr, arr); - ptr.parent_type = arr.self; - ptr.pointer = true; - ptr.pointer_depth++; - ptr.storage = StorageClassOutput; - set_decoration(op_ptr, DecorationBlock); - set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); + auto& arr = set(op_arr, type); + arr.parent_type = type.self; + arr.array.push_back(execution.output_primitives); + arr.array_size_literal.push_back(true); - auto& var = set(op_var, op_ptr, StorageClassOutput); - set_decoration(op_var, DecorationPerPrimitiveEXT); - set_name(op_var, "gl_MeshPrimitivesEXT"); - execution.interface_variables.push_back(var.self); + auto& ptr = set(op_ptr, arr); + ptr.parent_type = arr.self; + ptr.pointer = true; + ptr.pointer_depth++; + ptr.storage = StorageClassOutput; + set_decoration(op_ptr, DecorationBlock); + set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); - id_per_primitive = op_var; - } + auto& var = set(op_var, op_ptr, StorageClassOutput); + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, "gl_MeshPrimitivesEXT"); + execution.interface_variables.push_back(var.self); - unordered_set processed_func_ids; - analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); + id_per_primitive = op_var; } + + unordered_set processed_func_ids; + analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); } void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive, @@ -2404,67 +2402,78 @@ void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vert uint32_t inner_func_id = ops[2]; analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids); auto &inner_func = get(inner_func_id); - for (auto& iarg : inner_func.arguments) + for (auto &iarg : inner_func.arguments) { if (!iarg.alias_global_variable) continue; - bool already_declarated = false; - for (auto& arg : func.arguments) - if (arg.id==iarg.id) + + bool already_declared = false; + for (auto &arg : func.arguments) + { + if (arg.id == iarg.id) { - already_declarated=true; + already_declared = true; break; } - if (!already_declarated) + } + + if (!already_declared) { - func.arguments.push_back({ ops[0], iarg.id, iarg.read_count, iarg.write_count, true }); + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + func.arguments.push_back({ expression_type_id(iarg.id), iarg.id, + iarg.read_count, iarg.write_count, true }); } } break; } + + case OpStore: case OpLoad: case OpInBoundsAccessChain: case OpAccessChain: + case OpPtrAccessChain: + case OpInBoundsPtrAccessChain: case OpArrayLength: { - auto &type = get(ops[0]); - if (type.storage==StorageClassOutput || type.storage==StorageClassTaskPayloadWorkgroupEXT) + auto *var = maybe_get(ops[op == OpStore ? 0 : 2]); + if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT)) { - bool already_declarated = false; - auto &var = get(ops[2]); - auto &base_type = get(var.basetype); - auto *m = ir.find_meta(var.self); - - uint32_t var_id = var.self; - if (m!=nullptr && var.storage != StorageClassTaskPayloadWorkgroupEXT && - m->decoration.builtin_type != BuiltInPrimitivePointIndicesEXT && - m->decoration.builtin_type != BuiltInPrimitiveLineIndicesEXT && - m->decoration.builtin_type != BuiltInPrimitiveTriangleIndicesEXT) + bool already_declared = false; + auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + + uint32_t var_id = var->self; + if (var->storage != StorageClassTaskPayloadWorkgroupEXT && + builtin_type != BuiltInPrimitivePointIndicesEXT && + builtin_type != BuiltInPrimitiveLineIndicesEXT && + builtin_type != BuiltInPrimitiveTriangleIndicesEXT) { - bool is_block = has_decoration(base_type.self, DecorationBlock); - auto flags = is_block ? get_buffer_block_flags(var.self) : Bitset(); - if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(var_id, DecorationPerPrimitiveEXT)) - var_id = id_per_primitive; - else - var_id = id_per_vertex; + var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex; } - for (auto& arg : func.arguments) - if (arg.id==var_id) + for (auto &arg : func.arguments) + { + if (arg.id == var_id) { - already_declarated=true; + already_declared = true; break; } - if (!already_declarated) + } + + if (!already_declared) { - if (var.storage == StorageClassTaskPayloadWorkgroupEXT) - func.arguments.push_back({ ops[0], var_id, 1u, 0u, true }); + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + uint32_t type_id = expression_type_id(var_id); + if (var->storage == StorageClassTaskPayloadWorkgroupEXT) + func.arguments.push_back({ type_id, var_id, 1u, 0u, true }); else - func.arguments.push_back({ ops[0], var_id, 0u, 1u, true }); + func.arguments.push_back({ type_id, var_id, 1u, 1u, true }); } } break; } + default: break; } @@ -6514,7 +6523,8 @@ string CompilerHLSL::compile() update_active_builtins(); analyze_image_and_sampler_usage(); analyze_interlocked_resource_usage(); - analyze_meshlet_writes(); + if (get_execution_model() == ExecutionModelMeshEXT) + analyze_meshlet_writes(); // Subpass input needs SV_Position. if (need_subpass_input) -- cgit v1.2.3 From f5ef0a74fbf1b7b07192c554b8e30fe18f79c35c Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 12:56:10 +0100 Subject: HLSL: Make sure to test deeply nested functions in mesh shaders. --- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 18 ++++++++-------- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 24 +++++++++++++++------- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 24 +++++++++++++++------- 3 files changed, 43 insertions(+), 23 deletions(-) diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 3ebb602a..46a166c5 100644 --- a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -54,12 +54,12 @@ groupshared float shared_float[16]; void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); - float3 _29 = float3(gl_GlobalInvocationID); - float _31 = _29.x; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_31, _29.yz, 1.0f); + float3 _171 = float3(gl_GlobalInvocationID); + float _172 = _171.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_172, _171.yz, 1.0f); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_31, _29.yz, 2.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_172, _171.yz, 2.0f); gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; GroupMemoryBarrierWithGroupSync(); @@ -69,12 +69,12 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPe gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; - int _126 = int(gl_GlobalInvocationID.x); - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _126; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _126 + 1; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _126 + 2; + int _225 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _225; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _225 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _225 + 2; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _126 + 3; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _225 + 3; } } diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index cfe09a2d..1609cd5d 100644 --- a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -51,7 +51,17 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) +void main3(inout uint2 gl_PrimitiveLineIndicesEXT[22], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22]) +{ + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; +} + +void main2(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); @@ -66,15 +76,15 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPe gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f); gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; - gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + main3(gl_PrimitiveLineIndicesEXT, gl_MeshPrimitivesEXT); } } +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) +{ + main2(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT); +} + [outputtopology("line")] [numthreads(2, 3, 4)] void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22]) diff --git a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index ea3350a5..70aac964 100644 --- a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -37,7 +37,17 @@ struct TaskPayload taskPayloadSharedEXT TaskPayload payload; -void main() +void main3() +{ + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; +} + +void main2() { SetMeshOutputsEXT(24, 22); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); @@ -53,11 +63,11 @@ void main() vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); - gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + main3(); } } + +void main() +{ + main2(); +} -- cgit v1.2.3 From 3a066cd73394b1cb2f869da2822bb6a80dafe8d7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 13:07:32 +0100 Subject: HLSL: Handle case where PerVertex block is not used. --- ...lain-builtin-outputs.spv14.asm.vk.nocompat.mesh | 63 +++++++++ ...lain-builtin-outputs.spv14.asm.vk.nocompat.mesh | 150 +++++++++++++++++++++ spirv_hlsl.cpp | 41 +++--- 3 files changed, 239 insertions(+), 15 deletions(-) create mode 100644 reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh create mode 100644 shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh diff --git a/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000..2f4e548d --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,63 @@ +struct _12 +{ + float _m0; +}; + +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 B : TEXCOORD1; + float4 gl_Position : SV_Position; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 C : TEXCOORD3; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float _9[64]; + +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout uint3 gl_PrimitiveTriangleIndicesEXT[8], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8]) +{ + _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(24u, 8u); + gl_Position[gl_LocalInvocationIndex].x = _9[gl_LocalInvocationIndex]; + gl_Position[gl_LocalInvocationIndex].y = _9[gl_LocalInvocationIndex]; + gl_Position[gl_LocalInvocationIndex].z = _9[gl_LocalInvocationIndex]; + gl_Position[gl_LocalInvocationIndex].w = _9[gl_LocalInvocationIndex]; + float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.x = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.y = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.z = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.w = _63; + if (gl_LocalInvocationIndex < 8u) + { + uint _71 = gl_LocalInvocationIndex * 3u; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(_71, _71 + 1u, _71 + 2u); + gl_CullPrimitiveEXT[gl_LocalInvocationIndex] = (gl_LocalInvocationIndex & 1u) != 0u; + gl_PrimitiveID[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex); + gl_Layer[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex); + uint _81 = gl_LocalInvocationIndex ^ 2u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.x = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.y = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.z = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.w = _9[_81]; + } +} + +[outputtopology("triangle")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], in payload _12 _11, out indices uint3 gl_PrimitiveTriangleIndicesEXT[8], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8]) +{ + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, _11, gl_PrimitiveTriangleIndicesEXT, gl_MeshPrimitivesEXT); +} diff --git a/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000..7b38001d --- /dev/null +++ b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,150 @@ +; SPIR-V +; Version: 1.4 +; Generator: Unknown(30017); 21022 +; Bound: 89 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpCapability ShaderViewportIndexLayerEXT + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + OpExtension "SPV_EXT_shader_viewport_index_layer" + OpMemoryModel Logical GLSL450 + OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38 + OpExecutionMode %main OutputVertices 24 + OpExecutionMode %main OutputPrimitivesNV 8 + OpExecutionMode %main OutputTrianglesNV + OpExecutionMode %main LocalSize 2 3 4 + OpName %main "main" + OpName %SV_Position "SV_Position" + OpName %B "B" + OpName %SV_CullPrimitive "SV_CullPrimitive" + OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex" + OpName %SV_PrimitiveID "SV_PrimitiveID" + OpName %C "C" + OpName %indices "indices" + OpName %_ "" + OpDecorate %SV_Position BuiltIn Position + OpDecorate %B Location 1 + OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT + OpDecorate %SV_CullPrimitive PerPrimitiveNV + OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer + OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV + OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId + OpDecorate %SV_PrimitiveID PerPrimitiveNV + OpDecorate %C Location 3 + OpDecorate %C PerPrimitiveNV + OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + %void = OpTypeVoid + %2 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24 +%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24 +%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %bool = OpTypeBool + %uint_8 = OpConstant %uint 8 +%_arr_bool_uint_8 = OpTypeArray %bool %uint_8 +%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8 +%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output +%_arr_uint_uint_8 = OpTypeArray %uint %uint_8 +%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8 +%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8 +%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8 + %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output + %v3uint = OpTypeVector %uint 3 +%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8 +%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8 + %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output + %uint_64 = OpConstant %uint 64 +%_arr_float_uint_64 = OpTypeArray %float %uint_64 +%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64 + %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %_ = OpTypeStruct %float +%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_ + %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 +%_ptr_Output_float = OpTypePointer Output %float + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float +%_ptr_Output_v3uint = OpTypePointer Output %v3uint +%_ptr_Output_bool = OpTypePointer Output %bool +%_ptr_Output_uint = OpTypePointer Output %uint + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %85 + %85 = OpLabel + %35 = OpLoad %uint %gl_LocalInvocationIndex + %39 = OpConvertUToF %float %35 + %41 = OpAccessChain %_ptr_Workgroup_float %32 %35 + OpStore %41 %39 + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpSetMeshOutputsEXT %uint_24 %uint_8 + %44 = OpLoad %float %41 + %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0 + OpStore %46 %44 + %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1 + OpStore %48 %44 + %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2 + OpStore %50 %44 + %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3 + OpStore %51 %44 + %53 = OpBitwiseXor %uint %35 %uint_1 + %54 = OpAccessChain %_ptr_Workgroup_float %32 %53 + %55 = OpLoad %float %54 + %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0 + %58 = OpLoad %float %57 + %59 = OpFAdd %float %58 %55 + %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1 + OpStore %61 %59 + %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2 + OpStore %62 %59 + %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3 + OpStore %63 %59 + %64 = OpULessThan %bool %35 %uint_8 + OpSelectionMerge %87 None + OpBranchConditional %64 %86 %87 + %86 = OpLabel + %65 = OpIMul %uint %35 %uint_3 + %66 = OpIAdd %uint %65 %uint_1 + %67 = OpIAdd %uint %65 %uint_2 + %68 = OpCompositeConstruct %v3uint %65 %66 %67 + %70 = OpAccessChain %_ptr_Output_v3uint %indices %35 + OpStore %70 %68 + %71 = OpBitwiseAnd %uint %35 %uint_1 + %72 = OpINotEqual %bool %71 %uint_0 + %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35 + OpStore %76 %35 + %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35 + OpStore %77 %35 + %78 = OpBitwiseXor %uint %35 %uint_2 + %79 = OpAccessChain %_ptr_Workgroup_float %32 %78 + %80 = OpLoad %float %79 + %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0 + OpStore %81 %80 + %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1 + OpStore %82 %80 + %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2 + OpStore %83 %80 + %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3 + OpStore %84 %80 + OpBranch %87 + %87 = OpLabel + OpReturn + OpFunctionEnd diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index fe3ed058..86dcbd1c 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -2309,6 +2309,7 @@ void CompilerHLSL::analyze_meshlet_writes() uint32_t id_per_vertex = 0; uint32_t id_per_primitive = 0; bool need_per_primitive = false; + bool need_per_vertex = false; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); @@ -2331,13 +2332,16 @@ void CompilerHLSL::analyze_meshlet_writes() if (flags.get(DecorationPerPrimitiveEXT)) need_per_primitive = true; + else + need_per_vertex = true; } }); // If we have per-primitive outputs, and no per-primitive builtins, - // empty version of gl_MeshPerPrimitiveEXT will be emitted - if (id_per_primitive == 0 && need_per_primitive) - { + // empty version of gl_MeshPerPrimitiveEXT will be emitted. + // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block. + + const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t { auto &execution = get_entry_point(); uint32_t op_type = ir.increase_bound_by(4); @@ -2345,32 +2349,39 @@ void CompilerHLSL::analyze_meshlet_writes() uint32_t op_ptr = op_type + 2; uint32_t op_var = op_type + 3; - auto& type = set(op_type); + auto &type = set(op_type); type.basetype = SPIRType::Struct; - set_name(op_type, "gl_MeshPerPrimitiveEXT"); + set_name(op_type, block_name); set_decoration(op_type, DecorationBlock); - set_decoration(op_type, DecorationPerPrimitiveEXT); + if (per_primitive) + set_decoration(op_type, DecorationPerPrimitiveEXT); - auto& arr = set(op_arr, type); + auto &arr = set(op_arr, type); arr.parent_type = type.self; - arr.array.push_back(execution.output_primitives); + arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices); arr.array_size_literal.push_back(true); - auto& ptr = set(op_ptr, arr); + auto &ptr = set(op_ptr, arr); ptr.parent_type = arr.self; ptr.pointer = true; ptr.pointer_depth++; ptr.storage = StorageClassOutput; set_decoration(op_ptr, DecorationBlock); - set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); + set_name(op_ptr, block_name); - auto& var = set(op_var, op_ptr, StorageClassOutput); - set_decoration(op_var, DecorationPerPrimitiveEXT); - set_name(op_var, "gl_MeshPrimitivesEXT"); + auto &var = set(op_var, op_ptr, StorageClassOutput); + if (per_primitive) + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, instance_name); execution.interface_variables.push_back(var.self); - id_per_primitive = op_var; - } + return op_var; + }; + + if (id_per_vertex == 0 && need_per_vertex) + id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false); + if (id_per_primitive == 0 && need_per_primitive) + id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true); unordered_set processed_func_ids; analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); -- cgit v1.2.3 From 94160e88909fe588a06415fb30c8f05fe6e42af0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 13:08:56 +0100 Subject: GLSL: Add test for mesh with non-block builtin IO. --- ...n-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk | 44 ++++++ ...lain-builtin-outputs.spv14.asm.vk.nocompat.mesh | 150 +++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk create mode 100644 shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh diff --git a/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk new file mode 100644 index 00000000..5040aa46 --- /dev/null +++ b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk @@ -0,0 +1,44 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 8, triangles) out; + +struct _12 +{ + float _m0; +}; + +layout(location = 1) out vec4 B[24]; +layout(location = 3) perprimitiveEXT out vec4 C[8]; +shared float _9[64]; +taskPayloadSharedEXT _12 _11; + +void main() +{ + _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); + barrier(); + SetMeshOutputsEXT(24u, 8u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex]; + float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u]; + B[gl_LocalInvocationIndex].x = _63; + B[gl_LocalInvocationIndex].y = _63; + B[gl_LocalInvocationIndex].z = _63; + B[gl_LocalInvocationIndex].w = _63; + if (gl_LocalInvocationIndex < 8u) + { + uint _71 = gl_LocalInvocationIndex * 3u; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_71, _71 + 1u, _71 + 2u); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex); + uint _81 = gl_LocalInvocationIndex ^ 2u; + C[gl_LocalInvocationIndex].x = _9[_81]; + C[gl_LocalInvocationIndex].y = _9[_81]; + C[gl_LocalInvocationIndex].z = _9[_81]; + C[gl_LocalInvocationIndex].w = _9[_81]; + } +} + diff --git a/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000..7b38001d --- /dev/null +++ b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,150 @@ +; SPIR-V +; Version: 1.4 +; Generator: Unknown(30017); 21022 +; Bound: 89 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpCapability ShaderViewportIndexLayerEXT + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + OpExtension "SPV_EXT_shader_viewport_index_layer" + OpMemoryModel Logical GLSL450 + OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38 + OpExecutionMode %main OutputVertices 24 + OpExecutionMode %main OutputPrimitivesNV 8 + OpExecutionMode %main OutputTrianglesNV + OpExecutionMode %main LocalSize 2 3 4 + OpName %main "main" + OpName %SV_Position "SV_Position" + OpName %B "B" + OpName %SV_CullPrimitive "SV_CullPrimitive" + OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex" + OpName %SV_PrimitiveID "SV_PrimitiveID" + OpName %C "C" + OpName %indices "indices" + OpName %_ "" + OpDecorate %SV_Position BuiltIn Position + OpDecorate %B Location 1 + OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT + OpDecorate %SV_CullPrimitive PerPrimitiveNV + OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer + OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV + OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId + OpDecorate %SV_PrimitiveID PerPrimitiveNV + OpDecorate %C Location 3 + OpDecorate %C PerPrimitiveNV + OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + %void = OpTypeVoid + %2 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24 +%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24 +%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %bool = OpTypeBool + %uint_8 = OpConstant %uint 8 +%_arr_bool_uint_8 = OpTypeArray %bool %uint_8 +%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8 +%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output +%_arr_uint_uint_8 = OpTypeArray %uint %uint_8 +%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8 +%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8 +%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8 + %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output + %v3uint = OpTypeVector %uint 3 +%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8 +%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8 + %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output + %uint_64 = OpConstant %uint 64 +%_arr_float_uint_64 = OpTypeArray %float %uint_64 +%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64 + %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %_ = OpTypeStruct %float +%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_ + %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 +%_ptr_Output_float = OpTypePointer Output %float + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float +%_ptr_Output_v3uint = OpTypePointer Output %v3uint +%_ptr_Output_bool = OpTypePointer Output %bool +%_ptr_Output_uint = OpTypePointer Output %uint + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %85 + %85 = OpLabel + %35 = OpLoad %uint %gl_LocalInvocationIndex + %39 = OpConvertUToF %float %35 + %41 = OpAccessChain %_ptr_Workgroup_float %32 %35 + OpStore %41 %39 + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpSetMeshOutputsEXT %uint_24 %uint_8 + %44 = OpLoad %float %41 + %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0 + OpStore %46 %44 + %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1 + OpStore %48 %44 + %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2 + OpStore %50 %44 + %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3 + OpStore %51 %44 + %53 = OpBitwiseXor %uint %35 %uint_1 + %54 = OpAccessChain %_ptr_Workgroup_float %32 %53 + %55 = OpLoad %float %54 + %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0 + %58 = OpLoad %float %57 + %59 = OpFAdd %float %58 %55 + %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1 + OpStore %61 %59 + %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2 + OpStore %62 %59 + %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3 + OpStore %63 %59 + %64 = OpULessThan %bool %35 %uint_8 + OpSelectionMerge %87 None + OpBranchConditional %64 %86 %87 + %86 = OpLabel + %65 = OpIMul %uint %35 %uint_3 + %66 = OpIAdd %uint %65 %uint_1 + %67 = OpIAdd %uint %65 %uint_2 + %68 = OpCompositeConstruct %v3uint %65 %66 %67 + %70 = OpAccessChain %_ptr_Output_v3uint %indices %35 + OpStore %70 %68 + %71 = OpBitwiseAnd %uint %35 %uint_1 + %72 = OpINotEqual %bool %71 %uint_0 + %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35 + OpStore %76 %35 + %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35 + OpStore %77 %35 + %78 = OpBitwiseXor %uint %35 %uint_2 + %79 = OpAccessChain %_ptr_Workgroup_float %32 %78 + %80 = OpLoad %float %79 + %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0 + OpStore %81 %80 + %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1 + OpStore %82 %80 + %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2 + OpStore %83 %80 + %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3 + OpStore %84 %80 + OpBranch %87 + %87 = OpLabel + OpReturn + OpFunctionEnd -- cgit v1.2.3 From ebf779dcfbeca9f6609fb825cda342621f0c7607 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 13:26:45 +0100 Subject: HLSL: Do not support ClipCull size > 4 in mesh shaders. The array mechanism breaks DXC which needs to observe that all components have been written. Uninitialized outputs will be undefined. Resort to simple vector instead. --- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 27 ++++++++------- ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 19 +++++----- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 9 ++--- ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 9 ++--- .../mesh-shader-basic-lines.spv14.vk.nocompat.mesh | 1 + ...sh-shader-basic-triangle.spv14.vk.nocompat.mesh | 1 + spirv_glsl.cpp | 11 ------ spirv_hlsl.cpp | 40 ++++++++++++++++++---- 8 files changed, 70 insertions(+), 47 deletions(-) diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 46a166c5..4819b14f 100644 --- a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -34,8 +34,8 @@ struct gl_MeshPerVertexEXT float4 vOut : TEXCOORD0; BlockOut outputs : TEXCOORD2; float4 gl_Position : SV_Position; - float4 gl_ClipDistance[1] : SV_ClipDistance; - float4 gl_CullDistance[1] : SV_CullDistance; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; }; struct gl_MeshPerPrimitiveEXT @@ -54,12 +54,13 @@ groupshared float shared_float[16]; void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); - float3 _171 = float3(gl_GlobalInvocationID); - float _172 = _171.x; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_172, _171.yz, 1.0f); - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_172, _171.yz, 2.0f); + float3 _173 = float3(gl_GlobalInvocationID); + float _174 = _173.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_174, _173.yz, 1.0f); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_174, _173.yz, 2.0f); gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; GroupMemoryBarrierWithGroupSync(); @@ -69,12 +70,12 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPe gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx; - int _225 = int(gl_GlobalInvocationID.x); - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _225; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _225 + 1; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _225 + 2; + int _229 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _229; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _229 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _229 + 2; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _225 + 3; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _229 + 3; } } diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh index f3e40fd8..7436c463 100644 --- a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -34,8 +34,8 @@ struct gl_MeshPerVertexEXT float4 vOut : TEXCOORD0; BlockOut outputs : TEXCOORD2; float4 gl_Position : SV_Position; - float4 gl_ClipDistance[1] : SV_ClipDistance; - float4 gl_CullDistance[1] : SV_CullDistance; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; }; struct gl_MeshPerPrimitiveEXT @@ -57,8 +57,9 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPe float3 _29 = float3(gl_GlobalInvocationID); float _31 = _29.x; gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_31, _29.yz, 1.0f); - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_31, _29.yz, 2.0f); gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; @@ -69,12 +70,12 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPe gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx; gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx; - int _125 = int(gl_GlobalInvocationID.x); - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _125; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _125 + 1; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _125 + 2; + int _127 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _127; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _127 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _127 + 2; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; - gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _125 + 3; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _127 + 3; } } diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 1609cd5d..dad35928 100644 --- a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -34,8 +34,8 @@ struct gl_MeshPerVertexEXT float4 vOut : TEXCOORD0; BlockOut outputs : TEXCOORD2; float4 gl_Position : SV_Position; - float4 gl_ClipDistance[1] : SV_ClipDistance; - float4 gl_CullDistance[1] : SV_CullDistance; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; }; struct gl_MeshPerPrimitiveEXT @@ -65,8 +65,9 @@ void main2(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPri { SetMeshOutputCounts(24u, 22u); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f); gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh index 0249cda0..e636453d 100644 --- a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -34,8 +34,8 @@ struct gl_MeshPerVertexEXT float4 vOut : TEXCOORD0; BlockOut outputs : TEXCOORD2; float4 gl_Position : SV_Position; - float4 gl_ClipDistance[1] : SV_ClipDistance; - float4 gl_CullDistance[1] : SV_CullDistance; + float gl_ClipDistance[1] : SV_ClipDistance; + float2 gl_CullDistance : SV_CullDistance; }; struct gl_MeshPerPrimitiveEXT @@ -55,8 +55,9 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPe { SetMeshOutputCounts(24u, 22u); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0 / 4][0 % 4] = 4.0f; - gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1 / 4][1 % 4] = 5.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f; gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f); gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx; gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx; diff --git a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 70aac964..4f9500fe 100644 --- a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -53,6 +53,7 @@ void main2() gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0; gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); outputs[gl_LocalInvocationIndex].a = vec4(5.0); diff --git a/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh index c981a26e..4d8e3f64 100644 --- a/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh +++ b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -43,6 +43,7 @@ void main() gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); // gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0; gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); outputs[gl_LocalInvocationIndex].a = vec4(5.0); diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index ce95729c..31af3b2d 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -9214,7 +9214,6 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision); bool pending_array_enclose = false; bool dimension_flatten = false; - bool clip_cull_fixup = false; const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) { AccessChainFlags mod_flags = flags; @@ -9377,14 +9376,6 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (!pending_array_enclose) expr += "]"; } - else if (clip_cull_fixup) - { - string idx_expr = is_literal ? convert_to_string(index) : to_enclosed_unpacked_expression(index, register_expression_read); - - expr += "[" + idx_expr + " / 4]"; - expr += "[" + idx_expr + " % 4]"; - clip_cull_fixup = false; - } // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) @@ -9444,8 +9435,6 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice else physical_type = 0; - clip_cull_fixup = (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance) && backend.force_merged_mesh_block; - row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); type = &get(type->member_types[index]); } diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 86dcbd1c..f06fb47e 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -603,11 +603,25 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() break; case BuiltInClipDistance: + { + static const char *types[] = { "float", "float2", "float3", "float4" }; + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. if (execution.model == ExecutionModelMeshEXT) { - uint32_t clip = (clip_distance_count + 3) / 4; - statement("float4 gl_ClipDistance", "[", clip,"] : SV_ClipDistance;"); + if (clip_distance_count > 4) + SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders."); + + if (clip_distance_count == 1) + { + // Avoids having to hack up access_chain code. Makes it trivially indexable. + statement("float gl_ClipDistance[1] : SV_ClipDistance;"); + } + else + { + // Replace array with vector directly, avoids any weird fixup path. + statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;"); + } } else { @@ -619,19 +633,33 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() uint32_t semantic_index = clip / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, " : SV_ClipDistance", semantic_index, ";"); } } break; + } case BuiltInCullDistance: + { + static const char *types[] = { "float", "float2", "float3", "float4" }; + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. if (execution.model == ExecutionModelMeshEXT) { - uint32_t cull = (cull_distance_count + 3) / 4; - statement("float4 gl_CullDistance", "[", cull,"] : SV_CullDistance;"); + if (cull_distance_count > 4) + SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders."); + + if (cull_distance_count == 1) + { + // Avoids having to hack up access_chain code. Makes it trivially indexable. + statement("float gl_CullDistance[1] : SV_CullDistance;"); + } + else + { + // Replace array with vector directly, avoids any weird fixup path. + statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;"); + } } else { @@ -643,12 +671,12 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() uint32_t semantic_index = cull / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, " : SV_CullDistance", semantic_index, ";"); } } break; + } case BuiltInPointSize: // If point_size_compat is enabled, just ignore PointSize. -- cgit v1.2.3 From b606e4f7525acf63e8f3eb041bf9a4863ac688b7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 13:41:32 +0100 Subject: HLSL: Fix test for non-block per-primitive IO. Force gl_in_out path for HLSL as well when mesh shaders are used. --- ...shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh | 14 +++++++------- spirv_hlsl.cpp | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh index 2f4e548d..8fbd2915 100644 --- a/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh +++ b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -30,10 +30,10 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); GroupMemoryBarrierWithGroupSync(); SetMeshOutputCounts(24u, 8u); - gl_Position[gl_LocalInvocationIndex].x = _9[gl_LocalInvocationIndex]; - gl_Position[gl_LocalInvocationIndex].y = _9[gl_LocalInvocationIndex]; - gl_Position[gl_LocalInvocationIndex].z = _9[gl_LocalInvocationIndex]; - gl_Position[gl_LocalInvocationIndex].w = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex]; float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u]; gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.x = _63; gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.y = _63; @@ -43,9 +43,9 @@ void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout { uint _71 = gl_LocalInvocationIndex * 3u; gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(_71, _71 + 1u, _71 + 2u); - gl_CullPrimitiveEXT[gl_LocalInvocationIndex] = (gl_LocalInvocationIndex & 1u) != 0u; - gl_PrimitiveID[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex); - gl_Layer[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex); uint _81 = gl_LocalInvocationIndex ^ 2u; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.x = _9[_81]; gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.y = _9[_81]; diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index f06fb47e..48aabef4 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -6550,6 +6550,7 @@ string CompilerHLSL::compile() backend.nonuniform_qualifier = "NonUniformResourceIndex"; backend.support_case_fallthrough = false; backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT; + backend.force_gl_in_out_block = backend.force_merged_mesh_block; // SM 4.1 does not support precise for some reason. backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; -- cgit v1.2.3