From 3a066cd73394b1cb2f869da2822bb6a80dafe8d7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 2 Nov 2022 13:07:32 +0100 Subject: HLSL: Handle case where PerVertex block is not used. --- ...lain-builtin-outputs.spv14.asm.vk.nocompat.mesh | 63 +++++++++ ...lain-builtin-outputs.spv14.asm.vk.nocompat.mesh | 150 +++++++++++++++++++++ spirv_hlsl.cpp | 41 +++--- 3 files changed, 239 insertions(+), 15 deletions(-) create mode 100644 reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh create mode 100644 shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh diff --git a/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000..2f4e548d --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,63 @@ +struct _12 +{ + float _m0; +}; + +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +struct gl_MeshPerVertexEXT +{ + float4 B : TEXCOORD1; + float4 gl_Position : SV_Position; +}; + +struct gl_MeshPerPrimitiveEXT +{ + float4 C : TEXCOORD3; + uint gl_PrimitiveID : SV_PrimitiveID; + uint gl_Layer : SV_RenderTargetArrayIndex; + bool gl_CullPrimitiveEXT : SV_CullPrimitive; +}; + +groupshared float _9[64]; + +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout uint3 gl_PrimitiveTriangleIndicesEXT[8], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8]) +{ + _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); + GroupMemoryBarrierWithGroupSync(); + SetMeshOutputCounts(24u, 8u); + gl_Position[gl_LocalInvocationIndex].x = _9[gl_LocalInvocationIndex]; + gl_Position[gl_LocalInvocationIndex].y = _9[gl_LocalInvocationIndex]; + gl_Position[gl_LocalInvocationIndex].z = _9[gl_LocalInvocationIndex]; + gl_Position[gl_LocalInvocationIndex].w = _9[gl_LocalInvocationIndex]; + float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u]; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.x = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.y = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.z = _63; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.w = _63; + if (gl_LocalInvocationIndex < 8u) + { + uint _71 = gl_LocalInvocationIndex * 3u; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(_71, _71 + 1u, _71 + 2u); + gl_CullPrimitiveEXT[gl_LocalInvocationIndex] = (gl_LocalInvocationIndex & 1u) != 0u; + gl_PrimitiveID[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex); + gl_Layer[gl_LocalInvocationIndex] = int(gl_LocalInvocationIndex); + uint _81 = gl_LocalInvocationIndex ^ 2u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.x = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.y = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.z = _9[_81]; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.w = _9[_81]; + } +} + +[outputtopology("triangle")] +[numthreads(2, 3, 4)] +void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], in payload _12 _11, out indices uint3 gl_PrimitiveTriangleIndicesEXT[8], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8]) +{ + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + mesh_main(gl_MeshVerticesEXT, _11, gl_PrimitiveTriangleIndicesEXT, gl_MeshPrimitivesEXT); +} diff --git a/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh new file mode 100644 index 00000000..7b38001d --- /dev/null +++ b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh @@ -0,0 +1,150 @@ +; SPIR-V +; Version: 1.4 +; Generator: Unknown(30017); 21022 +; Bound: 89 +; Schema: 0 + OpCapability Shader + OpCapability Geometry + OpCapability ShaderViewportIndexLayerEXT + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + OpExtension "SPV_EXT_shader_viewport_index_layer" + OpMemoryModel Logical GLSL450 + OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38 + OpExecutionMode %main OutputVertices 24 + OpExecutionMode %main OutputPrimitivesNV 8 + OpExecutionMode %main OutputTrianglesNV + OpExecutionMode %main LocalSize 2 3 4 + OpName %main "main" + OpName %SV_Position "SV_Position" + OpName %B "B" + OpName %SV_CullPrimitive "SV_CullPrimitive" + OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex" + OpName %SV_PrimitiveID "SV_PrimitiveID" + OpName %C "C" + OpName %indices "indices" + OpName %_ "" + OpDecorate %SV_Position BuiltIn Position + OpDecorate %B Location 1 + OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT + OpDecorate %SV_CullPrimitive PerPrimitiveNV + OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer + OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV + OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId + OpDecorate %SV_PrimitiveID PerPrimitiveNV + OpDecorate %C Location 3 + OpDecorate %C PerPrimitiveNV + OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + %void = OpTypeVoid + %2 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24 +%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24 +%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output + %bool = OpTypeBool + %uint_8 = OpConstant %uint 8 +%_arr_bool_uint_8 = OpTypeArray %bool %uint_8 +%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8 +%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output +%_arr_uint_uint_8 = OpTypeArray %uint %uint_8 +%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8 +%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output +%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8 +%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8 + %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output + %v3uint = OpTypeVector %uint 3 +%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8 +%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8 + %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output + %uint_64 = OpConstant %uint 64 +%_arr_float_uint_64 = OpTypeArray %float %uint_64 +%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64 + %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %_ = OpTypeStruct %float +%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_ + %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 +%_ptr_Output_float = OpTypePointer Output %float + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_3 = OpConstant %uint 3 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float +%_ptr_Output_v3uint = OpTypePointer Output %v3uint +%_ptr_Output_bool = OpTypePointer Output %bool +%_ptr_Output_uint = OpTypePointer Output %uint + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %85 + %85 = OpLabel + %35 = OpLoad %uint %gl_LocalInvocationIndex + %39 = OpConvertUToF %float %35 + %41 = OpAccessChain %_ptr_Workgroup_float %32 %35 + OpStore %41 %39 + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpSetMeshOutputsEXT %uint_24 %uint_8 + %44 = OpLoad %float %41 + %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0 + OpStore %46 %44 + %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1 + OpStore %48 %44 + %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2 + OpStore %50 %44 + %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3 + OpStore %51 %44 + %53 = OpBitwiseXor %uint %35 %uint_1 + %54 = OpAccessChain %_ptr_Workgroup_float %32 %53 + %55 = OpLoad %float %54 + %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0 + %58 = OpLoad %float %57 + %59 = OpFAdd %float %58 %55 + %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0 + OpStore %60 %59 + %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1 + OpStore %61 %59 + %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2 + OpStore %62 %59 + %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3 + OpStore %63 %59 + %64 = OpULessThan %bool %35 %uint_8 + OpSelectionMerge %87 None + OpBranchConditional %64 %86 %87 + %86 = OpLabel + %65 = OpIMul %uint %35 %uint_3 + %66 = OpIAdd %uint %65 %uint_1 + %67 = OpIAdd %uint %65 %uint_2 + %68 = OpCompositeConstruct %v3uint %65 %66 %67 + %70 = OpAccessChain %_ptr_Output_v3uint %indices %35 + OpStore %70 %68 + %71 = OpBitwiseAnd %uint %35 %uint_1 + %72 = OpINotEqual %bool %71 %uint_0 + %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35 + OpStore %76 %35 + %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35 + OpStore %77 %35 + %78 = OpBitwiseXor %uint %35 %uint_2 + %79 = OpAccessChain %_ptr_Workgroup_float %32 %78 + %80 = OpLoad %float %79 + %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0 + OpStore %81 %80 + %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1 + OpStore %82 %80 + %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2 + OpStore %83 %80 + %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3 + OpStore %84 %80 + OpBranch %87 + %87 = OpLabel + OpReturn + OpFunctionEnd diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index fe3ed058..86dcbd1c 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -2309,6 +2309,7 @@ void CompilerHLSL::analyze_meshlet_writes() uint32_t id_per_vertex = 0; uint32_t id_per_primitive = 0; bool need_per_primitive = false; + bool need_per_vertex = false; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); @@ -2331,13 +2332,16 @@ void CompilerHLSL::analyze_meshlet_writes() if (flags.get(DecorationPerPrimitiveEXT)) need_per_primitive = true; + else + need_per_vertex = true; } }); // If we have per-primitive outputs, and no per-primitive builtins, - // empty version of gl_MeshPerPrimitiveEXT will be emitted - if (id_per_primitive == 0 && need_per_primitive) - { + // empty version of gl_MeshPerPrimitiveEXT will be emitted. + // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block. + + const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t { auto &execution = get_entry_point(); uint32_t op_type = ir.increase_bound_by(4); @@ -2345,32 +2349,39 @@ void CompilerHLSL::analyze_meshlet_writes() uint32_t op_ptr = op_type + 2; uint32_t op_var = op_type + 3; - auto& type = set(op_type); + auto &type = set(op_type); type.basetype = SPIRType::Struct; - set_name(op_type, "gl_MeshPerPrimitiveEXT"); + set_name(op_type, block_name); set_decoration(op_type, DecorationBlock); - set_decoration(op_type, DecorationPerPrimitiveEXT); + if (per_primitive) + set_decoration(op_type, DecorationPerPrimitiveEXT); - auto& arr = set(op_arr, type); + auto &arr = set(op_arr, type); arr.parent_type = type.self; - arr.array.push_back(execution.output_primitives); + arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices); arr.array_size_literal.push_back(true); - auto& ptr = set(op_ptr, arr); + auto &ptr = set(op_ptr, arr); ptr.parent_type = arr.self; ptr.pointer = true; ptr.pointer_depth++; ptr.storage = StorageClassOutput; set_decoration(op_ptr, DecorationBlock); - set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); + set_name(op_ptr, block_name); - auto& var = set(op_var, op_ptr, StorageClassOutput); - set_decoration(op_var, DecorationPerPrimitiveEXT); - set_name(op_var, "gl_MeshPrimitivesEXT"); + auto &var = set(op_var, op_ptr, StorageClassOutput); + if (per_primitive) + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, instance_name); execution.interface_variables.push_back(var.self); - id_per_primitive = op_var; - } + return op_var; + }; + + if (id_per_vertex == 0 && need_per_vertex) + id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false); + if (id_per_primitive == 0 && need_per_primitive) + id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true); unordered_set processed_func_ids; analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); -- cgit v1.2.3