diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-11-02 14:26:51 +0300 |
---|---|---|
committer | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-11-02 14:56:04 +0300 |
commit | e418266d25975cdae15039741487bcb3b4b972a1 (patch) | |
tree | c749e6866885cefde2fcc635029085c220208696 | |
parent | 892f65b3a6488690cb5917dff04ab6134fea2e20 (diff) |
HLSL: Some cleanups and fixes in analyze_meshlet_writes.
5 files changed, 107 insertions, 97 deletions
diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 9b9b3fa0..3ebb602a 100644 --- a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint2 gl_PrimitiveLineIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); float3 _29 = float3(gl_GlobalInvocationID); diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh index 4d589bc9..f3e40fd8 100644 --- a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh +++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint3 gl_PrimitiveTriangleIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); float3 _29 = float3(gl_GlobalInvocationID); diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh index 8832ada1..cfe09a2d 100644 --- a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint2 gl_PrimitiveLineIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh index 8728a58d..0249cda0 100644 --- a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh +++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -51,7 +51,7 @@ struct gl_MeshPerPrimitiveEXT groupshared float shared_float[16]; -void mesh_main(out gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, out uint3 gl_PrimitiveTriangleIndicesEXT[22]) +void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22]) { SetMeshOutputCounts(24u, 22u); gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f); diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index c2de3a8f..fe3ed058 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -2306,76 +2306,74 @@ void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char void CompilerHLSL::analyze_meshlet_writes() { - if (get_execution_model() == ExecutionModelMeshEXT) - { - uint32_t id_per_vertex = 0; - uint32_t id_per_primitive = 0; - bool need_per_primitive = false; - - ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { - auto &type = this->get<SPIRType>(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); - if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) - { - auto flags = get_buffer_block_flags(var.self); - if (flags.get(DecorationPerPrimitiveEXT)) - id_per_primitive = var.self; - else - id_per_vertex = var.self; - } - else if (var.storage == StorageClassOutput) - { - Bitset flags; - if (block) - flags = get_buffer_block_flags(var.self); - else - flags = get_decoration_bitset(var.self); - - if (flags.get(DecorationPerPrimitiveEXT)) - need_per_primitive = true; - } - }); + uint32_t id_per_vertex = 0; + uint32_t id_per_primitive = 0; + bool need_per_primitive = false; - // If we have per-primitive outputs, and no per-primitive builtins, empty version of gl_MeshPerPrimitiveEXT will be emitted - if (id_per_primitive == 0 && need_per_primitive) + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + id_per_primitive = var.self; + else + id_per_vertex = var.self; + } + else if (var.storage == StorageClassOutput) { - auto &execution = get_entry_point(); + Bitset flags; + if (block) + flags = get_buffer_block_flags(var.self); + else + flags = get_decoration_bitset(var.self); + + if (flags.get(DecorationPerPrimitiveEXT)) + need_per_primitive = true; + } + }); - uint32_t op_type = ir.increase_bound_by(4); - uint32_t op_arr = op_type + 1; - uint32_t op_ptr = op_type + 2; - uint32_t op_var = op_type + 3; + // If we have per-primitive outputs, and no per-primitive builtins, + // empty version of gl_MeshPerPrimitiveEXT will be emitted + if (id_per_primitive == 0 && need_per_primitive) + { + auto &execution = get_entry_point(); - auto& type = set<SPIRType>(op_type); - type.basetype = SPIRType::Struct; - set_name(op_type, "gl_MeshPerPrimitiveEXT"); - set_decoration(op_type, DecorationBlock); - set_decoration(op_type, DecorationPerPrimitiveEXT); + uint32_t op_type = ir.increase_bound_by(4); + uint32_t op_arr = op_type + 1; + uint32_t op_ptr = op_type + 2; + uint32_t op_var = op_type + 3; - auto& arr = set<SPIRType>(op_arr, type); - arr.parent_type = type.self; - arr.array.push_back(execution.output_primitives); - arr.array_size_literal.push_back(true); + auto& type = set<SPIRType>(op_type); + type.basetype = SPIRType::Struct; + set_name(op_type, "gl_MeshPerPrimitiveEXT"); + set_decoration(op_type, DecorationBlock); + set_decoration(op_type, DecorationPerPrimitiveEXT); - auto& ptr = set<SPIRType>(op_ptr, arr); - ptr.parent_type = arr.self; - ptr.pointer = true; - ptr.pointer_depth++; - ptr.storage = StorageClassOutput; - set_decoration(op_ptr, DecorationBlock); - set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); + auto& arr = set<SPIRType>(op_arr, type); + arr.parent_type = type.self; + arr.array.push_back(execution.output_primitives); + arr.array_size_literal.push_back(true); - auto& var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput); - set_decoration(op_var, DecorationPerPrimitiveEXT); - set_name(op_var, "gl_MeshPrimitivesEXT"); - execution.interface_variables.push_back(var.self); + auto& ptr = set<SPIRType>(op_ptr, arr); + ptr.parent_type = arr.self; + ptr.pointer = true; + ptr.pointer_depth++; + ptr.storage = StorageClassOutput; + set_decoration(op_ptr, DecorationBlock); + set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); - id_per_primitive = op_var; - } + auto& var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput); + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, "gl_MeshPrimitivesEXT"); + execution.interface_variables.push_back(var.self); - unordered_set<uint32_t> processed_func_ids; - analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); + id_per_primitive = op_var; } + + unordered_set<uint32_t> processed_func_ids; + analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); } void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive, @@ -2404,67 +2402,78 @@ void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vert uint32_t inner_func_id = ops[2]; analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids); auto &inner_func = get<SPIRFunction>(inner_func_id); - for (auto& iarg : inner_func.arguments) + for (auto &iarg : inner_func.arguments) { if (!iarg.alias_global_variable) continue; - bool already_declarated = false; - for (auto& arg : func.arguments) - if (arg.id==iarg.id) + + bool already_declared = false; + for (auto &arg : func.arguments) + { + if (arg.id == iarg.id) { - already_declarated=true; + already_declared = true; break; } - if (!already_declarated) + } + + if (!already_declared) { - func.arguments.push_back({ ops[0], iarg.id, iarg.read_count, iarg.write_count, true }); + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + func.arguments.push_back({ expression_type_id(iarg.id), iarg.id, + iarg.read_count, iarg.write_count, true }); } } break; } + + case OpStore: case OpLoad: case OpInBoundsAccessChain: case OpAccessChain: + case OpPtrAccessChain: + case OpInBoundsPtrAccessChain: case OpArrayLength: { - auto &type = get<SPIRType>(ops[0]); - if (type.storage==StorageClassOutput || type.storage==StorageClassTaskPayloadWorkgroupEXT) + auto *var = maybe_get<SPIRVariable>(ops[op == OpStore ? 0 : 2]); + if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT)) { - bool already_declarated = false; - auto &var = get<SPIRVariable>(ops[2]); - auto &base_type = get<SPIRType>(var.basetype); - auto *m = ir.find_meta(var.self); - - uint32_t var_id = var.self; - if (m!=nullptr && var.storage != StorageClassTaskPayloadWorkgroupEXT && - m->decoration.builtin_type != BuiltInPrimitivePointIndicesEXT && - m->decoration.builtin_type != BuiltInPrimitiveLineIndicesEXT && - m->decoration.builtin_type != BuiltInPrimitiveTriangleIndicesEXT) + bool already_declared = false; + auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + + uint32_t var_id = var->self; + if (var->storage != StorageClassTaskPayloadWorkgroupEXT && + builtin_type != BuiltInPrimitivePointIndicesEXT && + builtin_type != BuiltInPrimitiveLineIndicesEXT && + builtin_type != BuiltInPrimitiveTriangleIndicesEXT) { - bool is_block = has_decoration(base_type.self, DecorationBlock); - auto flags = is_block ? get_buffer_block_flags(var.self) : Bitset(); - if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(var_id, DecorationPerPrimitiveEXT)) - var_id = id_per_primitive; - else - var_id = id_per_vertex; + var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex; } - for (auto& arg : func.arguments) - if (arg.id==var_id) + for (auto &arg : func.arguments) + { + if (arg.id == var_id) { - already_declarated=true; + already_declared = true; break; } - if (!already_declarated) + } + + if (!already_declared) { - if (var.storage == StorageClassTaskPayloadWorkgroupEXT) - func.arguments.push_back({ ops[0], var_id, 1u, 0u, true }); + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + uint32_t type_id = expression_type_id(var_id); + if (var->storage == StorageClassTaskPayloadWorkgroupEXT) + func.arguments.push_back({ type_id, var_id, 1u, 0u, true }); else - func.arguments.push_back({ ops[0], var_id, 0u, 1u, true }); + func.arguments.push_back({ type_id, var_id, 1u, 1u, true }); } } break; } + default: break; } @@ -6514,7 +6523,8 @@ string CompilerHLSL::compile() update_active_builtins(); analyze_image_and_sampler_usage(); analyze_interlocked_resource_usage(); - analyze_meshlet_writes(); + if (get_execution_model() == ExecutionModelMeshEXT) + analyze_meshlet_writes(); // Subpass input needs SV_Position. if (need_subpass_input) |