diff options
Diffstat (limited to 'spirv_hlsl.cpp')
-rw-r--r-- | spirv_hlsl.cpp | 467 |
1 files changed, 426 insertions, 41 deletions
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 65c9882b..d6d70680 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -604,33 +604,49 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() case BuiltInClipDistance: // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. - for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + if (execution.model == ExecutionModelMeshEXT) { - uint32_t to_declare = clip_distance_count - clip; - if (to_declare > 4) - to_declare = 4; + const uint32_t clip = (clip_distance_count + 3) / 4; + statement("float4 gl_ClipDistance", "[", clip,"] : SV_ClipDistance;"); + } + else + { + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = clip / 4; + uint32_t semantic_index = clip / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_ClipDistance", semantic_index, ";"); + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } } break; case BuiltInCullDistance: // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. - for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + if (execution.model == ExecutionModelMeshEXT) { - uint32_t to_declare = cull_distance_count - cull; - if (to_declare > 4) - to_declare = 4; + const uint32_t cull = (cull_distance_count + 3) / 4; + statement("float4 gl_CullDistance", "[", cull,"] : SV_CullDistance;"); + } + else + { + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = cull / 4; + uint32_t semantic_index = cull / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_CullDistance", semantic_index, ";"); + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } } break; @@ -644,14 +660,69 @@ void CompilerHLSL::emit_builtin_outputs_in_struct() SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); case BuiltInLayer: - if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelGeometry) - SPIRV_CROSS_THROW("Render target array index output is only supported in GS 5.0 or higher."); + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInCullPrimitiveEXT: + // per-primitive attributes handled separatly + break; + + case BuiltInPrimitivePointIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitiveTriangleIndicesEXT: + // meshlet local-index buffer handled separatly + break; + + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + } + + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); + }); +} + +void CompilerHLSL::emit_builtin_primitive_outputs_in_struct() +{ + active_output_builtins.for_each_bit([&](uint32_t i){ + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast<BuiltIn>(i); + switch (builtin) + { + case BuiltInLayer: + { + const ExecutionModel model = get_entry_point().model; + if (hlsl_options.shader_model < 50 || + (model != ExecutionModelGeometry && model != ExecutionModelMeshEXT)) + SPIRV_CROSS_THROW("Render target array index output is only supported in GS/MS 5.0 or higher."); type = "uint"; semantic = "SV_RenderTargetArrayIndex"; break; + } + + case BuiltInPrimitiveId: + type = "uint"; + semantic = "SV_PrimitiveID"; + break; + + case BuiltInViewportIndex: + type = "uint"; + semantic = "SV_ViewportArrayIndex"; + break; + + case BuiltInPrimitiveShadingRateKHR: + type = "uint"; + semantic = "SV_ShadingRate"; + break; + + case BuiltInCullPrimitiveEXT: + type = "bool"; + semantic = "SV_CullPrimitive"; + break; default: - SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + break; } if (type && semantic) @@ -981,17 +1052,25 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord } else { - statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ", + auto decl_type = type; + if (execution.model == ExecutionModelMeshEXT) + { + decl_type.array.erase(decl_type.array.begin()); + decl_type.array_size_literal.erase(decl_type.array_size_literal.begin()); + } + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ", semantic, ";"); // Structs and arrays should consume more locations. - uint32_t consumed_locations = type_to_consumed_locations(type); + uint32_t consumed_locations = type_to_consumed_locations(decl_type); for (uint32_t i = 0; i < consumed_locations; i++) active_locations.insert(location_number + i); } } else + { statement(variable_decl(type, name), " : ", binding, ";"); + } } std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) @@ -1071,6 +1150,18 @@ void CompilerHLSL::emit_builtin_variables() if (init_itr != builtin_to_initializer.end()) init_expr = join(" = ", to_expression(init_itr->second)); + if (get_execution_model() == ExecutionModelMeshEXT) + { + if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId || + builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT || + builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT || + builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) + { + return; + } + } + switch (builtin) { case BuiltInFragCoord: @@ -1171,6 +1262,13 @@ void CompilerHLSL::emit_builtin_variables() type = "uint"; break; + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInCullPrimitiveEXT: + type = "uint"; + break; + default: SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); } @@ -1365,12 +1463,12 @@ void CompilerHLSL::replace_illegal_names() "double", "DomainShader", "dword", "else", "export", "false", "float", "for", "fxgroup", "GeometryShader", "groupshared", "half", "HullShader", - "if", "in", "inline", "inout", "InputPatch", "int", "interface", + "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface", "line", "lineadj", "linear", "LineStream", "matrix", "min16float", "min10float", "min16int", "min16uint", "namespace", "nointerpolation", "noperspective", "NULL", "out", "OutputPatch", - "packoffset", "pass", "pixelfragment", "PixelShader", "point", + "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point", "PointStream", "precise", "RasterizerState", "RenderTargetView", "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer", "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D", @@ -1381,7 +1479,7 @@ void CompilerHLSL::replace_illegal_names() "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray", "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle", "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned", - "vector", "vertexfragment", "VertexShader", "void", "volatile", "while", + "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while", }; CompilerGLSL::replace_illegal_names(keywords); @@ -1415,6 +1513,19 @@ void CompilerHLSL::emit_resources() replace_illegal_names(); + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); + break; + + default: + break; + } + emit_specialization_constants_and_structs(); emit_composite_constants(); @@ -1487,18 +1598,21 @@ void CompilerHLSL::emit_resources() // Emit builtin input and output variables here. emit_builtin_variables(); - ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { - auto &type = this->get<SPIRType>(var.basetype); + if (execution.model != ExecutionModelMeshEXT) + { + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); - if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && - (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self)) - { - // Builtin variables are handled separately. - emit_interface_block_globally(var); - emitted = true; - } - }); + if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + // Builtin variables are handled separately. + emit_interface_block_globally(var); + emitted = true; + } + }); + } if (emitted) statement(""); @@ -1612,23 +1726,50 @@ void CompilerHLSL::emit_resources() statement(""); } + const bool is_mesh_shader = (execution.model == ExecutionModelMeshEXT); if (!output_variables.empty() || !active_output_builtins.empty()) { - require_output = true; - statement("struct SPIRV_Cross_Output"); + sort(output_variables.begin(), output_variables.end(), variable_compare); + require_output = !is_mesh_shader; + statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output"); begin_scope(); - sort(output_variables.begin(), output_variables.end(), variable_compare); for (auto &var : output_variables) { - if (var.block) + if (is_per_primitive_variable(*var.var)) + continue; + if (var.block && is_mesh_shader && var.block_member_index!=0) + continue; + if (var.block && !is_mesh_shader) emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); else emit_interface_block_in_struct(*var.var, active_outputs); } emit_builtin_outputs_in_struct(); + if (!is_mesh_shader) + emit_builtin_primitive_outputs_in_struct(); end_scope_decl(); statement(""); + + if (is_mesh_shader) + { + statement("struct gl_MeshPerPrimitiveEXT"); + begin_scope(); + for (auto &var : output_variables) + { + if (!is_per_primitive_variable(*var.var)) + continue; + if (var.block && is_mesh_shader && var.block_member_index!=0) + continue; + if (var.block && !is_mesh_shader) + emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); + else + emit_interface_block_in_struct(*var.var, active_outputs); + } + emit_builtin_primitive_outputs_in_struct(); + end_scope_decl(); + statement(""); + } } // Global variables. @@ -1642,6 +1783,9 @@ void CompilerHLSL::emit_resources() { if (!variable_is_lut(var)) { + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) + continue; + add_resource_name(var.self); const char *storage = nullptr; @@ -2164,6 +2308,176 @@ void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char } } +void CompilerHLSL::analyze_meshlet_writes() +{ + if (get_execution_model() == ExecutionModelMeshEXT) + { + uint32_t id_per_vertex = 0; + uint32_t id_per_primitive = 0; + bool need_per_primitive = false; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t id, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + id_per_primitive = var.self; + else + id_per_vertex = var.self; + } + else if (var.storage == StorageClassOutput) + { + Bitset flags; + if (block) + flags = get_buffer_block_flags(var.self); + else + flags = get_decoration_bitset(var.self); + + if (flags.get(DecorationPerPrimitiveEXT)) + need_per_primitive = true; + } + }); + + // If we have per-primitive outputs, and no per-primitive builtins, empty version of gl_MeshPerPrimitiveEXT will be emitted + if (id_per_primitive == 0 && need_per_primitive) + { + auto &execution = get_entry_point(); + + uint32_t op_type = ir.increase_bound_by(4); + uint32_t op_arr = op_type + 1; + uint32_t op_ptr = op_type + 2; + uint32_t op_var = op_type + 3; + + auto& type = set<SPIRType>(op_type); + type.basetype = SPIRType::Struct; + set_name(op_type, "gl_MeshPerPrimitiveEXT"); + set_decoration(op_type, DecorationBlock); + set_decoration(op_type, DecorationPerPrimitiveEXT); + + auto& arr = set<SPIRType>(op_arr, type); + arr.parent_type = type.self; + arr.array.push_back(execution.output_primitives); + arr.array_size_literal.push_back(true); + + auto& ptr = set<SPIRType>(op_ptr, arr); + ptr.parent_type = arr.self; + ptr.pointer = true; + ptr.pointer_depth++; + ptr.storage = StorageClassOutput; + set_decoration(op_ptr, DecorationBlock); + set_name(op_ptr, "gl_MeshPerPrimitiveEXT"); + + auto& var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput); + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, "gl_MeshPrimitivesEXT"); + execution.interface_variables.push_back(var.self); + + id_per_primitive = op_var; + } + + unordered_set<uint32_t> processed_func_ids; + analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); + } +} + +void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, const uint32_t id_per_vertex, const uint32_t id_per_primitive, + std::unordered_set<uint32_t>& processed_func_ids) +{ + // Avoid processing a function more than once + if (processed_func_ids.find(func_id) != processed_func_ids.end()) + { + return; + } + processed_func_ids.insert(func_id); + + auto &func = get<SPIRFunction>(func_id); + // Recursively establish global args added to functions on which we depend. + for (auto& block : func.blocks) + { + auto &b = get<SPIRBlock>(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast<Op>(i.op); + + switch (op) + { + case OpFunctionCall: + { + // Then recurse into the function itself to extract globals used internally in the function + uint32_t inner_func_id = ops[2]; + analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids); + auto &inner_func = get<SPIRFunction>(inner_func_id); + for (auto& iarg : inner_func.arguments) + { + if (!iarg.alias_global_variable) + continue; + bool already_declarated = false; + for (auto& arg : func.arguments) + if (arg.id==iarg.id) + { + already_declarated=true; + break; + } + if (!already_declarated) + { + func.arguments.push_back({ ops[0], iarg.id, iarg.read_count, iarg.write_count, true }); + } + } + break; + } + case OpLoad: + case OpInBoundsAccessChain: + case OpAccessChain: + case OpArrayLength: + { + auto &type = get<SPIRType>(ops[0]); + if (type.storage==StorageClassOutput || type.storage==StorageClassTaskPayloadWorkgroupEXT) + { + bool already_declarated = false; + auto &var = get<SPIRVariable>(ops[2]); + auto &base_type = get<SPIRType>(var.basetype); + auto *m = ir.find_meta(var.self); + + uint32_t var_id = var.self; + if (m!=nullptr && var.storage != StorageClassTaskPayloadWorkgroupEXT && + m->decoration.builtin_type != BuiltInPrimitivePointIndicesEXT && + m->decoration.builtin_type != BuiltInPrimitiveLineIndicesEXT && + m->decoration.builtin_type != BuiltInPrimitiveTriangleIndicesEXT) + { + bool block = has_decoration(base_type.self, DecorationBlock); + auto flags = block ? get_buffer_block_flags(var.self) : Bitset(); + if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(var_id, DecorationPerPrimitiveEXT)) + var_id = id_per_primitive; + else + var_id = id_per_vertex; + } + + for (auto& arg : func.arguments) + if (arg.id==var_id) + { + already_declarated=true; + break; + } + if (!already_declarated) + { + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) + func.arguments.push_back({ ops[0], var_id, 1u, 0u, true }); + else + func.arguments.push_back({ ops[0], var_id, 0u, 1u, true }); + } + } + break; + } + default: + break; + } + } + } +} + string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) { auto &flags = get_member_decoration_bitset(type.self, index); @@ -2459,6 +2773,8 @@ string CompilerHLSL::get_inner_entry_point_name() const return "frag_main"; else if (execution.model == ExecutionModelGLCompute) return "comp_main"; + else if (execution.model == ExecutionModelMeshEXT) + return "mesh_main"; else SPIRV_CROSS_THROW("Unsupported execution model."); } @@ -2572,8 +2888,57 @@ void CompilerHLSL::emit_hlsl_entry_point() switch (execution.model) { + case ExecutionModelMeshEXT: + case ExecutionModelMeshNV: case ExecutionModelGLCompute: { + if (execution.model == ExecutionModelMeshEXT) + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + statement("[outputtopology(\"triangle\")]"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + statement("[outputtopology(\"line\")]"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX"); + auto& fn = get<SPIRFunction>(ir.default_entry_point); + for (auto& arg : fn.arguments) + { + auto &var = get<SPIRVariable>(arg.id); + auto &base_type = get<SPIRType>(var.basetype); + bool block = has_decoration(base_type.self, DecorationBlock); + if (var.storage==StorageClassTaskPayloadWorkgroupEXT) + { + arguments.push_back("in payload " + variable_decl(var)); + } + else if (block) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT)) + { + arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" + + std::to_string(execution.output_vertices) + "]"); + } + } + else + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + { + arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + } + } + } SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); @@ -2795,9 +3160,21 @@ void CompilerHLSL::emit_hlsl_entry_point() // Run the shader. if (execution.model == ExecutionModelVertex || execution.model == ExecutionModelFragment || - execution.model == ExecutionModelGLCompute) + execution.model == ExecutionModelGLCompute || + execution.model == ExecutionModelMeshEXT) { - statement(get_inner_entry_point_name(), "();"); + SmallVector<string> arglist; + auto& fn = get<SPIRFunction>(ir.default_entry_point); + for (auto& arg : fn.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.type)) + continue; + + arglist.push_back(to_expression(arg.id,false)); + } + statement(get_inner_entry_point_name(), "(", merge(arglist), ");"); } else SPIRV_CROSS_THROW("Unsupported shader stage."); @@ -5926,6 +6303,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false); break; } + case OpSetMeshOutputsEXT: + { + statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; @@ -6126,6 +6509,7 @@ string CompilerHLSL::compile() backend.can_return_array = false; backend.nonuniform_qualifier = "NonUniformResourceIndex"; backend.support_case_fallthrough = false; + backend.force_merged_mesh_block = (get_execution_model() == ExecutionModelMeshEXT); // SM 4.1 does not support precise for some reason. backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; @@ -6138,6 +6522,7 @@ string CompilerHLSL::compile() update_active_builtins(); analyze_image_and_sampler_usage(); analyze_interlocked_resource_usage(); + analyze_meshlet_writes(); // Subpass input needs SV_Position. if (need_subpass_input) |