diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-03-30 19:51:32 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-30 19:51:32 +0300 |
commit | 76b6e8a4e713124fd731710ccaa5e17f7bde5b7b (patch) | |
tree | 6f5302e05be16952247161715874ae933b804559 | |
parent | 53cf014c04cfa3d1509e3bc474fcce913b625f2e (diff) | |
parent | 5bbf6a4df3ddad372e8131c637cf2f38ddd256db (diff) |
Merge pull request #104 from HansKristian-Work/modern-cbuffer-layout
Implement modern CBV layout
88 files changed, 5252 insertions, 1339 deletions
diff --git a/build_dxc.sh b/build_dxc.sh index 411ddea..0db1b5b 100755 --- a/build_dxc.sh +++ b/build_dxc.sh @@ -13,6 +13,6 @@ fi echo "Building DXC." mkdir -p external/dxc-build cd external/dxc-build -cmake ../DirectXShaderCompiler -DCMAKE_BUILD_TYPE=$PROFILE -DCMAKE_INSTALL_PREFIX=output $(cat ../DirectXShaderCompiler/utils/cmake-predefined-config-params) -G Ninja -DSPIRV_WERROR=OFF -cmake --build . --config $PROFILE --target install ${NPROC} +cmake ../DirectXShaderCompiler -DCMAKE_BUILD_TYPE=$PROFILE -C ../DirectXShaderCompiler/cmake/caches/PredefinedParams.cmake -G Ninja -DSPIRV_WERROR=OFF +cmake --build . --config $PROFILE ${NPROC} diff --git a/cfg_structurizer.cpp b/cfg_structurizer.cpp index d7b0ee1..c7497ae 100644 --- a/cfg_structurizer.cpp +++ b/cfg_structurizer.cpp @@ -1311,7 +1311,9 @@ void CFGStructurizer::insert_phi(PHINode &node) merge_phi.type_id = module.get_builder().makeBoolType(); Operation *op = module.allocate_op(spv::OpSelect, module.allocate_id(), phi.type_id); - op->add_ids({ merge_phi.id, dominated_incoming->id, frontier_phi.id }); + op->add_id(merge_phi.id); + op->add_id(dominated_incoming->id); + op->add_id(frontier_phi.id); dominated_incoming->block->ir.operations.push_back(op); dominated_incoming->id = op->id; diff --git a/checkout_dxc.sh b/checkout_dxc.sh index 441c832..295e47e 100755 --- a/checkout_dxc.sh +++ b/checkout_dxc.sh @@ -1,13 +1,6 @@ #!/bin/bash -# Commit before GatherCmp regression -DXC_REV=19360a8fa63ee29925f59328c261c1c920402bfd - -if [ -z $PROTOCOL ]; then - PROTOCOL=git -fi - -echo "Using protocol \"$PROTOCOL\" for checking out repositories. If this is problematic, try PROTOCOL=https $0." +DXC_REV=2dc067b561f17d09d8012a1ded05bf0f6253fea5 if [ -d external/DirectXShaderCompiler ]; then echo "Updating DirectXShaderCompiler to revision $DXC_REV." @@ -19,7 +12,7 @@ else echo "Cloning DirectXShaderCompiler revision $DXC_REV." mkdir -p external cd external - git clone $PROTOCOL://github.com/Microsoft/DirectXShaderCompiler.git + git clone https://github.com/Microsoft/DirectXShaderCompiler.git cd DirectXShaderCompiler git checkout $DXC_REV git submodule update --init diff --git a/dxil_converter.cpp b/dxil_converter.cpp index b99b225..f36052a 100644 --- a/dxil_converter.cpp +++ b/dxil_converter.cpp @@ -280,17 +280,58 @@ Converter::Impl::create_bindless_heap_variable_alias_group(const BindlessInfo &b return decls; } +spv::Id Converter::Impl::create_ubo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name, + unsigned cbv_size) +{ + auto &builder = spirv_module.get_builder(); + + unsigned element_size = raw_width_to_bits(raw_decl.width) * raw_vecsize_to_vecsize(raw_decl.vecsize) / 8; + unsigned array_length = (cbv_size + element_size - 1) / element_size; + + // It seems like we will have to bitcast ourselves away from vec4 here after loading. + spv::Id size_id = builder.makeUintConstant(array_length, false); + spv::Id element_type = builder.makeFloatType(raw_width_to_bits(raw_decl.width)); + if (raw_decl.vecsize != RawVecSize::V1) + element_type = builder.makeVectorType(element_type, raw_vecsize_to_vecsize(raw_decl.vecsize)); + spv::Id member_array_type = builder.makeArrayType(element_type, size_id, element_size); + + builder.addDecoration(member_array_type, spv::DecorationArrayStride, element_size); + + spv::Id type_id = get_struct_type({ member_array_type }, name.c_str()); + builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); + builder.addDecoration(type_id, spv::DecorationBlock); + + if (range_size != 1) + { + if (range_size == ~0u) + type_id = builder.makeRuntimeArray(type_id); + else + type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); + } + + if (raw_decl.width == RawWidth::B16) + builder.addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess); + + return create_variable(spv::StorageClassUniform, + type_id, name.empty() ? nullptr : name.c_str()); +} + spv::Id Converter::Impl::create_raw_ssbo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name) { spv::Id type_id = build_ssbo_runtime_array_type(*this, raw_width_to_bits(raw_decl.width), raw_vecsize_to_vecsize(raw_decl.vecsize), range_size, "SSBO"); + + if (raw_decl.width == RawWidth::B16) + builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); + return create_variable(spv::StorageClassStorageBuffer, type_id, name.empty() ? nullptr : name.c_str()); } -Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_variable_alias_group( - const Vector<RawDeclaration> &raw_decls, uint32_t range_size, const String &name) +Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_raw_ssbo_variable_alias_group( + const Vector<RawDeclaration> &raw_decls, + uint32_t range_size, const String &name) { Vector<RawDeclarationVariable> group; group.reserve(raw_decls.size()); @@ -299,6 +340,17 @@ Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_variable return group; } +Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_ubo_variable_alias_group( + const Vector<RawDeclaration> &raw_decls, + uint32_t range_size, const String &name, unsigned cbv_size) +{ + Vector<RawDeclarationVariable> group; + group.reserve(raw_decls.size()); + for (auto &decl : raw_decls) + group.push_back({ decl, create_ubo_variable(decl, range_size, name, cbv_size) }); + return group; +} + spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info) { auto itr = std::find_if(bindless_resources.begin(), bindless_resources.end(), [&](const BindlessResource &resource) { @@ -371,6 +423,8 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info) type_id = build_ssbo_runtime_array_type(*this, bits, raw_vecsize_to_vecsize(info.raw_vecsize), ~0u, "SSBO"); storage = spv::StorageClassStorageBuffer; + if (bits == 16) + builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); } else { @@ -433,6 +487,8 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info) type_id = build_ssbo_runtime_array_type(*this, bits, raw_vecsize_to_vecsize(info.raw_vecsize), ~0u, "SSBO"); storage = spv::StorageClassStorageBuffer; + if (bits == 16) + builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); } else { @@ -463,9 +519,29 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info) case DXIL::ResourceType::CBV: { - type_id = builder().makeVectorType(builder().makeFloatType(32), 4); - type_id = builder().makeArrayType(type_id, builder().makeUintConstant(64 * 1024 / 16), 16); - builder().addDecoration(type_id, spv::DecorationArrayStride, 16); + unsigned bits; + if (info.component == DXIL::ComponentType::U16) + bits = 16; + else if (info.component == DXIL::ComponentType::U32) + bits = 32; + else if (info.component == DXIL::ComponentType::U64) + bits = 64; + else + { + LOGE("Invalid component type for UBO.\n"); + return 0; + } + + unsigned vecsize = raw_vecsize_to_vecsize(info.raw_vecsize); + type_id = builder().makeFloatType(bits); + if (vecsize > 1) + type_id = builder().makeVectorType(type_id, vecsize); + + unsigned element_size = (bits / 8) * vecsize; + unsigned num_elements = 0x10000 / element_size; + + type_id = builder().makeArrayType(type_id, builder().makeUintConstant(num_elements), element_size); + builder().addDecoration(type_id, spv::DecorationArrayStride, element_size); type_id = get_struct_type({ type_id }, "BindlessCBV"); builder().addDecoration(type_id, spv::DecorationBlock); if (options.bindless_cbv_ssbo_emulation) @@ -473,6 +549,14 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info) builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); type_id = builder().makeRuntimeArray(type_id); storage = options.bindless_cbv_ssbo_emulation ? spv::StorageClassStorageBuffer : spv::StorageClassUniform; + + if (bits == 16) + { + if (options.bindless_cbv_ssbo_emulation) + builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); + else + builder().addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess); + } break; } @@ -740,33 +824,52 @@ bool Converter::Impl::analyze_aliased_access(const AccessTracking &tracking, if (raw_access_16bit && descriptor_type != VulkanDescriptorType::SSBO && + descriptor_type != VulkanDescriptorType::UBO && descriptor_type != VulkanDescriptorType::BufferDeviceAddress) { - LOGE("Raw 16-bit load-store was used, which must be implemented with SSBO or BDA.\n"); + LOGE("Raw 16-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n"); return false; } if (raw_access_64bit && descriptor_type != VulkanDescriptorType::SSBO && + descriptor_type != VulkanDescriptorType::UBO && descriptor_type != VulkanDescriptorType::BufferDeviceAddress) { - LOGE("Raw 64-bit load-store was used, which must be implemented with SSBO or BDA.\n"); + LOGE("Raw 64-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n"); return false; } - // Only SSBO can be reclared with different types. + // Only SSBO and UBO can be reclared with different types. // Typed descriptors are always scalar. - aliased_access.requires_alias_decoration = descriptor_type == VulkanDescriptorType::SSBO && + aliased_access.requires_alias_decoration = (descriptor_type == VulkanDescriptorType::SSBO || + descriptor_type == VulkanDescriptorType::UBO) && aliased_access.raw_declarations.size() > 1; - // If we only emit one 16-bit or 64-bit SSBO, we need to override the component type of that meta declaration. - aliased_access.override_primary_component_types = descriptor_type == VulkanDescriptorType::SSBO && + // If we only emit one 16-bit or 64-bit SSBO/UBO, we need to override the component type of that meta declaration. + aliased_access.override_primary_component_types = (descriptor_type == VulkanDescriptorType::SSBO || + descriptor_type == VulkanDescriptorType::UBO) && aliased_access.raw_declarations.size() == 1; // If the SSBO is never actually accessed (UAV counters for example), fudge the default type. if (descriptor_type == VulkanDescriptorType::SSBO && aliased_access.raw_declarations.empty()) aliased_access.raw_declarations.push_back({ RawWidth::B32, RawVecSize::V1 }); + // If the CBV is never actually accessed, fudge the default legacy CBV type. + if (descriptor_type == VulkanDescriptorType::UBO && aliased_access.raw_declarations.empty()) + aliased_access.raw_declarations.push_back({ RawWidth::B32, RawVecSize::V4 }); + + // Safeguard against unused variables where we never end up setting any primary component type. + if ((descriptor_type == VulkanDescriptorType::SSBO || + descriptor_type == VulkanDescriptorType::UBO) && + aliased_access.raw_declarations.size() == 1) + { + aliased_access.primary_component_type = + raw_width_to_component_type(aliased_access.raw_declarations.front().width); + aliased_access.primary_raw_vecsize = aliased_access.raw_declarations.front().vecsize; + aliased_access.override_primary_component_types = true; + } + return true; } @@ -1049,7 +1152,7 @@ bool Converter::Impl::emit_srvs(const llvm::MDNode *srvs) if (type_id) ref.var_id = create_variable(storage, type_id, name.empty() ? nullptr : name.c_str()); else if (aliased_access.requires_alias_decoration) - ref.var_alias_group = create_variable_alias_group(aliased_access.raw_declarations, range_size, name); + ref.var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name); else { assert(aliased_access.raw_declarations.size() == 1); @@ -1596,7 +1699,7 @@ bool Converter::Impl::emit_uavs(const llvm::MDNode *uavs) storage = spv::StorageClassStorageBuffer; if (aliased_access.requires_alias_decoration) - var_alias_group = create_variable_alias_group(aliased_access.raw_declarations, range_size, name); + var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name); else { assert(aliased_access.raw_declarations.size() == 1); @@ -1756,6 +1859,11 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs) if (need_resource_remapping && resource_mapping_iface && !resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding)) return false; + auto &access_meta = cbv_access_tracking[index]; + AliasedAccess aliased_access; + if (!analyze_aliased_access(access_meta, VulkanDescriptorType::UBO, aliased_access)) + return false; + cbv_index_to_reference.resize(std::max(cbv_index_to_reference.size(), size_t(index + 1))); if (range_size != 1) @@ -1777,6 +1885,8 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs) bindless_info.kind = DXIL::ResourceKind::CBuffer; bindless_info.desc_set = vulkan_binding.buffer.descriptor_set; bindless_info.binding = vulkan_binding.buffer.binding; + bindless_info.component = aliased_access.primary_component_type; + bindless_info.raw_vecsize = aliased_access.primary_raw_vecsize; if (local_root_signature_entry >= 0) { @@ -1789,8 +1899,6 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs) return false; } - spv::Id var_id = create_bindless_heap_variable(bindless_info); - uint32_t heap_offset = local_table_entry.offset_in_heap; heap_offset += bind_register - local_table_entry.register_index; @@ -1801,7 +1909,17 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs) } auto &ref = cbv_index_to_reference[index]; - ref.var_id = var_id; + + if (aliased_access.requires_alias_decoration) + { + ref.var_alias_group = create_bindless_heap_variable_alias_group( + bindless_info, aliased_access.raw_declarations); + } + else + { + ref.var_id = create_bindless_heap_variable(bindless_info); + } + ref.base_offset = heap_offset; ref.base_resource_is_array = range_size != 1; ref.bindless = true; @@ -1851,8 +1969,6 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs) } else if (vulkan_binding.buffer.bindless.use_heap) { - spv::Id var_id = create_bindless_heap_variable(bindless_info); - // DXIL already applies the t# register offset to any dynamic index, so counteract that here. // The exception is with lib_* where we access resources by variable, not through // createResource() >_____<. @@ -1861,7 +1977,17 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs) heap_offset -= bind_register; auto &ref = cbv_index_to_reference[index]; - ref.var_id = var_id; + + if (aliased_access.requires_alias_decoration) + { + ref.var_alias_group = create_bindless_heap_variable_alias_group( + bindless_info, aliased_access.raw_declarations); + } + else + { + ref.var_id = create_bindless_heap_variable(bindless_info); + } + ref.push_constant_member = vulkan_binding.buffer.root_constant_index + root_descriptor_count; ref.base_offset = heap_offset; ref.base_resource_is_array = range_size != 1; @@ -1870,35 +1996,47 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs) } else { - unsigned vec4_length = (cbv_size + 15) / 16; - - // It seems like we will have to bitcast ourselves away from vec4 here after loading. - spv::Id member_array_type = builder.makeArrayType(builder.makeVectorType(builder.makeFloatType(32), 4), - builder.makeUintConstant(vec4_length, false), 16); - - builder.addDecoration(member_array_type, spv::DecorationArrayStride, 16); - - spv::Id type_id = get_struct_type({ member_array_type }, name.c_str()); - builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); - builder.addDecoration(type_id, spv::DecorationBlock); + auto &ref = cbv_index_to_reference[index]; - if (range_size != 1) + if (aliased_access.requires_alias_decoration) { - if (range_size == ~0u) - type_id = builder.makeRuntimeArray(type_id); - else - type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); + ref.var_alias_group = create_ubo_variable_alias_group( + aliased_access.raw_declarations, range_size, name, cbv_size); + } + else + { + assert(aliased_access.raw_declarations.size() == 1); + ref.var_id = create_ubo_variable(aliased_access.raw_declarations.front(), range_size, name, cbv_size); } - spv::Id var_id = create_variable(spv::StorageClassUniform, type_id, name.empty() ? nullptr : name.c_str()); - - builder.addDecoration(var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set); - builder.addDecoration(var_id, spv::DecorationBinding, vulkan_binding.buffer.binding); - - auto &ref = cbv_index_to_reference[index]; - ref.var_id = var_id; ref.base_resource_is_array = range_size != 1; ref.resource_kind = DXIL::ResourceKind::CBuffer; + + if (ref.var_id) + { + auto &meta = handle_to_resource_meta[ref.var_id]; + meta = {}; + meta.kind = ref.resource_kind; + meta.var_id = ref.var_id; + meta.storage = spv::StorageClassUniform; + meta.component_type = aliased_access.primary_component_type; + meta.raw_component_vecsize = aliased_access.primary_raw_vecsize; + builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set); + builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding); + } + + for (auto &var : ref.var_alias_group) + { + auto &meta = handle_to_resource_meta[var.var_id]; + meta = {}; + meta.kind = ref.resource_kind; + meta.var_id = var.var_id; + meta.storage = spv::StorageClassUniform; + meta.component_type = raw_width_to_component_type(var.declaration.width); + meta.raw_component_vecsize = var.declaration.vecsize; + builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set); + builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding); + } } } @@ -2194,9 +2332,10 @@ bool Converter::Impl::emit_shader_record_buffer() { case LocalRootSignatureType::Constants: { + spv::Id array_size_id = builder.makeUintConstant(elem.constants.num_words); + spv::Id u32_type = builder.makeUintType(32); spv::Id member_type_id = - builder.makeArrayType(builder.makeUintType(32), - builder.makeUintConstant(elem.constants.num_words), 4); + builder.makeArrayType(u32_type, array_size_id, 4); builder.addDecoration(member_type_id, spv::DecorationArrayStride, 4); member_types.push_back(member_type_id); offsets.push_back(current_offset); @@ -2441,7 +2580,8 @@ bool Converter::Impl::emit_global_heaps() auto actual_component_type = DXIL::ComponentType::U32; info.format = spv::ImageFormatUnknown; - if (annotation->resource_kind != DXIL::ResourceKind::RawBuffer && + if (annotation->resource_type != DXIL::ResourceType::CBV && + annotation->resource_kind != DXIL::ResourceKind::RawBuffer && annotation->resource_kind != DXIL::ResourceKind::StructuredBuffer) { actual_component_type = normalize_component_type(annotation->component_type); @@ -2529,6 +2669,7 @@ bool Converter::Impl::emit_global_heaps() return false; } vulkan_binding = vulkan_cbv_binding.buffer; + vulkan_binding.descriptor_type = VulkanDescriptorType::UBO; break; } @@ -3105,10 +3246,14 @@ spv::Id Converter::Impl::get_type_id(const llvm::Type *type) } case llvm::Type::TypeID::ArrayTyID: + { if (type->getArrayNumElements() == 0) return 0; - return builder.makeArrayType(get_type_id(type->getArrayElementType()), - builder.makeUintConstant(type->getArrayNumElements(), false), 0); + + spv::Id array_size_id = builder.makeUintConstant(type->getArrayNumElements()); + spv::Id element_type_id = get_type_id(type->getArrayElementType()); + return builder.makeArrayType(element_type_id, array_size_id, 0); + } case llvm::Type::TypeID::StructTyID: { diff --git a/dxil_converter.hpp b/dxil_converter.hpp index fa0e487..10210a4 100644 --- a/dxil_converter.hpp +++ b/dxil_converter.hpp @@ -89,7 +89,8 @@ enum class VulkanDescriptorType : unsigned Identity = 0, SSBO = 1, TexelBuffer = 2, - BufferDeviceAddress = 3 + BufferDeviceAddress = 3, + UBO = 4 }; struct VulkanBinding diff --git a/opcodes/converter_impl.hpp b/opcodes/converter_impl.hpp index 6008d9a..5d8b99c 100644 --- a/opcodes/converter_impl.hpp +++ b/opcodes/converter_impl.hpp @@ -191,8 +191,10 @@ struct Converter::Impl bool has_atomic_64bit = false; bool raw_access_buffer_declarations[unsigned(RawWidth::Count)][unsigned(RawVecSize::Count)] = {}; }; + UnorderedMap<uint32_t, AccessTracking> cbv_access_tracking; UnorderedMap<uint32_t, AccessTracking> srv_access_tracking; UnorderedMap<uint32_t, AccessTracking> uav_access_tracking; + UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_cbv_resource_index_map; UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_srv_resource_index_map; UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_uav_resource_index_map; UnorderedSet<const llvm::Value *> llvm_values_using_update_counter; @@ -514,10 +516,15 @@ struct Converter::Impl spv::Id create_bindless_heap_variable(const BindlessInfo &info); Vector<RawDeclarationVariable> create_bindless_heap_variable_alias_group( - const BindlessInfo &base_info, const Vector<RawDeclaration> &raw_decls); - Vector<RawDeclarationVariable> create_variable_alias_group( - const Vector<RawDeclaration> &raw_decls, uint32_t range_size, const String &name); + const BindlessInfo &base_info, const Vector<RawDeclaration> &raw_decls); + Vector<RawDeclarationVariable> create_raw_ssbo_variable_alias_group( + const Vector<RawDeclaration> &raw_decls, + uint32_t range_size, const String &name); + Vector<RawDeclarationVariable> create_ubo_variable_alias_group( + const Vector<RawDeclaration> &raw_decls, + uint32_t range_size, const String &name, unsigned cbv_size); spv::Id create_raw_ssbo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name); + spv::Id create_ubo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name, unsigned cbv_size); struct BindlessResource { diff --git a/opcodes/dxil/dxil_arithmetic.cpp b/opcodes/dxil/dxil_arithmetic.cpp index 33ab03d..0f9b057 100644 --- a/opcodes/dxil/dxil_arithmetic.cpp +++ b/opcodes/dxil/dxil_arithmetic.cpp @@ -27,12 +27,13 @@ bool emit_imad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct // FIXME: Do we need to deal with intermediate mul overflow here somehow? Operation *mul = impl.allocate(spv::OpIMul, impl.get_type_id(instruction->getType())); - mul->add_ids( - { impl.get_id_for_value(instruction->getOperand(1)), impl.get_id_for_value(instruction->getOperand(2)) }); + mul->add_id(impl.get_id_for_value(instruction->getOperand(1))); + mul->add_id(impl.get_id_for_value(instruction->getOperand(2))); impl.add(mul); Operation *add = impl.allocate(spv::OpIAdd, instruction); - add->add_ids({ mul->id, impl.get_id_for_value(instruction->getOperand(3)) }); + add->add_id(mul->id); + add->add_id(impl.get_id_for_value(instruction->getOperand(3))); impl.add(add); return true; } @@ -68,11 +69,8 @@ bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct Operation *op = impl.allocate(spv::OpExtInst, instruction); op->add_id(impl.glsl_std450_ext); op->add_literal(GLSLstd450Fma); - op->add_ids({ - impl.get_id_for_value(instruction->getOperand(1)), - impl.get_id_for_value(instruction->getOperand(2)), - impl.get_id_for_value(instruction->getOperand(3)), - }); + for (unsigned i = 1; i < 4; i++) + op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); } @@ -144,10 +142,8 @@ bool emit_dxil_std450_binary_instruction(GLSLstd450 opcode, Converter::Impl &imp Operation *op = impl.allocate(spv::OpExtInst, instruction); op->add_id(impl.glsl_std450_ext); op->add_literal(opcode); - op->add_ids({ - impl.get_id_for_value(instruction->getOperand(1)), - impl.get_id_for_value(instruction->getOperand(2)) - }); + for (unsigned i = 1; i < 3; i++) + op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); return true; @@ -162,9 +158,9 @@ bool emit_dxil_std450_trinary_instruction(GLSLstd450 opcode, Converter::Impl &im Operation *op = impl.allocate(spv::OpExtInst, instruction); op->add_id(impl.glsl_std450_ext); op->add_literal(opcode); - op->add_ids({ impl.get_id_for_value(instruction->getOperand(1)), - impl.get_id_for_value(instruction->getOperand(2)), - impl.get_id_for_value(instruction->getOperand(3)) }); + + for (unsigned i = 1; i < 4; i++) + op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); return true; @@ -283,11 +279,8 @@ static spv::Id clamp_bitfield_width(Converter::Impl &impl, spv::Id offset, spv:: static spv::Id mask_input(Converter::Impl &impl, const llvm::Value *value) { Operation *op = impl.allocate(spv::OpBitwiseAnd, impl.get_type_id(value->getType())); - op->add_ids({ - impl.get_id_for_value(value), - impl.builder().makeUintConstant(31), - }); - + op->add_id(impl.get_id_for_value(value)); + op->add_id(impl.builder().makeUintConstant(31)); impl.add(op); return op->id; } diff --git a/opcodes/dxil/dxil_buffer.cpp b/opcodes/dxil/dxil_buffer.cpp index 0a5b60a..cf0aa4a 100644 --- a/opcodes/dxil/dxil_buffer.cpp +++ b/opcodes/dxil/dxil_buffer.cpp @@ -24,153 +24,6 @@ namespace dxil_spv { -static spv::Id build_index_divider_fallback(Converter::Impl &impl, const llvm::Value *offset, unsigned addr_shift_log2) -{ - auto &builder = impl.builder(); - Operation *op = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32)); - op->add_ids({ impl.get_id_for_value(offset), builder.makeUintConstant(addr_shift_log2) }); - impl.add(op); - return op->id; -} - -bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride, - uint32_t addr_shift_log2, unsigned vecsize, - RawBufferAccessSplit &split) -{ - unsigned element_size = (1u << addr_shift_log2) * vecsize; - - // Base case first, a constant value. - if (const auto *const_addr = llvm::dyn_cast<llvm::ConstantInt>(index)) - { - int64_t constant_offset = const_addr->getUniqueInteger().getSExtValue(); - constant_offset *= stride; - - // Always pass scalar constant dividers through. - // Building a fallback divider helps nothing. - if (vecsize == 1 || constant_offset % int(element_size) == 0) - { - split = {}; - split.bias = constant_offset / element_size; - return true; - } - else - return false; - } - - const llvm::ConstantInt *scale = nullptr; - const llvm::ConstantInt *bias = nullptr; - bool scale_log2 = false; - bool bias_is_add = false; - bool bias_negate = false; - - while (!scale && llvm::isa<llvm::BinaryOperator>(index)) - { - auto *binop = llvm::cast<llvm::BinaryOperator>(index); - auto *lhs = binop->getOperand(0); - auto *rhs = binop->getOperand(1); - if (!bias && (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add || - binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub || - binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Or || - binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Xor)) - { - if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs)) - { - bias = const_lhs; - index = rhs; - } - else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs)) - { - bias = const_rhs; - index = lhs; - } - else - break; - - // DXC tends to be emit shift + or in some cases. - // We can turn this back into mul + add in most cases. - bias_negate = binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub; - bias_is_add = - binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add || - bias_negate; - } - else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Shl) - { - if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs)) - { - scale = const_rhs; - index = lhs; - } - else - break; - - scale_log2 = true; - } - else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Mul) - { - if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs)) - { - scale = const_lhs; - index = rhs; - } - else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs)) - { - scale = const_rhs; - index = lhs; - } - else - break; - - scale_log2 = false; - } - else - break; - } - - if (!scale && !bias) - { - // We cannot split anything, but we might be able to vectorize if the stride alone carries us. - if (stride % element_size == 0) - { - split = {}; - split.scale = stride / element_size; - split.dynamic_index = index; - return true; - } - else - return false; - } - - uint64_t scale_factor = 1; - if (scale) - scale_factor = scale->getUniqueInteger().getZExtValue(); - if (scale_log2) - scale_factor = 1ull << scale_factor; - - int64_t bias_factor = 0; - if (bias) - bias_factor = bias->getUniqueInteger().getSExtValue(); - if (bias_negate) - bias_factor = -bias_factor; - - // If there is no bit overlap between scale_factor and bias_factor - // then the bitwise OR is equivalent to add. - if (!bias_is_add && (scale_factor & bias_factor) != 0) - return false; - - scale_factor *= stride; - bias_factor *= stride; - - if (scale_factor % element_size == 0 && bias_factor % element_size == 0 && index) - { - split.scale = scale_factor / element_size; - split.bias = bias_factor / int(element_size); - split.dynamic_index = index; - return true; - } - else - return false; -} - bool raw_access_byte_address_can_vectorize(Converter::Impl &impl, const llvm::Type *type, const llvm::Value *byte_offset, unsigned vecsize) @@ -280,61 +133,6 @@ static spv::Id build_accumulate_offsets(Converter::Impl &impl, const spv::Id *id return accumulated_id; } -static spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset, - unsigned addr_shift_log2, unsigned vecsize) -{ - auto &builder = impl.builder(); - // Attempt to do trivial constant folding to make output a little more sensible to read. - // Try to find an expression for offset which is "constant0 * offset + constant1", - // where constant0 and constant1 are aligned with addr_shift_log2. - - spv::Id index_id; - RawBufferAccessSplit split = {}; - - if (extract_raw_buffer_access_split(offset, 1, addr_shift_log2, vecsize, split)) - { - if (!split.dynamic_index) - return builder.makeUintConstant(split.bias); - - spv::Op bias_opcode = split.bias > 0 ? spv::OpIAdd : spv::OpISub; - if (bias_opcode == spv::OpISub) - split.bias = -split.bias; - - spv::Id scaled_id; - if (split.scale != 1) - { - Operation *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32)); - scale_op->add_id(impl.get_id_for_value(split.dynamic_index)); - scale_op->add_id(builder.makeUintConstant(split.scale)); - impl.add(scale_op); - scaled_id = scale_op->id; - } - else - scaled_id = impl.get_id_for_value(split.dynamic_index); - - spv::Id bias_id; - if (split.bias != 0) - { - Operation *bias_op = impl.allocate(bias_opcode, builder.makeUintType(32)); - bias_op->add_id(scaled_id); - bias_op->add_id(builder.makeUintConstant(split.bias)); - impl.add(bias_op); - bias_id = bias_op->id; - } - else - bias_id = scaled_id; - - index_id = bias_id; - } - else - { - assert(vecsize == 1); - index_id = build_index_divider_fallback(impl, offset, addr_shift_log2); - } - - return index_id; -} - static spv::Id build_structured_index(Converter::Impl &impl, const llvm::Value *index, unsigned stride, const llvm::Value *byte_offset, @@ -410,20 +208,6 @@ static spv::Id build_structured_index(Converter::Impl &impl, const llvm::Value * } } -static bool type_is_16bit(const llvm::Type *data_type) -{ - return data_type->getTypeID() == llvm::Type::TypeID::HalfTyID || - (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && - data_type->getIntegerBitWidth() == 16); -} - -static bool type_is_64bit(const llvm::Type *data_type) -{ - return data_type->getTypeID() == llvm::Type::TypeID::DoubleTyID || - (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && - data_type->getIntegerBitWidth() == 64); -} - unsigned raw_buffer_data_type_to_addr_shift_log2(Converter::Impl &impl, const llvm::Type *data_type) { // A 16-bit raw load is only actually 16-bit if native 16-bit operations are enabled. @@ -591,30 +375,6 @@ static spv::Id build_physical_pointer_address_for_raw_load_store(Converter::Impl return emit_u32x2_u32_add(impl, ptr_id, byte_offset_id); } -static void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type, - spv::Id &physical_type_id, spv::Op &value_cast_op) -{ - if (type_is_16bit(element_type) && !impl.execution_mode_meta.native_16bit_operations && - impl.options.min_precision_prefer_native_16bit) - { - if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID) - { - physical_type_id = impl.get_type_id(DXIL::ComponentType::F32, 1, 1); - value_cast_op = spv::OpFConvert; - } - else - { - physical_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1, 1); - value_cast_op = spv::OpUConvert; - } - } - else - { - physical_type_id = impl.get_type_id(element_type); - value_cast_op = spv::OpNop; - } -} - static bool emit_physical_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, const Converter::Impl::PhysicalPointerMeta &ptr_meta, uint32_t mask = 0, uint32_t alignment = 0) @@ -702,21 +462,6 @@ static RawWidth get_buffer_access_bits_per_component( return RawWidth::B32; } -static spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta, - spv::Id default_id, RawWidth width, RawVecSize vecsize) -{ - for (auto &alias : meta.var_alias_group) - { - if (alias.declaration.width == width && alias.declaration.vecsize == vecsize) - { - default_id = alias.var_id; - break; - } - } - - return default_id; -} - bool emit_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction) { // Elide dead loads. @@ -1425,12 +1170,11 @@ bool emit_atomic_binop_instruction(Converter::Impl &impl, const llvm::CallInst * } Operation *op = impl.allocate(opcode, instruction, impl.get_type_id(component_type, 1, 1)); - op->add_ids({ - counter_ptr_op->id, - builder.makeUintConstant(spv::ScopeDevice), - builder.makeUintConstant(0), // Relaxed - impl.fixup_store_type_atomic(component_type, 1, impl.get_id_for_value(instruction->getOperand(6))), - }); + + op->add_id(counter_ptr_op->id); + op->add_id(builder.makeUintConstant(spv::ScopeDevice)); + op->add_id(builder.makeUintConstant(0)); + op->add_id(impl.fixup_store_type_atomic(component_type, 1, impl.get_id_for_value(instruction->getOperand(6)))); impl.add(op); impl.fixup_load_type_atomic(component_type, 1, instruction); @@ -1519,14 +1263,12 @@ bool emit_atomic_cmpxchg_instruction(Converter::Impl &impl, const llvm::CallInst comparison_id = impl.fixup_store_type_atomic(component_type, 1, comparison_id); new_value_id = impl.fixup_store_type_atomic(component_type, 1, new_value_id); - op->add_ids({ - counter_ptr_op->id, - builder.makeUintConstant(spv::ScopeDevice), - builder.makeUintConstant(0), // Relaxed - builder.makeUintConstant(0), // Relaxed - new_value_id, - comparison_id, - }); + op->add_id(counter_ptr_op->id); + op->add_id(builder.makeUintConstant(spv::ScopeDevice)); + op->add_id(builder.makeUintConstant(0)); + op->add_id(builder.makeUintConstant(0)); + op->add_id(new_value_id); + op->add_id(comparison_id); impl.add(op); impl.fixup_load_type_atomic(component_type, 1, instruction); @@ -1553,7 +1295,10 @@ bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::C { counter_ptr_op = impl.allocate(spv::OpImageTexelPointer, builder.makePointer(spv::StorageClassImage, builder.makeUintType(32))); - counter_ptr_op->add_ids({ meta.counter_var_id, builder.makeUintConstant(0), builder.makeUintConstant(0) }); + + counter_ptr_op->add_id(meta.counter_var_id); + counter_ptr_op->add_id(builder.makeUintConstant(0)); + counter_ptr_op->add_id(builder.makeUintConstant(0)); if (meta.non_uniform) builder.addDecoration(counter_ptr_op->id, spv::DecorationNonUniformEXT); @@ -1562,9 +1307,11 @@ bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::C impl.add(counter_ptr_op); Operation *op = impl.allocate(spv::OpAtomicIAdd, instruction); - op->add_ids({ counter_ptr_op->id, builder.makeUintConstant(spv::ScopeDevice), - builder.makeUintConstant(0), // Relaxed. - builder.makeUintConstant(direction) }); + + op->add_id(counter_ptr_op->id); + op->add_id(builder.makeUintConstant(spv::ScopeDevice)); + op->add_id(builder.makeUintConstant(0)); + op->add_id(builder.makeUintConstant(direction)); impl.add(op); diff --git a/opcodes/dxil/dxil_buffer.hpp b/opcodes/dxil/dxil_buffer.hpp index 5f8a090..0f8ea5d 100644 --- a/opcodes/dxil/dxil_buffer.hpp +++ b/opcodes/dxil/dxil_buffer.hpp @@ -37,16 +37,6 @@ bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::C unsigned raw_buffer_data_type_to_addr_shift_log2(Converter::Impl &impl, const llvm::Type *data_type); -struct RawBufferAccessSplit -{ - uint64_t scale; - int64_t bias; - const llvm::Value *dynamic_index; -}; -bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride, - uint32_t addr_shift_log2, unsigned vecsize, - RawBufferAccessSplit &split); - bool raw_access_byte_address_can_vectorize(Converter::Impl &impl, const llvm::Type *type, const llvm::Value *byte_offset, unsigned vecsize); diff --git a/opcodes/dxil/dxil_common.cpp b/opcodes/dxil/dxil_common.cpp index c76d8da..a26f571 100644 --- a/opcodes/dxil/dxil_common.cpp +++ b/opcodes/dxil/dxil_common.cpp @@ -79,4 +79,289 @@ spv::Id emit_u32x2_u32_add(Converter::Impl &impl, spv::Id u32x2_value, spv::Id u spv::Id addr_vec = impl.build_vector(uint_type, addr_elems, 2); return addr_vec; } + +unsigned get_type_scalar_alignment(Converter::Impl &impl, const llvm::Type *type) +{ + unsigned scalar_alignment; + switch (type->getTypeID()) + { + case llvm::Type::TypeID::IntegerTyID: + scalar_alignment = type->getIntegerBitWidth() / 8; + break; + case llvm::Type::TypeID::HalfTyID: + scalar_alignment = 2; + break; + case llvm::Type::TypeID::FloatTyID: + scalar_alignment = 4; + break; + case llvm::Type::TypeID::DoubleTyID: + scalar_alignment = 8; + break; + default: + LOGE("Invalid type for scalar alignment query.\n"); + return 1; + } + + if (!impl.execution_mode_meta.native_16bit_operations && scalar_alignment == 2) + scalar_alignment = 4; + + return scalar_alignment; +} + +spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta, + spv::Id default_id, RawWidth width, RawVecSize vecsize) +{ + for (auto &alias : meta.var_alias_group) + { + if (alias.declaration.width == width && alias.declaration.vecsize == vecsize) + { + default_id = alias.var_id; + break; + } + } + + return default_id; +} + +bool type_is_16bit(const llvm::Type *data_type) +{ + return data_type->getTypeID() == llvm::Type::TypeID::HalfTyID || + (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && + data_type->getIntegerBitWidth() == 16); +} + +bool type_is_64bit(const llvm::Type *data_type) +{ + return data_type->getTypeID() == llvm::Type::TypeID::DoubleTyID || + (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID && + data_type->getIntegerBitWidth() == 64); +} + +void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type, + spv::Id &physical_type_id, spv::Op &value_cast_op) +{ + if (type_is_16bit(element_type) && !impl.execution_mode_meta.native_16bit_operations && + impl.options.min_precision_prefer_native_16bit) + { + if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID) + { + physical_type_id = impl.get_type_id(DXIL::ComponentType::F32, 1, 1); + value_cast_op = spv::OpFConvert; + } + else + { + physical_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1, 1); + value_cast_op = spv::OpUConvert; + } + } + else + { + physical_type_id = impl.get_type_id(element_type); + value_cast_op = spv::OpNop; + } +} + +static spv::Id build_index_divider_fallback(Converter::Impl &impl, const llvm::Value *offset, unsigned addr_shift_log2) +{ + auto &builder = impl.builder(); + Operation *op = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32)); + op->add_id(impl.get_id_for_value(offset)); + op->add_id(builder.makeUintConstant(addr_shift_log2)); + impl.add(op); + return op->id; +} + +bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride, + uint32_t addr_shift_log2, unsigned vecsize, + RawBufferAccessSplit &split) +{ + unsigned element_size = (1u << addr_shift_log2) * vecsize; + + // Base case first, a constant value. + if (const auto *const_addr = llvm::dyn_cast<llvm::ConstantInt>(index)) + { + int64_t constant_offset = const_addr->getUniqueInteger().getSExtValue(); + constant_offset *= stride; + + // Always pass scalar constant dividers through. + // Building a fallback divider helps nothing. + if (vecsize == 1 || constant_offset % int(element_size) == 0) + { + split = {}; + split.bias = constant_offset / element_size; + return true; + } + else + return false; + } + + const llvm::ConstantInt *scale = nullptr; + const llvm::ConstantInt *bias = nullptr; + bool scale_log2 = false; + bool bias_is_add = false; + bool bias_negate = false; + + while (!scale && llvm::isa<llvm::BinaryOperator>(index)) + { + auto *binop = llvm::cast<llvm::BinaryOperator>(index); + auto *lhs = binop->getOperand(0); + auto *rhs = binop->getOperand(1); + if (!bias && (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add || + binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub || + binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Or || + binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Xor)) + { + if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs)) + { + bias = const_lhs; + index = rhs; + } + else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs)) + { + bias = const_rhs; + index = lhs; + } + else + break; + + // DXC tends to be emit shift + or in some cases. + // We can turn this back into mul + add in most cases. + bias_negate = binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub; + bias_is_add = + binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add || + bias_negate; + } + else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Shl) + { + if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs)) + { + scale = const_rhs; + index = lhs; + } + else + break; + + scale_log2 = true; + } + else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Mul) + { + if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs)) + { + scale = const_lhs; + index = rhs; + } + else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs)) + { + scale = const_rhs; + index = lhs; + } + else + break; + + scale_log2 = false; + } + else + break; + } + + if (!scale && !bias) + { + // We cannot split anything, but we might be able to vectorize if the stride alone carries us. + if (stride % element_size == 0) + { + split = {}; + split.scale = stride / element_size; + split.dynamic_index = index; + return true; + } + else + return false; + } + + uint64_t scale_factor = 1; + if (scale) + scale_factor = scale->getUniqueInteger().getZExtValue(); + if (scale_log2) + scale_factor = 1ull << scale_factor; + + int64_t bias_factor = 0; + if (bias) + bias_factor = bias->getUniqueInteger().getSExtValue(); + if (bias_negate) + bias_factor = -bias_factor; + + // If there is no bit overlap between scale_factor and bias_factor + // then the bitwise OR is equivalent to add. + if (!bias_is_add && (scale_factor & bias_factor) != 0) + return false; + + scale_factor *= stride; + bias_factor *= stride; + + if (scale_factor % element_size == 0 && bias_factor % element_size == 0 && index) + { + split.scale = scale_factor / element_size; + split.bias = bias_factor / int(element_size); + split.dynamic_index = index; + return true; + } + else + return false; +} + +spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset, + unsigned addr_shift_log2, unsigned vecsize) +{ + auto &builder = impl.builder(); + // Attempt to do trivial constant folding to make output a little more sensible to read. + // Try to find an expression for offset which is "constant0 * offset + constant1", + // where constant0 and constant1 are aligned with addr_shift_log2. + + spv::Id index_id; + RawBufferAccessSplit split = {}; + + if (extract_raw_buffer_access_split(offset, 1, addr_shift_log2, vecsize, split)) + { + if (!split.dynamic_index) + return builder.makeUintConstant(split.bias); + + spv::Op bias_opcode = split.bias > 0 ? spv::OpIAdd : spv::OpISub; + if (bias_opcode == spv::OpISub) + split.bias = -split.bias; + + spv::Id scaled_id; + if (split.scale != 1) + { + Operation *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32)); + scale_op->add_id(impl.get_id_for_value(split.dynamic_index)); + scale_op->add_id(builder.makeUintConstant(split.scale)); + impl.add(scale_op); + scaled_id = scale_op->id; + } + else + scaled_id = impl.get_id_for_value(split.dynamic_index); + + spv::Id bias_id; + if (split.bias != 0) + { + Operation *bias_op = impl.allocate(bias_opcode, builder.makeUintType(32)); + bias_op->add_id(scaled_id); + bias_op->add_id(builder.makeUintConstant(split.bias)); + impl.add(bias_op); + bias_id = bias_op->id; + } + else + bias_id = scaled_id; + + index_id = bias_id; + } + else + { + assert(vecsize == 1); + index_id = build_index_divider_fallback(impl, offset, addr_shift_log2); + } + + return index_id; +} + } // namespace dxil_spv diff --git a/opcodes/dxil/dxil_common.hpp b/opcodes/dxil/dxil_common.hpp index c13f0f9..e50278b 100644 --- a/opcodes/dxil/dxil_common.hpp +++ b/opcodes/dxil/dxil_common.hpp @@ -19,9 +19,34 @@ #pragma once #include "SpvBuilder.h" #include "opcodes/opcodes.hpp" +#include "opcodes/converter_impl.hpp" namespace dxil_spv { bool get_constant_operand(const llvm::CallInst *value, unsigned index, uint32_t *operand); spv::Id emit_u32x2_u32_add(Converter::Impl &impl, spv::Id u32x2_value, spv::Id u32_value); +unsigned get_type_scalar_alignment(Converter::Impl &impl, const llvm::Type *type); + +spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta, + spv::Id default_id, RawWidth width, RawVecSize vecsize); + +bool type_is_16bit(const llvm::Type *data_type); +bool type_is_64bit(const llvm::Type *data_type); + +void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type, + spv::Id &physical_type_id, spv::Op &value_cast_op); + +struct RawBufferAccessSplit +{ + uint64_t scale; + int64_t bias; + const llvm::Value *dynamic_index; +}; + +bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride, + uint32_t addr_shift_log2, unsigned vecsize, + RawBufferAccessSplit &split); + +spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset, + unsigned addr_shift_log2, unsigned vecsize); } diff --git a/opcodes/dxil/dxil_resources.cpp b/opcodes/dxil/dxil_resources.cpp index 1bfdfc2..00275db 100644 --- a/opcodes/dxil/dxil_resources.cpp +++ b/opcodes/dxil/dxil_resources.cpp @@ -150,11 +150,9 @@ static void fixup_builtin_load(Converter::Impl &impl, spv::Id var_id, const llvm else if (builtin == spv::BuiltInFrontFacing) { Operation *cast_op = impl.allocate(spv::OpSelect, builder.makeUintType(32)); - cast_op->add_ids({ - impl.get_id_for_value(instruction), - builder.makeUintConstant(~0u), - builder.makeUintConstant(0), - }); + cast_op->add_id(impl.get_id_for_value(instruction)); + cast_op->add_id(builder.makeUintConstant(~0u)); + cast_op->add_id(builder.makeUintConstant(0)); impl.add(cast_op); impl.rewrite_value(instruction, cast_op->id); } @@ -272,7 +270,9 @@ static spv::Id build_attribute_offset(spv::Id id, Converter::Impl &impl) auto &builder = impl.builder(); { Operation *op = impl.allocate(spv::OpBitFieldSExtract, builder.makeUintType(32)); - op->add_ids({ id, builder.makeUintConstant(0), builder.makeUintConstant(4) }); + op->add_id(id); + op->add_id(builder.makeUintConstant(0)); + op->add_id(builder.makeUintConstant(4)); id = op->id; impl.add(op); } @@ -664,11 +664,15 @@ static bool build_load_resource_handle(Converter::Impl &impl, spv::Id base_resou auto storage = get_resource_storage_class(impl, base_resource_id); is_non_uniform = false; + // If we index based on SBT, we must assume non-uniform, even for resources + // which are not arrayed, since in theory, the dispatch can process different SBTs concurrently, + // perhaps even within same subgroup, so have to be defensive. + if (reference.local_root_signature_entry >= 0) + is_non_uniform = true; + if (reference.base_resource_is_array || reference.bindless) { - if (reference.base_resource_is_array) - is_non_uniform = instruction_is_non_uniform; - else if (reference.local_root_signature_entry >= 0) + if (reference.base_resource_is_array && instruction_offset_value && instruction_is_non_uniform) is_non_uniform = true; type_id = builder.getContainedTypeId(type_id); @@ -725,7 +729,10 @@ static bool build_load_resource_handle(Converter::Impl &impl, spv::Id base_resou { *value_id = resource_id; impl.rewrite_value(instruction, resource_id); - // Not technically needed, but to be safe against weird compilers ... + + // Generally, we want to add NonUniformEXT after access chain for UBO/SSBO, + // but there is a special case in non-uniform OpArrayLength, where we will use this pointer + // directly, so mark it as non-uniform here. if (is_non_uniform) builder.addDecoration(resource_id, spv::DecorationNonUniformEXT); } @@ -1244,8 +1251,13 @@ static bool emit_create_handle(Converter::Impl &impl, const llvm::CallInst *inst case DXIL::ResourceType::CBV: { auto &reference = get_resource_reference(impl, resource_type, instruction, resource_range); - spv::Id base_cbv_id = reference.var_id; - spv::Id type_id = builder.getDerefTypeId(base_cbv_id); + const LocalRootSignatureEntry *local_root_signature_entry = nullptr; + if (reference.local_root_signature_entry >= 0) + local_root_signature_entry = &impl.local_root_signature[reference.local_root_signature_entry]; + + // Special case root constants since these resources point directly to + // the push constant block or SBT and not to any concrete resource, + // so we cannot deduce storage classes properly. if (resource_is_physical_pointer(impl, reference)) { @@ -1258,93 +1270,82 @@ static bool emit_create_handle(Converter::Impl &impl, const llvm::CallInst *inst meta.kind = reference.resource_kind; impl.rewrite_value(instruction, ptr_id); } - else if (reference.base_resource_is_array || reference.bindless) + else if (reference.var_id != 0 && reference.var_id == impl.root_constant_id) + { + // Point directly to root constants. + impl.rewrite_value(instruction, reference.var_id); + unsigned member_offset = reference.push_constant_member; + impl.handle_to_root_member_offset[instruction] = member_offset; + } + else if (local_root_signature_entry && local_root_signature_entry->type == LocalRootSignatureType::Constants) { - if (reference.local_root_signature_entry >= 0) - non_uniform = true; - else if (!reference.base_resource_is_array) - non_uniform = false; + // Access chain into the desired member once. + spv::Id id = build_shader_record_access_chain(impl, reference.local_root_signature_entry); + + auto &meta = impl.handle_to_resource_meta[id]; + meta = {}; + meta.storage = spv::StorageClassShaderRecordBufferKHR; + meta.kind = DXIL::ResourceKind::CBuffer; + impl.handle_to_root_member_offset[instruction] = reference.local_root_signature_entry; + impl.rewrite_value(instruction, id); + } + else + { + bool is_non_uniform = false; bool ssbo = reference.bindless && impl.options.bindless_cbv_ssbo_emulation; auto storage = ssbo ? spv::StorageClassStorageBuffer : spv::StorageClassUniform; - auto desc_type = ssbo ? DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT : DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT; + auto descriptor_type = ssbo ? DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT : DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT; - type_id = builder.getContainedTypeId(type_id); - Operation *op = impl.allocate(spv::OpAccessChain, instruction, builder.makePointer(storage, type_id)); - op->add_id(base_cbv_id); + Vector<Converter::Impl::RawDeclarationVariable> raw_declarations; + spv::Id loaded_id = 0; + spv::Id resource_id = 0; + raw_declarations.reserve(reference.var_alias_group.size()); - if (reference.bindless) + if (reference.var_id) { - spv::Id offset_id = build_bindless_heap_offset(impl, reference, desc_type, - reference.base_resource_is_array ? instruction_offset : nullptr); - if (!offset_id) + resource_id = reference.var_id; + if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type, instruction, + instruction_offset, non_uniform, is_non_uniform, + nullptr, &loaded_id, nullptr)) { - LOGE("Failed to load CBV bindless offset.\n"); + LOGE("Failed to load CBV resource handle.\n"); return false; } - op->add_id(offset_id); } - else + + for (auto &alias : reference.var_alias_group) { - op->add_id(impl.get_id_for_value(instruction_offset)); + resource_id = alias.var_id; + if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type, + instruction, instruction_offset, non_uniform, is_non_uniform, + nullptr, &loaded_id, nullptr)) + { + LOGE("Failed to load CBV resource handle.\n"); + return false; + } + + raw_declarations.push_back({ alias.declaration, loaded_id }); } - impl.add(op); - impl.rewrite_value(instruction, op->id); + auto &incoming_meta = impl.handle_to_resource_meta[resource_id]; - auto &meta = impl.handle_to_resource_meta[op->id]; - meta = {}; - meta.non_uniform = non_uniform; + auto &meta = impl.handle_to_resource_meta[loaded_id]; + meta = incoming_meta; + meta.non_uniform = is_non_uniform; meta.storage = storage; + meta.var_alias_group = std::move(raw_declarations); meta.kind = DXIL::ResourceKind::CBuffer; - if (meta.non_uniform) + if (is_non_uniform) { if (ssbo) builder.addCapability(spv::CapabilityStorageBufferArrayNonUniformIndexingEXT); else builder.addCapability(spv::CapabilityUniformBufferArrayNonUniformIndexingEXT); - builder.addDecoration(op->id, spv::DecorationNonUniformEXT); builder.addExtension("SPV_EXT_descriptor_indexing"); } } - else if (reference.local_root_signature_entry >= 0) - { - // Either we have root constants or a physical storage pointer here. - // CBufferLoad functions will deal with that. If we have a physical storage pointer, we can load it here. - auto &local_entry = impl.local_root_signature[reference.local_root_signature_entry]; - - if (local_entry.type == LocalRootSignatureType::Descriptor) - { - spv::Id id = build_root_descriptor_load_physical_pointer(impl, reference); - auto &meta = impl.handle_to_resource_meta[id]; - meta = {}; - meta.storage = spv::StorageClassPhysicalStorageBuffer; - meta.kind = DXIL::ResourceKind::CBuffer; - impl.rewrite_value(instruction, id); - } - else - { - // Access chain into the desired member once. - spv::Id id = build_shader_record_access_chain(impl, reference.local_root_signature_entry); - - auto &meta = impl.handle_to_resource_meta[id]; - meta = {}; - meta.storage = spv::StorageClassShaderRecordBufferKHR; - meta.kind = DXIL::ResourceKind::CBuffer; - impl.handle_to_root_member_offset[instruction] = reference.local_root_signature_entry; - impl.rewrite_value(instruction, id); - } - } - else - { - impl.rewrite_value(instruction, base_cbv_id); - if (base_cbv_id == impl.root_constant_id) - { - unsigned member_offset = reference.push_constant_member; - impl.handle_to_root_member_offset[instruction] = member_offset; - } - } break; } @@ -1538,47 +1539,158 @@ bool emit_annotate_handle_instruction(Converter::Impl &impl, const llvm::CallIns meta.binding_index, meta.offset, meta.non_uniform); } -static bool emit_cbuffer_load_legacy_physical_pointer(Converter::Impl &impl, const llvm::CallInst *instruction) +static bool build_bitcast_32x4_to_16x8_composite(Converter::Impl &impl, const llvm::CallInst *instruction, + spv::Id loaded_id) +{ + auto &builder = impl.builder(); + + Vector<spv::Id> member_types(8); + spv::Id type_id = impl.get_type_id(instruction->getType()->getStructElementType(0)); + for (auto &type : member_types) + type = type_id; + + spv::Id vec2_type_id = builder.makeVectorType(type_id, 2); + + spv::Id u32_composites[4]; + for (unsigned i = 0; i < 4; i++) + { + auto *extract_op = impl.allocate(spv::OpCompositeExtract, builder.makeFloatType(32)); + extract_op->add_id(loaded_id); + extract_op->add_literal(i); + impl.add(extract_op); + u32_composites[i] = extract_op->id; + } + + spv::Id u16_composites[8]; + for (unsigned i = 0; i < 4; i++) + { + auto *bitcast_op = impl.allocate(spv::OpBitcast, vec2_type_id); + bitcast_op->add_id(u32_composites[i]); + impl.add(bitcast_op); + + for (unsigned j = 0; j < 2; j++) + { + auto *extract = impl.allocate(spv::OpCompositeExtract, type_id); + extract->add_id(bitcast_op->id); + extract->add_literal(j); + impl.add(extract); + u16_composites[2 * i + j] = extract->id; + } + } + + spv::Id struct_type_id = impl.get_struct_type(member_types, "CBVComposite16x8"); + auto *composite = impl.allocate(spv::OpCompositeConstruct, struct_type_id); + for (auto &comp : u16_composites) + composite->add_id(comp); + impl.add(composite); + impl.rewrite_value(instruction, composite->id); + return true; +} + +static bool emit_cbuffer_load_physical_pointer(Converter::Impl &impl, const llvm::CallInst *instruction) { auto &builder = impl.builder(); spv::Id member_index = impl.get_id_for_value(instruction->getOperand(2)); + bool scalar_load = instruction->getType()->getTypeID() != llvm::Type::TypeID::StructTyID; + unsigned scalar_alignment; + spv::Id byteaddr_id; + uint32_t alignment; - auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32)); - mul_op->add_id(member_index); - mul_op->add_id(builder.makeUintConstant(16)); - impl.add(mul_op); + const llvm::Type *result_component_type; - spv::Id addr_vec = emit_u32x2_u32_add(impl, impl.get_id_for_value(instruction->getOperand(1)), mul_op->id); + if (!scalar_load) + { + auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32)); + mul_op->add_id(member_index); + mul_op->add_id(builder.makeUintConstant(16)); + impl.add(mul_op); + byteaddr_id = mul_op->id; + result_component_type = instruction->getType()->getStructElementType(0); + scalar_alignment = get_type_scalar_alignment(impl, result_component_type); + alignment = 16; + } + else + { + byteaddr_id = member_index; + // DXIL emits the alignment, but we cannot trust it, DXC is completely buggy here and emits + // obviously bogus alignment values. + // Use scalar alignment. + result_component_type = instruction->getType(); + alignment = get_type_scalar_alignment(impl, instruction->getType()); + scalar_alignment = alignment; + } + + // Handle min16float where we want FP16 value, but FP32 physical. + spv::Op value_cast_op = spv::OpNop; + spv::Id physical_type_id = 0; + get_physical_load_store_cast_info(impl, result_component_type, physical_type_id, value_cast_op); + + spv::Id addr_vec = emit_u32x2_u32_add(impl, impl.get_id_for_value(instruction->getOperand(1)), byteaddr_id); auto *result_type = instruction->getType(); - spv::Id vec_type_id = builder.makeVectorType(impl.get_type_id(result_type->getStructElementType(0)), 4); + unsigned physical_vecsize; + spv::Id result_type_id; + + if (scalar_load) + { + result_type_id = impl.get_type_id(result_type); + physical_vecsize = 1; + } + else + { + if (scalar_alignment != 2) + { + physical_vecsize = 16 / scalar_alignment; + result_type_id = builder.makeVectorType(physical_type_id, physical_vecsize); + } + else + { + result_type_id = builder.makeVectorType(builder.makeFloatType(32), 4); + physical_vecsize = 4; + } + } + Converter::Impl::PhysicalPointerMeta ptr_meta = {}; ptr_meta.nonwritable = true; - spv::Id ptr_type_id = impl.get_physical_pointer_block_type(vec_type_id, ptr_meta); + spv::Id ptr_type_id = impl.get_physical_pointer_block_type(result_type_id, ptr_meta); auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id); ptr_bitcast_op->add_id(addr_vec); impl.add(ptr_bitcast_op); - auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, vec_type_id)); + auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, result_type_id)); chain_op->add_id(ptr_bitcast_op->id); chain_op->add_id(builder.makeUintConstant(0)); impl.add(chain_op); - auto *load_op = impl.allocate(spv::OpLoad, instruction, vec_type_id); + auto *load_op = impl.allocate(spv::OpLoad, instruction, result_type_id); load_op->add_id(chain_op->id); load_op->add_literal(spv::MemoryAccessAlignedMask); - load_op->add_literal(16); + load_op->add_literal(alignment); impl.add(load_op); + // Handle f16x8 loads. + if (!scalar_load && scalar_alignment == 2) + return build_bitcast_32x4_to_16x8_composite(impl, instruction, load_op->id); + else if (value_cast_op != spv::OpNop) + { + spv::Id type_id = impl.get_type_id(result_component_type); + if (physical_vecsize != 1) + type_id = builder.makeVectorType(type_id, physical_vecsize); + auto *cast_op = impl.allocate(value_cast_op, type_id); + cast_op->add_id(impl.get_id_for_value(instruction)); + impl.add(cast_op); + impl.rewrite_value(instruction, cast_op->id); + } + return true; } -static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llvm::CallInst *instruction, - spv::Id base_ptr, - spv::StorageClass storage, - unsigned index_offset, unsigned num_elements) +static bool emit_cbuffer_load_from_uints(Converter::Impl &impl, const llvm::CallInst *instruction, + spv::Id base_ptr, + spv::StorageClass storage, + unsigned index_offset, unsigned num_elements) { auto &builder = impl.builder(); @@ -1589,18 +1701,56 @@ static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llv return false; } - unsigned member_index = 4 * unsigned(constant_int->getUniqueInteger().getZExtValue()); + // CBufferLoad vs CBufferLoadLegacy + bool scalar_load = instruction->getType()->getTypeID() != llvm::Type::TypeID::StructTyID; + auto member_index = unsigned(constant_int->getUniqueInteger().getZExtValue()); + + // In scalar load, we index by byte offset. Ignore alignment, we read from registers. + if (scalar_load) + { + if (member_index % 4) + { + LOGE("Scalar CBufferLoad on root constant buffer is not aligned to 4 bytes.\n"); + return false; + } + member_index /= 4; + + if (get_type_scalar_alignment(impl, instruction->getType()) != 4) + { + LOGE("Attempting to use root constant buffer with non-32bit type.\n"); + return false; + } + } + else + { + // In legacy load, we index in terms of float4[]s. + member_index *= 4; + + if (get_type_scalar_alignment(impl, instruction->getType()->getStructElementType(0)) != 4) + { + LOGE("Attempting to use root constant buffer with non-32bit type.\n"); + return false; + } + } + member_index += index_offset; if (member_index >= num_elements) + { + LOGE("Root constant CBV is accessed out of bounds. (%u > %u).\n", member_index, num_elements); return false; + } - unsigned num_words = std::min(4u, num_elements - member_index); + unsigned num_words = std::min(scalar_load ? 1u : 4u, num_elements - member_index); auto *result_type = instruction->getType(); // Root constants are emitted as uints as they are typically used as indices. - bool need_bitcast = result_type->getStructElementType(0)->getTypeID() != llvm::Type::TypeID::IntegerTyID; + bool need_bitcast; + if (scalar_load) + need_bitcast = result_type->getTypeID() != llvm::Type::TypeID::IntegerTyID; + else + need_bitcast = result_type->getStructElementType(0)->getTypeID() != llvm::Type::TypeID::IntegerTyID; spv::Id elements[4]; for (unsigned i = 0; i < 4; i++) @@ -1625,10 +1775,21 @@ static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llv elements[i] = builder.makeUintConstant(0); } - spv::Id id = impl.build_vector(builder.makeUintType(32), elements, 4); + spv::Id id; + + if (scalar_load) + id = elements[0]; + else + id = impl.build_vector(builder.makeUintType(32), elements, 4); + if (need_bitcast) { - spv::Id type_id = builder.makeVectorType(impl.get_type_id(result_type->getStructElementType(0)), 4); + spv::Id type_id; + if (scalar_load) + type_id = impl.get_type_id(result_type); + else + type_id = builder.makeVectorType(impl.get_type_id(result_type->getStructElementType(0)), 4); + auto *op = impl.allocate(spv::OpBitcast, instruction, type_id); op->add_id(id); impl.add(op); @@ -1641,23 +1802,121 @@ static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llv return true; } -static bool emit_cbuffer_load_legacy_shader_record(Converter::Impl &impl, const llvm::CallInst *instruction, - unsigned local_root_signature_entry) +static bool emit_cbuffer_load_shader_record(Converter::Impl &impl, const llvm::CallInst *instruction, + unsigned local_root_signature_entry) { auto &entry = impl.local_root_signature[local_root_signature_entry]; - return emit_cbuffer_load_legacy_from_uints(impl, instruction, - impl.get_id_for_value(instruction->getOperand(1)), - spv::StorageClassShaderRecordBufferKHR, - 0, entry.constants.num_words); + return emit_cbuffer_load_from_uints(impl, instruction, + impl.get_id_for_value(instruction->getOperand(1)), + spv::StorageClassShaderRecordBufferKHR, + 0, entry.constants.num_words); +} + +static bool emit_cbuffer_load_root_constant(Converter::Impl &impl, const llvm::CallInst *instruction) +{ + return emit_cbuffer_load_from_uints(impl, instruction, + impl.root_constant_id, + spv::StorageClassPushConstant, + impl.handle_to_root_member_offset[instruction->getOperand(1)], + impl.root_constant_num_words + impl.root_descriptor_count); } -static bool emit_cbuffer_load_legacy_root_constant(Converter::Impl &impl, const llvm::CallInst *instruction) +bool emit_cbuffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction) { - return emit_cbuffer_load_legacy_from_uints(impl, instruction, - impl.root_constant_id, - spv::StorageClassPushConstant, - impl.handle_to_root_member_offset[instruction->getOperand(1)], - impl.root_constant_num_words + impl.root_descriptor_count); + auto &builder = impl.builder(); + + // This always returns a scalar. + spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1)); + if (!ptr_id) + return false; + + if (ptr_id == impl.root_constant_id) + { + return emit_cbuffer_load_root_constant(impl, instruction); + } + else + { + auto &meta = impl.handle_to_resource_meta[ptr_id]; + + if (meta.storage == spv::StorageClassPhysicalStorageBuffer) + { + return emit_cbuffer_load_physical_pointer(impl, instruction); + } + else if (meta.storage == spv::StorageClassShaderRecordBufferKHR) + { + return emit_cbuffer_load_shader_record(impl, instruction, + impl.handle_to_root_member_offset[instruction->getOperand(1)]); + } + + // Handle min16float where we want FP16 value, but FP32 physical. + spv::Op value_cast_op = spv::OpNop; + spv::Id physical_type_id = 0; + get_physical_load_store_cast_info(impl, instruction->getType(), physical_type_id, value_cast_op); + + unsigned addr_shift; + RawWidth raw_width; + switch (get_type_scalar_alignment(impl, instruction->getType())) + { + case 2: + raw_width = RawWidth::B16; + addr_shift = 1; + break; + + case 4: + raw_width = RawWidth::B32; + addr_shift = 2; + break; + + case 8: + raw_width = RawWidth::B64; + addr_shift = 3; + break; + + default: + return false; + } + + unsigned raw_bits = raw_width_to_bits(raw_width); + ptr_id = get_buffer_alias_handle(impl, meta, ptr_id, raw_width, RawVecSize::V1); + + spv::Id array_index_id = build_index_divider(impl, instruction->getOperand(2), addr_shift, 1); + + Operation *access_chain_op = impl.allocate( + spv::OpAccessChain, builder.makePointer(meta.storage, builder.makeFloatType(raw_bits))); + access_chain_op->add_ids({ ptr_id, builder.makeUintConstant(0), array_index_id }); + impl.add(access_chain_op); + + if (meta.non_uniform) + builder.addDecoration(access_chain_op->id, spv::DecorationNonUniformEXT); + + bool need_bitcast = false; + auto *result_type = instruction->getType(); + if (result_type->getTypeID() == llvm::Type::TypeID::IntegerTyID) + need_bitcast = true; + + Operation *load_op = impl.allocate(spv::OpLoad, instruction, builder.makeFloatType(raw_bits)); + load_op->add_id(access_chain_op->id); + impl.add(load_op); + + if (need_bitcast) + { + Operation *op = impl.allocate(spv::OpBitcast, builder.makeUintType(raw_bits)); + op->add_id(load_op->id); + impl.add(op); + impl.rewrite_value(instruction, op->id); + } + + // Handle min16float4 value cast scenarios. + if (value_cast_op != spv::OpNop) + { + auto *cast_op = impl.allocate(value_cast_op, impl.get_type_id(instruction->getType())); + cast_op->add_id(impl.get_id_for_value(instruction)); + impl.add(cast_op); + impl.rewrite_value(instruction, cast_op->id); + } + + return true; + } } bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::CallInst *instruction) @@ -1672,64 +1931,101 @@ bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::Cal if (ptr_id == impl.root_constant_id) { - return emit_cbuffer_load_legacy_root_constant(impl, instruction); + return emit_cbuffer_load_root_constant(impl, instruction); } else { - auto itr = impl.handle_to_resource_meta.find(ptr_id); - bool non_uniform = false; - spv::StorageClass storage = spv::StorageClassUniform; + auto &meta = impl.handle_to_resource_meta[ptr_id]; + + auto *result_type = instruction->getType(); - if (itr != impl.handle_to_resource_meta.end()) + if (result_type->getTypeID() != llvm::Type::TypeID::StructTyID) { - non_uniform = itr->second.non_uniform; - storage = itr->second.storage; + LOGE("CBufferLoadLegacy: return type must be struct.\n"); + return false; } - if (storage == spv::StorageClassPhysicalStorageBuffer) + if (meta.storage == spv::StorageClassPhysicalStorageBuffer) { - return emit_cbuffer_load_legacy_physical_pointer(impl, instruction); + return emit_cbuffer_load_physical_pointer(impl, instruction); } - else if (storage == spv::StorageClassShaderRecordBufferKHR) + else if (meta.storage == spv::StorageClassShaderRecordBufferKHR) { - return emit_cbuffer_load_legacy_shader_record(impl, instruction, - impl.handle_to_root_member_offset[instruction->getOperand(1)]); + return emit_cbuffer_load_shader_record(impl, instruction, + impl.handle_to_root_member_offset[instruction->getOperand(1)]); } + // Handle min16float where we want FP16 value, but FP32 physical. + auto *result_component_type = result_type->getStructElementType(0); + spv::Op value_cast_op = spv::OpNop; + spv::Id physical_type_id = 0; + get_physical_load_store_cast_info(impl, result_component_type, physical_type_id, value_cast_op); + + RawVecSize alias_vecsize; + RawWidth alias_width; + unsigned scalar_alignment = get_type_scalar_alignment(impl, result_component_type); + unsigned bits, vecsize; + + if (scalar_alignment == 8) + { + alias_width = RawWidth::B64; + alias_vecsize = RawVecSize::V2; + } + else + { + alias_width = RawWidth::B32; + alias_vecsize = RawVecSize::V4; + } + + bits = raw_width_to_bits(alias_width); + vecsize = raw_vecsize_to_vecsize(alias_vecsize); + + ptr_id = get_buffer_alias_handle(impl, meta, ptr_id, alias_width, alias_vecsize); + spv::Id vec4_index = impl.get_id_for_value(instruction->getOperand(2)); - Operation *access_chain_op = impl.allocate( - spv::OpAccessChain, builder.makePointer(storage, builder.makeVectorType(builder.makeFloatType(32), 4))); + spv::Id vector_type_id = builder.makeVectorType(builder.makeFloatType(bits), vecsize); + Operation *access_chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(meta.storage, vector_type_id)); access_chain_op->add_ids({ ptr_id, builder.makeUintConstant(0), vec4_index }); impl.add(access_chain_op); - if (non_uniform) + if (meta.non_uniform) builder.addDecoration(access_chain_op->id, spv::DecorationNonUniformEXT); bool need_bitcast = false; - auto *result_type = instruction->getType(); - if (result_type->getTypeID() != llvm::Type::TypeID::StructTyID) - return false; - if (result_type->getStructNumElements() != 4) - return false; - if (result_type->getStructElementType(0)->getTypeID() != llvm::Type::TypeID::FloatTyID) + if (result_type->getStructElementType(0)->getTypeID() == llvm::Type::TypeID::IntegerTyID) need_bitcast = true; - Operation *load_op = - impl.allocate(spv::OpLoad, instruction, builder.makeVectorType(builder.makeFloatType(32), 4)); + Operation *load_op = impl.allocate(spv::OpLoad, instruction, vector_type_id); load_op->add_id(access_chain_op->id); impl.add(load_op); - if (need_bitcast) + if (scalar_alignment == 2) { - Operation *op = impl.allocate(spv::OpBitcast, builder.makeVectorType(builder.makeUintType(32), 4)); + // Special case, need to bitcast and build a struct with 8 elements instead. + if (!build_bitcast_32x4_to_16x8_composite(impl, instruction, load_op->id)) + return false; + } + else if (need_bitcast) + { + spv::Id uint_vector_type_id = builder.makeVectorType(builder.makeUintType(bits), vecsize); + Operation *op = impl.allocate(spv::OpBitcast, uint_vector_type_id); - assert(result_type->getStructElementType(0)->getTypeID() == llvm::Type::TypeID::IntegerTyID); op->add_id(load_op->id); impl.add(op); impl.rewrite_value(instruction, op->id); } - return true; + + // If we have min-precision loads, we might have to truncate here. + if (value_cast_op != spv::OpNop) + { + auto *cast_op = impl.allocate(value_cast_op, builder.makeVectorType(impl.get_type_id(result_component_type), vecsize)); + cast_op->add_id(impl.get_id_for_value(instruction)); + impl.add(cast_op); + impl.rewrite_value(instruction, cast_op->id); + } } + + return true; } } // namespace dxil_spv diff --git a/opcodes/dxil/dxil_resources.hpp b/opcodes/dxil/dxil_resources.hpp index dc8c842..d04784b 100644 --- a/opcodes/dxil/dxil_resources.hpp +++ b/opcodes/dxil/dxil_resources.hpp @@ -32,6 +32,7 @@ bool emit_create_handle_from_heap_instruction(Converter::Impl &impl, const llvm: bool emit_create_handle_from_binding_instruction(Converter::Impl &impl, const llvm::CallInst *instruction); bool emit_annotate_handle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction); +bool emit_cbuffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction); bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::CallInst *instruction); template <GLSLstd450 opcode> diff --git a/opcodes/opcodes_dxil_builtins.cpp b/opcodes/opcodes_dxil_builtins.cpp index 74bf633..9099cc1 100644 --- a/opcodes/opcodes_dxil_builtins.cpp +++ b/opcodes/opcodes_dxil_builtins.cpp @@ -48,6 +48,7 @@ struct DXILDispatcher OP(CreateHandle) = emit_create_handle_instruction; OP(CreateHandleForLib) = emit_create_handle_for_lib_instruction; OP(CBufferLoadLegacy) = emit_cbuffer_load_legacy_instruction; + OP(CBufferLoad) = emit_cbuffer_load_instruction; OP(EvalSnapped) = emit_interpolate_dispatch<GLSLstd450InterpolateAtOffset>; OP(EvalSampleIndex) = emit_interpolate_dispatch<GLSLstd450InterpolateAtSample>; OP(EvalCentroid) = emit_interpolate_dispatch<GLSLstd450InterpolateAtCentroid>; @@ -356,7 +357,12 @@ bool emit_dxil_instruction(Converter::Impl &impl, const llvm::CallInst *instruct return false; } - return global_dispatcher.builder_lut[opcode](impl, instruction); + if (!global_dispatcher.builder_lut[opcode](impl, instruction)) + { + LOGE("Failed DXIL opcode %u.\n", opcode); + return false; + } + return true; } static void update_raw_access_tracking_from_vector_type(Converter::Impl::AccessTracking &tracking, @@ -463,6 +469,64 @@ get_resource_meta_from_buffer_op(Converter::Impl &impl, const llvm::CallInst *in return { DXIL::ResourceKind::Invalid, 0 }; } +static void analyze_dxil_cbuffer_load(Converter::Impl &impl, const llvm::CallInst *instruction) +{ + Converter::Impl::AccessTracking *tracking = nullptr; + auto itr = impl.llvm_value_to_cbv_resource_index_map.find(instruction->getOperand(1)); + if (itr != impl.llvm_value_to_cbv_resource_index_map.end()) + tracking = &impl.cbv_access_tracking[itr->second]; + + if (!tracking) + { + auto annotate_itr = impl.llvm_annotate_handle_uses.find(instruction->getOperand(1)); + if (annotate_itr != impl.llvm_annotate_handle_uses.end()) + tracking = &annotate_itr->second.tracking; + } + + if (tracking) + { + if (instruction->getType()->getTypeID() == llvm::Type::TypeID::StructTyID) + { + // Legacy float4 model. However, it seems like DXIL also supports f16x8, f32x4 and f64x2 ... :( + switch (get_type_scalar_alignment(impl, instruction->getType()->getStructElementType(0))) + { + case 2: + case 4: + // We'll bit-cast on-demand for f16x8. + tracking->raw_access_buffer_declarations[int(RawWidth::B32)][int(RawVecSize::V4)] = true; + break; + + case 8: + tracking->raw_access_buffer_declarations[int(RawWidth::B64)][int(RawVecSize::V2)] = true; + break; + + default: + break; + } + } + else + { + switch (get_type_scalar_alignment(impl, instruction->getType())) + { + case 2: + tracking->raw_access_buffer_declarations[int(RawWidth::B16)][int(RawVecSize::V1)] = true; + break; + + case 4: + tracking->raw_access_buffer_declarations[int(RawWidth::B32)][int(RawVecSize::V1)] = true; + break; + + case 8: + tracking->raw_access_buffer_declarations[int(RawWidth::B64)][int(RawVecSize::V1)] = true; + break; + + default: + break; + } + } + } +} + static void analyze_dxil_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op opcode) { Converter::Impl::AccessTracking *tracking = nullptr; @@ -653,6 +717,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn impl.llvm_value_to_uav_resource_index_map[instruction] = resource_range; else if (static_cast<DXIL::ResourceType>(resource_type_operand) == DXIL::ResourceType::SRV) impl.llvm_value_to_srv_resource_index_map[instruction] = resource_range; + else if (static_cast<DXIL::ResourceType>(resource_type_operand) == DXIL::ResourceType::CBV) + impl.llvm_value_to_cbv_resource_index_map[instruction] = resource_range; if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles) impl.resource_handle_to_block[instruction] = bb; @@ -669,6 +735,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn impl.llvm_value_to_uav_resource_index_map[instruction] = itr->second.meta_index; else if (itr->second.type == DXIL::ResourceType::SRV) impl.llvm_value_to_srv_resource_index_map[instruction] = itr->second.meta_index; + else if (itr->second.type == DXIL::ResourceType::CBV) + impl.llvm_value_to_cbv_resource_index_map[instruction] = itr->second.meta_index; impl.llvm_active_global_resource_variables.insert(itr->second.variable); @@ -726,7 +794,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn if (use.resource_kind == DXIL::ResourceKind::StructuredBuffer) use.stride = params; - else if (use.resource_kind != DXIL::ResourceKind::RawBuffer) + else if (use.resource_kind != DXIL::ResourceKind::RawBuffer && + use.resource_kind != DXIL::ResourceKind::CBuffer) use.component_type = DXIL::ComponentType(params & 0xff); } else if (meta.resource_op == DXIL::Op::CreateHandleFromBinding || @@ -736,6 +805,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn impl.llvm_value_to_uav_resource_index_map[instruction] = meta.binding_index; else if (meta.resource_type == DXIL::ResourceType::SRV) impl.llvm_value_to_srv_resource_index_map[instruction] = meta.binding_index; + else if (meta.resource_type == DXIL::ResourceType::CBV) + impl.llvm_value_to_cbv_resource_index_map[instruction] = meta.binding_index; } if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles) @@ -751,6 +822,11 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn analyze_dxil_buffer_store(impl, instruction, op); break; + case DXIL::Op::CBufferLoad: + case DXIL::Op::CBufferLoadLegacy: + analyze_dxil_cbuffer_load(impl, instruction); + break; + case DXIL::Op::BufferUpdateCounter: { impl.llvm_values_using_update_counter.insert(instruction->getOperand(1)); diff --git a/opcodes/opcodes_llvm_builtins.cpp b/opcodes/opcodes_llvm_builtins.cpp index 3d2b144..f62a50a 100644 --- a/opcodes/opcodes_llvm_builtins.cpp +++ b/opcodes/opcodes_llvm_builtins.cpp @@ -1092,11 +1092,8 @@ bool emit_select_instruction(Converter::Impl &impl, const llvm::SelectInst *inst { Operation *op = impl.allocate(spv::OpSelect, instruction); - op->add_ids({ - impl.get_id_for_value(instruction->getOperand(0)), - impl.get_id_for_value(instruction->getOperand(1)), - impl.get_id_for_value(instruction->getOperand(2)), - }); + for (unsigned i = 0; i < 3; i++) + op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); return true; @@ -1114,11 +1111,11 @@ bool emit_cmpxchg_instruction(Converter::Impl &impl, const llvm::AtomicCmpXchgIn atomic_op->add_id(impl.get_id_for_value(instruction->getPointerOperand())); - atomic_op->add_ids({ builder.makeUintConstant(spv::ScopeWorkgroup), - builder.makeUintConstant(0), // Relaxed - builder.makeUintConstant(0), // Relaxed - impl.get_id_for_value(instruction->getNewValOperand()), - impl.get_id_for_value(instruction->getCompareOperand()) }); + atomic_op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup)); + atomic_op->add_id(builder.makeUintConstant(0)); + atomic_op->add_id(builder.makeUintConstant(0)); + atomic_op->add_id(impl.get_id_for_value(instruction->getNewValOperand())); + atomic_op->add_id(impl.get_id_for_value(instruction->getCompareOperand())); impl.add(atomic_op); @@ -1196,11 +1193,9 @@ bool emit_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst op->add_id(impl.get_id_for_value(instruction->getPointerOperand())); - op->add_ids({ - builder.makeUintConstant(spv::ScopeWorkgroup), - builder.makeUintConstant(0), - impl.get_id_for_value(instruction->getValOperand()), - }); + op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup)); + op->add_id(builder.makeUintConstant(0)); + op->add_id(impl.get_id_for_value(instruction->getValOperand())); impl.add(op); return true; @@ -1209,7 +1204,9 @@ bool emit_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst bool emit_shufflevector_instruction(Converter::Impl &impl, const llvm::ShuffleVectorInst *inst) { Operation *op = impl.allocate(spv::OpVectorShuffle, inst); - op->add_ids({ impl.get_id_for_value(inst->getOperand(0)), impl.get_id_for_value(inst->getOperand(1)) }); + + for (unsigned i = 0; i < 2; i++) + op->add_id(impl.get_id_for_value(inst->getOperand(i))); unsigned num_outputs = inst->getType()->getVectorNumElements(); for (unsigned i = 0; i < num_outputs; i++) diff --git a/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil new file mode 100644 index 0000000..a9846a6 --- /dev/null +++ b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil @@ -0,0 +1,105 @@ +#version 460 +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_scalar_block_layout : require + +layout(set = 0, binding = 0, scalar) uniform BindlessCBV +{ + float _m0[16384]; +} _12[]; + +layout(set = 0, binding = 0, std140) uniform _16_19 +{ + vec4 _m0[4096]; +} _19[]; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + uint _34 = floatBitsToUint(_12[1u]._m0[1u]) >> 2u; + SV_Target.x = _12[0u]._m0[_34]; + SV_Target.y = _12[0u]._m0[_34]; + SV_Target.z = _12[0u]._m0[_34]; + SV_Target.w = _12[0u]._m0[_34]; +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 45 +; Schema: 0 +OpCapability Shader +OpCapability RuntimeDescriptorArray +OpExtension "SPV_EXT_descriptor_indexing" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %21 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %9 "BindlessCBV" +OpName %16 "BindlessCBV" +OpName %21 "SV_Target" +OpDecorate %8 ArrayStride 4 +OpDecorate %9 Block +OpMemberDecorate %9 0 Offset 0 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 0 +OpDecorate %15 ArrayStride 16 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +OpDecorate %19 DescriptorSet 0 +OpDecorate %19 Binding 0 +OpDecorate %21 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeFloat 32 +%6 = OpTypeInt 32 0 +%7 = OpConstant %6 16384 +%8 = OpTypeArray %5 %7 +%9 = OpTypeStruct %8 +%10 = OpTypeRuntimeArray %9 +%11 = OpTypePointer Uniform %10 +%12 = OpVariable %11 Uniform +%13 = OpTypeVector %5 4 +%14 = OpConstant %6 4096 +%15 = OpTypeArray %13 %14 +%16 = OpTypeStruct %15 +%17 = OpTypeRuntimeArray %16 +%18 = OpTypePointer Uniform %17 +%19 = OpVariable %18 Uniform +%20 = OpTypePointer Output %13 +%21 = OpVariable %20 Output +%22 = OpTypePointer Uniform %9 +%24 = OpConstant %6 0 +%26 = OpConstant %6 1 +%27 = OpTypePointer Uniform %16 +%29 = OpConstant %6 2 +%30 = OpTypePointer Uniform %5 +%37 = OpTypePointer Output %5 +%42 = OpConstant %6 3 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %43 +%43 = OpLabel +%23 = OpAccessChain %22 %12 %24 +%25 = OpAccessChain %22 %12 %26 +%28 = OpAccessChain %27 %19 %29 +%31 = OpAccessChain %30 %25 %24 %26 +%32 = OpLoad %5 %31 +%33 = OpBitcast %6 %32 +%34 = OpShiftRightLogical %6 %33 %29 +%35 = OpAccessChain %30 %23 %24 %34 +%36 = OpLoad %5 %35 +%38 = OpAccessChain %37 %21 %24 +OpStore %38 %36 +%39 = OpAccessChain %37 %21 %26 +OpStore %39 %36 +%40 = OpAccessChain %37 %21 %29 +OpStore %40 %36 +%41 = OpAccessChain %37 %21 %42 +OpStore %41 %36 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil new file mode 100644 index 0000000..98b135b --- /dev/null +++ b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil @@ -0,0 +1,146 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_scalar_block_layout : require + +layout(set = 0, binding = 0, scalar) uniform BindlessCBV +{ + float _m0[16384]; +} _12[]; + +layout(set = 0, binding = 0, scalar) uniform _16_19 +{ + float16_t _m0[32768]; +} _19[]; + +layout(set = 0, binding = 0, scalar) uniform _23_26 +{ + double _m0[8192]; +} _26[]; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + float _53 = (_12[0u]._m0[1u] + float(_19[1u]._m0[1u])) + float(doubleBitsToUint64(_26[2u]._m0[1u])); + SV_Target.x = _53; + SV_Target.y = _53; + SV_Target.z = _53; + SV_Target.w = _53; +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 62 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Float64 +OpCapability Int64 +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability RuntimeDescriptorArray +OpExtension "SPV_EXT_descriptor_indexing" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %29 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %9 "BindlessCBV" +OpName %16 "BindlessCBV" +OpName %23 "BindlessCBV" +OpName %29 "SV_Target" +OpDecorate %8 ArrayStride 4 +OpDecorate %9 Block +OpMemberDecorate %9 0 Offset 0 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 0 +OpDecorate %15 ArrayStride 2 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +OpDecorate %19 DescriptorSet 0 +OpDecorate %19 Binding 0 +OpDecorate %22 ArrayStride 8 +OpDecorate %23 Block +OpMemberDecorate %23 0 Offset 0 +OpDecorate %26 DescriptorSet 0 +OpDecorate %26 Binding 0 +OpDecorate %29 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeFloat 32 +%6 = OpTypeInt 32 0 +%7 = OpConstant %6 16384 +%8 = OpTypeArray %5 %7 +%9 = OpTypeStruct %8 +%10 = OpTypeRuntimeArray %9 +%11 = OpTypePointer Uniform %10 +%12 = OpVariable %11 Uniform +%13 = OpTypeFloat 16 +%14 = OpConstant %6 32768 +%15 = OpTypeArray %13 %14 +%16 = OpTypeStruct %15 +%17 = OpTypeRuntimeArray %16 +%18 = OpTypePointer Uniform %17 +%19 = OpVariable %18 Uniform +%20 = OpTypeFloat 64 +%21 = OpConstant %6 8192 +%22 = OpTypeArray %20 %21 +%23 = OpTypeStruct %22 +%24 = OpTypeRuntimeArray %23 +%25 = OpTypePointer Uniform %24 +%26 = OpVariable %25 Uniform +%27 = OpTypeVector %5 4 +%28 = OpTypePointer Output %27 +%29 = OpVariable %28 Output +%30 = OpTypePointer Uniform %9 +%32 = OpConstant %6 0 +%33 = OpTypePointer Uniform %16 +%35 = OpConstant %6 1 +%36 = OpTypePointer Uniform %23 +%38 = OpConstant %6 2 +%39 = OpTypePointer Uniform %5 +%42 = OpTypePointer Uniform %13 +%45 = OpTypeInt 64 0 +%46 = OpTypePointer Uniform %20 +%54 = OpTypePointer Output %5 +%59 = OpConstant %6 3 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %60 +%60 = OpLabel +%31 = OpAccessChain %30 %12 %32 +%34 = OpAccessChain %33 %19 %35 +%37 = OpAccessChain %36 %26 %38 +%40 = OpAccessChain %39 %31 %32 %35 +%41 = OpLoad %5 %40 +%43 = OpAccessChain %42 %34 %32 %35 +%44 = OpLoad %13 %43 +%47 = OpAccessChain %46 %37 %32 %35 +%48 = OpLoad %20 %47 +%49 = OpBitcast %45 %48 +%50 = OpFConvert %5 %44 +%51 = OpConvertUToF %5 %49 +%52 = OpFAdd %5 %41 %50 +%53 = OpFAdd %5 %52 %51 +%55 = OpAccessChain %54 %29 %32 +OpStore %55 %53 +%56 = OpAccessChain %54 %29 %35 +OpStore %56 %53 +%57 = OpAccessChain %54 %29 %38 +OpStore %57 %53 +%58 = OpAccessChain %54 %29 %59 +OpStore %58 %53 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag b/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag index 8c9b213..d99f257 100644 --- a/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag +++ b/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag @@ -44,8 +44,8 @@ bool discard_state; void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction) { - uint _69 = atomicAdd(QAGlobalData.fault_atomic, 1u); - if (_69 == 0u) + uint _61 = atomicAdd(QAGlobalData.fault_atomic, 1u); + if (_61 == 0u) { QAGlobalData.failed_cookie = cookie; QAGlobalData.failed_offset = heap_offset; @@ -61,15 +61,15 @@ void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction) { - uint _103 = QAHeapData.descriptor_count; - uint _105 = QAHeapData.heap_index; - uvec2 _107 = QAHeapData.cookies_descriptor_info[heap_offset]; - uint _115 = QAGlobalData.live_status_table[_107.x >> 5u]; - uint _126 = (uint(heap_offset >= _103) | (((_107.y & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_115 & (1u << (_107.x & 31u))) != 0u) ? 0u : 4u); - if (_126 != 0u) + uint _95 = QAHeapData.descriptor_count; + uint _97 = QAHeapData.heap_index; + uvec2 _99 = QAHeapData.cookies_descriptor_info[heap_offset]; + uint _107 = QAGlobalData.live_status_table[_99.x >> 5u]; + uint _118 = (uint(heap_offset >= _95) | (((_99.y & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_107 & (1u << (_99.x & 31u))) != 0u) ? 0u : 4u); + if (_118 != 0u) { - descriptor_qa_report_fault(_126, heap_offset, _107.x, _105, descriptor_type_mask, _107.y, instruction); - return _103; + descriptor_qa_report_fault(_118, heap_offset, _99.x, _97, descriptor_type_mask, _99.y, instruction); + return _95; } return heap_offset; } @@ -89,12 +89,12 @@ void main() { discard_state = true; } - uint _50 = descriptor_qa_check(registers._m0, 1u, 1u); - vec4 _141 = texture(sampler2D(_13[_50], _17[registers._m2]), vec2(UV.x, UV.y)); - SV_Target.x = _141.x; - SV_Target.y = _141.y; - SV_Target.z = _141.z; - SV_Target.w = _141.w; + uint _42 = descriptor_qa_check(registers._m0, 1u, 1u); + vec4 _133 = texture(sampler2D(_13[_42], _17[registers._m2]), vec2(UV.x, UV.y)); + SV_Target.x = _133.x; + SV_Target.y = _133.y; + SV_Target.z = _133.z; + SV_Target.w = _133.w; discard_exit(); } @@ -104,7 +104,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 164 +; Bound: 156 ; Schema: 0 OpCapability Shader OpCapability RuntimeDescriptorArray @@ -112,44 +112,44 @@ OpCapability PhysicalStorageBufferAddresses OpExtension "SPV_EXT_descriptor_indexing" OpExtension "SPV_KHR_physical_storage_buffer" OpMemoryModel PhysicalStorageBuffer64 GLSL450 -OpEntryPoint Fragment %3 "main" %20 %23 %27 +OpEntryPoint Fragment %3 "main" %20 %23 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %6 "RootConstants" OpName %8 "registers" OpName %20 "UV" OpName %23 "SV_Target" -OpName %43 "discard_state" -OpName %53 "DescriptorHeapGlobalQAData" -OpMemberName %53 0 "failed_shader_hash" -OpMemberName %53 1 "failed_offset" -OpMemberName %53 2 "failed_heap" -OpMemberName %53 3 "failed_cookie" -OpMemberName %53 4 "fault_atomic" -OpMemberName %53 5 "failed_instruction" -OpMemberName %53 6 "failed_descriptor_type_mask" -OpMemberName %53 7 "actual_descriptor_type_mask" -OpMemberName %53 8 "fault_type" -OpMemberName %53 9 "live_status_table" -OpName %55 "QAGlobalData" -OpName %64 "descriptor_qa_report_fault" -OpName %57 "fault_type" -OpName %58 "heap_offset" -OpName %59 "cookie" -OpName %60 "heap_index" -OpName %61 "descriptor_type" -OpName %62 "actual_descriptor_type" -OpName %63 "instruction" -OpName %93 "DescriptorHeapQAData" -OpMemberName %93 0 "descriptor_count" -OpMemberName %93 1 "heap_index" -OpMemberName %93 2 "cookies_descriptor_info" -OpName %95 "QAHeapData" -OpName %100 "descriptor_qa_check" -OpName %97 "heap_offset" -OpName %98 "descriptor_type_mask" -OpName %99 "instruction" -OpName %156 "discard_exit" +OpName %35 "discard_state" +OpName %45 "DescriptorHeapGlobalQAData" +OpMemberName %45 0 "failed_shader_hash" +OpMemberName %45 1 "failed_offset" +OpMemberName %45 2 "failed_heap" +OpMemberName %45 3 "failed_cookie" +OpMemberName %45 4 "fault_atomic" +OpMemberName %45 5 "failed_instruction" +OpMemberName %45 6 "failed_descriptor_type_mask" +OpMemberName %45 7 "actual_descriptor_type_mask" +OpMemberName %45 8 "fault_type" +OpMemberName %45 9 "live_status_table" +OpName %47 "QAGlobalData" +OpName %56 "descriptor_qa_report_fault" +OpName %49 "fault_type" +OpName %50 "heap_offset" +OpName %51 "cookie" +OpName %52 "heap_index" +OpName %53 "descriptor_type" +OpName %54 "actual_descriptor_type" +OpName %55 "instruction" +OpName %85 "DescriptorHeapQAData" +OpMemberName %85 0 "descriptor_count" +OpMemberName %85 1 "heap_index" +OpMemberName %85 2 "cookies_descriptor_info" +OpName %87 "QAHeapData" +OpName %92 "descriptor_qa_check" +OpName %89 "heap_offset" +OpName %90 "descriptor_type_mask" +OpName %91 "instruction" +OpName %148 "discard_exit" OpDecorate %6 Block OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 4 @@ -165,29 +165,28 @@ OpDecorate %17 DescriptorSet 2 OpDecorate %17 Binding 0 OpDecorate %20 Location 0 OpDecorate %23 Location 0 -OpDecorate %27 BuiltIn SampleMask -OpDecorate %52 ArrayStride 4 -OpMemberDecorate %53 0 Offset 0 -OpMemberDecorate %53 1 Offset 8 -OpMemberDecorate %53 2 Offset 12 -OpMemberDecorate %53 3 Offset 16 -OpMemberDecorate %53 4 Offset 20 -OpMemberDecorate %53 5 Offset 24 -OpMemberDecorate %53 6 Offset 28 -OpMemberDecorate %53 7 Offset 32 -OpMemberDecorate %53 8 Offset 36 -OpMemberDecorate %53 9 Offset 40 -OpDecorate %53 Block -OpDecorate %55 DescriptorSet 10 -OpDecorate %55 Binding 10 -OpDecorate %92 ArrayStride 8 -OpMemberDecorate %93 0 Offset 0 -OpMemberDecorate %93 1 Offset 4 -OpMemberDecorate %93 2 Offset 8 -OpDecorate %93 Block -OpDecorate %95 DescriptorSet 10 -OpDecorate %95 Binding 11 -OpDecorate %95 NonWritable +OpDecorate %44 ArrayStride 4 +OpMemberDecorate %45 0 Offset 0 +OpMemberDecorate %45 1 Offset 8 +OpMemberDecorate %45 2 Offset 12 +OpMemberDecorate %45 3 Offset 16 +OpMemberDecorate %45 4 Offset 20 +OpMemberDecorate %45 5 Offset 24 +OpMemberDecorate %45 6 Offset 28 +OpMemberDecorate %45 7 Offset 32 +OpMemberDecorate %45 8 Offset 36 +OpMemberDecorate %45 9 Offset 40 +OpDecorate %45 Block +OpDecorate %47 DescriptorSet 10 +OpDecorate %47 Binding 10 +OpDecorate %84 ArrayStride 8 +OpMemberDecorate %85 0 Offset 0 +OpMemberDecorate %85 1 Offset 4 +OpMemberDecorate %85 2 Offset 8 +OpDecorate %85 Block +OpDecorate %87 DescriptorSet 10 +OpDecorate %87 Binding 11 +OpDecorate %87 NonWritable %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -209,177 +208,169 @@ OpDecorate %95 NonWritable %21 = OpTypeVector %9 4 %22 = OpTypePointer Output %21 %23 = OpVariable %22 Output -%24 = OpConstant %5 1 -%25 = OpTypeArray %5 %24 -%26 = OpTypePointer Input %25 -%27 = OpVariable %26 Input -%28 = OpTypePointer Input %5 -%30 = OpConstant %5 0 -%32 = OpTypeBool -%35 = OpTypePointer Input %9 -%41 = OpConstant %9 0 -%42 = OpTypePointer Private %32 -%43 = OpVariable %42 Private -%44 = OpConstantFalse %32 -%45 = OpTypePointer UniformConstant %10 -%47 = OpTypePointer PushConstant %5 -%51 = OpTypeVector %5 2 -%52 = OpTypeRuntimeArray %5 -%53 = OpTypeStruct %51 %5 %5 %5 %5 %5 %5 %5 %5 %52 -%54 = OpTypePointer StorageBuffer %53 -%55 = OpVariable %54 StorageBuffer -%56 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5 -%66 = OpTypePointer StorageBuffer %5 -%68 = OpConstant %5 4 -%74 = OpConstant %5 3 -%77 = OpConstant %5 2 -%79 = OpConstant %5 6 -%81 = OpConstant %5 7 -%83 = OpConstant %5 5 -%84 = OpConstant %5 3735928559 -%85 = OpConstantComposite %51 %84 %30 -%86 = OpTypePointer StorageBuffer %51 -%88 = OpConstant %5 72 -%90 = OpConstant %5 8 -%92 = OpTypeRuntimeArray %51 -%93 = OpTypeStruct %5 %5 %92 -%94 = OpTypePointer StorageBuffer %93 -%95 = OpVariable %94 StorageBuffer -%96 = OpTypeFunction %5 %5 %5 %5 -%112 = OpConstant %5 31 -%114 = OpConstant %5 9 -%134 = OpTypePointer UniformConstant %14 -%139 = OpTypeSampledImage %10 -%147 = OpTypePointer Output %9 -%155 = OpConstantTrue %32 +%24 = OpTypePointer Input %9 +%26 = OpConstant %5 0 +%29 = OpConstant %5 1 +%31 = OpTypeBool +%33 = OpConstant %9 0 +%34 = OpTypePointer Private %31 +%35 = OpVariable %34 Private +%36 = OpConstantFalse %31 +%37 = OpTypePointer UniformConstant %10 +%39 = OpTypePointer PushConstant %5 +%43 = OpTypeVector %5 2 +%44 = OpTypeRuntimeArray %5 +%45 = OpTypeStruct %43 %5 %5 %5 %5 %5 %5 %5 %5 %44 +%46 = OpTypePointer StorageBuffer %45 +%47 = OpVariable %46 StorageBuffer +%48 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5 +%58 = OpTypePointer StorageBuffer %5 +%60 = OpConstant %5 4 +%66 = OpConstant %5 3 +%69 = OpConstant %5 2 +%71 = OpConstant %5 6 +%73 = OpConstant %5 7 +%75 = OpConstant %5 5 +%76 = OpConstant %5 3735928559 +%77 = OpConstantComposite %43 %76 %26 +%78 = OpTypePointer StorageBuffer %43 +%80 = OpConstant %5 72 +%82 = OpConstant %5 8 +%84 = OpTypeRuntimeArray %43 +%85 = OpTypeStruct %5 %5 %84 +%86 = OpTypePointer StorageBuffer %85 +%87 = OpVariable %86 StorageBuffer +%88 = OpTypeFunction %5 %5 %5 %5 +%104 = OpConstant %5 31 +%106 = OpConstant %5 9 +%126 = OpTypePointer UniformConstant %14 +%131 = OpTypeSampledImage %10 +%139 = OpTypePointer Output %9 +%147 = OpConstantTrue %31 %3 = OpFunction %1 None %2 %4 = OpLabel -OpStore %43 %44 -OpBranch %152 -%152 = OpLabel -%29 = OpAccessChain %28 %27 %30 -%31 = OpLoad %5 %29 -%33 = OpIEqual %32 %30 %31 -%34 = OpSelect %5 %33 %24 %30 -%36 = OpAccessChain %35 %20 %30 -%37 = OpLoad %9 %36 -%38 = OpAccessChain %35 %20 %24 -%39 = OpLoad %9 %38 -%40 = OpFOrdLessThan %32 %37 %41 -OpSelectionMerge %154 None -OpBranchConditional %40 %153 %154 -%153 = OpLabel -OpStore %43 %155 -OpBranch %154 -%154 = OpLabel -%48 = OpAccessChain %47 %8 %30 -%49 = OpLoad %5 %48 -%50 = OpFunctionCall %5 %100 %49 %24 %24 -%46 = OpAccessChain %45 %13 %50 -%133 = OpLoad %10 %46 -%136 = OpAccessChain %47 %8 %77 -%137 = OpLoad %5 %136 -%135 = OpAccessChain %134 %17 %137 -%138 = OpLoad %14 %135 -%140 = OpSampledImage %139 %133 %138 -%142 = OpCompositeConstruct %18 %37 %39 -%141 = OpImageSampleImplicitLod %21 %140 %142 None -%143 = OpCompositeExtract %9 %141 0 -%144 = OpCompositeExtract %9 %141 1 -%145 = OpCompositeExtract %9 %141 2 -%146 = OpCompositeExtract %9 %141 3 -%148 = OpAccessChain %147 %23 %30 -OpStore %148 %143 -%149 = OpAccessChain %147 %23 %24 -OpStore %149 %144 -%150 = OpAccessChain %147 %23 %77 -OpStore %150 %145 -%151 = OpAccessChain %147 %23 %74 -OpStore %151 %146 -%162 = OpFunctionCall %1 %156 +OpStore %35 %36 +OpBranch %144 +%144 = OpLabel +%25 = OpAccessChain %24 %20 %26 +%27 = OpLoad %9 %25 +%28 = OpAccessChain %24 %20 %29 +%30 = OpLoad %9 %28 +%32 = OpFOrdLessThan %31 %27 %33 +OpSelectionMerge %146 None +OpBranchConditional %32 %145 %146 +%145 = OpLabel +OpStore %35 %147 +OpBranch %146 +%146 = OpLabel +%40 = OpAccessChain %39 %8 %26 +%41 = OpLoad %5 %40 +%42 = OpFunctionCall %5 %92 %41 %29 %29 +%38 = OpAccessChain %37 %13 %42 +%125 = OpLoad %10 %38 +%128 = OpAccessChain %39 %8 %69 +%129 = OpLoad %5 %128 +%127 = OpAccessChain %126 %17 %129 +%130 = OpLoad %14 %127 +%132 = OpSampledImage %131 %125 %130 +%134 = OpCompositeConstruct %18 %27 %30 +%133 = OpImageSampleImplicitLod %21 %132 %134 None +%135 = OpCompositeExtract %9 %133 0 +%136 = OpCompositeExtract %9 %133 1 +%137 = OpCompositeExtract %9 %133 2 +%138 = OpCompositeExtract %9 %133 3 +%140 = OpAccessChain %139 %23 %26 +OpStore %140 %135 +%141 = OpAccessChain %139 %23 %29 +OpStore %141 %136 +%142 = OpAccessChain %139 %23 %69 +OpStore %142 %137 +%143 = OpAccessChain %139 %23 %66 +OpStore %143 %138 +%154 = OpFunctionCall %1 %148 OpReturn OpFunctionEnd -%64 = OpFunction %1 None %56 -%57 = OpFunctionParameter %5 -%58 = OpFunctionParameter %5 -%59 = OpFunctionParameter %5 -%60 = OpFunctionParameter %5 -%61 = OpFunctionParameter %5 -%62 = OpFunctionParameter %5 -%63 = OpFunctionParameter %5 -%65 = OpLabel -%67 = OpAccessChain %66 %55 %68 -%69 = OpAtomicIAdd %5 %67 %24 %30 %24 -%70 = OpIEqual %32 %69 %30 -OpSelectionMerge %72 None -OpBranchConditional %70 %71 %72 -%71 = OpLabel -%73 = OpAccessChain %66 %55 %74 -OpStore %73 %59 -%75 = OpAccessChain %66 %55 %24 -OpStore %75 %58 -%76 = OpAccessChain %66 %55 %77 -OpStore %76 %60 -%78 = OpAccessChain %66 %55 %79 -OpStore %78 %61 -%80 = OpAccessChain %66 %55 %81 -OpStore %80 %62 -%82 = OpAccessChain %66 %55 %83 -OpStore %82 %63 -%87 = OpAccessChain %86 %55 %30 -OpStore %87 %85 -OpMemoryBarrier %24 %88 -%89 = OpAccessChain %66 %55 %90 -OpStore %89 %57 -OpBranch %72 -%72 = OpLabel +%56 = OpFunction %1 None %48 +%49 = OpFunctionParameter %5 +%50 = OpFunctionParameter %5 +%51 = OpFunctionParameter %5 +%52 = OpFunctionParameter %5 +%53 = OpFunctionParameter %5 +%54 = OpFunctionParameter %5 +%55 = OpFunctionParameter %5 +%57 = OpLabel +%59 = OpAccessChain %58 %47 %60 +%61 = OpAtomicIAdd %5 %59 %29 %26 %29 +%62 = OpIEqual %31 %61 %26 +OpSelectionMerge %64 None +OpBranchConditional %62 %63 %64 +%63 = OpLabel +%65 = OpAccessChain %58 %47 %66 +OpStore %65 %51 +%67 = OpAccessChain %58 %47 %29 +OpStore %67 %50 +%68 = OpAccessChain %58 %47 %69 +OpStore %68 %52 +%70 = OpAccessChain %58 %47 %71 +OpStore %70 %53 +%72 = OpAccessChain %58 %47 %73 +OpStore %72 %54 +%74 = OpAccessChain %58 %47 %75 +OpStore %74 %55 +%79 = OpAccessChain %78 %47 %26 +OpStore %79 %77 +OpMemoryBarrier %29 %80 +%81 = OpAccessChain %58 %47 %82 +OpStore %81 %49 +OpBranch %64 +%64 = OpLabel OpReturn OpFunctionEnd -%100 = OpFunction %5 None %96 -%97 = OpFunctionParameter %5 -%98 = OpFunctionParameter %5 -%99 = OpFunctionParameter %5 -%101 = OpLabel -%102 = OpAccessChain %66 %95 %30 -%103 = OpLoad %5 %102 -%104 = OpAccessChain %66 %95 %24 -%105 = OpLoad %5 %104 -%106 = OpAccessChain %86 %95 %77 %97 -%107 = OpLoad %51 %106 -%108 = OpCompositeExtract %5 %107 0 -%110 = OpShiftRightLogical %5 %108 %83 -%111 = OpBitwiseAnd %5 %108 %112 -%109 = OpCompositeExtract %5 %107 1 -%113 = OpAccessChain %66 %55 %114 %110 -%115 = OpLoad %5 %113 -%116 = OpShiftLeftLogical %5 %24 %111 -%117 = OpBitwiseAnd %5 %115 %116 -%118 = OpINotEqual %32 %117 %30 -%119 = OpBitwiseAnd %5 %109 %98 -%120 = OpIEqual %32 %119 %98 -%121 = OpUGreaterThanEqual %32 %97 %103 -%122 = OpSelect %5 %121 %24 %30 -%123 = OpSelect %5 %120 %30 %77 -%124 = OpSelect %5 %118 %30 %68 -%125 = OpBitwiseOr %5 %122 %123 -%126 = OpBitwiseOr %5 %125 %124 -%127 = OpINotEqual %32 %126 %30 -OpSelectionMerge %129 None -OpBranchConditional %127 %128 %129 -%128 = OpLabel -%130 = OpFunctionCall %1 %64 %126 %97 %108 %105 %98 %109 %99 -OpReturnValue %103 -%129 = OpLabel -OpReturnValue %97 +%92 = OpFunction %5 None %88 +%89 = OpFunctionParameter %5 +%90 = OpFunctionParameter %5 +%91 = OpFunctionParameter %5 +%93 = OpLabel +%94 = OpAccessChain %58 %87 %26 +%95 = OpLoad %5 %94 +%96 = OpAccessChain %58 %87 %29 +%97 = OpLoad %5 %96 +%98 = OpAccessChain %78 %87 %69 %89 +%99 = OpLoad %43 %98 +%100 = OpCompositeExtract %5 %99 0 +%102 = OpShiftRightLogical %5 %100 %75 +%103 = OpBitwiseAnd %5 %100 %104 +%101 = OpCompositeExtract %5 %99 1 +%105 = OpAccessChain %58 %47 %106 %102 +%107 = OpLoad %5 %105 +%108 = OpShiftLeftLogical %5 %29 %103 +%109 = OpBitwiseAnd %5 %107 %108 +%110 = OpINotEqual %31 %109 %26 +%111 = OpBitwiseAnd %5 %101 %90 +%112 = OpIEqual %31 %111 %90 +%113 = OpUGreaterThanEqual %31 %89 %95 +%114 = OpSelect %5 %113 %29 %26 +%115 = OpSelect %5 %112 %26 %69 +%116 = OpSelect %5 %110 %26 %60 +%117 = OpBitwiseOr %5 %114 %115 +%118 = OpBitwiseOr %5 %117 %116 +%119 = OpINotEqual %31 %118 %26 +OpSelectionMerge %121 None +OpBranchConditional %119 %120 %121 +%120 = OpLabel +%122 = OpFunctionCall %1 %56 %118 %89 %100 %97 %90 %101 %91 +OpReturnValue %95 +%121 = OpLabel +OpReturnValue %89 OpFunctionEnd -%156 = OpFunction %1 None %2 -%157 = OpLabel -%160 = OpLoad %32 %43 -OpSelectionMerge %159 None -OpBranchConditional %160 %158 %159 -%158 = OpLabel +%148 = OpFunction %1 None %2 +%149 = OpLabel +%152 = OpLoad %31 %35 +OpSelectionMerge %151 None +OpBranchConditional %152 %150 %151 +%150 = OpLabel OpKill -%159 = OpLabel +%151 = OpLabel OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/dxil-builtin/clip.demote-to-helper.frag b/reference/shaders/dxil-builtin/clip.demote-to-helper.frag index 7697f58..a52b58b 100644 --- a/reference/shaders/dxil-builtin/clip.demote-to-helper.frag +++ b/reference/shaders/dxil-builtin/clip.demote-to-helper.frag @@ -3,9 +3,9 @@ layout(location = 0) in vec2 TEXCOORD; -void demote_cond(bool _37) +void demote_cond(bool _27) { - if (_37) + if (_27) { demote; } @@ -13,10 +13,8 @@ void demote_cond(bool _37) void main() { - bool _28 = (TEXCOORD.x + (-10.0)) < 0.0; - demote_cond(_28); - bool _33 = (TEXCOORD.y + (-20.0)) < 0.0; - demote_cond(_33); + demote_cond((TEXCOORD.x + (-10.0)) < 0.0); + demote_cond((TEXCOORD.y + (-20.0)) < 0.0); } @@ -25,69 +23,58 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 46 +; Bound: 36 ; Schema: 0 OpCapability Shader OpCapability DemoteToHelperInvocationEXT OpExtension "SPV_EXT_demote_to_helper_invocation" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %3 "main" %8 %13 +OpEntryPoint Fragment %3 "main" %8 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %8 "TEXCOORD" -OpName %38 "demote_cond" +OpName %28 "demote_cond" OpDecorate %8 Location 0 -OpDecorate %13 BuiltIn SampleMask %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 %6 = OpTypeVector %5 2 %7 = OpTypePointer Input %6 %8 = OpVariable %7 Input -%9 = OpTypeInt 32 0 -%10 = OpConstant %9 1 -%11 = OpTypeArray %9 %10 -%12 = OpTypePointer Input %11 -%13 = OpVariable %12 Input -%14 = OpTypePointer Input %9 -%16 = OpConstant %9 0 -%18 = OpTypeBool -%21 = OpTypePointer Input %5 -%27 = OpConstant %5 -10 -%29 = OpConstant %5 0 -%32 = OpConstant %5 -20 -%36 = OpTypeFunction %1 %18 +%9 = OpTypePointer Input %5 +%11 = OpTypeInt 32 0 +%12 = OpConstant %11 0 +%15 = OpConstant %11 1 +%18 = OpConstant %5 -10 +%19 = OpTypeBool +%21 = OpConstant %5 0 +%23 = OpConstant %5 -20 +%26 = OpTypeFunction %1 %19 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %35 -%35 = OpLabel -%15 = OpAccessChain %14 %13 %16 -%17 = OpLoad %9 %15 -%19 = OpIEqual %18 %16 %17 -%20 = OpSelect %9 %19 %10 %16 -%22 = OpAccessChain %21 %8 %16 -%23 = OpLoad %5 %22 -%24 = OpAccessChain %21 %8 %10 -%25 = OpLoad %5 %24 -%26 = OpFAdd %5 %23 %27 -%28 = OpFOrdLessThan %18 %26 %29 -%30 = OpSelect %9 %28 %10 %16 -%43 = OpFunctionCall %1 %38 %28 -%31 = OpFAdd %5 %25 %32 -%33 = OpFOrdLessThan %18 %31 %29 -%34 = OpSelect %9 %33 %10 %16 -%44 = OpFunctionCall %1 %38 %33 +OpBranch %25 +%25 = OpLabel +%10 = OpAccessChain %9 %8 %12 +%13 = OpLoad %5 %10 +%14 = OpAccessChain %9 %8 %15 +%16 = OpLoad %5 %14 +%17 = OpFAdd %5 %13 %18 +%20 = OpFOrdLessThan %19 %17 %21 +%33 = OpFunctionCall %1 %28 %20 +%22 = OpFAdd %5 %16 %23 +%24 = OpFOrdLessThan %19 %22 %21 +%34 = OpFunctionCall %1 %28 %24 OpReturn OpFunctionEnd -%38 = OpFunction %1 None %36 -%37 = OpFunctionParameter %18 -%39 = OpLabel -OpSelectionMerge %41 None -OpBranchConditional %37 %40 %41 -%40 = OpLabel +%28 = OpFunction %1 None %26 +%27 = OpFunctionParameter %19 +%29 = OpLabel +OpSelectionMerge %31 None +OpBranchConditional %27 %30 %31 +%30 = OpLabel OpDemoteToHelperInvocationEXT -OpBranch %41 -%41 = OpLabel +OpBranch %31 +%31 = OpLabel OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/dxil-builtin/clip.frag b/reference/shaders/dxil-builtin/clip.frag index 942e587..fb07ef5 100644 --- a/reference/shaders/dxil-builtin/clip.frag +++ b/reference/shaders/dxil-builtin/clip.frag @@ -3,9 +3,9 @@ layout(location = 0) in vec2 TEXCOORD; bool discard_state; -void discard_cond(bool _40) +void discard_cond(bool _30) { - if (_40) + if (_30) { discard_state = true; } @@ -22,10 +22,8 @@ void discard_exit() void main() { discard_state = false; - bool _28 = (TEXCOORD.x + (-10.0)) < 0.0; - discard_cond(_28); - bool _36 = (TEXCOORD.y + (-20.0)) < 0.0; - discard_cond(_36); + discard_cond((TEXCOORD.x + (-10.0)) < 0.0); + discard_cond((TEXCOORD.y + (-20.0)) < 0.0); discard_exit(); } @@ -35,85 +33,74 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 57 +; Bound: 47 ; Schema: 0 OpCapability Shader OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %3 "main" %8 %13 +OpEntryPoint Fragment %3 "main" %8 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %8 "TEXCOORD" -OpName %32 "discard_state" -OpName %41 "discard_cond" -OpName %49 "discard_exit" +OpName %23 "discard_state" +OpName %31 "discard_cond" +OpName %39 "discard_exit" OpDecorate %8 Location 0 -OpDecorate %13 BuiltIn SampleMask %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 %6 = OpTypeVector %5 2 %7 = OpTypePointer Input %6 %8 = OpVariable %7 Input -%9 = OpTypeInt 32 0 -%10 = OpConstant %9 1 -%11 = OpTypeArray %9 %10 -%12 = OpTypePointer Input %11 -%13 = OpVariable %12 Input -%14 = OpTypePointer Input %9 -%16 = OpConstant %9 0 -%18 = OpTypeBool -%21 = OpTypePointer Input %5 -%27 = OpConstant %5 -10 -%29 = OpConstant %5 0 -%31 = OpTypePointer Private %18 -%32 = OpVariable %31 Private -%33 = OpConstantFalse %18 -%35 = OpConstant %5 -20 -%39 = OpTypeFunction %1 %18 -%45 = OpConstantTrue %18 +%9 = OpTypePointer Input %5 +%11 = OpTypeInt 32 0 +%12 = OpConstant %11 0 +%15 = OpConstant %11 1 +%18 = OpConstant %5 -10 +%19 = OpTypeBool +%21 = OpConstant %5 0 +%22 = OpTypePointer Private %19 +%23 = OpVariable %22 Private +%24 = OpConstantFalse %19 +%26 = OpConstant %5 -20 +%29 = OpTypeFunction %1 %19 +%35 = OpConstantTrue %19 %3 = OpFunction %1 None %2 %4 = OpLabel -OpStore %32 %33 -OpBranch %38 -%38 = OpLabel -%15 = OpAccessChain %14 %13 %16 -%17 = OpLoad %9 %15 -%19 = OpIEqual %18 %16 %17 -%20 = OpSelect %9 %19 %10 %16 -%22 = OpAccessChain %21 %8 %16 -%23 = OpLoad %5 %22 -%24 = OpAccessChain %21 %8 %10 -%25 = OpLoad %5 %24 -%26 = OpFAdd %5 %23 %27 -%28 = OpFOrdLessThan %18 %26 %29 -%30 = OpSelect %9 %28 %10 %16 -%47 = OpFunctionCall %1 %41 %28 -%34 = OpFAdd %5 %25 %35 -%36 = OpFOrdLessThan %18 %34 %29 -%37 = OpSelect %9 %36 %10 %16 -%48 = OpFunctionCall %1 %41 %36 -%55 = OpFunctionCall %1 %49 +OpStore %23 %24 +OpBranch %28 +%28 = OpLabel +%10 = OpAccessChain %9 %8 %12 +%13 = OpLoad %5 %10 +%14 = OpAccessChain %9 %8 %15 +%16 = OpLoad %5 %14 +%17 = OpFAdd %5 %13 %18 +%20 = OpFOrdLessThan %19 %17 %21 +%37 = OpFunctionCall %1 %31 %20 +%25 = OpFAdd %5 %16 %26 +%27 = OpFOrdLessThan %19 %25 %21 +%38 = OpFunctionCall %1 %31 %27 +%45 = OpFunctionCall %1 %39 OpReturn OpFunctionEnd -%41 = OpFunction %1 None %39 -%40 = OpFunctionParameter %18 -%42 = OpLabel -OpSelectionMerge %44 None -OpBranchConditional %40 %43 %44 -%43 = OpLabel -OpStore %32 %45 -OpBranch %44 -%44 = OpLabel +%31 = OpFunction %1 None %29 +%30 = OpFunctionParameter %19 +%32 = OpLabel +OpSelectionMerge %34 None +OpBranchConditional %30 %33 %34 +%33 = OpLabel +OpStore %23 %35 +OpBranch %34 +%34 = OpLabel OpReturn OpFunctionEnd -%49 = OpFunction %1 None %2 -%50 = OpLabel -%53 = OpLoad %18 %32 -OpSelectionMerge %52 None -OpBranchConditional %53 %51 %52 -%51 = OpLabel +%39 = OpFunction %1 None %2 +%40 = OpLabel +%43 = OpLoad %19 %23 +OpSelectionMerge %42 None +OpBranchConditional %43 %41 %42 +%41 = OpLabel OpKill -%52 = OpLabel +%42 = OpLabel OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/dxil-builtin/discard.demote-to-helper.frag b/reference/shaders/dxil-builtin/discard.demote-to-helper.frag index cdd1b17..717a476 100644 --- a/reference/shaders/dxil-builtin/discard.demote-to-helper.frag +++ b/reference/shaders/dxil-builtin/discard.demote-to-helper.frag @@ -24,63 +24,54 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 37 +; Bound: 29 ; Schema: 0 OpCapability Shader OpCapability DemoteToHelperInvocationEXT OpExtension "SPV_EXT_demote_to_helper_invocation" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %3 "main" %8 %13 +OpEntryPoint Fragment %3 "main" %8 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %8 "TEXCOORD" OpDecorate %8 Location 0 -OpDecorate %13 BuiltIn SampleMask %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 %6 = OpTypeVector %5 2 %7 = OpTypePointer Input %6 %8 = OpVariable %7 Input -%9 = OpTypeInt 32 0 -%10 = OpConstant %9 1 -%11 = OpTypeArray %9 %10 -%12 = OpTypePointer Input %11 -%13 = OpVariable %12 Input -%14 = OpTypePointer Input %9 -%16 = OpConstant %9 0 -%18 = OpTypeBool -%21 = OpTypePointer Input %5 -%25 = OpConstant %5 10 -%29 = OpConstant %5 20 +%9 = OpTypePointer Input %5 +%11 = OpTypeInt 32 0 +%12 = OpConstant %11 0 +%14 = OpTypeBool +%16 = OpConstant %5 10 +%18 = OpConstant %11 1 +%21 = OpConstant %5 20 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %30 -%30 = OpLabel -%15 = OpAccessChain %14 %13 %16 -%17 = OpLoad %9 %15 -%19 = OpIEqual %18 %16 %17 -%20 = OpSelect %9 %19 %10 %16 -%22 = OpAccessChain %21 %8 %16 -%23 = OpLoad %5 %22 -%24 = OpFOrdGreaterThan %18 %23 %25 -OpSelectionMerge %35 None -OpBranchConditional %24 %34 %31 -%34 = OpLabel +OpBranch %22 +%22 = OpLabel +%10 = OpAccessChain %9 %8 %12 +%13 = OpLoad %5 %10 +%15 = OpFOrdGreaterThan %14 %13 %16 +OpSelectionMerge %27 None +OpBranchConditional %15 %26 %23 +%26 = OpLabel OpDemoteToHelperInvocationEXT -OpBranch %35 -%31 = OpLabel -%26 = OpAccessChain %21 %8 %10 -%27 = OpLoad %5 %26 -%28 = OpFOrdGreaterThan %18 %27 %29 -OpSelectionMerge %33 None -OpBranchConditional %28 %32 %33 -%32 = OpLabel +OpBranch %27 +%23 = OpLabel +%17 = OpAccessChain %9 %8 %18 +%19 = OpLoad %5 %17 +%20 = OpFOrdGreaterThan %14 %19 %21 +OpSelectionMerge %25 None +OpBranchConditional %20 %24 %25 +%24 = OpLabel OpDemoteToHelperInvocationEXT -OpBranch %33 -%33 = OpLabel -OpBranch %35 -%35 = OpLabel +OpBranch %25 +%25 = OpLabel +OpBranch %27 +%27 = OpLabel OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/dxil-builtin/discard.frag b/reference/shaders/dxil-builtin/discard.frag index c26a174..48c065a 100644 --- a/reference/shaders/dxil-builtin/discard.frag +++ b/reference/shaders/dxil-builtin/discard.frag @@ -34,79 +34,70 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 48 +; Bound: 40 ; Schema: 0 OpCapability Shader OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %3 "main" %8 %13 +OpEntryPoint Fragment %3 "main" %8 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %8 "TEXCOORD" -OpName %27 "discard_state" -OpName %40 "discard_exit" +OpName %18 "discard_state" +OpName %32 "discard_exit" OpDecorate %8 Location 0 -OpDecorate %13 BuiltIn SampleMask %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 %6 = OpTypeVector %5 2 %7 = OpTypePointer Input %6 %8 = OpVariable %7 Input -%9 = OpTypeInt 32 0 -%10 = OpConstant %9 1 -%11 = OpTypeArray %9 %10 -%12 = OpTypePointer Input %11 -%13 = OpVariable %12 Input -%14 = OpTypePointer Input %9 -%16 = OpConstant %9 0 -%18 = OpTypeBool -%21 = OpTypePointer Input %5 -%25 = OpConstant %5 10 -%26 = OpTypePointer Private %18 -%27 = OpVariable %26 Private -%28 = OpConstantFalse %18 -%32 = OpConstant %5 20 -%39 = OpConstantTrue %18 +%9 = OpTypePointer Input %5 +%11 = OpTypeInt 32 0 +%12 = OpConstant %11 0 +%14 = OpTypeBool +%16 = OpConstant %5 10 +%17 = OpTypePointer Private %14 +%18 = OpVariable %17 Private +%19 = OpConstantFalse %14 +%21 = OpConstant %11 1 +%24 = OpConstant %5 20 +%31 = OpConstantTrue %14 %3 = OpFunction %1 None %2 %4 = OpLabel -OpStore %27 %28 -OpBranch %33 -%33 = OpLabel -%15 = OpAccessChain %14 %13 %16 -%17 = OpLoad %9 %15 -%19 = OpIEqual %18 %16 %17 -%20 = OpSelect %9 %19 %10 %16 -%22 = OpAccessChain %21 %8 %16 -%23 = OpLoad %5 %22 -%24 = OpFOrdGreaterThan %18 %23 %25 -OpSelectionMerge %38 None -OpBranchConditional %24 %37 %34 -%37 = OpLabel -OpStore %27 %39 -OpBranch %38 -%34 = OpLabel -%29 = OpAccessChain %21 %8 %10 -%30 = OpLoad %5 %29 -%31 = OpFOrdGreaterThan %18 %30 %32 -OpSelectionMerge %36 None -OpBranchConditional %31 %35 %36 -%35 = OpLabel -OpStore %27 %39 -OpBranch %36 -%36 = OpLabel -OpBranch %38 -%38 = OpLabel -%46 = OpFunctionCall %1 %40 +OpStore %18 %19 +OpBranch %25 +%25 = OpLabel +%10 = OpAccessChain %9 %8 %12 +%13 = OpLoad %5 %10 +%15 = OpFOrdGreaterThan %14 %13 %16 +OpSelectionMerge %30 None +OpBranchConditional %15 %29 %26 +%29 = OpLabel +OpStore %18 %31 +OpBranch %30 +%26 = OpLabel +%20 = OpAccessChain %9 %8 %21 +%22 = OpLoad %5 %20 +%23 = OpFOrdGreaterThan %14 %22 %24 +OpSelectionMerge %28 None +OpBranchConditional %23 %27 %28 +%27 = OpLabel +OpStore %18 %31 +OpBranch %28 +%28 = OpLabel +OpBranch %30 +%30 = OpLabel +%38 = OpFunctionCall %1 %32 OpReturn OpFunctionEnd -%40 = OpFunction %1 None %2 -%41 = OpLabel -%44 = OpLoad %18 %27 -OpSelectionMerge %43 None -OpBranchConditional %44 %42 %43 -%42 = OpLabel +%32 = OpFunction %1 None %2 +%33 = OpLabel +%36 = OpLoad %14 %18 +OpSelectionMerge %35 None +OpBranchConditional %36 %34 %35 +%34 = OpLabel OpKill -%43 = OpLabel +%35 = OpLabel OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp b/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp index 7a839df..e7a4aa8 100644 --- a/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp +++ b/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp @@ -45,6 +45,7 @@ void main() OpCapability Shader OpCapability Int16 OpCapability Int8 +OpCapability StorageBuffer16BitAccess %80 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "main" %23 diff --git a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp index a82fcd6..07039f7 100644 --- a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp +++ b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp @@ -72,6 +72,7 @@ void main() OpCapability Shader OpCapability Float16 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "main" %34 OpExecutionMode %3 LocalSize 64 1 1 diff --git a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp index c626cd6..6fd75c1 100644 --- a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp +++ b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp @@ -6,6 +6,7 @@ OpCapability Shader OpCapability Float16 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpCapability DotProductInput4x8BitPackedKHR OpCapability DotProductKHR OpExtension "SPV_KHR_integer_dot_product" diff --git a/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag b/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag index 74cda90..aa21585 100644 --- a/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag +++ b/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag @@ -11,11 +11,11 @@ void main() { demote; } - uvec4 _26 = subgroupBallot(INDEX < 100u); - SV_Target.x = _26.x; - SV_Target.y = _26.y; - SV_Target.z = _26.z; - SV_Target.w = _26.w; + uvec4 _17 = subgroupBallot(INDEX < 100u); + SV_Target.x = _17.x; + SV_Target.y = _17.y; + SV_Target.z = _17.z; + SV_Target.w = _17.w; } @@ -24,14 +24,14 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 42 +; Bound: 35 ; Schema: 0 OpCapability Shader OpCapability GroupNonUniformBallot OpCapability DemoteToHelperInvocationEXT OpExtension "SPV_EXT_demote_to_helper_invocation" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %3 "main" %7 %10 %14 +OpEntryPoint Fragment %3 "main" %7 %10 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %7 "INDEX" @@ -39,7 +39,6 @@ OpName %10 "SV_Target" OpDecorate %7 Flat OpDecorate %7 Location 0 OpDecorate %10 Location 0 -OpDecorate %14 BuiltIn SampleMask %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -48,47 +47,40 @@ OpDecorate %14 BuiltIn SampleMask %8 = OpTypeVector %5 4 %9 = OpTypePointer Output %8 %10 = OpVariable %9 Output -%11 = OpConstant %5 1 -%12 = OpTypeArray %5 %11 -%13 = OpTypePointer Input %12 -%14 = OpVariable %13 Input -%16 = OpConstant %5 0 -%18 = OpTypeBool -%23 = OpConstant %5 40 -%25 = OpConstant %5 100 -%27 = OpConstant %5 3 -%32 = OpTypePointer Output %5 -%36 = OpConstant %5 2 +%12 = OpTypeBool +%14 = OpConstant %5 40 +%16 = OpConstant %5 100 +%18 = OpConstant %5 3 +%23 = OpTypePointer Output %5 +%25 = OpConstant %5 0 +%27 = OpConstant %5 1 +%29 = OpConstant %5 2 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %38 -%38 = OpLabel -%15 = OpAccessChain %6 %14 %16 -%17 = OpLoad %5 %15 -%19 = OpIEqual %18 %16 %17 -%20 = OpSelect %5 %19 %11 %16 -%21 = OpLoad %5 %7 -%22 = OpIEqual %18 %21 %23 -OpSelectionMerge %40 None -OpBranchConditional %22 %39 %40 -%39 = OpLabel +OpBranch %31 +%31 = OpLabel +%11 = OpLoad %5 %7 +%13 = OpIEqual %12 %11 %14 +OpSelectionMerge %33 None +OpBranchConditional %13 %32 %33 +%32 = OpLabel OpDemoteToHelperInvocationEXT -OpBranch %40 -%40 = OpLabel -%24 = OpULessThan %18 %21 %25 -%26 = OpGroupNonUniformBallot %8 %27 %24 -%28 = OpCompositeExtract %5 %26 0 -%29 = OpCompositeExtract %5 %26 1 -%30 = OpCompositeExtract %5 %26 2 -%31 = OpCompositeExtract %5 %26 3 -%33 = OpAccessChain %32 %10 %16 -OpStore %33 %28 -%34 = OpAccessChain %32 %10 %11 -OpStore %34 %29 -%35 = OpAccessChain %32 %10 %36 -OpStore %35 %30 -%37 = OpAccessChain %32 %10 %27 -OpStore %37 %31 +OpBranch %33 +%33 = OpLabel +%15 = OpULessThan %12 %11 %16 +%17 = OpGroupNonUniformBallot %8 %18 %15 +%19 = OpCompositeExtract %5 %17 0 +%20 = OpCompositeExtract %5 %17 1 +%21 = OpCompositeExtract %5 %17 2 +%22 = OpCompositeExtract %5 %17 3 +%24 = OpAccessChain %23 %10 %25 +OpStore %24 %19 +%26 = OpAccessChain %23 %10 %27 +OpStore %26 %20 +%28 = OpAccessChain %23 %10 %29 +OpStore %28 %21 +%30 = OpAccessChain %23 %10 %18 +OpStore %30 %22 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag b/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag index 9754d4d..3efd1d9 100644 --- a/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag +++ b/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag @@ -20,11 +20,11 @@ void main() { discard_state = true; } - uvec4 _29 = subgroupBallot(INDEX < 100u); - SV_Target.x = _29.x; - SV_Target.y = _29.y; - SV_Target.z = _29.z; - SV_Target.w = _29.w; + uvec4 _20 = subgroupBallot(INDEX < 100u); + SV_Target.x = _20.x; + SV_Target.y = _20.y; + SV_Target.z = _20.z; + SV_Target.w = _20.w; discard_exit(); } @@ -34,22 +34,21 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 53 +; Bound: 46 ; Schema: 0 OpCapability Shader OpCapability GroupNonUniformBallot OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %3 "main" %7 %10 %14 +OpEntryPoint Fragment %3 "main" %7 %10 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %7 "INDEX" OpName %10 "SV_Target" -OpName %25 "discard_state" -OpName %45 "discard_exit" +OpName %16 "discard_state" +OpName %38 "discard_exit" OpDecorate %7 Flat OpDecorate %7 Location 0 OpDecorate %10 Location 0 -OpDecorate %14 BuiltIn SampleMask %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -58,63 +57,56 @@ OpDecorate %14 BuiltIn SampleMask %8 = OpTypeVector %5 4 %9 = OpTypePointer Output %8 %10 = OpVariable %9 Output -%11 = OpConstant %5 1 -%12 = OpTypeArray %5 %11 -%13 = OpTypePointer Input %12 -%14 = OpVariable %13 Input -%16 = OpConstant %5 0 -%18 = OpTypeBool -%23 = OpConstant %5 40 -%24 = OpTypePointer Private %18 -%25 = OpVariable %24 Private -%26 = OpConstantFalse %18 -%28 = OpConstant %5 100 -%30 = OpConstant %5 3 -%35 = OpTypePointer Output %5 -%39 = OpConstant %5 2 -%44 = OpConstantTrue %18 +%12 = OpTypeBool +%14 = OpConstant %5 40 +%15 = OpTypePointer Private %12 +%16 = OpVariable %15 Private +%17 = OpConstantFalse %12 +%19 = OpConstant %5 100 +%21 = OpConstant %5 3 +%26 = OpTypePointer Output %5 +%28 = OpConstant %5 0 +%30 = OpConstant %5 1 +%32 = OpConstant %5 2 +%37 = OpConstantTrue %12 %3 = OpFunction %1 None %2 %4 = OpLabel -OpStore %25 %26 -OpBranch %41 -%41 = OpLabel -%15 = OpAccessChain %6 %14 %16 -%17 = OpLoad %5 %15 -%19 = OpIEqual %18 %16 %17 -%20 = OpSelect %5 %19 %11 %16 -%21 = OpLoad %5 %7 -%22 = OpIEqual %18 %21 %23 -OpSelectionMerge %43 None -OpBranchConditional %22 %42 %43 -%42 = OpLabel -OpStore %25 %44 -OpBranch %43 -%43 = OpLabel -%27 = OpULessThan %18 %21 %28 -%29 = OpGroupNonUniformBallot %8 %30 %27 -%31 = OpCompositeExtract %5 %29 0 -%32 = OpCompositeExtract %5 %29 1 -%33 = OpCompositeExtract %5 %29 2 -%34 = OpCompositeExtract %5 %29 3 -%36 = OpAccessChain %35 %10 %16 -OpStore %36 %31 -%37 = OpAccessChain %35 %10 %11 -OpStore %37 %32 -%38 = OpAccessChain %35 %10 %39 -OpStore %38 %33 -%40 = OpAccessChain %35 %10 %30 -OpStore %40 %34 -%51 = OpFunctionCall %1 %45 +OpStore %16 %17 +OpBranch %34 +%34 = OpLabel +%11 = OpLoad %5 %7 +%13 = OpIEqual %12 %11 %14 +OpSelectionMerge %36 None +OpBranchConditional %13 %35 %36 +%35 = OpLabel +OpStore %16 %37 +OpBranch %36 +%36 = OpLabel +%18 = OpULessThan %12 %11 %19 +%20 = OpGroupNonUniformBallot %8 %21 %18 +%22 = OpCompositeExtract %5 %20 0 +%23 = OpCompositeExtract %5 %20 1 +%24 = OpCompositeExtract %5 %20 2 +%25 = OpCompositeExtract %5 %20 3 +%27 = OpAccessChain %26 %10 %28 +OpStore %27 %22 +%29 = OpAccessChain %26 %10 %30 +OpStore %29 %23 +%31 = OpAccessChain %26 %10 %32 +OpStore %31 %24 +%33 = OpAccessChain %26 %10 %21 +OpStore %33 %25 +%44 = OpFunctionCall %1 %38 OpReturn OpFunctionEnd -%45 = OpFunction %1 None %2 -%46 = OpLabel -%49 = OpLoad %18 %25 -OpSelectionMerge %48 None -OpBranchConditional %49 %47 %48 -%47 = OpLabel +%38 = OpFunction %1 None %2 +%39 = OpLabel +%42 = OpLoad %12 %16 +OpSelectionMerge %41 None +OpBranchConditional %42 %40 %41 +%40 = OpLabel OpKill -%48 = OpLabel +%41 = OpLabel OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp b/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp index c94f1c8..029d98d 100644 --- a/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp +++ b/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp @@ -161,6 +161,7 @@ OpCapability Float16 OpCapability Int16 OpCapability StorageBufferArrayDynamicIndexing OpCapability ImageQuery +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability StorageBufferArrayNonUniformIndexing OpCapability PhysicalStorageBufferAddresses diff --git a/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp b/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp index 3d51b1b..701d4bf 100644 --- a/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp +++ b/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp @@ -209,6 +209,7 @@ OpCapability Int16 OpCapability StorageBufferArrayDynamicIndexing OpCapability ImageQuery OpCapability GroupNonUniformBallot +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability StorageBufferArrayNonUniformIndexing OpCapability PhysicalStorageBufferAddresses diff --git a/reference/shaders/resources/buffer-16bit.ssbo.comp b/reference/shaders/resources/buffer-16bit.ssbo.comp index 2bb9766..138faa0 100644 --- a/reference/shaders/resources/buffer-16bit.ssbo.comp +++ b/reference/shaders/resources/buffer-16bit.ssbo.comp @@ -170,6 +170,7 @@ OpCapability Float16 OpCapability Int16 OpCapability StorageBufferArrayDynamicIndexing OpCapability ImageQuery +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability StorageBufferArrayNonUniformIndexing OpExtension "SPV_EXT_descriptor_indexing" diff --git a/reference/shaders/resources/cbv-array-nonuniform.frag b/reference/shaders/resources/cbv-array-nonuniform.frag index 568d8a5..8fcb778 100644 --- a/reference/shaders/resources/cbv-array-nonuniform.frag +++ b/reference/shaders/resources/cbv-array-nonuniform.frag @@ -60,8 +60,10 @@ OpDecorate %19 Binding 0 OpDecorate %21 Flat OpDecorate %21 Location 0 OpDecorate %23 Location 0 +OpDecorate %27 NonUniform OpDecorate %30 NonUniform OpDecorate %32 NonUniform +OpDecorate %41 NonUniform OpDecorate %43 NonUniform OpDecorate %44 NonUniform %1 = OpTypeVoid diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.frag new file mode 100644 index 0000000..49a7d61 --- /dev/null +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.frag @@ -0,0 +1,193 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require + +struct CBVComposite16x8 +{ + float16_t _m0; + float16_t _m1; + float16_t _m2; + float16_t _m3; + float16_t _m4; + float16_t _m5; + float16_t _m6; + float16_t _m7; +}; + +layout(set = 0, binding = 0, std140) uniform _10_12 +{ + vec4 _m0[4]; +} _12; + +layout(set = 0, binding = 0, std140) uniform _16_18 +{ + dvec2 _m0[4]; +} _18; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + f16vec2 _38 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].x)); + f16vec2 _41 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].y)); + f16vec2 _44 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].z)); + f16vec2 _47 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].w)); + CBVComposite16x8 _51 = CBVComposite16x8(_38.x, _38.y, _41.x, _41.y, _44.x, _44.y, _47.x, _47.y); + u64vec2 _82 = doubleBitsToUint64(_18._m0[2u]); + u64vec2 _88 = doubleBitsToUint64(_18._m0[3u]); + SV_Target.x = ((float(_51._m0) + _12._m0[0u].x) + float(_51._m4)) + float(int64_t(_82.x)); + SV_Target.y = ((float(_51._m1) + _12._m0[0u].y) + float(_51._m5)) + float(int64_t(_82.y)); + SV_Target.z = ((float(_51._m2) + _12._m0[0u].z) + float(_51._m6)) + float(int64_t(_88.x)); + SV_Target.w = ((float(_51._m3) + _12._m0[0u].w) + float(_51._m7)) + float(int64_t(_88.y)); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 106 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Float64 +OpCapability Int64 +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %20 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %10 "" +OpName %16 "" +OpName %20 "SV_Target" +OpName %50 "CBVComposite16x8" +OpDecorate %9 ArrayStride 16 +OpMemberDecorate %10 0 Offset 0 +OpDecorate %10 Block +OpDecorate %15 ArrayStride 16 +OpMemberDecorate %16 0 Offset 0 +OpDecorate %16 Block +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 0 +OpDecorate %18 DescriptorSet 0 +OpDecorate %18 Binding 0 +OpDecorate %20 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpConstant %5 4 +%7 = OpTypeFloat 32 +%8 = OpTypeVector %7 4 +%9 = OpTypeArray %8 %6 +%10 = OpTypeStruct %9 +%11 = OpTypePointer Uniform %10 +%12 = OpVariable %11 Uniform +%13 = OpTypeFloat 64 +%14 = OpTypeVector %13 2 +%15 = OpTypeArray %14 %6 +%16 = OpTypeStruct %15 +%17 = OpTypePointer Uniform %16 +%18 = OpVariable %17 Uniform +%19 = OpTypePointer Output %8 +%20 = OpVariable %19 Output +%21 = OpConstant %5 0 +%22 = OpTypePointer Uniform %8 +%29 = OpTypeFloat 16 +%30 = OpConstant %5 1 +%33 = OpTypeVector %29 2 +%50 = OpTypeStruct %29 %29 %29 %29 %29 %29 %29 %29 +%76 = OpTypeInt 64 0 +%77 = OpConstant %5 2 +%78 = OpTypePointer Uniform %14 +%81 = OpTypeVector %76 2 +%85 = OpConstant %5 3 +%99 = OpTypePointer Output %7 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %104 +%104 = OpLabel +%23 = OpAccessChain %22 %12 %21 %21 +%24 = OpLoad %8 %23 +%25 = OpCompositeExtract %7 %24 0 +%26 = OpCompositeExtract %7 %24 1 +%27 = OpCompositeExtract %7 %24 2 +%28 = OpCompositeExtract %7 %24 3 +%31 = OpAccessChain %22 %12 %21 %30 +%32 = OpLoad %8 %31 +%34 = OpCompositeExtract %7 %32 0 +%35 = OpCompositeExtract %7 %32 1 +%36 = OpCompositeExtract %7 %32 2 +%37 = OpCompositeExtract %7 %32 3 +%38 = OpBitcast %33 %34 +%39 = OpCompositeExtract %29 %38 0 +%40 = OpCompositeExtract %29 %38 1 +%41 = OpBitcast %33 %35 +%42 = OpCompositeExtract %29 %41 0 +%43 = OpCompositeExtract %29 %41 1 +%44 = OpBitcast %33 %36 +%45 = OpCompositeExtract %29 %44 0 +%46 = OpCompositeExtract %29 %44 1 +%47 = OpBitcast %33 %37 +%48 = OpCompositeExtract %29 %47 0 +%49 = OpCompositeExtract %29 %47 1 +%51 = OpCompositeConstruct %50 %39 %40 %42 %43 %45 %46 %48 %49 +%52 = OpCompositeExtract %29 %51 0 +%53 = OpCompositeExtract %29 %51 1 +%54 = OpCompositeExtract %29 %51 2 +%55 = OpCompositeExtract %29 %51 3 +%56 = OpFConvert %7 %52 +%57 = OpFConvert %7 %53 +%58 = OpFConvert %7 %54 +%59 = OpFConvert %7 %55 +%60 = OpFAdd %7 %56 %25 +%61 = OpFAdd %7 %57 %26 +%62 = OpFAdd %7 %58 %27 +%63 = OpFAdd %7 %59 %28 +%64 = OpCompositeExtract %29 %51 4 +%65 = OpCompositeExtract %29 %51 5 +%66 = OpCompositeExtract %29 %51 6 +%67 = OpCompositeExtract %29 %51 7 +%68 = OpFConvert %7 %64 +%69 = OpFConvert %7 %65 +%70 = OpFConvert %7 %66 +%71 = OpFConvert %7 %67 +%72 = OpFAdd %7 %60 %68 +%73 = OpFAdd %7 %61 %69 +%74 = OpFAdd %7 %62 %70 +%75 = OpFAdd %7 %63 %71 +%79 = OpAccessChain %78 %18 %21 %77 +%80 = OpLoad %14 %79 +%82 = OpBitcast %81 %80 +%83 = OpCompositeExtract %76 %82 0 +%84 = OpCompositeExtract %76 %82 1 +%86 = OpAccessChain %78 %18 %21 %85 +%87 = OpLoad %14 %86 +%88 = OpBitcast %81 %87 +%89 = OpCompositeExtract %76 %88 0 +%90 = OpCompositeExtract %76 %88 1 +%91 = OpConvertSToF %7 %83 +%92 = OpConvertSToF %7 %84 +%93 = OpConvertSToF %7 %89 +%94 = OpConvertSToF %7 %90 +%95 = OpFAdd %7 %72 %91 +%96 = OpFAdd %7 %73 %92 +%97 = OpFAdd %7 %74 %93 +%98 = OpFAdd %7 %75 %94 +%100 = OpAccessChain %99 %20 %21 +OpStore %100 %95 +%101 = OpAccessChain %99 %20 %30 +OpStore %101 %96 +%102 = OpAccessChain %99 %20 %77 +OpStore %102 %97 +%103 = OpAccessChain %99 %20 %85 +OpStore %103 %98 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag new file mode 100644 index 0000000..996b3f0 --- /dev/null +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag @@ -0,0 +1,263 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_buffer_reference : require + +struct AddCarry +{ + uint _m0; + uint _m1; +}; + +struct CBVComposite16x8 +{ + float16_t _m0; + float16_t _m1; + float16_t _m2; + float16_t _m3; + float16_t _m4; + float16_t _m5; + float16_t _m6; + float16_t _m7; +}; + +layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; +layout(buffer_reference) buffer PhysicalPointerUint642NonWrite; +layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +{ + vec4 value; +}; + +layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite +{ + u64vec2 value; +}; + +layout(push_constant, std430) uniform RootConstants +{ + uvec2 _m0; + uvec2 _m1; + uvec2 _m2; + uvec2 _m3; +} registers; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + AddCarry _23; + _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1); + PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1)); + AddCarry _43; + _43._m0 = uaddCarry(registers._m0.x, 1u * 16u, _43._m1); + PhysicalPointerFloat4NonWrite _48 = PhysicalPointerFloat4NonWrite(uvec2(_43._m0, registers._m0.y + _43._m1)); + f16vec2 _56 = unpackFloat2x16(floatBitsToUint(_48.value.x)); + f16vec2 _59 = unpackFloat2x16(floatBitsToUint(_48.value.y)); + f16vec2 _62 = unpackFloat2x16(floatBitsToUint(_48.value.z)); + f16vec2 _65 = unpackFloat2x16(floatBitsToUint(_48.value.w)); + CBVComposite16x8 _69 = CBVComposite16x8(_56.x, _56.y, _59.x, _59.y, _62.x, _62.y, _65.x, _65.y); + AddCarry _99; + _99._m0 = uaddCarry(registers._m0.x, 2u * 16u, _99._m1); + PhysicalPointerUint642NonWrite _107 = PhysicalPointerUint642NonWrite(uvec2(_99._m0, registers._m0.y + _99._m1)); + AddCarry _117; + _117._m0 = uaddCarry(registers._m0.x, 3u * 16u, _117._m1); + PhysicalPointerUint642NonWrite _122 = PhysicalPointerUint642NonWrite(uvec2(_117._m0, registers._m0.y + _117._m1)); + SV_Target.x = ((float(_69._m0) + _30.value.x) + float(_69._m4)) + float(int64_t(_107.value.x)); + SV_Target.y = ((float(_69._m1) + _30.value.y) + float(_69._m5)) + float(int64_t(_107.value.y)); + SV_Target.z = ((float(_69._m2) + _30.value.z) + float(_69._m6)) + float(int64_t(_122.value.x)); + SV_Target.w = ((float(_69._m3) + _30.value.w) + float(_69._m7)) + float(int64_t(_122.value.y)); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 142 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Int64 +OpCapability PhysicalStorageBufferAddresses +OpExtension "SPV_KHR_physical_storage_buffer" +OpMemoryModel PhysicalStorageBuffer64 GLSL450 +OpEntryPoint Fragment %3 "main" %13 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %7 "RootConstants" +OpName %9 "registers" +OpName %13 "SV_Target" +OpName %22 "AddCarry" +OpName %28 "PhysicalPointerFloat4NonWrite" +OpMemberName %28 0 "value" +OpName %68 "CBVComposite16x8" +OpName %105 "PhysicalPointerUint642NonWrite" +OpMemberName %105 0 "value" +OpDecorate %7 Block +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 8 +OpMemberDecorate %7 2 Offset 16 +OpMemberDecorate %7 3 Offset 24 +OpDecorate %13 Location 0 +OpMemberDecorate %28 0 Offset 0 +OpDecorate %28 Block +OpMemberDecorate %28 0 NonWritable +OpMemberDecorate %105 0 Offset 0 +OpDecorate %105 Block +OpMemberDecorate %105 0 NonWritable +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeVector %5 2 +%7 = OpTypeStruct %6 %6 %6 %6 +%8 = OpTypePointer PushConstant %7 +%9 = OpVariable %8 PushConstant +%10 = OpTypeFloat 32 +%11 = OpTypeVector %10 4 +%12 = OpTypePointer Output %11 +%13 = OpVariable %12 Output +%14 = OpTypePointer PushConstant %6 +%16 = OpConstant %5 0 +%19 = OpConstant %5 16 +%22 = OpTypeStruct %5 %5 +%28 = OpTypeStruct %11 +%29 = OpTypePointer PhysicalStorageBuffer %28 +%31 = OpTypePointer PhysicalStorageBuffer %11 +%38 = OpConstant %5 1 +%40 = OpTypeFloat 16 +%51 = OpTypeVector %40 2 +%68 = OpTypeStruct %40 %40 %40 %40 %40 %40 %40 %40 +%94 = OpConstant %5 2 +%96 = OpTypeInt 64 0 +%104 = OpTypeVector %96 2 +%105 = OpTypeStruct %104 +%106 = OpTypePointer PhysicalStorageBuffer %105 +%108 = OpTypePointer PhysicalStorageBuffer %104 +%113 = OpConstant %5 3 +%135 = OpTypePointer Output %10 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %140 +%140 = OpLabel +%15 = OpAccessChain %14 %9 %16 +%17 = OpLoad %6 %15 +%18 = OpIMul %5 %16 %19 +%20 = OpCompositeExtract %5 %17 0 +%21 = OpCompositeExtract %5 %17 1 +%23 = OpIAddCarry %22 %20 %18 +%24 = OpCompositeExtract %5 %23 0 +%25 = OpCompositeExtract %5 %23 1 +%26 = OpIAdd %5 %21 %25 +%27 = OpCompositeConstruct %6 %24 %26 +%30 = OpBitcast %29 %27 +%32 = OpAccessChain %31 %30 %16 +%33 = OpLoad %11 %32 Aligned 16 +%34 = OpCompositeExtract %10 %33 0 +%35 = OpCompositeExtract %10 %33 1 +%36 = OpCompositeExtract %10 %33 2 +%37 = OpCompositeExtract %10 %33 3 +%39 = OpIMul %5 %38 %19 +%41 = OpCompositeExtract %5 %17 0 +%42 = OpCompositeExtract %5 %17 1 +%43 = OpIAddCarry %22 %41 %39 +%44 = OpCompositeExtract %5 %43 0 +%45 = OpCompositeExtract %5 %43 1 +%46 = OpIAdd %5 %42 %45 +%47 = OpCompositeConstruct %6 %44 %46 +%48 = OpBitcast %29 %47 +%49 = OpAccessChain %31 %48 %16 +%50 = OpLoad %11 %49 Aligned 16 +%52 = OpCompositeExtract %10 %50 0 +%53 = OpCompositeExtract %10 %50 1 +%54 = OpCompositeExtract %10 %50 2 +%55 = OpCompositeExtract %10 %50 3 +%56 = OpBitcast %51 %52 +%57 = OpCompositeExtract %40 %56 0 +%58 = OpCompositeExtract %40 %56 1 +%59 = OpBitcast %51 %53 +%60 = OpCompositeExtract %40 %59 0 +%61 = OpCompositeExtract %40 %59 1 +%62 = OpBitcast %51 %54 +%63 = OpCompositeExtract %40 %62 0 +%64 = OpCompositeExtract %40 %62 1 +%65 = OpBitcast %51 %55 +%66 = OpCompositeExtract %40 %65 0 +%67 = OpCompositeExtract %40 %65 1 +%69 = OpCompositeConstruct %68 %57 %58 %60 %61 %63 %64 %66 %67 +%70 = OpCompositeExtract %40 %69 0 +%71 = OpCompositeExtract %40 %69 1 +%72 = OpCompositeExtract %40 %69 2 +%73 = OpCompositeExtract %40 %69 3 +%74 = OpFConvert %10 %70 +%75 = OpFConvert %10 %71 +%76 = OpFConvert %10 %72 +%77 = OpFConvert %10 %73 +%78 = OpFAdd %10 %74 %34 +%79 = OpFAdd %10 %75 %35 +%80 = OpFAdd %10 %76 %36 +%81 = OpFAdd %10 %77 %37 +%82 = OpCompositeExtract %40 %69 4 +%83 = OpCompositeExtract %40 %69 5 +%84 = OpCompositeExtract %40 %69 6 +%85 = OpCompositeExtract %40 %69 7 +%86 = OpFConvert %10 %82 +%87 = OpFConvert %10 %83 +%88 = OpFConvert %10 %84 +%89 = OpFConvert %10 %85 +%90 = OpFAdd %10 %78 %86 +%91 = OpFAdd %10 %79 %87 +%92 = OpFAdd %10 %80 %88 +%93 = OpFAdd %10 %81 %89 +%95 = OpIMul %5 %94 %19 +%97 = OpCompositeExtract %5 %17 0 +%98 = OpCompositeExtract %5 %17 1 +%99 = OpIAddCarry %22 %97 %95 +%100 = OpCompositeExtract %5 %99 0 +%101 = OpCompositeExtract %5 %99 1 +%102 = OpIAdd %5 %98 %101 +%103 = OpCompositeConstruct %6 %100 %102 +%107 = OpBitcast %106 %103 +%109 = OpAccessChain %108 %107 %16 +%110 = OpLoad %104 %109 Aligned 16 +%111 = OpCompositeExtract %96 %110 0 +%112 = OpCompositeExtract %96 %110 1 +%114 = OpIMul %5 %113 %19 +%115 = OpCompositeExtract %5 %17 0 +%116 = OpCompositeExtract %5 %17 1 +%117 = OpIAddCarry %22 %115 %114 +%118 = OpCompositeExtract %5 %117 0 +%119 = OpCompositeExtract %5 %117 1 +%120 = OpIAdd %5 %116 %119 +%121 = OpCompositeConstruct %6 %118 %120 +%122 = OpBitcast %106 %121 +%123 = OpAccessChain %108 %122 %16 +%124 = OpLoad %104 %123 Aligned 16 +%125 = OpCompositeExtract %96 %124 0 +%126 = OpCompositeExtract %96 %124 1 +%127 = OpConvertSToF %10 %111 +%128 = OpConvertSToF %10 %112 +%129 = OpConvertSToF %10 %125 +%130 = OpConvertSToF %10 %126 +%131 = OpFAdd %10 %90 %127 +%132 = OpFAdd %10 %91 %128 +%133 = OpFAdd %10 %92 %129 +%134 = OpFAdd %10 %93 %130 +%136 = OpAccessChain %135 %13 %16 +OpStore %136 %131 +%137 = OpAccessChain %135 %13 %38 +OpStore %137 %132 +%138 = OpAccessChain %135 %13 %94 +OpStore %138 %133 +%139 = OpAccessChain %135 %13 %113 +OpStore %139 %134 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag new file mode 100644 index 0000000..ec79c5f --- /dev/null +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag @@ -0,0 +1,223 @@ +#version 460 +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_buffer_reference : require + +struct AddCarry +{ + uint _m0; + uint _m1; +}; + +layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; +layout(buffer_reference) buffer PhysicalPointerUint642NonWrite; +layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +{ + vec4 value; +}; + +layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite +{ + u64vec2 value; +}; + +layout(push_constant, std430) uniform RootConstants +{ + uvec2 _m0; + uvec2 _m1; + uvec2 _m2; + uvec2 _m3; +} registers; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + AddCarry _23; + _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1); + PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1)); + AddCarry _42; + _42._m0 = uaddCarry(registers._m0.x, 1u * 16u, _42._m1); + PhysicalPointerFloat4NonWrite _47 = PhysicalPointerFloat4NonWrite(uvec2(_42._m0, registers._m0.y + _42._m1)); + AddCarry _62; + _62._m0 = uaddCarry(registers._m0.x, 2u * 16u, _62._m1); + PhysicalPointerFloat4NonWrite _67 = PhysicalPointerFloat4NonWrite(uvec2(_62._m0, registers._m0.y + _62._m1)); + AddCarry _83; + _83._m0 = uaddCarry(registers._m0.x, 3u * 16u, _83._m1); + PhysicalPointerUint642NonWrite _91 = PhysicalPointerUint642NonWrite(uvec2(_83._m0, registers._m0.y + _83._m1)); + AddCarry _101; + _101._m0 = uaddCarry(registers._m0.x, 4u * 16u, _101._m1); + PhysicalPointerUint642NonWrite _106 = PhysicalPointerUint642NonWrite(uvec2(_101._m0, registers._m0.y + _101._m1)); + SV_Target.x = ((_47.value.x + _30.value.x) + _67.value.x) + float(int64_t(_91.value.x)); + SV_Target.y = ((_47.value.y + _30.value.y) + _67.value.y) + float(int64_t(_91.value.y)); + SV_Target.z = ((_47.value.z + _30.value.z) + _67.value.z) + float(int64_t(_106.value.x)); + SV_Target.w = ((_47.value.w + _30.value.w) + _67.value.w) + float(int64_t(_106.value.y)); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 126 +; Schema: 0 +OpCapability Shader +OpCapability Int64 +OpCapability PhysicalStorageBufferAddresses +OpExtension "SPV_KHR_physical_storage_buffer" +OpMemoryModel PhysicalStorageBuffer64 GLSL450 +OpEntryPoint Fragment %3 "main" %13 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %7 "RootConstants" +OpName %9 "registers" +OpName %13 "SV_Target" +OpName %22 "AddCarry" +OpName %28 "PhysicalPointerFloat4NonWrite" +OpMemberName %28 0 "value" +OpName %89 "PhysicalPointerUint642NonWrite" +OpMemberName %89 0 "value" +OpDecorate %7 Block +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 8 +OpMemberDecorate %7 2 Offset 16 +OpMemberDecorate %7 3 Offset 24 +OpDecorate %13 Location 0 +OpMemberDecorate %28 0 Offset 0 +OpDecorate %28 Block +OpMemberDecorate %28 0 NonWritable +OpMemberDecorate %89 0 Offset 0 +OpDecorate %89 Block +OpMemberDecorate %89 0 NonWritable +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeVector %5 2 +%7 = OpTypeStruct %6 %6 %6 %6 +%8 = OpTypePointer PushConstant %7 +%9 = OpVariable %8 PushConstant +%10 = OpTypeFloat 32 +%11 = OpTypeVector %10 4 +%12 = OpTypePointer Output %11 +%13 = OpVariable %12 Output +%14 = OpTypePointer PushConstant %6 +%16 = OpConstant %5 0 +%19 = OpConstant %5 16 +%22 = OpTypeStruct %5 %5 +%28 = OpTypeStruct %11 +%29 = OpTypePointer PhysicalStorageBuffer %28 +%31 = OpTypePointer PhysicalStorageBuffer %11 +%38 = OpConstant %5 1 +%58 = OpConstant %5 2 +%78 = OpConstant %5 3 +%80 = OpTypeInt 64 0 +%88 = OpTypeVector %80 2 +%89 = OpTypeStruct %88 +%90 = OpTypePointer PhysicalStorageBuffer %89 +%92 = OpTypePointer PhysicalStorageBuffer %88 +%97 = OpConstant %5 4 +%119 = OpTypePointer Output %10 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %124 +%124 = OpLabel +%15 = OpAccessChain %14 %9 %16 +%17 = OpLoad %6 %15 +%18 = OpIMul %5 %16 %19 +%20 = OpCompositeExtract %5 %17 0 +%21 = OpCompositeExtract %5 %17 1 +%23 = OpIAddCarry %22 %20 %18 +%24 = OpCompositeExtract %5 %23 0 +%25 = OpCompositeExtract %5 %23 1 +%26 = OpIAdd %5 %21 %25 +%27 = OpCompositeConstruct %6 %24 %26 +%30 = OpBitcast %29 %27 +%32 = OpAccessChain %31 %30 %16 +%33 = OpLoad %11 %32 Aligned 16 +%34 = OpCompositeExtract %10 %33 0 +%35 = OpCompositeExtract %10 %33 1 +%36 = OpCompositeExtract %10 %33 2 +%37 = OpCompositeExtract %10 %33 3 +%39 = OpIMul %5 %38 %19 +%40 = OpCompositeExtract %5 %17 0 +%41 = OpCompositeExtract %5 %17 1 +%42 = OpIAddCarry %22 %40 %39 +%43 = OpCompositeExtract %5 %42 0 +%44 = OpCompositeExtract %5 %42 1 +%45 = OpIAdd %5 %41 %44 +%46 = OpCompositeConstruct %6 %43 %45 +%47 = OpBitcast %29 %46 +%48 = OpAccessChain %31 %47 %16 +%49 = OpLoad %11 %48 Aligned 16 +%50 = OpCompositeExtract %10 %49 0 +%51 = OpCompositeExtract %10 %49 1 +%52 = OpCompositeExtract %10 %49 2 +%53 = OpCompositeExtract %10 %49 3 +%54 = OpFAdd %10 %50 %34 +%55 = OpFAdd %10 %51 %35 +%56 = OpFAdd %10 %52 %36 +%57 = OpFAdd %10 %53 %37 +%59 = OpIMul %5 %58 %19 +%60 = OpCompositeExtract %5 %17 0 +%61 = OpCompositeExtract %5 %17 1 +%62 = OpIAddCarry %22 %60 %59 +%63 = OpCompositeExtract %5 %62 0 +%64 = OpCompositeExtract %5 %62 1 +%65 = OpIAdd %5 %61 %64 +%66 = OpCompositeConstruct %6 %63 %65 +%67 = OpBitcast %29 %66 +%68 = OpAccessChain %31 %67 %16 +%69 = OpLoad %11 %68 Aligned 16 +%70 = OpCompositeExtract %10 %69 0 +%71 = OpCompositeExtract %10 %69 1 +%72 = OpCompositeExtract %10 %69 2 +%73 = OpCompositeExtract %10 %69 3 +%74 = OpFAdd %10 %54 %70 +%75 = OpFAdd %10 %55 %71 +%76 = OpFAdd %10 %56 %72 +%77 = OpFAdd %10 %57 %73 +%79 = OpIMul %5 %78 %19 +%81 = OpCompositeExtract %5 %17 0 +%82 = OpCompositeExtract %5 %17 1 +%83 = OpIAddCarry %22 %81 %79 +%84 = OpCompositeExtract %5 %83 0 +%85 = OpCompositeExtract %5 %83 1 +%86 = OpIAdd %5 %82 %85 +%87 = OpCompositeConstruct %6 %84 %86 +%91 = OpBitcast %90 %87 +%93 = OpAccessChain %92 %91 %16 +%94 = OpLoad %88 %93 Aligned 16 +%95 = OpCompositeExtract %80 %94 0 +%96 = OpCompositeExtract %80 %94 1 +%98 = OpIMul %5 %97 %19 +%99 = OpCompositeExtract %5 %17 0 +%100 = OpCompositeExtract %5 %17 1 +%101 = OpIAddCarry %22 %99 %98 +%102 = OpCompositeExtract %5 %101 0 +%103 = OpCompositeExtract %5 %101 1 +%104 = OpIAdd %5 %100 %103 +%105 = OpCompositeConstruct %6 %102 %104 +%106 = OpBitcast %90 %105 +%107 = OpAccessChain %92 %106 %16 +%108 = OpLoad %88 %107 Aligned 16 +%109 = OpCompositeExtract %80 %108 0 +%110 = OpCompositeExtract %80 %108 1 +%111 = OpConvertSToF %10 %95 +%112 = OpConvertSToF %10 %96 +%113 = OpConvertSToF %10 %109 +%114 = OpConvertSToF %10 %110 +%115 = OpFAdd %10 %74 %111 +%116 = OpFAdd %10 %75 %112 +%117 = OpFAdd %10 %76 %113 +%118 = OpFAdd %10 %77 %114 +%120 = OpAccessChain %119 %13 %16 +OpStore %120 %115 +%121 = OpAccessChain %119 %13 %38 +OpStore %121 %116 +%122 = OpAccessChain %119 %13 %58 +OpStore %122 %117 +%123 = OpAccessChain %119 %13 %78 +OpStore %123 %118 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag new file mode 100644 index 0000000..4f60fc9 --- /dev/null +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag @@ -0,0 +1,244 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_buffer_reference : require + +struct AddCarry +{ + uint _m0; + uint _m1; +}; + +layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; +layout(buffer_reference) buffer PhysicalPointerUint642NonWrite; +layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +{ + vec4 value; +}; + +layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite +{ + u64vec2 value; +}; + +layout(push_constant, std430) uniform RootConstants +{ + uvec2 _m0; + uvec2 _m1; + uvec2 _m2; + uvec2 _m3; +} registers; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + AddCarry _23; + _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1); + PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1)); + AddCarry _42; + _42._m0 = uaddCarry(registers._m0.x, 1u * 16u, _42._m1); + f16vec4 _52 = f16vec4(PhysicalPointerFloat4NonWrite(uvec2(_42._m0, registers._m0.y + _42._m1)).value); + AddCarry _69; + _69._m0 = uaddCarry(registers._m0.x, 2u * 16u, _69._m1); + f16vec4 _77 = f16vec4(PhysicalPointerFloat4NonWrite(uvec2(_69._m0, registers._m0.y + _69._m1)).value); + AddCarry _95; + _95._m0 = uaddCarry(registers._m0.x, 3u * 16u, _95._m1); + PhysicalPointerUint642NonWrite _103 = PhysicalPointerUint642NonWrite(uvec2(_95._m0, registers._m0.y + _95._m1)); + AddCarry _113; + _113._m0 = uaddCarry(registers._m0.x, 4u * 16u, _113._m1); + PhysicalPointerUint642NonWrite _118 = PhysicalPointerUint642NonWrite(uvec2(_113._m0, registers._m0.y + _113._m1)); + SV_Target.x = ((float(_52.x) + _30.value.x) + float(_77.x)) + float(int64_t(_103.value.x)); + SV_Target.y = ((float(_52.y) + _30.value.y) + float(_77.y)) + float(int64_t(_103.value.y)); + SV_Target.z = ((float(_52.z) + _30.value.z) + float(_77.z)) + float(int64_t(_118.value.x)); + SV_Target.w = ((float(_52.w) + _30.value.w) + float(_77.w)) + float(int64_t(_118.value.y)); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 138 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Int64 +OpCapability PhysicalStorageBufferAddresses +OpExtension "SPV_KHR_physical_storage_buffer" +OpMemoryModel PhysicalStorageBuffer64 GLSL450 +OpEntryPoint Fragment %3 "main" %13 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %7 "RootConstants" +OpName %9 "registers" +OpName %13 "SV_Target" +OpName %22 "AddCarry" +OpName %28 "PhysicalPointerFloat4NonWrite" +OpMemberName %28 0 "value" +OpName %101 "PhysicalPointerUint642NonWrite" +OpMemberName %101 0 "value" +OpDecorate %7 Block +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 8 +OpMemberDecorate %7 2 Offset 16 +OpMemberDecorate %7 3 Offset 24 +OpDecorate %13 Location 0 +OpMemberDecorate %28 0 Offset 0 +OpDecorate %28 Block +OpMemberDecorate %28 0 NonWritable +OpMemberDecorate %101 0 Offset 0 +OpDecorate %101 Block +OpMemberDecorate %101 0 NonWritable +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeVector %5 2 +%7 = OpTypeStruct %6 %6 %6 %6 +%8 = OpTypePointer PushConstant %7 +%9 = OpVariable %8 PushConstant +%10 = OpTypeFloat 32 +%11 = OpTypeVector %10 4 +%12 = OpTypePointer Output %11 +%13 = OpVariable %12 Output +%14 = OpTypePointer PushConstant %6 +%16 = OpConstant %5 0 +%19 = OpConstant %5 16 +%22 = OpTypeStruct %5 %5 +%28 = OpTypeStruct %11 +%29 = OpTypePointer PhysicalStorageBuffer %28 +%31 = OpTypePointer PhysicalStorageBuffer %11 +%38 = OpConstant %5 1 +%50 = OpTypeFloat 16 +%51 = OpTypeVector %50 4 +%65 = OpConstant %5 2 +%90 = OpConstant %5 3 +%92 = OpTypeInt 64 0 +%100 = OpTypeVector %92 2 +%101 = OpTypeStruct %100 +%102 = OpTypePointer PhysicalStorageBuffer %101 +%104 = OpTypePointer PhysicalStorageBuffer %100 +%109 = OpConstant %5 4 +%131 = OpTypePointer Output %10 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %136 +%136 = OpLabel +%15 = OpAccessChain %14 %9 %16 +%17 = OpLoad %6 %15 +%18 = OpIMul %5 %16 %19 +%20 = OpCompositeExtract %5 %17 0 +%21 = OpCompositeExtract %5 %17 1 +%23 = OpIAddCarry %22 %20 %18 +%24 = OpCompositeExtract %5 %23 0 +%25 = OpCompositeExtract %5 %23 1 +%26 = OpIAdd %5 %21 %25 +%27 = OpCompositeConstruct %6 %24 %26 +%30 = OpBitcast %29 %27 +%32 = OpAccessChain %31 %30 %16 +%33 = OpLoad %11 %32 Aligned 16 +%34 = OpCompositeExtract %10 %33 0 +%35 = OpCompositeExtract %10 %33 1 +%36 = OpCompositeExtract %10 %33 2 +%37 = OpCompositeExtract %10 %33 3 +%39 = OpIMul %5 %38 %19 +%40 = OpCompositeExtract %5 %17 0 +%41 = OpCompositeExtract %5 %17 1 +%42 = OpIAddCarry %22 %40 %39 +%43 = OpCompositeExtract %5 %42 0 +%44 = OpCompositeExtract %5 %42 1 +%45 = OpIAdd %5 %41 %44 +%46 = OpCompositeConstruct %6 %43 %45 +%47 = OpBitcast %29 %46 +%48 = OpAccessChain %31 %47 %16 +%49 = OpLoad %11 %48 Aligned 16 +%52 = OpFConvert %51 %49 +%53 = OpCompositeExtract %50 %52 0 +%54 = OpCompositeExtract %50 %52 1 +%55 = OpCompositeExtract %50 %52 2 +%56 = OpCompositeExtract %50 %52 3 +%57 = OpFConvert %10 %53 +%58 = OpFConvert %10 %54 +%59 = OpFConvert %10 %55 +%60 = OpFConvert %10 %56 +%61 = OpFAdd %10 %57 %34 +%62 = OpFAdd %10 %58 %35 +%63 = OpFAdd %10 %59 %36 +%64 = OpFAdd %10 %60 %37 +%66 = OpIMul %5 %65 %19 +%67 = OpCompositeExtract %5 %17 0 +%68 = OpCompositeExtract %5 %17 1 +%69 = OpIAddCarry %22 %67 %66 +%70 = OpCompositeExtract %5 %69 0 +%71 = OpCompositeExtract %5 %69 1 +%72 = OpIAdd %5 %68 %71 +%73 = OpCompositeConstruct %6 %70 %72 +%74 = OpBitcast %29 %73 +%75 = OpAccessChain %31 %74 %16 +%76 = OpLoad %11 %75 Aligned 16 +%77 = OpFConvert %51 %76 +%78 = OpCompositeExtract %50 %77 0 +%79 = OpCompositeExtract %50 %77 1 +%80 = OpCompositeExtract %50 %77 2 +%81 = OpCompositeExtract %50 %77 3 +%82 = OpFConvert %10 %78 +%83 = OpFConvert %10 %79 +%84 = OpFConvert %10 %80 +%85 = OpFConvert %10 %81 +%86 = OpFAdd %10 %61 %82 +%87 = OpFAdd %10 %62 %83 +%88 = OpFAdd %10 %63 %84 +%89 = OpFAdd %10 %64 %85 +%91 = OpIMul %5 %90 %19 +%93 = OpCompositeExtract %5 %17 0 +%94 = OpCompositeExtract %5 %17 1 +%95 = OpIAddCarry %22 %93 %91 +%96 = OpCompositeExtract %5 %95 0 +%97 = OpCompositeExtract %5 %95 1 +%98 = OpIAdd %5 %94 %97 +%99 = OpCompositeConstruct %6 %96 %98 +%103 = OpBitcast %102 %99 +%105 = OpAccessChain %104 %103 %16 +%106 = OpLoad %100 %105 Aligned 16 +%107 = OpCompositeExtract %92 %106 0 +%108 = OpCompositeExtract %92 %106 1 +%110 = OpIMul %5 %109 %19 +%111 = OpCompositeExtract %5 %17 0 +%112 = OpCompositeExtract %5 %17 1 +%113 = OpIAddCarry %22 %111 %110 +%114 = OpCompositeExtract %5 %113 0 +%115 = OpCompositeExtract %5 %113 1 +%116 = OpIAdd %5 %112 %115 +%117 = OpCompositeConstruct %6 %114 %116 +%118 = OpBitcast %102 %117 +%119 = OpAccessChain %104 %118 %16 +%120 = OpLoad %100 %119 Aligned 16 +%121 = OpCompositeExtract %92 %120 0 +%122 = OpCompositeExtract %92 %120 1 +%123 = OpConvertSToF %10 %107 +%124 = OpConvertSToF %10 %108 +%125 = OpConvertSToF %10 %121 +%126 = OpConvertSToF %10 %122 +%127 = OpFAdd %10 %86 %123 +%128 = OpFAdd %10 %87 %124 +%129 = OpFAdd %10 %88 %125 +%130 = OpFAdd %10 %89 %126 +%132 = OpAccessChain %131 %13 %16 +OpStore %132 %127 +%133 = OpAccessChain %131 %13 %38 +OpStore %133 %128 +%134 = OpAccessChain %131 %13 %65 +OpStore %134 %129 +%135 = OpAccessChain %131 %13 %90 +OpStore %135 %130 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag new file mode 100644 index 0000000..0e8c333 --- /dev/null +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag @@ -0,0 +1,141 @@ +#version 460 +#extension GL_ARB_gpu_shader_int64 : require + +layout(set = 0, binding = 0, std140) uniform _10_12 +{ + vec4 _m0[5]; +} _12; + +layout(set = 0, binding = 0, std140) uniform _16_18 +{ + dvec2 _m0[5]; +} _18; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + u64vec2 _57 = doubleBitsToUint64(_18._m0[3u]); + u64vec2 _63 = doubleBitsToUint64(_18._m0[4u]); + SV_Target.x = ((_12._m0[1u].x + _12._m0[0u].x) + _12._m0[2u].x) + float(int64_t(_57.x)); + SV_Target.y = ((_12._m0[1u].y + _12._m0[0u].y) + _12._m0[2u].y) + float(int64_t(_57.y)); + SV_Target.z = ((_12._m0[1u].z + _12._m0[0u].z) + _12._m0[2u].z) + float(int64_t(_63.x)); + SV_Target.w = ((_12._m0[1u].w + _12._m0[0u].w) + _12._m0[2u].w) + float(int64_t(_63.y)); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 81 +; Schema: 0 +OpCapability Shader +OpCapability Float64 +OpCapability Int64 +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %20 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %10 "" +OpName %16 "" +OpName %20 "SV_Target" +OpDecorate %9 ArrayStride 16 +OpMemberDecorate %10 0 Offset 0 +OpDecorate %10 Block +OpDecorate %15 ArrayStride 16 +OpMemberDecorate %16 0 Offset 0 +OpDecorate %16 Block +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 0 +OpDecorate %18 DescriptorSet 0 +OpDecorate %18 Binding 0 +OpDecorate %20 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpConstant %5 5 +%7 = OpTypeFloat 32 +%8 = OpTypeVector %7 4 +%9 = OpTypeArray %8 %6 +%10 = OpTypeStruct %9 +%11 = OpTypePointer Uniform %10 +%12 = OpVariable %11 Uniform +%13 = OpTypeFloat 64 +%14 = OpTypeVector %13 2 +%15 = OpTypeArray %14 %6 +%16 = OpTypeStruct %15 +%17 = OpTypePointer Uniform %16 +%18 = OpVariable %17 Uniform +%19 = OpTypePointer Output %8 +%20 = OpVariable %19 Output +%21 = OpConstant %5 0 +%22 = OpTypePointer Uniform %8 +%29 = OpConstant %5 1 +%40 = OpConstant %5 2 +%51 = OpTypeInt 64 0 +%52 = OpConstant %5 3 +%53 = OpTypePointer Uniform %14 +%56 = OpTypeVector %51 2 +%60 = OpConstant %5 4 +%74 = OpTypePointer Output %7 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %79 +%79 = OpLabel +%23 = OpAccessChain %22 %12 %21 %21 +%24 = OpLoad %8 %23 +%25 = OpCompositeExtract %7 %24 0 +%26 = OpCompositeExtract %7 %24 1 +%27 = OpCompositeExtract %7 %24 2 +%28 = OpCompositeExtract %7 %24 3 +%30 = OpAccessChain %22 %12 %21 %29 +%31 = OpLoad %8 %30 +%32 = OpCompositeExtract %7 %31 0 +%33 = OpCompositeExtract %7 %31 1 +%34 = OpCompositeExtract %7 %31 2 +%35 = OpCompositeExtract %7 %31 3 +%36 = OpFAdd %7 %32 %25 +%37 = OpFAdd %7 %33 %26 +%38 = OpFAdd %7 %34 %27 +%39 = OpFAdd %7 %35 %28 +%41 = OpAccessChain %22 %12 %21 %40 +%42 = OpLoad %8 %41 +%43 = OpCompositeExtract %7 %42 0 +%44 = OpCompositeExtract %7 %42 1 +%45 = OpCompositeExtract %7 %42 2 +%46 = OpCompositeExtract %7 %42 3 +%47 = OpFAdd %7 %36 %43 +%48 = OpFAdd %7 %37 %44 +%49 = OpFAdd %7 %38 %45 +%50 = OpFAdd %7 %39 %46 +%54 = OpAccessChain %53 %18 %21 %52 +%55 = OpLoad %14 %54 +%57 = OpBitcast %56 %55 +%58 = OpCompositeExtract %51 %57 0 +%59 = OpCompositeExtract %51 %57 1 +%61 = OpAccessChain %53 %18 %21 %60 +%62 = OpLoad %14 %61 +%63 = OpBitcast %56 %62 +%64 = OpCompositeExtract %51 %63 0 +%65 = OpCompositeExtract %51 %63 1 +%66 = OpConvertSToF %7 %58 +%67 = OpConvertSToF %7 %59 +%68 = OpConvertSToF %7 %64 +%69 = OpConvertSToF %7 %65 +%70 = OpFAdd %7 %47 %66 +%71 = OpFAdd %7 %48 %67 +%72 = OpFAdd %7 %49 %68 +%73 = OpFAdd %7 %50 %69 +%75 = OpAccessChain %74 %20 %21 +OpStore %75 %70 +%76 = OpAccessChain %74 %20 %29 +OpStore %76 %71 +%77 = OpAccessChain %74 %20 %40 +OpStore %77 %72 +%78 = OpAccessChain %74 %20 %52 +OpStore %78 %73 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag new file mode 100644 index 0000000..1a732b3 --- /dev/null +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag @@ -0,0 +1,164 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require + +layout(set = 0, binding = 0, std140) uniform _10_12 +{ + vec4 _m0[5]; +} _12; + +layout(set = 0, binding = 0, std140) uniform _16_18 +{ + dvec2 _m0[5]; +} _18; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + f16vec4 _34 = f16vec4(_12._m0[1u]); + f16vec4 _50 = f16vec4(_12._m0[2u]); + u64vec2 _69 = doubleBitsToUint64(_18._m0[3u]); + u64vec2 _75 = doubleBitsToUint64(_18._m0[4u]); + SV_Target.x = ((float(_34.x) + _12._m0[0u].x) + float(_50.x)) + float(int64_t(_69.x)); + SV_Target.y = ((float(_34.y) + _12._m0[0u].y) + float(_50.y)) + float(int64_t(_69.y)); + SV_Target.z = ((float(_34.z) + _12._m0[0u].z) + float(_50.z)) + float(int64_t(_75.x)); + SV_Target.w = ((float(_34.w) + _12._m0[0u].w) + float(_50.w)) + float(int64_t(_75.y)); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 93 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Float64 +OpCapability Int64 +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %20 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %10 "" +OpName %16 "" +OpName %20 "SV_Target" +OpDecorate %9 ArrayStride 16 +OpMemberDecorate %10 0 Offset 0 +OpDecorate %10 Block +OpDecorate %15 ArrayStride 16 +OpMemberDecorate %16 0 Offset 0 +OpDecorate %16 Block +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 0 +OpDecorate %18 DescriptorSet 0 +OpDecorate %18 Binding 0 +OpDecorate %20 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpConstant %5 5 +%7 = OpTypeFloat 32 +%8 = OpTypeVector %7 4 +%9 = OpTypeArray %8 %6 +%10 = OpTypeStruct %9 +%11 = OpTypePointer Uniform %10 +%12 = OpVariable %11 Uniform +%13 = OpTypeFloat 64 +%14 = OpTypeVector %13 2 +%15 = OpTypeArray %14 %6 +%16 = OpTypeStruct %15 +%17 = OpTypePointer Uniform %16 +%18 = OpVariable %17 Uniform +%19 = OpTypePointer Output %8 +%20 = OpVariable %19 Output +%21 = OpConstant %5 0 +%22 = OpTypePointer Uniform %8 +%29 = OpConstant %5 1 +%32 = OpTypeFloat 16 +%33 = OpTypeVector %32 4 +%47 = OpConstant %5 2 +%63 = OpTypeInt 64 0 +%64 = OpConstant %5 3 +%65 = OpTypePointer Uniform %14 +%68 = OpTypeVector %63 2 +%72 = OpConstant %5 4 +%86 = OpTypePointer Output %7 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %91 +%91 = OpLabel +%23 = OpAccessChain %22 %12 %21 %21 +%24 = OpLoad %8 %23 +%25 = OpCompositeExtract %7 %24 0 +%26 = OpCompositeExtract %7 %24 1 +%27 = OpCompositeExtract %7 %24 2 +%28 = OpCompositeExtract %7 %24 3 +%30 = OpAccessChain %22 %12 %21 %29 +%31 = OpLoad %8 %30 +%34 = OpFConvert %33 %31 +%35 = OpCompositeExtract %32 %34 0 +%36 = OpCompositeExtract %32 %34 1 +%37 = OpCompositeExtract %32 %34 2 +%38 = OpCompositeExtract %32 %34 3 +%39 = OpFConvert %7 %35 +%40 = OpFConvert %7 %36 +%41 = OpFConvert %7 %37 +%42 = OpFConvert %7 %38 +%43 = OpFAdd %7 %39 %25 +%44 = OpFAdd %7 %40 %26 +%45 = OpFAdd %7 %41 %27 +%46 = OpFAdd %7 %42 %28 +%48 = OpAccessChain %22 %12 %21 %47 +%49 = OpLoad %8 %48 +%50 = OpFConvert %33 %49 +%51 = OpCompositeExtract %32 %50 0 +%52 = OpCompositeExtract %32 %50 1 +%53 = OpCompositeExtract %32 %50 2 +%54 = OpCompositeExtract %32 %50 3 +%55 = OpFConvert %7 %51 +%56 = OpFConvert %7 %52 +%57 = OpFConvert %7 %53 +%58 = OpFConvert %7 %54 +%59 = OpFAdd %7 %43 %55 +%60 = OpFAdd %7 %44 %56 +%61 = OpFAdd %7 %45 %57 +%62 = OpFAdd %7 %46 %58 +%66 = OpAccessChain %65 %18 %21 %64 +%67 = OpLoad %14 %66 +%69 = OpBitcast %68 %67 +%70 = OpCompositeExtract %63 %69 0 +%71 = OpCompositeExtract %63 %69 1 +%73 = OpAccessChain %65 %18 %21 %72 +%74 = OpLoad %14 %73 +%75 = OpBitcast %68 %74 +%76 = OpCompositeExtract %63 %75 0 +%77 = OpCompositeExtract %63 %75 1 +%78 = OpConvertSToF %7 %70 +%79 = OpConvertSToF %7 %71 +%80 = OpConvertSToF %7 %76 +%81 = OpConvertSToF %7 %77 +%82 = OpFAdd %7 %59 %78 +%83 = OpFAdd %7 %60 %79 +%84 = OpFAdd %7 %61 %80 +%85 = OpFAdd %7 %62 %81 +%87 = OpAccessChain %86 %20 %21 +OpStore %87 %82 +%88 = OpAccessChain %86 %20 %29 +OpStore %88 %83 +%89 = OpAccessChain %86 %20 %47 +OpStore %89 %84 +%90 = OpAccessChain %86 %20 %64 +OpStore %90 %85 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag b/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag index 8b39407..4f08a54 100644 --- a/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag +++ b/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag @@ -92,6 +92,7 @@ OpDecorate %16 Binding 0 OpDecorate %18 Flat OpDecorate %18 Location 0 OpDecorate %20 Location 0 +OpDecorate %86 NonUniform OpDecorate %83 NonUniform OpDecorate %87 NonUniform %1 = OpTypeVoid diff --git a/reference/shaders/resources/cbv.bindless.root-constant.frag b/reference/shaders/resources/cbv.bindless.root-constant.frag index 19939bb..149d9fa 100644 --- a/reference/shaders/resources/cbv.bindless.root-constant.frag +++ b/reference/shaders/resources/cbv.bindless.root-constant.frag @@ -91,6 +91,7 @@ OpDecorate %16 Binding 0 OpDecorate %18 Flat OpDecorate %18 Location 0 OpDecorate %20 Location 0 +OpDecorate %86 NonUniform OpDecorate %83 NonUniform OpDecorate %87 NonUniform %1 = OpTypeVoid diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag new file mode 100644 index 0000000..ba78e81 --- /dev/null +++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag @@ -0,0 +1,260 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_scalar_block_layout : require + +layout(set = 5, binding = 0, scalar) uniform BindlessCBV +{ + float16_t _m0[32768]; +} _15[]; + +layout(set = 5, binding = 0, scalar) uniform _19_22 +{ + float _m0[16384]; +} _22[]; + +layout(set = 5, binding = 0, scalar) uniform _26_29 +{ + double _m0[8192]; +} _29[]; + +layout(push_constant, std430) uniform RootConstants +{ + uint _m0; + uint _m1; + uint _m2; + uint _m3; + uint _m4; + uint _m5; + uint _m6; + uint _m7; +} registers; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + uint _39 = registers._m5 + 2u; + uint _45 = registers._m5 + 1u; + SV_Target.x = (((float(_15[registers._m5]._m0[8u]) + _22[registers._m5]._m0[0u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[4u])))) + _22[_45]._m0[0u]) + float(_29[_39]._m0[0u]); + SV_Target.y = (((float(_15[registers._m5]._m0[10u]) + _22[registers._m5]._m0[1u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[5u])))) + _22[_45]._m0[1u]) + float(_29[_39]._m0[1u]); + SV_Target.z = (((float(_15[registers._m5]._m0[12u]) + _22[registers._m5]._m0[2u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[6u])))) + _22[_45]._m0[2u]) + float(_29[_39]._m0[2u]); + SV_Target.w = (((float(_15[registers._m5]._m0[14u]) + _22[registers._m5]._m0[3u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[7u])))) + _22[_45]._m0[3u]) + float(_29[_39]._m0[3u]); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 149 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Float64 +OpCapability Int64 +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability RuntimeDescriptorArray +OpCapability PhysicalStorageBufferAddresses +OpExtension "SPV_EXT_descriptor_indexing" +OpExtension "SPV_KHR_physical_storage_buffer" +OpMemoryModel PhysicalStorageBuffer64 GLSL450 +OpEntryPoint Fragment %3 "main" %32 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %6 "RootConstants" +OpName %8 "registers" +OpName %12 "BindlessCBV" +OpName %19 "BindlessCBV" +OpName %26 "BindlessCBV" +OpName %32 "SV_Target" +OpDecorate %6 Block +OpMemberDecorate %6 0 Offset 0 +OpMemberDecorate %6 1 Offset 4 +OpMemberDecorate %6 2 Offset 8 +OpMemberDecorate %6 3 Offset 12 +OpMemberDecorate %6 4 Offset 16 +OpMemberDecorate %6 5 Offset 20 +OpMemberDecorate %6 6 Offset 24 +OpMemberDecorate %6 7 Offset 28 +OpDecorate %11 ArrayStride 2 +OpDecorate %12 Block +OpMemberDecorate %12 0 Offset 0 +OpDecorate %15 DescriptorSet 5 +OpDecorate %15 Binding 0 +OpDecorate %18 ArrayStride 4 +OpDecorate %19 Block +OpMemberDecorate %19 0 Offset 0 +OpDecorate %22 DescriptorSet 5 +OpDecorate %22 Binding 0 +OpDecorate %25 ArrayStride 8 +OpDecorate %26 Block +OpMemberDecorate %26 0 Offset 0 +OpDecorate %29 DescriptorSet 5 +OpDecorate %29 Binding 0 +OpDecorate %32 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 +%7 = OpTypePointer PushConstant %6 +%8 = OpVariable %7 PushConstant +%9 = OpTypeFloat 16 +%10 = OpConstant %5 32768 +%11 = OpTypeArray %9 %10 +%12 = OpTypeStruct %11 +%13 = OpTypeRuntimeArray %12 +%14 = OpTypePointer Uniform %13 +%15 = OpVariable %14 Uniform +%16 = OpTypeFloat 32 +%17 = OpConstant %5 16384 +%18 = OpTypeArray %16 %17 +%19 = OpTypeStruct %18 +%20 = OpTypeRuntimeArray %19 +%21 = OpTypePointer Uniform %20 +%22 = OpVariable %21 Uniform +%23 = OpTypeFloat 64 +%24 = OpConstant %5 8192 +%25 = OpTypeArray %23 %24 +%26 = OpTypeStruct %25 +%27 = OpTypeRuntimeArray %26 +%28 = OpTypePointer Uniform %27 +%29 = OpVariable %28 Uniform +%30 = OpTypeVector %16 4 +%31 = OpTypePointer Output %30 +%32 = OpVariable %31 Output +%33 = OpTypePointer Uniform %26 +%35 = OpTypePointer PushConstant %5 +%37 = OpConstant %5 5 +%40 = OpConstant %5 2 +%41 = OpTypePointer Uniform %19 +%46 = OpConstant %5 1 +%47 = OpTypePointer Uniform %12 +%57 = OpConstant %5 0 +%58 = OpTypePointer Uniform %16 +%65 = OpConstant %5 3 +%68 = OpConstant %5 8 +%69 = OpTypePointer Uniform %9 +%72 = OpConstant %5 10 +%75 = OpConstant %5 12 +%78 = OpConstant %5 14 +%89 = OpTypeInt 64 0 +%90 = OpConstant %5 4 +%91 = OpTypePointer Uniform %23 +%98 = OpConstant %5 6 +%102 = OpConstant %5 7 +%142 = OpTypePointer Output %16 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %147 +%147 = OpLabel +%36 = OpAccessChain %35 %8 %37 +%38 = OpLoad %5 %36 +%39 = OpIAdd %5 %38 %40 +%34 = OpAccessChain %33 %29 %39 +%43 = OpAccessChain %35 %8 %37 +%44 = OpLoad %5 %43 +%45 = OpIAdd %5 %44 %46 +%42 = OpAccessChain %41 %22 %45 +%49 = OpAccessChain %35 %8 %37 +%50 = OpLoad %5 %49 +%48 = OpAccessChain %47 %15 %50 +%52 = OpAccessChain %35 %8 %37 +%53 = OpLoad %5 %52 +%51 = OpAccessChain %41 %22 %53 +%55 = OpAccessChain %35 %8 %37 +%56 = OpLoad %5 %55 +%54 = OpAccessChain %33 %29 %56 +%59 = OpAccessChain %58 %51 %57 %57 +%60 = OpLoad %16 %59 +%61 = OpAccessChain %58 %51 %57 %46 +%62 = OpLoad %16 %61 +%63 = OpAccessChain %58 %51 %57 %40 +%64 = OpLoad %16 %63 +%66 = OpAccessChain %58 %51 %57 %65 +%67 = OpLoad %16 %66 +%70 = OpAccessChain %69 %48 %57 %68 +%71 = OpLoad %9 %70 +%73 = OpAccessChain %69 %48 %57 %72 +%74 = OpLoad %9 %73 +%76 = OpAccessChain %69 %48 %57 %75 +%77 = OpLoad %9 %76 +%79 = OpAccessChain %69 %48 %57 %78 +%80 = OpLoad %9 %79 +%81 = OpFConvert %16 %71 +%82 = OpFConvert %16 %74 +%83 = OpFConvert %16 %77 +%84 = OpFConvert %16 %80 +%85 = OpFAdd %16 %81 %60 +%86 = OpFAdd %16 %82 %62 +%87 = OpFAdd %16 %83 %64 +%88 = OpFAdd %16 %84 %67 +%92 = OpAccessChain %91 %54 %57 %90 +%93 = OpLoad %23 %92 +%94 = OpBitcast %89 %93 +%95 = OpAccessChain %91 %54 %57 %37 +%96 = OpLoad %23 %95 +%97 = OpBitcast %89 %96 +%99 = OpAccessChain %91 %54 %57 %98 +%100 = OpLoad %23 %99 +%101 = OpBitcast %89 %100 +%103 = OpAccessChain %91 %54 %57 %102 +%104 = OpLoad %23 %103 +%105 = OpBitcast %89 %104 +%106 = OpConvertSToF %16 %94 +%107 = OpConvertSToF %16 %97 +%108 = OpConvertSToF %16 %101 +%109 = OpConvertSToF %16 %105 +%110 = OpFAdd %16 %85 %106 +%111 = OpFAdd %16 %86 %107 +%112 = OpFAdd %16 %87 %108 +%113 = OpFAdd %16 %88 %109 +%114 = OpAccessChain %58 %42 %57 %57 +%115 = OpLoad %16 %114 +%116 = OpAccessChain %58 %42 %57 %46 +%117 = OpLoad %16 %116 +%118 = OpAccessChain %58 %42 %57 %40 +%119 = OpLoad %16 %118 +%120 = OpAccessChain %58 %42 %57 %65 +%121 = OpLoad %16 %120 +%122 = OpFAdd %16 %110 %115 +%123 = OpFAdd %16 %111 %117 +%124 = OpFAdd %16 %112 %119 +%125 = OpFAdd %16 %113 %121 +%126 = OpAccessChain %91 %34 %57 %57 +%127 = OpLoad %23 %126 +%128 = OpAccessChain %91 %34 %57 %46 +%129 = OpLoad %23 %128 +%130 = OpAccessChain %91 %34 %57 %40 +%131 = OpLoad %23 %130 +%132 = OpAccessChain %91 %34 %57 %65 +%133 = OpLoad %23 %132 +%134 = OpFConvert %16 %127 +%135 = OpFConvert %16 %129 +%136 = OpFConvert %16 %131 +%137 = OpFConvert %16 %133 +%138 = OpFAdd %16 %122 %134 +%139 = OpFAdd %16 %123 %135 +%140 = OpFAdd %16 %124 %136 +%141 = OpFAdd %16 %125 %137 +%143 = OpAccessChain %142 %32 %57 +OpStore %143 %138 +%144 = OpAccessChain %142 %32 %46 +OpStore %144 %139 +%145 = OpAccessChain %142 %32 %40 +OpStore %145 %140 +%146 = OpAccessChain %142 %32 %65 +OpStore %146 %141 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag new file mode 100644 index 0000000..eea884f --- /dev/null +++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag @@ -0,0 +1,161 @@ +#version 460 +#extension GL_EXT_scalar_block_layout : require + +layout(set = 0, binding = 0, scalar) uniform _9_11 +{ + float _m0[1024]; +} _11; + +layout(location = 0) flat in uint INDEX; +layout(location = 0) out vec4 SV_Target; + +void main() +{ + uint _18 = INDEX << 4u; + uint _20 = INDEX * 4u; + float _38 = _11._m0[(INDEX * 4u) + 256u] + _11._m0[_20]; + float _39 = _11._m0[(INDEX * 4u) + 257u] + _11._m0[_20]; + uint _43 = (INDEX * 4u) + 512u; + SV_Target.x = (_38 + _11._m0[_43]) + _11._m0[(INDEX * 4u) + 768u]; + SV_Target.y = (_39 + _11._m0[(INDEX * 4u) + 513u]) + _11._m0[(INDEX * 4u) + 769u]; + SV_Target.z = (_38 + _11._m0[(INDEX * 4u) + 514u]) + _11._m0[(INDEX * 4u) + 770u]; + SV_Target.w = (_39 + _11._m0[_43]) + _11._m0[(INDEX * 4u) + 771u]; +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 107 +; Schema: 0 +OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %13 %16 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %9 "" +OpName %13 "INDEX" +OpName %16 "SV_Target" +OpDecorate %8 ArrayStride 4 +OpMemberDecorate %9 0 Offset 0 +OpDecorate %9 Block +OpDecorate %11 DescriptorSet 0 +OpDecorate %11 Binding 0 +OpDecorate %13 Flat +OpDecorate %13 Location 0 +OpDecorate %16 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpConstant %5 1024 +%7 = OpTypeFloat 32 +%8 = OpTypeArray %7 %6 +%9 = OpTypeStruct %8 +%10 = OpTypePointer Uniform %9 +%11 = OpVariable %10 Uniform +%12 = OpTypePointer Input %5 +%13 = OpVariable %12 Input +%14 = OpTypeVector %7 4 +%15 = OpTypePointer Output %14 +%16 = OpVariable %15 Output +%19 = OpConstant %5 4 +%21 = OpTypePointer Uniform %7 +%23 = OpConstant %5 0 +%28 = OpConstant %5 256 +%32 = OpConstant %5 1028 +%35 = OpConstant %5 257 +%41 = OpConstant %5 2048 +%44 = OpConstant %5 512 +%48 = OpConstant %5 2052 +%51 = OpConstant %5 513 +%55 = OpConstant %5 2056 +%58 = OpConstant %5 514 +%66 = OpConstant %5 3072 +%69 = OpConstant %5 768 +%73 = OpConstant %5 3076 +%76 = OpConstant %5 769 +%80 = OpConstant %5 3080 +%83 = OpConstant %5 770 +%87 = OpConstant %5 3084 +%90 = OpConstant %5 771 +%97 = OpTypePointer Output %7 +%100 = OpConstant %5 1 +%102 = OpConstant %5 2 +%104 = OpConstant %5 3 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %105 +%105 = OpLabel +%17 = OpLoad %5 %13 +%18 = OpShiftLeftLogical %5 %17 %19 +%20 = OpIMul %5 %17 %19 +%22 = OpAccessChain %21 %11 %23 %20 +%24 = OpLoad %7 %22 +%25 = OpIAdd %5 %18 %6 +%26 = OpIMul %5 %17 %19 +%27 = OpIAdd %5 %26 %28 +%29 = OpAccessChain %21 %11 %23 %27 +%30 = OpLoad %7 %29 +%31 = OpIAdd %5 %18 %32 +%33 = OpIMul %5 %17 %19 +%34 = OpIAdd %5 %33 %35 +%36 = OpAccessChain %21 %11 %23 %34 +%37 = OpLoad %7 %36 +%38 = OpFAdd %7 %30 %24 +%39 = OpFAdd %7 %37 %24 +%40 = OpIAdd %5 %18 %41 +%42 = OpIMul %5 %17 %19 +%43 = OpIAdd %5 %42 %44 +%45 = OpAccessChain %21 %11 %23 %43 +%46 = OpLoad %7 %45 +%47 = OpIAdd %5 %18 %48 +%49 = OpIMul %5 %17 %19 +%50 = OpIAdd %5 %49 %51 +%52 = OpAccessChain %21 %11 %23 %50 +%53 = OpLoad %7 %52 +%54 = OpIAdd %5 %18 %55 +%56 = OpIMul %5 %17 %19 +%57 = OpIAdd %5 %56 %58 +%59 = OpAccessChain %21 %11 %23 %57 +%60 = OpLoad %7 %59 +%61 = OpFAdd %7 %38 %46 +%62 = OpFAdd %7 %39 %53 +%63 = OpFAdd %7 %38 %60 +%64 = OpFAdd %7 %39 %46 +%65 = OpIAdd %5 %18 %66 +%67 = OpIMul %5 %17 %19 +%68 = OpIAdd %5 %67 %69 +%70 = OpAccessChain %21 %11 %23 %68 +%71 = OpLoad %7 %70 +%72 = OpIAdd %5 %18 %73 +%74 = OpIMul %5 %17 %19 +%75 = OpIAdd %5 %74 %76 +%77 = OpAccessChain %21 %11 %23 %75 +%78 = OpLoad %7 %77 +%79 = OpIAdd %5 %18 %80 +%81 = OpIMul %5 %17 %19 +%82 = OpIAdd %5 %81 %83 +%84 = OpAccessChain %21 %11 %23 %82 +%85 = OpLoad %7 %84 +%86 = OpIAdd %5 %18 %87 +%88 = OpIMul %5 %17 %19 +%89 = OpIAdd %5 %88 %90 +%91 = OpAccessChain %21 %11 %23 %89 +%92 = OpLoad %7 %91 +%93 = OpFAdd %7 %61 %71 +%94 = OpFAdd %7 %62 %78 +%95 = OpFAdd %7 %63 %85 +%96 = OpFAdd %7 %64 %92 +%98 = OpAccessChain %97 %16 %23 +OpStore %98 %93 +%99 = OpAccessChain %97 %16 %100 +OpStore %99 %94 +%101 = OpAccessChain %97 %16 %102 +OpStore %101 %95 +%103 = OpAccessChain %97 %16 %104 +OpStore %103 %96 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss new file mode 100644 index 0000000..5c816f7 --- /dev/null +++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss @@ -0,0 +1,162 @@ +#version 460 +#extension GL_EXT_ray_tracing : require +#extension GL_EXT_nonuniform_qualifier : require + +struct _17 +{ + vec4 _m0; + uvec4 _m1; +}; + +layout(shaderRecordEXT, std430) buffer SBTBlock +{ + uint _m0[5]; + uint _m1[6]; + uvec2 _m2; + uvec2 _m3; + uvec2 _m4; + uvec2 _m5; + uvec2 _m6; + uvec2 _m7; + uvec2 _m8; + uvec2 _m9; + uvec2 _m10; +} SBT; + +layout(location = 0) rayPayloadInEXT _17 payload; + +vec4 _38; +uvec4 _54; + +void main() +{ + vec4 _37 = _38; + _37.x = uintBitsToFloat(SBT._m0[0u]); + vec4 _39 = _37; + _39.y = float(SBT._m0[1u]); + vec4 _40 = _39; + _40.z = float(int(SBT._m0[2u])); + vec4 _41 = _40; + _41.w = 1.0; + uvec4 _53 = _54; + _53.x = uint(int(uintBitsToFloat(SBT._m1[0u]))); + uvec4 _55 = _53; + _55.y = uint(int(uintBitsToFloat(SBT._m1[1u]))); + uvec4 _56 = _55; + _56.z = SBT._m1[2u]; + uvec4 _57 = _56; + _57.w = SBT._m0[2u]; + payload._m0 = _41; + payload._m1 = _57; +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.4 +; Generator: Unknown(30017); 21022 +; Bound: 64 +; Schema: 0 +OpCapability Shader +OpCapability UniformBufferArrayDynamicIndexing +OpCapability SampledImageArrayDynamicIndexing +OpCapability StorageBufferArrayDynamicIndexing +OpCapability StorageImageArrayDynamicIndexing +OpCapability RayTracingKHR +OpCapability RuntimeDescriptorArray +OpCapability UniformBufferArrayNonUniformIndexing +OpCapability SampledImageArrayNonUniformIndexing +OpCapability StorageBufferArrayNonUniformIndexing +OpCapability StorageImageArrayNonUniformIndexing +OpExtension "SPV_EXT_descriptor_indexing" +OpExtension "SPV_KHR_ray_tracing" +OpMemoryModel Logical GLSL450 +OpEntryPoint MissNV %3 "main" %13 %19 +OpName %3 "main" +OpName %11 "SBTBlock" +OpName %13 "SBT" +OpName %17 "" +OpName %19 "payload" +OpDecorate %7 ArrayStride 4 +OpDecorate %9 ArrayStride 4 +OpDecorate %11 Block +OpMemberDecorate %11 0 Offset 0 +OpMemberDecorate %11 1 Offset 20 +OpMemberDecorate %11 2 Offset 48 +OpMemberDecorate %11 3 Offset 56 +OpMemberDecorate %11 4 Offset 64 +OpMemberDecorate %11 5 Offset 72 +OpMemberDecorate %11 6 Offset 80 +OpMemberDecorate %11 7 Offset 88 +OpMemberDecorate %11 8 Offset 96 +OpMemberDecorate %11 9 Offset 104 +OpMemberDecorate %11 10 Offset 112 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpConstant %5 5 +%7 = OpTypeArray %5 %6 +%8 = OpConstant %5 6 +%9 = OpTypeArray %5 %8 +%10 = OpTypeVector %5 2 +%11 = OpTypeStruct %7 %9 %10 %10 %10 %10 %10 %10 %10 %10 %10 +%12 = OpTypePointer ShaderRecordBufferNV %11 +%13 = OpVariable %12 ShaderRecordBufferNV +%14 = OpTypeFloat 32 +%15 = OpTypeVector %14 4 +%16 = OpTypeVector %5 4 +%17 = OpTypeStruct %15 %16 +%18 = OpTypePointer IncomingRayPayloadNV %17 +%19 = OpVariable %18 IncomingRayPayloadNV +%20 = OpTypePointer ShaderRecordBufferNV %9 +%22 = OpConstant %5 1 +%23 = OpTypePointer ShaderRecordBufferNV %7 +%25 = OpConstant %5 0 +%26 = OpTypePointer ShaderRecordBufferNV %5 +%34 = OpConstant %5 2 +%42 = OpConstant %14 1 +%58 = OpTypePointer IncomingRayPayloadNV %15 +%60 = OpTypePointer IncomingRayPayloadNV %16 +%3 = OpFunction %1 None %2 +%4 = OpLabel +%38 = OpUndef %15 +%54 = OpUndef %16 +OpBranch %62 +%62 = OpLabel +%21 = OpAccessChain %20 %13 %22 +%24 = OpAccessChain %23 %13 %25 +%27 = OpAccessChain %26 %24 %25 +%28 = OpLoad %5 %27 +%29 = OpBitcast %14 %28 +%30 = OpAccessChain %26 %24 %22 +%31 = OpLoad %5 %30 +%32 = OpConvertUToF %14 %31 +%33 = OpAccessChain %26 %24 %34 +%35 = OpLoad %5 %33 +%36 = OpConvertSToF %14 %35 +%37 = OpCompositeInsert %15 %29 %38 0 +%39 = OpCompositeInsert %15 %32 %37 1 +%40 = OpCompositeInsert %15 %36 %39 2 +%41 = OpCompositeInsert %15 %42 %40 3 +%43 = OpAccessChain %26 %21 %25 +%44 = OpLoad %5 %43 +%45 = OpBitcast %14 %44 +%46 = OpAccessChain %26 %21 %22 +%47 = OpLoad %5 %46 +%48 = OpBitcast %14 %47 +%49 = OpAccessChain %26 %21 %34 +%50 = OpLoad %5 %49 +%51 = OpConvertFToS %5 %45 +%52 = OpConvertFToS %5 %48 +%53 = OpCompositeInsert %16 %51 %54 0 +%55 = OpCompositeInsert %16 %52 %53 1 +%56 = OpCompositeInsert %16 %50 %55 2 +%57 = OpCompositeInsert %16 %35 %56 3 +%59 = OpInBoundsAccessChain %58 %19 %25 +OpStore %59 %41 +%61 = OpInBoundsAccessChain %60 %19 %22 +OpStore %61 %57 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag new file mode 100644 index 0000000..567eb93 --- /dev/null +++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag @@ -0,0 +1,179 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_scalar_block_layout : require + +layout(set = 0, binding = 0, scalar) uniform _9_11 +{ + float _m0[20]; +} _11; + +layout(set = 0, binding = 0, scalar) uniform _15_17 +{ + double _m0[10]; +} _17; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + SV_Target.x = ((float(float16_t(_11._m0[4u])) + _11._m0[0u]) + float(float16_t(_11._m0[8u]))) + float(int64_t(doubleBitsToUint64(_17._m0[6u]))); + SV_Target.y = ((float(float16_t(_11._m0[5u])) + _11._m0[1u]) + float(float16_t(_11._m0[9u]))) + float(int64_t(doubleBitsToUint64(_17._m0[7u]))); + SV_Target.z = ((float(float16_t(_11._m0[6u])) + _11._m0[2u]) + float(float16_t(_11._m0[10u]))) + float(int64_t(doubleBitsToUint64(_17._m0[8u]))); + SV_Target.w = ((float(float16_t(_11._m0[7u])) + _11._m0[3u]) + float(float16_t(_11._m0[11u]))) + float(int64_t(doubleBitsToUint64(_17._m0[9u]))); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 111 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Float64 +OpCapability Int64 +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %20 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %9 "" +OpName %15 "" +OpName %20 "SV_Target" +OpDecorate %8 ArrayStride 4 +OpMemberDecorate %9 0 Offset 0 +OpDecorate %9 Block +OpDecorate %14 ArrayStride 8 +OpMemberDecorate %15 0 Offset 0 +OpDecorate %15 Block +OpDecorate %11 DescriptorSet 0 +OpDecorate %11 Binding 0 +OpDecorate %17 DescriptorSet 0 +OpDecorate %17 Binding 0 +OpDecorate %20 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpConstant %5 20 +%7 = OpTypeFloat 32 +%8 = OpTypeArray %7 %6 +%9 = OpTypeStruct %8 +%10 = OpTypePointer Uniform %9 +%11 = OpVariable %10 Uniform +%12 = OpConstant %5 10 +%13 = OpTypeFloat 64 +%14 = OpTypeArray %13 %12 +%15 = OpTypeStruct %14 +%16 = OpTypePointer Uniform %15 +%17 = OpVariable %16 Uniform +%18 = OpTypeVector %7 4 +%19 = OpTypePointer Output %18 +%20 = OpVariable %19 Output +%21 = OpConstant %5 0 +%22 = OpTypePointer Uniform %7 +%25 = OpConstant %5 1 +%28 = OpConstant %5 2 +%31 = OpConstant %5 3 +%34 = OpConstant %5 4 +%37 = OpTypeFloat 16 +%39 = OpConstant %5 5 +%43 = OpConstant %5 6 +%47 = OpConstant %5 7 +%59 = OpConstant %5 8 +%63 = OpConstant %5 9 +%70 = OpConstant %5 11 +%82 = OpTypeInt 64 0 +%83 = OpTypePointer Uniform %13 +%104 = OpTypePointer Output %7 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %109 +%109 = OpLabel +%23 = OpAccessChain %22 %11 %21 %21 +%24 = OpLoad %7 %23 +%26 = OpAccessChain %22 %11 %21 %25 +%27 = OpLoad %7 %26 +%29 = OpAccessChain %22 %11 %21 %28 +%30 = OpLoad %7 %29 +%32 = OpAccessChain %22 %11 %21 %31 +%33 = OpLoad %7 %32 +%35 = OpAccessChain %22 %11 %21 %34 +%36 = OpLoad %7 %35 +%38 = OpFConvert %37 %36 +%40 = OpAccessChain %22 %11 %21 %39 +%41 = OpLoad %7 %40 +%42 = OpFConvert %37 %41 +%44 = OpAccessChain %22 %11 %21 %43 +%45 = OpLoad %7 %44 +%46 = OpFConvert %37 %45 +%48 = OpAccessChain %22 %11 %21 %47 +%49 = OpLoad %7 %48 +%50 = OpFConvert %37 %49 +%51 = OpFConvert %7 %38 +%52 = OpFConvert %7 %42 +%53 = OpFConvert %7 %46 +%54 = OpFConvert %7 %50 +%55 = OpFAdd %7 %51 %24 +%56 = OpFAdd %7 %52 %27 +%57 = OpFAdd %7 %53 %30 +%58 = OpFAdd %7 %54 %33 +%60 = OpAccessChain %22 %11 %21 %59 +%61 = OpLoad %7 %60 +%62 = OpFConvert %37 %61 +%64 = OpAccessChain %22 %11 %21 %63 +%65 = OpLoad %7 %64 +%66 = OpFConvert %37 %65 +%67 = OpAccessChain %22 %11 %21 %12 +%68 = OpLoad %7 %67 +%69 = OpFConvert %37 %68 +%71 = OpAccessChain %22 %11 %21 %70 +%72 = OpLoad %7 %71 +%73 = OpFConvert %37 %72 +%74 = OpFConvert %7 %62 +%75 = OpFConvert %7 %66 +%76 = OpFConvert %7 %69 +%77 = OpFConvert %7 %73 +%78 = OpFAdd %7 %55 %74 +%79 = OpFAdd %7 %56 %75 +%80 = OpFAdd %7 %57 %76 +%81 = OpFAdd %7 %58 %77 +%84 = OpAccessChain %83 %17 %21 %43 +%85 = OpLoad %13 %84 +%86 = OpBitcast %82 %85 +%87 = OpAccessChain %83 %17 %21 %47 +%88 = OpLoad %13 %87 +%89 = OpBitcast %82 %88 +%90 = OpAccessChain %83 %17 %21 %59 +%91 = OpLoad %13 %90 +%92 = OpBitcast %82 %91 +%93 = OpAccessChain %83 %17 %21 %63 +%94 = OpLoad %13 %93 +%95 = OpBitcast %82 %94 +%96 = OpConvertSToF %7 %86 +%97 = OpConvertSToF %7 %89 +%98 = OpConvertSToF %7 %92 +%99 = OpConvertSToF %7 %95 +%100 = OpFAdd %7 %78 %96 +%101 = OpFAdd %7 %79 %97 +%102 = OpFAdd %7 %80 %98 +%103 = OpFAdd %7 %81 %99 +%105 = OpAccessChain %104 %20 %21 +OpStore %105 %100 +%106 = OpAccessChain %104 %20 %25 +OpStore %106 %101 +%107 = OpAccessChain %104 %20 %28 +OpStore %107 %102 +%108 = OpAccessChain %104 %20 %31 +OpStore %108 %103 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag new file mode 100644 index 0000000..af4eb5b --- /dev/null +++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag @@ -0,0 +1,124 @@ +#version 460 + +layout(push_constant, std430) uniform RootConstants +{ + uint _m0; + uint _m1; + uint _m2; + uint _m3; + uint _m4; + uint _m5; + uint _m6; + uint _m7; + uint _m8; + uint _m9; + uint _m10; + uint _m11; + uint _m12; + uint _m13; + uint _m14; + uint _m15; +} registers; + +layout(location = 0) out vec2 SV_Target; + +void main() +{ + float _18 = uintBitsToFloat(registers._m4); + float _35 = float(registers._m2 + registers._m5); + float _45 = float(int(registers._m3 + registers._m6)); + SV_Target.x = ((uintBitsToFloat(registers._m0) + _18) + _35) + _45; + SV_Target.y = ((uintBitsToFloat(registers._m1) + _18) + _35) + _45; +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 53 +; Schema: 0 +OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %12 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %6 "RootConstants" +OpName %8 "registers" +OpName %12 "SV_Target" +OpDecorate %6 Block +OpMemberDecorate %6 0 Offset 0 +OpMemberDecorate %6 1 Offset 4 +OpMemberDecorate %6 2 Offset 8 +OpMemberDecorate %6 3 Offset 12 +OpMemberDecorate %6 4 Offset 16 +OpMemberDecorate %6 5 Offset 20 +OpMemberDecorate %6 6 Offset 24 +OpMemberDecorate %6 7 Offset 28 +OpMemberDecorate %6 8 Offset 32 +OpMemberDecorate %6 9 Offset 36 +OpMemberDecorate %6 10 Offset 40 +OpMemberDecorate %6 11 Offset 44 +OpMemberDecorate %6 12 Offset 48 +OpMemberDecorate %6 13 Offset 52 +OpMemberDecorate %6 14 Offset 56 +OpMemberDecorate %6 15 Offset 60 +OpDecorate %12 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 +%7 = OpTypePointer PushConstant %6 +%8 = OpVariable %7 PushConstant +%9 = OpTypeFloat 32 +%10 = OpTypeVector %9 2 +%11 = OpTypePointer Output %10 +%12 = OpVariable %11 Output +%13 = OpTypePointer PushConstant %5 +%15 = OpConstant %5 4 +%17 = OpConstant %5 0 +%23 = OpConstant %5 1 +%29 = OpConstant %5 5 +%32 = OpConstant %5 2 +%39 = OpConstant %5 6 +%42 = OpConstant %5 3 +%48 = OpTypePointer Output %9 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %51 +%51 = OpLabel +%14 = OpAccessChain %13 %8 %15 +%16 = OpLoad %5 %14 +%18 = OpBitcast %9 %16 +%19 = OpAccessChain %13 %8 %17 +%20 = OpLoad %5 %19 +%21 = OpBitcast %9 %20 +%22 = OpAccessChain %13 %8 %23 +%24 = OpLoad %5 %22 +%25 = OpBitcast %9 %24 +%26 = OpFAdd %9 %21 %18 +%27 = OpFAdd %9 %25 %18 +%28 = OpAccessChain %13 %8 %29 +%30 = OpLoad %5 %28 +%31 = OpAccessChain %13 %8 %32 +%33 = OpLoad %5 %31 +%34 = OpIAdd %5 %33 %30 +%35 = OpConvertUToF %9 %34 +%36 = OpFAdd %9 %26 %35 +%37 = OpFAdd %9 %27 %35 +%38 = OpAccessChain %13 %8 %39 +%40 = OpLoad %5 %38 +%41 = OpAccessChain %13 %8 %42 +%43 = OpLoad %5 %41 +%44 = OpIAdd %5 %43 %40 +%45 = OpConvertSToF %9 %44 +%46 = OpFAdd %9 %36 %45 +%47 = OpFAdd %9 %37 %45 +%49 = OpAccessChain %48 %12 %17 +OpStore %49 %46 +%50 = OpAccessChain %48 %12 %23 +OpStore %50 %47 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag b/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag new file mode 100644 index 0000000..8eb931e --- /dev/null +++ b/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag @@ -0,0 +1,313 @@ +#version 460 +#extension GL_ARB_gpu_shader_int64 : require +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_buffer_reference : require + +struct AddCarry +{ + uint _m0; + uint _m1; +}; + +layout(buffer_reference) buffer PhysicalPointerFloatNonWrite; +layout(buffer_reference) buffer PhysicalPointerUint64NonWrite; +layout(buffer_reference) buffer PhysicalPointerHalfNonWrite; +layout(buffer_reference, std430) readonly buffer PhysicalPointerFloatNonWrite +{ + float value; +}; + +layout(buffer_reference, std430) readonly buffer PhysicalPointerUint64NonWrite +{ + uint64_t value; +}; + +layout(buffer_reference, std430) readonly buffer PhysicalPointerHalfNonWrite +{ + float16_t value; +}; + +layout(push_constant, std430) uniform RootConstants +{ + uvec2 _m0; + uvec2 _m1; + uvec2 _m2; + uvec2 _m3; +} registers; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + AddCarry _21; + _21._m0 = uaddCarry(registers._m0.x, 0u, _21._m1); + AddCarry _35; + _35._m0 = uaddCarry(registers._m0.x, 4u, _35._m1); + AddCarry _46; + _46._m0 = uaddCarry(registers._m0.x, 8u, _46._m1); + AddCarry _57; + _57._m0 = uaddCarry(registers._m0.x, 12u, _57._m1); + AddCarry _69; + _69._m0 = uaddCarry(registers._m0.x, 32u, _69._m1); + AddCarry _83; + _83._m0 = uaddCarry(registers._m0.x, 40u, _83._m1); + AddCarry _94; + _94._m0 = uaddCarry(registers._m0.x, 48u, _94._m1); + AddCarry _105; + _105._m0 = uaddCarry(registers._m0.x, 56u, _105._m1); + AddCarry _125; + _125._m0 = uaddCarry(registers._m0.x, 16u, _125._m1); + AddCarry _139; + _139._m0 = uaddCarry(registers._m0.x, 20u, _139._m1); + AddCarry _150; + _150._m0 = uaddCarry(registers._m0.x, 24u, _150._m1); + AddCarry _161; + _161._m0 = uaddCarry(registers._m0.x, 28u, _161._m1); + SV_Target.x = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_69._m0, registers._m0.y + _69._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_21._m0, registers._m0.y + _21._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_125._m0, registers._m0.y + _125._m1)).value); + SV_Target.y = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_83._m0, registers._m0.y + _83._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_35._m0, registers._m0.y + _35._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_139._m0, registers._m0.y + _139._m1)).value); + SV_Target.z = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_94._m0, registers._m0.y + _94._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_46._m0, registers._m0.y + _46._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_150._m0, registers._m0.y + _150._m1)).value); + SV_Target.w = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_105._m0, registers._m0.y + _105._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_57._m0, registers._m0.y + _57._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_161._m0, registers._m0.y + _161._m1)).value); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 187 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Int64 +OpCapability PhysicalStorageBufferAddresses +OpExtension "SPV_KHR_physical_storage_buffer" +OpMemoryModel PhysicalStorageBuffer64 GLSL450 +OpEntryPoint Fragment %3 "main" %13 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %7 "RootConstants" +OpName %9 "registers" +OpName %13 "SV_Target" +OpName %20 "AddCarry" +OpName %26 "PhysicalPointerFloatNonWrite" +OpMemberName %26 0 "value" +OpName %74 "PhysicalPointerUint64NonWrite" +OpMemberName %74 0 "value" +OpName %130 "PhysicalPointerHalfNonWrite" +OpMemberName %130 0 "value" +OpDecorate %7 Block +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 8 +OpMemberDecorate %7 2 Offset 16 +OpMemberDecorate %7 3 Offset 24 +OpDecorate %13 Location 0 +OpMemberDecorate %26 0 Offset 0 +OpDecorate %26 Block +OpMemberDecorate %26 0 NonWritable +OpMemberDecorate %74 0 Offset 0 +OpDecorate %74 Block +OpMemberDecorate %74 0 NonWritable +OpMemberDecorate %130 0 Offset 0 +OpDecorate %130 Block +OpMemberDecorate %130 0 NonWritable +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeVector %5 2 +%7 = OpTypeStruct %6 %6 %6 %6 +%8 = OpTypePointer PushConstant %7 +%9 = OpVariable %8 PushConstant +%10 = OpTypeFloat 32 +%11 = OpTypeVector %10 4 +%12 = OpTypePointer Output %11 +%13 = OpVariable %12 Output +%14 = OpTypePointer PushConstant %6 +%16 = OpConstant %5 0 +%20 = OpTypeStruct %5 %5 +%26 = OpTypeStruct %10 +%27 = OpTypePointer PhysicalStorageBuffer %26 +%29 = OpTypePointer PhysicalStorageBuffer %10 +%32 = OpConstant %5 4 +%43 = OpConstant %5 8 +%54 = OpConstant %5 12 +%65 = OpConstant %5 32 +%66 = OpTypeInt 64 0 +%74 = OpTypeStruct %66 +%75 = OpTypePointer PhysicalStorageBuffer %74 +%77 = OpTypePointer PhysicalStorageBuffer %66 +%80 = OpConstant %5 40 +%91 = OpConstant %5 48 +%102 = OpConstant %5 56 +%121 = OpConstant %5 16 +%122 = OpTypeFloat 16 +%130 = OpTypeStruct %122 +%131 = OpTypePointer PhysicalStorageBuffer %130 +%133 = OpTypePointer PhysicalStorageBuffer %122 +%136 = OpConstant %5 20 +%147 = OpConstant %5 24 +%158 = OpConstant %5 28 +%177 = OpTypePointer Output %10 +%180 = OpConstant %5 1 +%182 = OpConstant %5 2 +%184 = OpConstant %5 3 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %185 +%185 = OpLabel +%15 = OpAccessChain %14 %9 %16 +%17 = OpLoad %6 %15 +%18 = OpCompositeExtract %5 %17 0 +%19 = OpCompositeExtract %5 %17 1 +%21 = OpIAddCarry %20 %18 %16 +%22 = OpCompositeExtract %5 %21 0 +%23 = OpCompositeExtract %5 %21 1 +%24 = OpIAdd %5 %19 %23 +%25 = OpCompositeConstruct %6 %22 %24 +%28 = OpBitcast %27 %25 +%30 = OpAccessChain %29 %28 %16 +%31 = OpLoad %10 %30 Aligned 4 +%33 = OpCompositeExtract %5 %17 0 +%34 = OpCompositeExtract %5 %17 1 +%35 = OpIAddCarry %20 %33 %32 +%36 = OpCompositeExtract %5 %35 0 +%37 = OpCompositeExtract %5 %35 1 +%38 = OpIAdd %5 %34 %37 +%39 = OpCompositeConstruct %6 %36 %38 +%40 = OpBitcast %27 %39 +%41 = OpAccessChain %29 %40 %16 +%42 = OpLoad %10 %41 Aligned 4 +%44 = OpCompositeExtract %5 %17 0 +%45 = OpCompositeExtract %5 %17 1 +%46 = OpIAddCarry %20 %44 %43 +%47 = OpCompositeExtract %5 %46 0 +%48 = OpCompositeExtract %5 %46 1 +%49 = OpIAdd %5 %45 %48 +%50 = OpCompositeConstruct %6 %47 %49 +%51 = OpBitcast %27 %50 +%52 = OpAccessChain %29 %51 %16 +%53 = OpLoad %10 %52 Aligned 4 +%55 = OpCompositeExtract %5 %17 0 +%56 = OpCompositeExtract %5 %17 1 +%57 = OpIAddCarry %20 %55 %54 +%58 = OpCompositeExtract %5 %57 0 +%59 = OpCompositeExtract %5 %57 1 +%60 = OpIAdd %5 %56 %59 +%61 = OpCompositeConstruct %6 %58 %60 +%62 = OpBitcast %27 %61 +%63 = OpAccessChain %29 %62 %16 +%64 = OpLoad %10 %63 Aligned 4 +%67 = OpCompositeExtract %5 %17 0 +%68 = OpCompositeExtract %5 %17 1 +%69 = OpIAddCarry %20 %67 %65 +%70 = OpCompositeExtract %5 %69 0 +%71 = OpCompositeExtract %5 %69 1 +%72 = OpIAdd %5 %68 %71 +%73 = OpCompositeConstruct %6 %70 %72 +%76 = OpBitcast %75 %73 +%78 = OpAccessChain %77 %76 %16 +%79 = OpLoad %66 %78 Aligned 8 +%81 = OpCompositeExtract %5 %17 0 +%82 = OpCompositeExtract %5 %17 1 +%83 = OpIAddCarry %20 %81 %80 +%84 = OpCompositeExtract %5 %83 0 +%85 = OpCompositeExtract %5 %83 1 +%86 = OpIAdd %5 %82 %85 +%87 = OpCompositeConstruct %6 %84 %86 +%88 = OpBitcast %75 %87 +%89 = OpAccessChain %77 %88 %16 +%90 = OpLoad %66 %89 Aligned 8 +%92 = OpCompositeExtract %5 %17 0 +%93 = OpCompositeExtract %5 %17 1 +%94 = OpIAddCarry %20 %92 %91 +%95 = OpCompositeExtract %5 %94 0 +%96 = OpCompositeExtract %5 %94 1 +%97 = OpIAdd %5 %93 %96 +%98 = OpCompositeConstruct %6 %95 %97 +%99 = OpBitcast %75 %98 +%100 = OpAccessChain %77 %99 %16 +%101 = OpLoad %66 %100 Aligned 8 +%103 = OpCompositeExtract %5 %17 0 +%104 = OpCompositeExtract %5 %17 1 +%105 = OpIAddCarry %20 %103 %102 +%106 = OpCompositeExtract %5 %105 0 +%107 = OpCompositeExtract %5 %105 1 +%108 = OpIAdd %5 %104 %107 +%109 = OpCompositeConstruct %6 %106 %108 +%110 = OpBitcast %75 %109 +%111 = OpAccessChain %77 %110 %16 +%112 = OpLoad %66 %111 Aligned 8 +%113 = OpConvertSToF %10 %79 +%114 = OpConvertSToF %10 %90 +%115 = OpConvertSToF %10 %101 +%116 = OpConvertSToF %10 %112 +%117 = OpFAdd %10 %113 %31 +%118 = OpFAdd %10 %114 %42 +%119 = OpFAdd %10 %115 %53 +%120 = OpFAdd %10 %116 %64 +%123 = OpCompositeExtract %5 %17 0 +%124 = OpCompositeExtract %5 %17 1 +%125 = OpIAddCarry %20 %123 %121 +%126 = OpCompositeExtract %5 %125 0 +%127 = OpCompositeExtract %5 %125 1 +%128 = OpIAdd %5 %124 %127 +%129 = OpCompositeConstruct %6 %126 %128 +%132 = OpBitcast %131 %129 +%134 = OpAccessChain %133 %132 %16 +%135 = OpLoad %122 %134 Aligned 2 +%137 = OpCompositeExtract %5 %17 0 +%138 = OpCompositeExtract %5 %17 1 +%139 = OpIAddCarry %20 %137 %136 +%140 = OpCompositeExtract %5 %139 0 +%141 = OpCompositeExtract %5 %139 1 +%142 = OpIAdd %5 %138 %141 +%143 = OpCompositeConstruct %6 %140 %142 +%144 = OpBitcast %131 %143 +%145 = OpAccessChain %133 %144 %16 +%146 = OpLoad %122 %145 Aligned 2 +%148 = OpCompositeExtract %5 %17 0 +%149 = OpCompositeExtract %5 %17 1 +%150 = OpIAddCarry %20 %148 %147 +%151 = OpCompositeExtract %5 %150 0 +%152 = OpCompositeExtract %5 %150 1 +%153 = OpIAdd %5 %149 %152 +%154 = OpCompositeConstruct %6 %151 %153 +%155 = OpBitcast %131 %154 +%156 = OpAccessChain %133 %155 %16 +%157 = OpLoad %122 %156 Aligned 2 +%159 = OpCompositeExtract %5 %17 0 +%160 = OpCompositeExtract %5 %17 1 +%161 = OpIAddCarry %20 %159 %158 +%162 = OpCompositeExtract %5 %161 0 +%163 = OpCompositeExtract %5 %161 1 +%164 = OpIAdd %5 %160 %163 +%165 = OpCompositeConstruct %6 %162 %164 +%166 = OpBitcast %131 %165 +%167 = OpAccessChain %133 %166 %16 +%168 = OpLoad %122 %167 Aligned 2 +%169 = OpFConvert %10 %135 +%170 = OpFConvert %10 %146 +%171 = OpFConvert %10 %157 +%172 = OpFConvert %10 %168 +%173 = OpFAdd %10 %117 %169 +%174 = OpFAdd %10 %118 %170 +%175 = OpFAdd %10 %119 %171 +%176 = OpFAdd %10 %120 %172 +%178 = OpAccessChain %177 %13 %16 +OpStore %178 %173 +%179 = OpAccessChain %177 %13 %180 +OpStore %179 %174 +%181 = OpAccessChain %177 %13 %182 +OpStore %181 %175 +%183 = OpAccessChain %177 %13 %184 +OpStore %183 %176 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen b/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen index 9312a4f..d8abf60 100644 --- a/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen +++ b/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen @@ -36,6 +36,7 @@ OpCapability UniformBufferArrayDynamicIndexing OpCapability SampledImageArrayDynamicIndexing OpCapability StorageBufferArrayDynamicIndexing OpCapability StorageImageArrayDynamicIndexing +OpCapability StorageBuffer16BitAccess OpCapability RayTracingKHR OpCapability RuntimeDescriptorArray OpCapability UniformBufferArrayNonUniformIndexing diff --git a/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss b/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss index 7ebdc2c..e4815b2 100644 --- a/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss +++ b/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss @@ -105,8 +105,8 @@ void main() uint _59 = _58 & 1u; vec4 _67 = texelFetch(_21[registers._m0 + _59], ivec2(uvec2(0u)), int(0u)); vec4 _80 = texelFetch(_21[registers._m0 + _58], ivec2(uvec2(0u)), int(0u)); - vec4 _99 = texelFetch(_21[((SBT._m7.x >> 6u) + 17u) + _58], ivec2(uvec2(0u)), int(0u)); - vec4 _119 = imageLoad(_25[((SBT._m8.x >> 6u) + 18u) + _58], ivec2(uvec2(0u))); + vec4 _99 = texelFetch(_21[nonuniformEXT(((SBT._m7.x >> 6u) + 17u) + _58)], ivec2(uvec2(0u)), int(0u)); + vec4 _119 = imageLoad(_25[nonuniformEXT(((SBT._m8.x >> 6u) + 18u) + _58)], ivec2(uvec2(0u))); uint _146 = ((SBT._m9.x >> 6u) + 13u) + _58; vec4 _169 = uintBitsToFloat(uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u])); vec4 _182 = uintBitsToFloat(uvec4(SBT._m0[4u], 0u, 0u, 0u)); @@ -114,7 +114,7 @@ void main() _196._m0 = uaddCarry(SBT._m6.x, 1u * 16u, _196._m1); PhysicalPointerFloat4NonWrite _203 = PhysicalPointerFloat4NonWrite(uvec2(_196._m0, SBT._m6.y + _196._m1)); vec4 _232 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _59], _36[(SBT._m10.x >> 5u) + 13u])), vec2(0.5), 0.0); - vec4 _258 = textureLod(sampler2D(_21[registers._m0 + _58], _36[((SBT._m10.x >> 5u) + 14u) + (_58 ^ 1u)]), vec2(0.5), 0.0); + vec4 _258 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _58], _36[((SBT._m10.x >> 5u) + 14u) + (_58 ^ 1u)])), vec2(0.5), 0.0); AddCarry _274; _274._m0 = uaddCarry(SBT._m2.x, (_58 * 16u) + 0u, _274._m1); PhysicalPointerFloat4NonWrite _279 = PhysicalPointerFloat4NonWrite(uvec2(_274._m0, SBT._m2.y + _274._m1)); @@ -243,7 +243,12 @@ OpDecorate %32 Binding 0 OpDecorate %36 DescriptorSet 2 OpDecorate %36 Binding 0 OpDecorate %47 NonUniform +OpDecorate %97 NonUniform +OpDecorate %98 NonUniform +OpDecorate %117 NonUniform +OpDecorate %118 NonUniform OpDecorate %130 NonUniform +OpDecorate %146 NonUniform OpDecorate %140 NonUniform OpDecorate %147 NonUniform OpMemberDecorate %201 0 Offset 0 @@ -251,6 +256,9 @@ OpDecorate %201 Block OpMemberDecorate %201 0 NonWritable OpDecorate %227 NonUniform OpDecorate %229 NonUniform +OpDecorate %255 NonUniform +OpDecorate %256 NonUniform +OpDecorate %257 NonUniform OpMemberDecorate %293 0 Offset 0 OpDecorate %293 Block OpMemberDecorate %293 0 NonWritable diff --git a/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag new file mode 100644 index 0000000..ba78e81 --- /dev/null +++ b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag @@ -0,0 +1,260 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_scalar_block_layout : require + +layout(set = 5, binding = 0, scalar) uniform BindlessCBV +{ + float16_t _m0[32768]; +} _15[]; + +layout(set = 5, binding = 0, scalar) uniform _19_22 +{ + float _m0[16384]; +} _22[]; + +layout(set = 5, binding = 0, scalar) uniform _26_29 +{ + double _m0[8192]; +} _29[]; + +layout(push_constant, std430) uniform RootConstants +{ + uint _m0; + uint _m1; + uint _m2; + uint _m3; + uint _m4; + uint _m5; + uint _m6; + uint _m7; +} registers; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + uint _39 = registers._m5 + 2u; + uint _45 = registers._m5 + 1u; + SV_Target.x = (((float(_15[registers._m5]._m0[8u]) + _22[registers._m5]._m0[0u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[4u])))) + _22[_45]._m0[0u]) + float(_29[_39]._m0[0u]); + SV_Target.y = (((float(_15[registers._m5]._m0[10u]) + _22[registers._m5]._m0[1u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[5u])))) + _22[_45]._m0[1u]) + float(_29[_39]._m0[1u]); + SV_Target.z = (((float(_15[registers._m5]._m0[12u]) + _22[registers._m5]._m0[2u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[6u])))) + _22[_45]._m0[2u]) + float(_29[_39]._m0[2u]); + SV_Target.w = (((float(_15[registers._m5]._m0[14u]) + _22[registers._m5]._m0[3u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[7u])))) + _22[_45]._m0[3u]) + float(_29[_39]._m0[3u]); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 149 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Float64 +OpCapability Int64 +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability RuntimeDescriptorArray +OpCapability PhysicalStorageBufferAddresses +OpExtension "SPV_EXT_descriptor_indexing" +OpExtension "SPV_KHR_physical_storage_buffer" +OpMemoryModel PhysicalStorageBuffer64 GLSL450 +OpEntryPoint Fragment %3 "main" %32 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %6 "RootConstants" +OpName %8 "registers" +OpName %12 "BindlessCBV" +OpName %19 "BindlessCBV" +OpName %26 "BindlessCBV" +OpName %32 "SV_Target" +OpDecorate %6 Block +OpMemberDecorate %6 0 Offset 0 +OpMemberDecorate %6 1 Offset 4 +OpMemberDecorate %6 2 Offset 8 +OpMemberDecorate %6 3 Offset 12 +OpMemberDecorate %6 4 Offset 16 +OpMemberDecorate %6 5 Offset 20 +OpMemberDecorate %6 6 Offset 24 +OpMemberDecorate %6 7 Offset 28 +OpDecorate %11 ArrayStride 2 +OpDecorate %12 Block +OpMemberDecorate %12 0 Offset 0 +OpDecorate %15 DescriptorSet 5 +OpDecorate %15 Binding 0 +OpDecorate %18 ArrayStride 4 +OpDecorate %19 Block +OpMemberDecorate %19 0 Offset 0 +OpDecorate %22 DescriptorSet 5 +OpDecorate %22 Binding 0 +OpDecorate %25 ArrayStride 8 +OpDecorate %26 Block +OpMemberDecorate %26 0 Offset 0 +OpDecorate %29 DescriptorSet 5 +OpDecorate %29 Binding 0 +OpDecorate %32 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 +%7 = OpTypePointer PushConstant %6 +%8 = OpVariable %7 PushConstant +%9 = OpTypeFloat 16 +%10 = OpConstant %5 32768 +%11 = OpTypeArray %9 %10 +%12 = OpTypeStruct %11 +%13 = OpTypeRuntimeArray %12 +%14 = OpTypePointer Uniform %13 +%15 = OpVariable %14 Uniform +%16 = OpTypeFloat 32 +%17 = OpConstant %5 16384 +%18 = OpTypeArray %16 %17 +%19 = OpTypeStruct %18 +%20 = OpTypeRuntimeArray %19 +%21 = OpTypePointer Uniform %20 +%22 = OpVariable %21 Uniform +%23 = OpTypeFloat 64 +%24 = OpConstant %5 8192 +%25 = OpTypeArray %23 %24 +%26 = OpTypeStruct %25 +%27 = OpTypeRuntimeArray %26 +%28 = OpTypePointer Uniform %27 +%29 = OpVariable %28 Uniform +%30 = OpTypeVector %16 4 +%31 = OpTypePointer Output %30 +%32 = OpVariable %31 Output +%33 = OpTypePointer Uniform %26 +%35 = OpTypePointer PushConstant %5 +%37 = OpConstant %5 5 +%40 = OpConstant %5 2 +%41 = OpTypePointer Uniform %19 +%46 = OpConstant %5 1 +%47 = OpTypePointer Uniform %12 +%57 = OpConstant %5 0 +%58 = OpTypePointer Uniform %16 +%65 = OpConstant %5 3 +%68 = OpConstant %5 8 +%69 = OpTypePointer Uniform %9 +%72 = OpConstant %5 10 +%75 = OpConstant %5 12 +%78 = OpConstant %5 14 +%89 = OpTypeInt 64 0 +%90 = OpConstant %5 4 +%91 = OpTypePointer Uniform %23 +%98 = OpConstant %5 6 +%102 = OpConstant %5 7 +%142 = OpTypePointer Output %16 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %147 +%147 = OpLabel +%36 = OpAccessChain %35 %8 %37 +%38 = OpLoad %5 %36 +%39 = OpIAdd %5 %38 %40 +%34 = OpAccessChain %33 %29 %39 +%43 = OpAccessChain %35 %8 %37 +%44 = OpLoad %5 %43 +%45 = OpIAdd %5 %44 %46 +%42 = OpAccessChain %41 %22 %45 +%49 = OpAccessChain %35 %8 %37 +%50 = OpLoad %5 %49 +%48 = OpAccessChain %47 %15 %50 +%52 = OpAccessChain %35 %8 %37 +%53 = OpLoad %5 %52 +%51 = OpAccessChain %41 %22 %53 +%55 = OpAccessChain %35 %8 %37 +%56 = OpLoad %5 %55 +%54 = OpAccessChain %33 %29 %56 +%59 = OpAccessChain %58 %51 %57 %57 +%60 = OpLoad %16 %59 +%61 = OpAccessChain %58 %51 %57 %46 +%62 = OpLoad %16 %61 +%63 = OpAccessChain %58 %51 %57 %40 +%64 = OpLoad %16 %63 +%66 = OpAccessChain %58 %51 %57 %65 +%67 = OpLoad %16 %66 +%70 = OpAccessChain %69 %48 %57 %68 +%71 = OpLoad %9 %70 +%73 = OpAccessChain %69 %48 %57 %72 +%74 = OpLoad %9 %73 +%76 = OpAccessChain %69 %48 %57 %75 +%77 = OpLoad %9 %76 +%79 = OpAccessChain %69 %48 %57 %78 +%80 = OpLoad %9 %79 +%81 = OpFConvert %16 %71 +%82 = OpFConvert %16 %74 +%83 = OpFConvert %16 %77 +%84 = OpFConvert %16 %80 +%85 = OpFAdd %16 %81 %60 +%86 = OpFAdd %16 %82 %62 +%87 = OpFAdd %16 %83 %64 +%88 = OpFAdd %16 %84 %67 +%92 = OpAccessChain %91 %54 %57 %90 +%93 = OpLoad %23 %92 +%94 = OpBitcast %89 %93 +%95 = OpAccessChain %91 %54 %57 %37 +%96 = OpLoad %23 %95 +%97 = OpBitcast %89 %96 +%99 = OpAccessChain %91 %54 %57 %98 +%100 = OpLoad %23 %99 +%101 = OpBitcast %89 %100 +%103 = OpAccessChain %91 %54 %57 %102 +%104 = OpLoad %23 %103 +%105 = OpBitcast %89 %104 +%106 = OpConvertSToF %16 %94 +%107 = OpConvertSToF %16 %97 +%108 = OpConvertSToF %16 %101 +%109 = OpConvertSToF %16 %105 +%110 = OpFAdd %16 %85 %106 +%111 = OpFAdd %16 %86 %107 +%112 = OpFAdd %16 %87 %108 +%113 = OpFAdd %16 %88 %109 +%114 = OpAccessChain %58 %42 %57 %57 +%115 = OpLoad %16 %114 +%116 = OpAccessChain %58 %42 %57 %46 +%117 = OpLoad %16 %116 +%118 = OpAccessChain %58 %42 %57 %40 +%119 = OpLoad %16 %118 +%120 = OpAccessChain %58 %42 %57 %65 +%121 = OpLoad %16 %120 +%122 = OpFAdd %16 %110 %115 +%123 = OpFAdd %16 %111 %117 +%124 = OpFAdd %16 %112 %119 +%125 = OpFAdd %16 %113 %121 +%126 = OpAccessChain %91 %34 %57 %57 +%127 = OpLoad %23 %126 +%128 = OpAccessChain %91 %34 %57 %46 +%129 = OpLoad %23 %128 +%130 = OpAccessChain %91 %34 %57 %40 +%131 = OpLoad %23 %130 +%132 = OpAccessChain %91 %34 %57 %65 +%133 = OpLoad %23 %132 +%134 = OpFConvert %16 %127 +%135 = OpFConvert %16 %129 +%136 = OpFConvert %16 %131 +%137 = OpFConvert %16 %133 +%138 = OpFAdd %16 %122 %134 +%139 = OpFAdd %16 %123 %135 +%140 = OpFAdd %16 %124 %136 +%141 = OpFAdd %16 %125 %137 +%143 = OpAccessChain %142 %32 %57 +OpStore %143 %138 +%144 = OpAccessChain %142 %32 %46 +OpStore %144 %139 +%145 = OpAccessChain %142 %32 %40 +OpStore %145 %140 +%146 = OpAccessChain %142 %32 %65 +OpStore %146 %141 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag new file mode 100644 index 0000000..32d7dcf --- /dev/null +++ b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag @@ -0,0 +1,231 @@ +#version 460 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require +#extension GL_ARB_gpu_shader_int64 : require +#extension GL_EXT_scalar_block_layout : require + +layout(set = 0, binding = 0, scalar) uniform _9_11 +{ + float16_t _m0[32]; +} _11; + +layout(set = 0, binding = 0, scalar) uniform _15_17 +{ + float _m0[16]; +} _17; + +layout(set = 0, binding = 0, scalar) uniform _21_23 +{ + double _m0[8]; +} _23; + +layout(set = 0, binding = 1, scalar) uniform _26_28 +{ + float _m0[4]; +} _28; + +layout(set = 0, binding = 2, scalar) uniform _30_32 +{ + double _m0[4]; +} _32; + +layout(location = 0) out vec4 SV_Target; + +void main() +{ + SV_Target.x = (((float(_11._m0[8u]) + _17._m0[0u]) + float(int64_t(doubleBitsToUint64(_23._m0[4u])))) + _28._m0[0u]) + float(_32._m0[0u]); + SV_Target.y = (((float(_11._m0[10u]) + _17._m0[1u]) + float(int64_t(doubleBitsToUint64(_23._m0[5u])))) + _28._m0[1u]) + float(_32._m0[1u]); + SV_Target.z = (((float(_11._m0[12u]) + _17._m0[2u]) + float(int64_t(doubleBitsToUint64(_23._m0[6u])))) + _28._m0[2u]) + float(_32._m0[2u]); + SV_Target.w = (((float(_11._m0[14u]) + _17._m0[3u]) + float(int64_t(doubleBitsToUint64(_23._m0[7u])))) + _28._m0[3u]) + float(_32._m0[3u]); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 129 +; Schema: 0 +OpCapability Shader +OpCapability Float16 +OpCapability Float64 +OpCapability Int64 +OpCapability UniformAndStorageBuffer16BitAccess +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %3 "main" %35 +OpExecutionMode %3 OriginUpperLeft +OpName %3 "main" +OpName %9 "" +OpName %15 "" +OpName %21 "" +OpName %26 "" +OpName %30 "" +OpName %35 "SV_Target" +OpDecorate %8 ArrayStride 2 +OpMemberDecorate %9 0 Offset 0 +OpDecorate %9 Block +OpDecorate %14 ArrayStride 4 +OpMemberDecorate %15 0 Offset 0 +OpDecorate %15 Block +OpDecorate %20 ArrayStride 8 +OpMemberDecorate %21 0 Offset 0 +OpDecorate %21 Block +OpDecorate %11 DescriptorSet 0 +OpDecorate %11 Binding 0 +OpDecorate %17 DescriptorSet 0 +OpDecorate %17 Binding 0 +OpDecorate %23 DescriptorSet 0 +OpDecorate %23 Binding 0 +OpDecorate %25 ArrayStride 4 +OpMemberDecorate %26 0 Offset 0 +OpDecorate %26 Block +OpDecorate %28 DescriptorSet 0 +OpDecorate %28 Binding 1 +OpDecorate %29 ArrayStride 8 +OpMemberDecorate %30 0 Offset 0 +OpDecorate %30 Block +OpDecorate %32 DescriptorSet 0 +OpDecorate %32 Binding 2 +OpDecorate %35 Location 0 +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpConstant %5 32 +%7 = OpTypeFloat 16 +%8 = OpTypeArray %7 %6 +%9 = OpTypeStruct %8 +%10 = OpTypePointer Uniform %9 +%11 = OpVariable %10 Uniform +%12 = OpConstant %5 16 +%13 = OpTypeFloat 32 +%14 = OpTypeArray %13 %12 +%15 = OpTypeStruct %14 +%16 = OpTypePointer Uniform %15 +%17 = OpVariable %16 Uniform +%18 = OpConstant %5 8 +%19 = OpTypeFloat 64 +%20 = OpTypeArray %19 %18 +%21 = OpTypeStruct %20 +%22 = OpTypePointer Uniform %21 +%23 = OpVariable %22 Uniform +%24 = OpConstant %5 4 +%25 = OpTypeArray %13 %24 +%26 = OpTypeStruct %25 +%27 = OpTypePointer Uniform %26 +%28 = OpVariable %27 Uniform +%29 = OpTypeArray %19 %24 +%30 = OpTypeStruct %29 +%31 = OpTypePointer Uniform %30 +%32 = OpVariable %31 Uniform +%33 = OpTypeVector %13 4 +%34 = OpTypePointer Output %33 +%35 = OpVariable %34 Output +%36 = OpConstant %5 0 +%37 = OpTypePointer Uniform %13 +%40 = OpConstant %5 1 +%43 = OpConstant %5 2 +%46 = OpConstant %5 3 +%49 = OpTypePointer Uniform %7 +%52 = OpConstant %5 10 +%55 = OpConstant %5 12 +%58 = OpConstant %5 14 +%69 = OpTypeInt 64 0 +%70 = OpTypePointer Uniform %19 +%74 = OpConstant %5 5 +%78 = OpConstant %5 6 +%82 = OpConstant %5 7 +%122 = OpTypePointer Output %13 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %127 +%127 = OpLabel +%38 = OpAccessChain %37 %17 %36 %36 +%39 = OpLoad %13 %38 +%41 = OpAccessChain %37 %17 %36 %40 +%42 = OpLoad %13 %41 +%44 = OpAccessChain %37 %17 %36 %43 +%45 = OpLoad %13 %44 +%47 = OpAccessChain %37 %17 %36 %46 +%48 = OpLoad %13 %47 +%50 = OpAccessChain %49 %11 %36 %18 +%51 = OpLoad %7 %50 +%53 = OpAccessChain %49 %11 %36 %52 +%54 = OpLoad %7 %53 +%56 = OpAccessChain %49 %11 %36 %55 +%57 = OpLoad %7 %56 +%59 = OpAccessChain %49 %11 %36 %58 +%60 = OpLoad %7 %59 +%61 = OpFConvert %13 %51 +%62 = OpFConvert %13 %54 +%63 = OpFConvert %13 %57 +%64 = OpFConvert %13 %60 +%65 = OpFAdd %13 %61 %39 +%66 = OpFAdd %13 %62 %42 +%67 = OpFAdd %13 %63 %45 +%68 = OpFAdd %13 %64 %48 +%71 = OpAccessChain %70 %23 %36 %24 +%72 = OpLoad %19 %71 +%73 = OpBitcast %69 %72 +%75 = OpAccessChain %70 %23 %36 %74 +%76 = OpLoad %19 %75 +%77 = OpBitcast %69 %76 +%79 = OpAccessChain %70 %23 %36 %78 +%80 = OpLoad %19 %79 +%81 = OpBitcast %69 %80 +%83 = OpAccessChain %70 %23 %36 %82 +%84 = OpLoad %19 %83 +%85 = OpBitcast %69 %84 +%86 = OpConvertSToF %13 %73 +%87 = OpConvertSToF %13 %77 +%88 = OpConvertSToF %13 %81 +%89 = OpConvertSToF %13 %85 +%90 = OpFAdd %13 %65 %86 +%91 = OpFAdd %13 %66 %87 +%92 = OpFAdd %13 %67 %88 +%93 = OpFAdd %13 %68 %89 +%94 = OpAccessChain %37 %28 %36 %36 +%95 = OpLoad %13 %94 +%96 = OpAccessChain %37 %28 %36 %40 +%97 = OpLoad %13 %96 +%98 = OpAccessChain %37 %28 %36 %43 +%99 = OpLoad %13 %98 +%100 = OpAccessChain %37 %28 %36 %46 +%101 = OpLoad %13 %100 +%102 = OpFAdd %13 %90 %95 +%103 = OpFAdd %13 %91 %97 +%104 = OpFAdd %13 %92 %99 +%105 = OpFAdd %13 %93 %101 +%106 = OpAccessChain %70 %32 %36 %36 +%107 = OpLoad %19 %106 +%108 = OpAccessChain %70 %32 %36 %40 +%109 = OpLoad %19 %108 +%110 = OpAccessChain %70 %32 %36 %43 +%111 = OpLoad %19 %110 +%112 = OpAccessChain %70 %32 %36 %46 +%113 = OpLoad %19 %112 +%114 = OpFConvert %13 %107 +%115 = OpFConvert %13 %109 +%116 = OpFConvert %13 %111 +%117 = OpFConvert %13 %113 +%118 = OpFAdd %13 %102 %114 +%119 = OpFAdd %13 %103 %115 +%120 = OpFAdd %13 %104 %116 +%121 = OpFAdd %13 %105 %117 +%123 = OpAccessChain %122 %35 %36 +OpStore %123 %118 +%124 = OpAccessChain %122 %35 %40 +OpStore %124 %119 +%125 = OpAccessChain %122 %35 %43 +OpStore %125 %120 +%126 = OpAccessChain %122 %35 %46 +OpStore %126 %121 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag index f494f31..e5ee27b 100644 --- a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag +++ b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag @@ -99,6 +99,7 @@ void main() OpCapability Shader OpCapability Float16 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability PhysicalStorageBufferAddresses OpExtension "SPV_EXT_descriptor_indexing" diff --git a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag index d70287c..b9c3f99 100644 --- a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag +++ b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag @@ -125,6 +125,7 @@ OpCapability Shader OpCapability Float16 OpCapability Int16 OpCapability GroupNonUniformBallot +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability PhysicalStorageBufferAddresses OpExtension "SPV_EXT_descriptor_indexing" diff --git a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag index 941d6dc..6bc6537 100644 --- a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag +++ b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag @@ -88,6 +88,7 @@ void main() OpCapability Shader OpCapability Float16 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpExtension "SPV_EXT_descriptor_indexing" OpMemoryModel Logical GLSL450 diff --git a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag index 088e381..8c122b8 100644 --- a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag +++ b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag @@ -112,6 +112,7 @@ OpCapability Shader OpCapability Float16 OpCapability Int16 OpCapability GroupNonUniformBallot +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpExtension "SPV_EXT_descriptor_indexing" OpMemoryModel Logical GLSL450 diff --git a/reference/shaders/semantics/clip-distance-flatten.vert b/reference/shaders/semantics/clip-distance-flatten.vert index f80f694..5a53683 100644 --- a/reference/shaders/semantics/clip-distance-flatten.vert +++ b/reference/shaders/semantics/clip-distance-flatten.vert @@ -59,8 +59,8 @@ OpDecorate %20 BuiltIn ClipDistance %23 = OpConstant %14 0 %26 = OpConstant %14 1 %35 = OpConstant %14 3 -%37 = OpTypePointer Output %5 -%45 = OpConstant %5 1 +%38 = OpConstant %5 1 +%40 = OpTypePointer Output %5 %3 = OpFunction %1 None %2 %4 = OpLabel OpBranch %49 @@ -77,24 +77,24 @@ OpBranch %49 %33 = OpLoad %5 %32 %34 = OpAccessChain %21 %8 %35 %36 = OpLoad %5 %34 -%38 = OpAccessChain %37 %13 %23 -OpStore %38 %29 -%39 = OpAccessChain %37 %13 %26 -OpStore %39 %31 -%40 = OpAccessChain %37 %13 %15 -OpStore %40 %33 -%41 = OpAccessChain %37 %13 %35 -OpStore %41 %36 -%42 = OpAccessChain %37 %20 %23 -OpStore %42 %24 -%43 = OpAccessChain %37 %20 %26 -OpStore %43 %27 -%44 = OpFAdd %5 %24 %45 -%46 = OpFAdd %5 %27 %45 -%47 = OpAccessChain %37 %20 %15 -OpStore %47 %44 -%48 = OpAccessChain %37 %20 %35 -OpStore %48 %46 +%37 = OpFAdd %5 %24 %38 +%39 = OpFAdd %5 %27 %38 +%41 = OpAccessChain %40 %13 %23 +OpStore %41 %29 +%42 = OpAccessChain %40 %13 %26 +OpStore %42 %31 +%43 = OpAccessChain %40 %13 %15 +OpStore %43 %33 +%44 = OpAccessChain %40 %13 %35 +OpStore %44 %36 +%45 = OpAccessChain %40 %20 %23 +OpStore %45 %24 +%46 = OpAccessChain %40 %20 %26 +OpStore %46 %27 +%47 = OpAccessChain %40 %20 %15 +OpStore %47 %37 +%48 = OpAccessChain %40 %20 %35 +OpStore %48 %39 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/semantics/clip-distance-rows.vert b/reference/shaders/semantics/clip-distance-rows.vert index ec89d09..e57eb95 100644 --- a/reference/shaders/semantics/clip-distance-rows.vert +++ b/reference/shaders/semantics/clip-distance-rows.vert @@ -53,8 +53,8 @@ OpDecorate %17 BuiltIn ClipDistance %20 = OpConstant %13 0 %23 = OpConstant %13 1 %28 = OpConstant %13 3 -%30 = OpTypePointer Output %5 -%37 = OpConstant %5 1 +%31 = OpConstant %5 1 +%32 = OpTypePointer Output %5 %3 = OpFunction %1 None %2 %4 = OpLabel OpBranch %39 @@ -68,19 +68,19 @@ OpBranch %39 %26 = OpLoad %5 %25 %27 = OpAccessChain %9 %8 %28 %29 = OpLoad %5 %27 -%31 = OpAccessChain %30 %12 %20 -OpStore %31 %21 -%32 = OpAccessChain %30 %12 %23 -OpStore %32 %24 -%33 = OpAccessChain %30 %12 %14 -OpStore %33 %26 -%34 = OpAccessChain %30 %12 %28 -OpStore %34 %29 -%35 = OpAccessChain %30 %17 %20 -OpStore %35 %18 -%36 = OpFAdd %5 %18 %37 -%38 = OpAccessChain %30 %17 %23 -OpStore %38 %36 +%30 = OpFAdd %5 %18 %31 +%33 = OpAccessChain %32 %12 %20 +OpStore %33 %21 +%34 = OpAccessChain %32 %12 %23 +OpStore %34 %24 +%35 = OpAccessChain %32 %12 %14 +OpStore %35 %26 +%36 = OpAccessChain %32 %12 %28 +OpStore %36 %29 +%37 = OpAccessChain %32 %17 %20 +OpStore %37 %18 +%38 = OpAccessChain %32 %17 %23 +OpStore %38 %30 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/semantics/coverage.frag b/reference/shaders/semantics/coverage.frag index b41b0f6..1b8066d 100644 --- a/reference/shaders/semantics/coverage.frag +++ b/reference/shaders/semantics/coverage.frag @@ -44,10 +44,10 @@ OpDecorate %15 BuiltIn SampleMask %15 = OpVariable %14 Input %16 = OpTypePointer Input %9 %18 = OpConstant %9 0 -%20 = OpTypePointer Output %5 -%22 = OpConstant %5 1 -%25 = OpConstant %9 2 -%27 = OpConstant %9 3 +%21 = OpConstant %9 3 +%22 = OpTypePointer Output %5 +%24 = OpConstant %5 1 +%27 = OpConstant %9 2 %29 = OpTypePointer Output %9 %3 = OpFunction %1 None %2 %4 = OpLabel @@ -55,17 +55,17 @@ OpBranch %31 %31 = OpLabel %17 = OpAccessChain %16 %15 %18 %19 = OpLoad %9 %17 -%21 = OpAccessChain %20 %8 %18 -OpStore %21 %22 -%23 = OpAccessChain %20 %8 %10 -OpStore %23 %22 -%24 = OpAccessChain %20 %8 %25 -OpStore %24 %22 -%26 = OpAccessChain %20 %8 %27 -OpStore %26 %22 -%28 = OpBitwiseAnd %9 %19 %27 +%20 = OpBitwiseAnd %9 %19 %21 +%23 = OpAccessChain %22 %8 %18 +OpStore %23 %24 +%25 = OpAccessChain %22 %8 %10 +OpStore %25 %24 +%26 = OpAccessChain %22 %8 %27 +OpStore %26 %24 +%28 = OpAccessChain %22 %8 %21 +OpStore %28 %24 %30 = OpAccessChain %29 %13 %18 -OpStore %30 %28 +OpStore %30 %20 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/semantics/inner-coverage.noglsl.frag b/reference/shaders/semantics/inner-coverage.noglsl.frag index 27f0f81..2d574d7 100644 --- a/reference/shaders/semantics/inner-coverage.noglsl.frag +++ b/reference/shaders/semantics/inner-coverage.noglsl.frag @@ -1,71 +1,62 @@ ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 41 +; Bound: 33 ; Schema: 0 OpCapability Shader OpCapability FragmentFullyCoveredEXT OpExtension "SPV_EXT_fragment_fully_covered" OpMemoryModel Logical GLSL450 -OpEntryPoint Fragment %3 "main" %7 %12 %21 +OpEntryPoint Fragment %3 "main" %7 %10 OpExecutionMode %3 OriginUpperLeft OpName %3 "main" OpName %7 "SV_Target" -OpName %26 "discard_state" -OpName %33 "discard_exit" +OpName %18 "discard_state" +OpName %25 "discard_exit" OpDecorate %7 Location 0 -OpDecorate %12 BuiltIn SampleMask -OpDecorate %21 BuiltIn FullyCoveredEXT +OpDecorate %10 BuiltIn FullyCoveredEXT %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 %6 = OpTypePointer Output %5 %7 = OpVariable %6 Output -%8 = OpTypeInt 32 0 -%9 = OpConstant %8 1 -%10 = OpTypeArray %8 %9 -%11 = OpTypePointer Input %10 -%12 = OpVariable %11 Input -%13 = OpTypePointer Input %8 -%15 = OpConstant %8 0 -%17 = OpTypeBool -%20 = OpTypePointer Input %17 -%21 = OpVariable %20 Input -%25 = OpTypePointer Private %17 -%26 = OpVariable %25 Private -%27 = OpConstantFalse %17 -%28 = OpConstant %5 1 -%32 = OpConstantTrue %17 +%8 = OpTypeBool +%9 = OpTypePointer Input %8 +%10 = OpVariable %9 Input +%12 = OpTypeInt 32 0 +%14 = OpConstant %12 1 +%15 = OpConstant %12 0 +%17 = OpTypePointer Private %8 +%18 = OpVariable %17 Private +%19 = OpConstantFalse %8 +%20 = OpConstant %5 1 +%24 = OpConstantTrue %8 %3 = OpFunction %1 None %2 %4 = OpLabel -OpStore %26 %27 -OpBranch %29 -%29 = OpLabel -%14 = OpAccessChain %13 %12 %15 -%16 = OpLoad %8 %14 -%18 = OpIEqual %17 %15 %16 -%19 = OpSelect %8 %18 %9 %15 -%22 = OpLoad %17 %21 -%23 = OpSelect %8 %22 %9 %15 -%24 = OpIEqual %17 %23 %15 -OpSelectionMerge %31 None -OpBranchConditional %24 %30 %31 -%30 = OpLabel -OpStore %26 %32 -OpBranch %31 -%31 = OpLabel -OpStore %7 %28 -%39 = OpFunctionCall %1 %33 +OpStore %18 %19 +OpBranch %21 +%21 = OpLabel +%11 = OpLoad %8 %10 +%13 = OpSelect %12 %11 %14 %15 +%16 = OpIEqual %8 %13 %15 +OpSelectionMerge %23 None +OpBranchConditional %16 %22 %23 +%22 = OpLabel +OpStore %18 %24 +OpBranch %23 +%23 = OpLabel +OpStore %7 %20 +%31 = OpFunctionCall %1 %25 OpReturn OpFunctionEnd -%33 = OpFunction %1 None %2 -%34 = OpLabel -%37 = OpLoad %17 %26 -OpSelectionMerge %36 None -OpBranchConditional %37 %35 %36 -%35 = OpLabel +%25 = OpFunction %1 None %2 +%26 = OpLabel +%29 = OpLoad %8 %18 +OpSelectionMerge %28 None +OpBranchConditional %29 %27 %28 +%27 = OpLabel OpKill -%36 = OpLabel +%28 = OpLabel OpReturn OpFunctionEnd diff --git a/reference/shaders/semantics/stencil-ref.frag b/reference/shaders/semantics/stencil-ref.frag index 222cbba..5d86f7e 100644 --- a/reference/shaders/semantics/stencil-ref.frag +++ b/reference/shaders/semantics/stencil-ref.frag @@ -46,26 +46,26 @@ OpDecorate %13 BuiltIn FragStencilRefEXT %11 = OpTypeInt 32 0 %12 = OpTypePointer Output %11 %13 = OpVariable %12 Output -%15 = OpTypePointer Output %5 -%17 = OpConstant %11 0 -%19 = OpConstant %11 1 -%21 = OpConstant %11 2 -%23 = OpConstant %11 3 +%16 = OpTypePointer Output %5 +%18 = OpConstant %11 0 +%20 = OpConstant %11 1 +%22 = OpConstant %11 2 +%24 = OpConstant %11 3 %3 = OpFunction %1 None %2 %4 = OpLabel OpBranch %25 %25 = OpLabel %14 = OpLoad %5 %7 -%16 = OpAccessChain %15 %10 %17 -OpStore %16 %14 -%18 = OpAccessChain %15 %10 %19 -OpStore %18 %14 -%20 = OpAccessChain %15 %10 %21 -OpStore %20 %14 -%22 = OpAccessChain %15 %10 %23 -OpStore %22 %14 -%24 = OpConvertFToU %11 %14 -OpStore %13 %24 +%15 = OpConvertFToU %11 %14 +%17 = OpAccessChain %16 %10 %18 +OpStore %17 %14 +%19 = OpAccessChain %16 %10 %20 +OpStore %19 %14 +%21 = OpAccessChain %16 %10 %22 +OpStore %21 %14 +%23 = OpAccessChain %16 %10 %24 +OpStore %23 %14 +OpStore %13 %15 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/stages/hull-arrays.tesc b/reference/shaders/stages/hull-arrays.tesc index 7c4f171..ecc21e4 100644 --- a/reference/shaders/stages/hull-arrays.tesc +++ b/reference/shaders/stages/hull-arrays.tesc @@ -15,10 +15,10 @@ void hull_main() void patch_main() { - gl_TessLevelInner[0u] = HSValue[1u][2u] + HSValue[0u][1u]; gl_TessLevelOuter[0u] = VSValue[0u][2u]; gl_TessLevelOuter[1u] = VSValue[1u][1u]; gl_TessLevelOuter[2u] = VSValue[2u][0u]; + gl_TessLevelInner[0u] = HSValue[1u][2u] + HSValue[0u][1u]; PATCH[0u] = VSValue[3u][2u]; PATCH[1u] = VSValue[4u][1u]; } @@ -147,28 +147,28 @@ OpBranch %85 %55 = OpAccessChain %44 %16 %34 %13 %56 = OpLoad %5 %55 %57 = OpFAdd %5 %56 %54 -%58 = OpAccessChain %44 %23 %31 -OpStore %58 %57 -%59 = OpAccessChain %29 %12 %31 %13 -%60 = OpLoad %5 %59 -%61 = OpAccessChain %44 %20 %31 -OpStore %61 %60 -%62 = OpAccessChain %29 %12 %34 %34 +%58 = OpAccessChain %29 %12 %31 %13 +%59 = OpLoad %5 %58 +%60 = OpAccessChain %29 %12 %34 %34 +%61 = OpLoad %5 %60 +%62 = OpAccessChain %29 %12 %13 %31 %63 = OpLoad %5 %62 -%64 = OpAccessChain %44 %20 %34 -OpStore %64 %63 -%65 = OpAccessChain %29 %12 %13 %31 -%66 = OpLoad %5 %65 -%67 = OpAccessChain %44 %20 %13 -OpStore %67 %66 -%68 = OpAccessChain %29 %12 %7 %13 -%69 = OpLoad %5 %68 -%70 = OpAccessChain %44 %24 %31 -OpStore %70 %69 -%71 = OpAccessChain %29 %12 %17 %34 -%72 = OpLoad %5 %71 +%64 = OpAccessChain %29 %12 %7 %13 +%65 = OpLoad %5 %64 +%66 = OpAccessChain %29 %12 %17 %34 +%67 = OpLoad %5 %66 +%68 = OpAccessChain %44 %20 %31 +OpStore %68 %59 +%69 = OpAccessChain %44 %20 %34 +OpStore %69 %61 +%70 = OpAccessChain %44 %20 %13 +OpStore %70 %63 +%71 = OpAccessChain %44 %23 %31 +OpStore %71 %57 +%72 = OpAccessChain %44 %24 %31 +OpStore %72 %65 %73 = OpAccessChain %44 %24 %34 -OpStore %73 %72 +OpStore %73 %67 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/stages/hull-single-cp.tesc b/reference/shaders/stages/hull-single-cp.tesc index b7aa73f..c6669e3 100644 --- a/reference/shaders/stages/hull-single-cp.tesc +++ b/reference/shaders/stages/hull-single-cp.tesc @@ -12,10 +12,10 @@ void hull_main() void patch_main() { - gl_TessLevelInner[0u] = HSValue[0u]; gl_TessLevelOuter[0u] = VSValue[0u]; gl_TessLevelOuter[1u] = VSValue[1u]; gl_TessLevelOuter[2u] = VSValue[2u]; + gl_TessLevelInner[0u] = HSValue[0u]; PATCH = VSValue[3u]; } @@ -84,7 +84,7 @@ OpDecorate %41 BuiltIn InvocationId %31 = OpConstant %6 0 %40 = OpTypePointer Input %6 %41 = OpVariable %40 Input -%56 = OpConstant %6 3 +%52 = OpConstant %6 3 %3 = OpFunction %1 None %2 %4 = OpLabel OpBranch %60 @@ -116,23 +116,23 @@ OpBranch %64 %64 = OpLabel %43 = OpAccessChain %23 %14 %31 %44 = OpLoad %5 %43 -%45 = OpAccessChain %23 %22 %31 -OpStore %45 %44 -%46 = OpAccessChain %29 %10 %31 -%47 = OpLoad %5 %46 -%48 = OpAccessChain %23 %18 %31 -OpStore %48 %47 -%49 = OpAccessChain %29 %10 %11 +%45 = OpAccessChain %29 %10 %31 +%46 = OpLoad %5 %45 +%47 = OpAccessChain %29 %10 %11 +%48 = OpLoad %5 %47 +%49 = OpAccessChain %29 %10 %19 %50 = OpLoad %5 %49 -%51 = OpAccessChain %23 %18 %11 -OpStore %51 %50 -%52 = OpAccessChain %29 %10 %19 -%53 = OpLoad %5 %52 -%54 = OpAccessChain %23 %18 %19 -OpStore %54 %53 -%55 = OpAccessChain %29 %10 %56 -%57 = OpLoad %5 %55 -OpStore %24 %57 +%51 = OpAccessChain %29 %10 %52 +%53 = OpLoad %5 %51 +%54 = OpAccessChain %23 %18 %31 +OpStore %54 %46 +%55 = OpAccessChain %23 %18 %11 +OpStore %55 %48 +%56 = OpAccessChain %23 %18 %19 +OpStore %56 %50 +%57 = OpAccessChain %23 %22 %31 +OpStore %57 %44 +OpStore %24 %53 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/stages/hull.tesc b/reference/shaders/stages/hull.tesc index e4fac52..d0e5f68 100644 --- a/reference/shaders/stages/hull.tesc +++ b/reference/shaders/stages/hull.tesc @@ -31,12 +31,12 @@ void patch_main() _66[3u] = VSValue[3u]; uint _79 = uint(int(VSValue[0u])); _66[_79] += 40.0; - gl_TessLevelInner[0u] = HSValue[0u]; - gl_TessLevelInner[1u] = HSValue[1u]; gl_TessLevelOuter[0u] = VSValue[0u]; gl_TessLevelOuter[1u] = VSValue[1u]; gl_TessLevelOuter[2u] = VSValue[2u]; gl_TessLevelOuter[3u] = HSValue[0u] + VSValue[0u]; + gl_TessLevelInner[0u] = HSValue[0u]; + gl_TessLevelInner[1u] = HSValue[1u]; PATCH = _12._m0[0u].x + _66[3u]; } @@ -122,7 +122,7 @@ OpDecorate %33 BuiltIn InvocationId %41 = OpTypePointer Function %7 %50 = OpConstant %5 3 %57 = OpConstant %7 40 -%95 = OpTypePointer Uniform %8 +%89 = OpTypePointer Uniform %8 %102 = OpTypeBool %3 = OpFunction %1 None %2 %4 = OpLabel @@ -206,27 +206,27 @@ OpStore %78 %77 OpStore %80 %82 %83 = OpAccessChain %26 %20 %39 %84 = OpLoad %7 %83 -%85 = OpAccessChain %26 %25 %39 -OpStore %85 %84 -%86 = OpAccessChain %26 %20 %6 -%87 = OpLoad %7 %86 -%88 = OpAccessChain %26 %25 %6 -OpStore %88 %87 -%89 = OpAccessChain %26 %21 %39 -OpStore %89 %68 -%90 = OpAccessChain %26 %21 %6 -OpStore %90 %71 -%91 = OpAccessChain %26 %21 %22 -OpStore %91 %74 -%92 = OpFAdd %7 %84 %68 -%93 = OpAccessChain %26 %21 %50 -OpStore %93 %92 -%94 = OpLoad %7 %78 -%96 = OpAccessChain %95 %12 %39 %39 -%97 = OpLoad %8 %96 -%98 = OpCompositeExtract %7 %97 0 -%99 = OpFAdd %7 %98 %94 -OpStore %27 %99 +%85 = OpAccessChain %26 %20 %6 +%86 = OpLoad %7 %85 +%87 = OpFAdd %7 %84 %68 +%88 = OpLoad %7 %78 +%90 = OpAccessChain %89 %12 %39 %39 +%91 = OpLoad %8 %90 +%92 = OpCompositeExtract %7 %91 0 +%93 = OpFAdd %7 %92 %88 +%94 = OpAccessChain %26 %21 %39 +OpStore %94 %68 +%95 = OpAccessChain %26 %21 %6 +OpStore %95 %71 +%96 = OpAccessChain %26 %21 %22 +OpStore %96 %74 +%97 = OpAccessChain %26 %21 %50 +OpStore %97 %87 +%98 = OpAccessChain %26 %25 %39 +OpStore %98 %84 +%99 = OpAccessChain %26 %25 %6 +OpStore %99 %86 +OpStore %27 %93 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/stages/stage-input-output.16bit-io.frag b/reference/shaders/stages/stage-input-output.16bit-io.frag index 06852c4..7229252 100644 --- a/reference/shaders/stages/stage-input-output.16bit-io.frag +++ b/reference/shaders/stages/stage-input-output.16bit-io.frag @@ -81,10 +81,10 @@ OpDecorate %21 Location 2 %30 = OpConstant %25 1 %33 = OpTypePointer Input %5 %39 = OpConstant %5 0x1p+3 -%41 = OpTypePointer Output %5 -%45 = OpConstant %13 65528 -%47 = OpTypePointer Output %9 -%53 = OpConstant %13 2 +%42 = OpConstant %13 65528 +%45 = OpConstant %13 2 +%46 = OpTypePointer Output %5 +%49 = OpTypePointer Output %9 %3 = OpFunction %1 None %2 %4 = OpLabel OpBranch %54 @@ -102,20 +102,20 @@ OpBranch %54 %37 = OpLoad %5 %36 %38 = OpFMul %5 %35 %39 %40 = OpFMul %5 %37 %39 -%42 = OpAccessChain %41 %17 %26 -OpStore %42 %38 -%43 = OpAccessChain %41 %17 %30 -OpStore %43 %40 -%44 = OpIMul %13 %28 %45 -%46 = OpIMul %13 %32 %45 -%48 = OpAccessChain %47 %19 %26 -%49 = OpBitcast %9 %44 -OpStore %48 %49 -%50 = OpAccessChain %47 %19 %30 -%51 = OpBitcast %9 %46 +%41 = OpIMul %13 %28 %42 +%43 = OpIMul %13 %32 %42 +%44 = OpShiftLeftLogical %13 %22 %45 +%47 = OpAccessChain %46 %17 %26 +OpStore %47 %38 +%48 = OpAccessChain %46 %17 %30 +OpStore %48 %40 +%50 = OpAccessChain %49 %19 %26 +%51 = OpBitcast %9 %41 OpStore %50 %51 -%52 = OpShiftLeftLogical %13 %22 %53 -OpStore %21 %52 +%52 = OpAccessChain %49 %19 %30 +%53 = OpBitcast %9 %43 +OpStore %52 %53 +OpStore %21 %44 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/stages/stage-input-output.frag b/reference/shaders/stages/stage-input-output.frag index 98d5992..1d2acfd 100644 --- a/reference/shaders/stages/stage-input-output.frag +++ b/reference/shaders/stages/stage-input-output.frag @@ -87,10 +87,10 @@ OpDecorate %21 Location 2 %34 = OpTypePointer Input %5 %37 = OpTypeFloat 16 %43 = OpConstant %37 0x1p+3 -%45 = OpTypePointer Output %5 -%51 = OpConstant %23 65528 -%53 = OpTypePointer Output %9 -%59 = OpConstant %23 2 +%46 = OpConstant %23 65528 +%49 = OpConstant %23 2 +%50 = OpTypePointer Output %5 +%55 = OpTypePointer Output %9 %3 = OpFunction %1 None %2 %4 = OpLabel OpBranch %61 @@ -111,22 +111,22 @@ OpBranch %61 %41 = OpFConvert %37 %40 %42 = OpFMul %37 %38 %43 %44 = OpFMul %37 %41 %43 -%46 = OpAccessChain %45 %17 %27 -%47 = OpFConvert %5 %42 -OpStore %46 %47 -%48 = OpAccessChain %45 %17 %31 -%49 = OpFConvert %5 %44 -OpStore %48 %49 -%50 = OpIMul %23 %29 %51 -%52 = OpIMul %23 %33 %51 -%54 = OpAccessChain %53 %19 %27 -%55 = OpSConvert %9 %50 -OpStore %54 %55 -%56 = OpAccessChain %53 %19 %31 -%57 = OpSConvert %9 %52 +%45 = OpIMul %23 %29 %46 +%47 = OpIMul %23 %33 %46 +%48 = OpShiftLeftLogical %23 %24 %49 +%51 = OpAccessChain %50 %17 %27 +%52 = OpFConvert %5 %42 +OpStore %51 %52 +%53 = OpAccessChain %50 %17 %31 +%54 = OpFConvert %5 %44 +OpStore %53 %54 +%56 = OpAccessChain %55 %19 %27 +%57 = OpSConvert %9 %45 OpStore %56 %57 -%58 = OpShiftLeftLogical %23 %24 %59 -%60 = OpUConvert %13 %58 +%58 = OpAccessChain %55 %19 %31 +%59 = OpSConvert %9 %47 +OpStore %58 %59 +%60 = OpUConvert %13 %48 OpStore %21 %60 OpReturn OpFunctionEnd diff --git a/reference/shaders/stages/vertex-array-output.vert b/reference/shaders/stages/vertex-array-output.vert index 1995650..5d146ad 100644 --- a/reference/shaders/stages/vertex-array-output.vert +++ b/reference/shaders/stages/vertex-array-output.vert @@ -73,10 +73,10 @@ OpDecorate %18 Location 5 %24 = OpConstant %11 1 %27 = OpConstant %11 2 %30 = OpConstant %11 3 -%32 = OpTypePointer Output %5 -%42 = OpConstant %5 1 -%51 = OpConstant %5 2 -%60 = OpConstant %5 3 +%33 = OpConstant %5 1 +%38 = OpConstant %5 2 +%43 = OpConstant %5 3 +%47 = OpTypePointer Output %5 %3 = OpFunction %1 None %2 %4 = OpLabel OpBranch %72 @@ -89,65 +89,65 @@ OpBranch %72 %28 = OpLoad %5 %26 %29 = OpAccessChain %19 %8 %30 %31 = OpLoad %5 %29 -%33 = OpAccessChain %32 %10 %21 -OpStore %33 %22 -%34 = OpAccessChain %32 %10 %24 -OpStore %34 %25 -%35 = OpAccessChain %32 %10 %27 -OpStore %35 %28 -%36 = OpAccessChain %32 %10 %30 -OpStore %36 %31 -%37 = OpAccessChain %32 %15 %21 %21 -OpStore %37 %22 -%38 = OpAccessChain %32 %15 %21 %24 -OpStore %38 %25 -%39 = OpAccessChain %32 %15 %21 %27 -OpStore %39 %28 -%40 = OpAccessChain %32 %15 %21 %30 -OpStore %40 %31 -%41 = OpFAdd %5 %22 %42 -%43 = OpFAdd %5 %25 %42 -%44 = OpFAdd %5 %28 %42 -%45 = OpFAdd %5 %31 %42 -%46 = OpAccessChain %32 %15 %24 %21 -OpStore %46 %41 -%47 = OpAccessChain %32 %15 %24 %24 -OpStore %47 %43 -%48 = OpAccessChain %32 %15 %24 %27 -OpStore %48 %44 -%49 = OpAccessChain %32 %15 %24 %30 -OpStore %49 %45 -%50 = OpFAdd %5 %22 %51 -%52 = OpFAdd %5 %25 %51 -%53 = OpFAdd %5 %28 %51 -%54 = OpFAdd %5 %31 %51 -%55 = OpAccessChain %32 %15 %27 %21 -OpStore %55 %50 -%56 = OpAccessChain %32 %15 %27 %24 -OpStore %56 %52 -%57 = OpAccessChain %32 %15 %27 %27 -OpStore %57 %53 -%58 = OpAccessChain %32 %15 %27 %30 -OpStore %58 %54 -%59 = OpFAdd %5 %22 %60 -%61 = OpFAdd %5 %25 %60 -%62 = OpFAdd %5 %28 %60 -%63 = OpFAdd %5 %31 %60 -%64 = OpAccessChain %32 %15 %30 %21 -OpStore %64 %59 -%65 = OpAccessChain %32 %15 %30 %24 -OpStore %65 %61 -%66 = OpAccessChain %32 %15 %30 %27 -OpStore %66 %62 -%67 = OpAccessChain %32 %15 %30 %30 -OpStore %67 %63 -%68 = OpAccessChain %32 %18 %21 +%32 = OpFAdd %5 %22 %33 +%34 = OpFAdd %5 %25 %33 +%35 = OpFAdd %5 %28 %33 +%36 = OpFAdd %5 %31 %33 +%37 = OpFAdd %5 %22 %38 +%39 = OpFAdd %5 %25 %38 +%40 = OpFAdd %5 %28 %38 +%41 = OpFAdd %5 %31 %38 +%42 = OpFAdd %5 %22 %43 +%44 = OpFAdd %5 %25 %43 +%45 = OpFAdd %5 %28 %43 +%46 = OpFAdd %5 %31 %43 +%48 = OpAccessChain %47 %10 %21 +OpStore %48 %22 +%49 = OpAccessChain %47 %10 %24 +OpStore %49 %25 +%50 = OpAccessChain %47 %10 %27 +OpStore %50 %28 +%51 = OpAccessChain %47 %10 %30 +OpStore %51 %31 +%52 = OpAccessChain %47 %15 %21 %21 +OpStore %52 %22 +%53 = OpAccessChain %47 %15 %21 %24 +OpStore %53 %25 +%54 = OpAccessChain %47 %15 %21 %27 +OpStore %54 %28 +%55 = OpAccessChain %47 %15 %21 %30 +OpStore %55 %31 +%56 = OpAccessChain %47 %15 %24 %21 +OpStore %56 %32 +%57 = OpAccessChain %47 %15 %24 %24 +OpStore %57 %34 +%58 = OpAccessChain %47 %15 %24 %27 +OpStore %58 %35 +%59 = OpAccessChain %47 %15 %24 %30 +OpStore %59 %36 +%60 = OpAccessChain %47 %15 %27 %21 +OpStore %60 %37 +%61 = OpAccessChain %47 %15 %27 %24 +OpStore %61 %39 +%62 = OpAccessChain %47 %15 %27 %27 +OpStore %62 %40 +%63 = OpAccessChain %47 %15 %27 %30 +OpStore %63 %41 +%64 = OpAccessChain %47 %15 %30 %21 +OpStore %64 %42 +%65 = OpAccessChain %47 %15 %30 %24 +OpStore %65 %44 +%66 = OpAccessChain %47 %15 %30 %27 +OpStore %66 %45 +%67 = OpAccessChain %47 %15 %30 %30 +OpStore %67 %46 +%68 = OpAccessChain %47 %18 %21 OpStore %68 %22 -%69 = OpAccessChain %32 %18 %24 +%69 = OpAccessChain %47 %18 %24 OpStore %69 %25 -%70 = OpAccessChain %32 %18 %27 +%70 = OpAccessChain %47 %18 %27 OpStore %70 %28 -%71 = OpAccessChain %32 %18 %30 +%71 = OpAccessChain %47 %18 %30 OpStore %71 %31 OpReturn OpFunctionEnd diff --git a/reference/shaders/vectorization/copy-byte-address.ssbo.comp b/reference/shaders/vectorization/copy-byte-address.ssbo.comp index 4ff513d..2a2b9eb 100644 --- a/reference/shaders/vectorization/copy-byte-address.ssbo.comp +++ b/reference/shaders/vectorization/copy-byte-address.ssbo.comp @@ -183,6 +183,7 @@ OpCapability Float16 OpCapability Float64 OpCapability Int64 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "main" %114 %119 OpExecutionMode %3 LocalSize 2 1 1 diff --git a/reference/shaders/vectorization/copy-half2.ssbo.comp b/reference/shaders/vectorization/copy-half2.ssbo.comp index 59f9cf3..42704dd 100644 --- a/reference/shaders/vectorization/copy-half2.ssbo.comp +++ b/reference/shaders/vectorization/copy-half2.ssbo.comp @@ -37,6 +37,7 @@ void main() OpCapability Shader OpCapability Float16 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "main" %18 OpExecutionMode %3 LocalSize 2 1 1 diff --git a/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp b/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp index 02f759d..6f04b21 100644 --- a/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp +++ b/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp @@ -61,6 +61,7 @@ OpCapability Shader OpCapability Float16 OpCapability Int16 OpCapability GroupNonUniformBallot +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability PhysicalStorageBufferAddresses OpExtension "SPV_EXT_descriptor_indexing" diff --git a/reference/shaders/vectorization/copy-half3.ssbo.comp b/reference/shaders/vectorization/copy-half3.ssbo.comp index 72b6c4a..bda36cb 100644 --- a/reference/shaders/vectorization/copy-half3.ssbo.comp +++ b/reference/shaders/vectorization/copy-half3.ssbo.comp @@ -38,6 +38,7 @@ void main() OpCapability Shader OpCapability Float16 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "main" %18 OpExecutionMode %3 LocalSize 2 1 1 diff --git a/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp b/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp index 82224d6..48b394e 100644 --- a/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp +++ b/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp @@ -70,6 +70,7 @@ OpCapability Shader OpCapability Float16 OpCapability Int16 OpCapability GroupNonUniformBallot +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability PhysicalStorageBufferAddresses OpExtension "SPV_EXT_descriptor_indexing" diff --git a/reference/shaders/vectorization/copy-half4.ssbo.comp b/reference/shaders/vectorization/copy-half4.ssbo.comp index cd0f4cd..ba2e329 100644 --- a/reference/shaders/vectorization/copy-half4.ssbo.comp +++ b/reference/shaders/vectorization/copy-half4.ssbo.comp @@ -37,6 +37,7 @@ void main() OpCapability Shader OpCapability Float16 OpCapability Int16 +OpCapability StorageBuffer16BitAccess OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "main" %18 OpExecutionMode %3 LocalSize 2 1 1 diff --git a/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp b/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp index 145e9c4..7661e74 100644 --- a/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp +++ b/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp @@ -61,6 +61,7 @@ OpCapability Shader OpCapability Float16 OpCapability Int16 OpCapability GroupNonUniformBallot +OpCapability StorageBuffer16BitAccess OpCapability RuntimeDescriptorArray OpCapability PhysicalStorageBufferAddresses OpExtension "SPV_EXT_descriptor_indexing" diff --git a/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil Binary files differnew file mode 100644 index 0000000..dc55350 --- /dev/null +++ b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil diff --git a/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil Binary files differnew file mode 100644 index 0000000..0739985 --- /dev/null +++ b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil diff --git a/shaders/resources/cbv-legacy-fp16-fp64.frag b/shaders/resources/cbv-legacy-fp16-fp64.frag new file mode 100644 index 0000000..039e61c --- /dev/null +++ b/shaders/resources/cbv-legacy-fp16-fp64.frag @@ -0,0 +1,14 @@ +struct Half8 { min16float4 lo; min16float4 hi; }; + +cbuffer Cbuf +{ + float4 a; + Half8 b; + int64_t4 c; +}; + +float4 main() : SV_Target +{ + Half8 half8 = b; + return a + float4(half8.lo) + float4(half8.hi) + float4(c); +} diff --git a/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag new file mode 100644 index 0000000..039e61c --- /dev/null +++ b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag @@ -0,0 +1,14 @@ +struct Half8 { min16float4 lo; min16float4 hi; }; + +cbuffer Cbuf +{ + float4 a; + Half8 b; + int64_t4 c; +}; + +float4 main() : SV_Target +{ + Half8 half8 = b; + return a + float4(half8.lo) + float4(half8.hi) + float4(c); +} diff --git a/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag new file mode 100644 index 0000000..039e61c --- /dev/null +++ b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag @@ -0,0 +1,14 @@ +struct Half8 { min16float4 lo; min16float4 hi; }; + +cbuffer Cbuf +{ + float4 a; + Half8 b; + int64_t4 c; +}; + +float4 main() : SV_Target +{ + Half8 half8 = b; + return a + float4(half8.lo) + float4(half8.hi) + float4(c); +} diff --git a/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag new file mode 100644 index 0000000..039e61c --- /dev/null +++ b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag @@ -0,0 +1,14 @@ +struct Half8 { min16float4 lo; min16float4 hi; }; + +cbuffer Cbuf +{ + float4 a; + Half8 b; + int64_t4 c; +}; + +float4 main() : SV_Target +{ + Half8 half8 = b; + return a + float4(half8.lo) + float4(half8.hi) + float4(c); +} diff --git a/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag b/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag new file mode 100644 index 0000000..039e61c --- /dev/null +++ b/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag @@ -0,0 +1,14 @@ +struct Half8 { min16float4 lo; min16float4 hi; }; + +cbuffer Cbuf +{ + float4 a; + Half8 b; + int64_t4 c; +}; + +float4 main() : SV_Target +{ + Half8 half8 = b; + return a + float4(half8.lo) + float4(half8.hi) + float4(c); +} diff --git a/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag b/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag new file mode 100644 index 0000000..039e61c --- /dev/null +++ b/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag @@ -0,0 +1,14 @@ +struct Half8 { min16float4 lo; min16float4 hi; }; + +cbuffer Cbuf +{ + float4 a; + Half8 b; + int64_t4 c; +}; + +float4 main() : SV_Target +{ + Half8 half8 = b; + return a + float4(half8.lo) + float4(half8.hi) + float4(c); +} diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag new file mode 100644 index 0000000..7bc5a19 --- /dev/null +++ b/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag @@ -0,0 +1,21 @@ +cbuffer Cbuf +{ + float4 a; + half4 b; + int64_t4 c; +}; + +cbuffer Cbuf1 : register(b1) +{ + float4 d; +}; + +cbuffer Cbuf2 : register(b2) +{ + double4 e; +}; + +float4 main() : SV_Target +{ + return a + float4(b) + float4(c) + d + float4(e); +} diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag new file mode 100644 index 0000000..2ae7750 --- /dev/null +++ b/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag @@ -0,0 +1,12 @@ +cbuffer Cbuf +{ + float a[64]; + float2 b[64]; + float3 c[64]; + float4 d[64]; +}; + +float4 main(nointerpolation uint index : INDEX) : SV_Target +{ + return a[index].xxxx + b[index].xyxy + c[index].xyzx + d[index]; +} diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss b/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss new file mode 100644 index 0000000..3ce3a8e --- /dev/null +++ b/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss @@ -0,0 +1,26 @@ +cbuffer A : register(b0, space15) +{ + float a; + uint b; + int c; +}; + +cbuffer B : register(b1, space15) +{ + float2 a2; + uint b2; + int c2; +}; + +struct Payload +{ + float4 f; + int4 i; +}; + +[shader("miss")] +void main(inout Payload payload) +{ + payload.f = float4(a, b, c, 1.0); + payload.i = int4(a2, b2, c); +} diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag new file mode 100644 index 0000000..039e61c --- /dev/null +++ b/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag @@ -0,0 +1,14 @@ +struct Half8 { min16float4 lo; min16float4 hi; }; + +cbuffer Cbuf +{ + float4 a; + Half8 b; + int64_t4 c; +}; + +float4 main() : SV_Target +{ + Half8 half8 = b; + return a + float4(half8.lo) + float4(half8.hi) + float4(c); +} diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag new file mode 100644 index 0000000..fbc0db9 --- /dev/null +++ b/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag @@ -0,0 +1,18 @@ +cbuffer A : register(b0, space0) +{ + float a; + uint b; + int c; +}; + +cbuffer B : register(b0, space1) +{ + float2 a2; + uint b2; + int c2; +}; + +float2 main() : SV_Target +{ + return (a + a2) + (b + b2) + (c + c2); +} diff --git a/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag b/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag new file mode 100644 index 0000000..39cc769 --- /dev/null +++ b/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag @@ -0,0 +1,15 @@ +cbuffer Buf : register(b0) +{ + float4 a; + half4 c; + int64_t4 b; +}; + +float4 main() : SV_Target +{ + float4 res = 0.0.xxxx; + res += a; + res += float4(b); + res += float4(c); + return res; +} diff --git a/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag new file mode 100644 index 0000000..7bc5a19 --- /dev/null +++ b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag @@ -0,0 +1,21 @@ +cbuffer Cbuf +{ + float4 a; + half4 b; + int64_t4 c; +}; + +cbuffer Cbuf1 : register(b1) +{ + float4 d; +}; + +cbuffer Cbuf2 : register(b2) +{ + double4 e; +}; + +float4 main() : SV_Target +{ + return a + float4(b) + float4(c) + d + float4(e); +} diff --git a/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag new file mode 100644 index 0000000..7bc5a19 --- /dev/null +++ b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag @@ -0,0 +1,21 @@ +cbuffer Cbuf +{ + float4 a; + half4 b; + int64_t4 c; +}; + +cbuffer Cbuf1 : register(b1) +{ + float4 d; +}; + +cbuffer Cbuf2 : register(b2) +{ + double4 e; +}; + +float4 main() : SV_Target +{ + return a + float4(b) + float4(c) + d + float4(e); +} diff --git a/test_shaders.py b/test_shaders.py index 5afc278..8cb46d6 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -97,6 +97,8 @@ def cross_compile_dxil(shader, args, paths, is_asm): dxil_cmd += ['-denorm', 'ftz'] if '.denorm-preserve.' in shader: dxil_cmd += ['-denorm', 'preserve'] + if '.no-legacy-cbuf-layout.' in shader: + dxil_cmd += ['-no-legacy-cbuf-layout'] subprocess.check_call(dxil_cmd) else: dxil_path = shader @@ -343,7 +345,7 @@ def main(): action = 'store_true', help = 'Execute tests in parallel. Useful for doing regression quickly, but bad for debugging and stat output.') parser.add_argument('--dxc', - default = './external/dxc-build/output/bin/dxc', + default = './external/dxc-build/bin/dxc', help = 'Explicit path to DXC') parser.add_argument('--dxil-spirv', default = './dxil-spirv', |