Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/HansKristian-Work/dxil-spirv.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans-Kristian Arntzen <post@arntzen-software.no>2022-03-30 19:51:32 +0300
committerGitHub <noreply@github.com>2022-03-30 19:51:32 +0300
commit76b6e8a4e713124fd731710ccaa5e17f7bde5b7b (patch)
tree6f5302e05be16952247161715874ae933b804559
parent53cf014c04cfa3d1509e3bc474fcce913b625f2e (diff)
parent5bbf6a4df3ddad372e8131c637cf2f38ddd256db (diff)
Merge pull request #104 from HansKristian-Work/modern-cbuffer-layout
Implement modern CBV layout
-rwxr-xr-xbuild_dxc.sh4
-rw-r--r--cfg_structurizer.cpp4
-rwxr-xr-xcheckout_dxc.sh11
-rw-r--r--dxil_converter.cpp239
-rw-r--r--dxil_converter.hpp3
-rw-r--r--opcodes/converter_impl.hpp13
-rw-r--r--opcodes/dxil/dxil_arithmetic.cpp33
-rw-r--r--opcodes/dxil/dxil_buffer.cpp293
-rw-r--r--opcodes/dxil/dxil_buffer.hpp10
-rw-r--r--opcodes/dxil/dxil_common.cpp285
-rw-r--r--opcodes/dxil/dxil_common.hpp25
-rw-r--r--opcodes/dxil/dxil_resources.cpp560
-rw-r--r--opcodes/dxil/dxil_resources.hpp1
-rw-r--r--opcodes/opcodes_dxil_builtins.cpp80
-rw-r--r--opcodes/opcodes_llvm_builtins.cpp29
-rw-r--r--reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil105
-rw-r--r--reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil146
-rw-r--r--reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag461
-rw-r--r--reference/shaders/dxil-builtin/clip.demote-to-helper.frag85
-rw-r--r--reference/shaders/dxil-builtin/clip.frag117
-rw-r--r--reference/shaders/dxil-builtin/discard.demote-to-helper.frag67
-rw-r--r--reference/shaders/dxil-builtin/discard.frag101
-rw-r--r--reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp1
-rw-r--r--reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp1
-rw-r--r--reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp1
-rw-r--r--reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag84
-rw-r--r--reference/shaders/dxil-builtin/wave-active-ballot-discard.frag116
-rw-r--r--reference/shaders/resources/buffer-16bit.ssbo.bindless.comp1
-rw-r--r--reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp1
-rw-r--r--reference/shaders/resources/buffer-16bit.ssbo.comp1
-rw-r--r--reference/shaders/resources/cbv-array-nonuniform.frag2
-rw-r--r--reference/shaders/resources/cbv-legacy-fp16-fp64.frag193
-rw-r--r--reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag263
-rw-r--r--reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag223
-rw-r--r--reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag244
-rw-r--r--reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag141
-rw-r--r--reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag164
-rw-r--r--reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag1
-rw-r--r--reference/shaders/resources/cbv.bindless.root-constant.frag1
-rw-r--r--reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag260
-rw-r--r--reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag161
-rw-r--r--reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss162
-rw-r--r--reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag179
-rw-r--r--reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag124
-rw-r--r--reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag313
-rw-r--r--reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen1
-rw-r--r--reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss14
-rw-r--r--reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag260
-rw-r--r--reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag231
-rw-r--r--reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag1
-rw-r--r--reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag1
-rw-r--r--reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag1
-rw-r--r--reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag1
-rw-r--r--reference/shaders/semantics/clip-distance-flatten.vert40
-rw-r--r--reference/shaders/semantics/clip-distance-rows.vert30
-rw-r--r--reference/shaders/semantics/coverage.frag28
-rw-r--r--reference/shaders/semantics/inner-coverage.noglsl.frag83
-rw-r--r--reference/shaders/semantics/stencil-ref.frag30
-rw-r--r--reference/shaders/stages/hull-arrays.tesc42
-rw-r--r--reference/shaders/stages/hull-single-cp.tesc36
-rw-r--r--reference/shaders/stages/hull.tesc48
-rw-r--r--reference/shaders/stages/stage-input-output.16bit-io.frag34
-rw-r--r--reference/shaders/stages/stage-input-output.frag38
-rw-r--r--reference/shaders/stages/vertex-array-output.vert120
-rw-r--r--reference/shaders/vectorization/copy-byte-address.ssbo.comp1
-rw-r--r--reference/shaders/vectorization/copy-half2.ssbo.comp1
-rw-r--r--reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp1
-rw-r--r--reference/shaders/vectorization/copy-half3.ssbo.comp1
-rw-r--r--reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp1
-rw-r--r--reference/shaders/vectorization/copy-half4.ssbo.comp1
-rw-r--r--reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp1
-rw-r--r--shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxilbin0 -> 1428 bytes
-rw-r--r--shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxilbin0 -> 1492 bytes
-rw-r--r--shaders/resources/cbv-legacy-fp16-fp64.frag14
-rw-r--r--shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag14
-rw-r--r--shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag14
-rw-r--r--shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag14
-rw-r--r--shaders/resources/cbv-legacy-fp16-fp64.sm60.frag14
-rw-r--r--shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag14
-rw-r--r--shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag21
-rw-r--r--shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag12
-rw-r--r--shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss26
-rw-r--r--shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag14
-rw-r--r--shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag18
-rw-r--r--shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag15
-rw-r--r--shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag21
-rw-r--r--shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag21
-rwxr-xr-xtest_shaders.py4
88 files changed, 5252 insertions, 1339 deletions
diff --git a/build_dxc.sh b/build_dxc.sh
index 411ddea..0db1b5b 100755
--- a/build_dxc.sh
+++ b/build_dxc.sh
@@ -13,6 +13,6 @@ fi
echo "Building DXC."
mkdir -p external/dxc-build
cd external/dxc-build
-cmake ../DirectXShaderCompiler -DCMAKE_BUILD_TYPE=$PROFILE -DCMAKE_INSTALL_PREFIX=output $(cat ../DirectXShaderCompiler/utils/cmake-predefined-config-params) -G Ninja -DSPIRV_WERROR=OFF
-cmake --build . --config $PROFILE --target install ${NPROC}
+cmake ../DirectXShaderCompiler -DCMAKE_BUILD_TYPE=$PROFILE -C ../DirectXShaderCompiler/cmake/caches/PredefinedParams.cmake -G Ninja -DSPIRV_WERROR=OFF
+cmake --build . --config $PROFILE ${NPROC}
diff --git a/cfg_structurizer.cpp b/cfg_structurizer.cpp
index d7b0ee1..c7497ae 100644
--- a/cfg_structurizer.cpp
+++ b/cfg_structurizer.cpp
@@ -1311,7 +1311,9 @@ void CFGStructurizer::insert_phi(PHINode &node)
merge_phi.type_id = module.get_builder().makeBoolType();
Operation *op = module.allocate_op(spv::OpSelect, module.allocate_id(), phi.type_id);
- op->add_ids({ merge_phi.id, dominated_incoming->id, frontier_phi.id });
+ op->add_id(merge_phi.id);
+ op->add_id(dominated_incoming->id);
+ op->add_id(frontier_phi.id);
dominated_incoming->block->ir.operations.push_back(op);
dominated_incoming->id = op->id;
diff --git a/checkout_dxc.sh b/checkout_dxc.sh
index 441c832..295e47e 100755
--- a/checkout_dxc.sh
+++ b/checkout_dxc.sh
@@ -1,13 +1,6 @@
#!/bin/bash
-# Commit before GatherCmp regression
-DXC_REV=19360a8fa63ee29925f59328c261c1c920402bfd
-
-if [ -z $PROTOCOL ]; then
- PROTOCOL=git
-fi
-
-echo "Using protocol \"$PROTOCOL\" for checking out repositories. If this is problematic, try PROTOCOL=https $0."
+DXC_REV=2dc067b561f17d09d8012a1ded05bf0f6253fea5
if [ -d external/DirectXShaderCompiler ]; then
echo "Updating DirectXShaderCompiler to revision $DXC_REV."
@@ -19,7 +12,7 @@ else
echo "Cloning DirectXShaderCompiler revision $DXC_REV."
mkdir -p external
cd external
- git clone $PROTOCOL://github.com/Microsoft/DirectXShaderCompiler.git
+ git clone https://github.com/Microsoft/DirectXShaderCompiler.git
cd DirectXShaderCompiler
git checkout $DXC_REV
git submodule update --init
diff --git a/dxil_converter.cpp b/dxil_converter.cpp
index b99b225..f36052a 100644
--- a/dxil_converter.cpp
+++ b/dxil_converter.cpp
@@ -280,17 +280,58 @@ Converter::Impl::create_bindless_heap_variable_alias_group(const BindlessInfo &b
return decls;
}
+spv::Id Converter::Impl::create_ubo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name,
+ unsigned cbv_size)
+{
+ auto &builder = spirv_module.get_builder();
+
+ unsigned element_size = raw_width_to_bits(raw_decl.width) * raw_vecsize_to_vecsize(raw_decl.vecsize) / 8;
+ unsigned array_length = (cbv_size + element_size - 1) / element_size;
+
+ // It seems like we will have to bitcast ourselves away from vec4 here after loading.
+ spv::Id size_id = builder.makeUintConstant(array_length, false);
+ spv::Id element_type = builder.makeFloatType(raw_width_to_bits(raw_decl.width));
+ if (raw_decl.vecsize != RawVecSize::V1)
+ element_type = builder.makeVectorType(element_type, raw_vecsize_to_vecsize(raw_decl.vecsize));
+ spv::Id member_array_type = builder.makeArrayType(element_type, size_id, element_size);
+
+ builder.addDecoration(member_array_type, spv::DecorationArrayStride, element_size);
+
+ spv::Id type_id = get_struct_type({ member_array_type }, name.c_str());
+ builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
+ builder.addDecoration(type_id, spv::DecorationBlock);
+
+ if (range_size != 1)
+ {
+ if (range_size == ~0u)
+ type_id = builder.makeRuntimeArray(type_id);
+ else
+ type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
+ }
+
+ if (raw_decl.width == RawWidth::B16)
+ builder.addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess);
+
+ return create_variable(spv::StorageClassUniform,
+ type_id, name.empty() ? nullptr : name.c_str());
+}
+
spv::Id Converter::Impl::create_raw_ssbo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name)
{
spv::Id type_id = build_ssbo_runtime_array_type(*this,
raw_width_to_bits(raw_decl.width),
raw_vecsize_to_vecsize(raw_decl.vecsize),
range_size, "SSBO");
+
+ if (raw_decl.width == RawWidth::B16)
+ builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
+
return create_variable(spv::StorageClassStorageBuffer, type_id, name.empty() ? nullptr : name.c_str());
}
-Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_variable_alias_group(
- const Vector<RawDeclaration> &raw_decls, uint32_t range_size, const String &name)
+Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_raw_ssbo_variable_alias_group(
+ const Vector<RawDeclaration> &raw_decls,
+ uint32_t range_size, const String &name)
{
Vector<RawDeclarationVariable> group;
group.reserve(raw_decls.size());
@@ -299,6 +340,17 @@ Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_variable
return group;
}
+Vector<Converter::Impl::RawDeclarationVariable> Converter::Impl::create_ubo_variable_alias_group(
+ const Vector<RawDeclaration> &raw_decls,
+ uint32_t range_size, const String &name, unsigned cbv_size)
+{
+ Vector<RawDeclarationVariable> group;
+ group.reserve(raw_decls.size());
+ for (auto &decl : raw_decls)
+ group.push_back({ decl, create_ubo_variable(decl, range_size, name, cbv_size) });
+ return group;
+}
+
spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info)
{
auto itr = std::find_if(bindless_resources.begin(), bindless_resources.end(), [&](const BindlessResource &resource) {
@@ -371,6 +423,8 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info)
type_id = build_ssbo_runtime_array_type(*this, bits, raw_vecsize_to_vecsize(info.raw_vecsize),
~0u, "SSBO");
storage = spv::StorageClassStorageBuffer;
+ if (bits == 16)
+ builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
}
else
{
@@ -433,6 +487,8 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info)
type_id = build_ssbo_runtime_array_type(*this, bits, raw_vecsize_to_vecsize(info.raw_vecsize),
~0u, "SSBO");
storage = spv::StorageClassStorageBuffer;
+ if (bits == 16)
+ builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
}
else
{
@@ -463,9 +519,29 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info)
case DXIL::ResourceType::CBV:
{
- type_id = builder().makeVectorType(builder().makeFloatType(32), 4);
- type_id = builder().makeArrayType(type_id, builder().makeUintConstant(64 * 1024 / 16), 16);
- builder().addDecoration(type_id, spv::DecorationArrayStride, 16);
+ unsigned bits;
+ if (info.component == DXIL::ComponentType::U16)
+ bits = 16;
+ else if (info.component == DXIL::ComponentType::U32)
+ bits = 32;
+ else if (info.component == DXIL::ComponentType::U64)
+ bits = 64;
+ else
+ {
+ LOGE("Invalid component type for UBO.\n");
+ return 0;
+ }
+
+ unsigned vecsize = raw_vecsize_to_vecsize(info.raw_vecsize);
+ type_id = builder().makeFloatType(bits);
+ if (vecsize > 1)
+ type_id = builder().makeVectorType(type_id, vecsize);
+
+ unsigned element_size = (bits / 8) * vecsize;
+ unsigned num_elements = 0x10000 / element_size;
+
+ type_id = builder().makeArrayType(type_id, builder().makeUintConstant(num_elements), element_size);
+ builder().addDecoration(type_id, spv::DecorationArrayStride, element_size);
type_id = get_struct_type({ type_id }, "BindlessCBV");
builder().addDecoration(type_id, spv::DecorationBlock);
if (options.bindless_cbv_ssbo_emulation)
@@ -473,6 +549,14 @@ spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info)
builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
type_id = builder().makeRuntimeArray(type_id);
storage = options.bindless_cbv_ssbo_emulation ? spv::StorageClassStorageBuffer : spv::StorageClassUniform;
+
+ if (bits == 16)
+ {
+ if (options.bindless_cbv_ssbo_emulation)
+ builder().addCapability(spv::CapabilityStorageBuffer16BitAccess);
+ else
+ builder().addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess);
+ }
break;
}
@@ -740,33 +824,52 @@ bool Converter::Impl::analyze_aliased_access(const AccessTracking &tracking,
if (raw_access_16bit &&
descriptor_type != VulkanDescriptorType::SSBO &&
+ descriptor_type != VulkanDescriptorType::UBO &&
descriptor_type != VulkanDescriptorType::BufferDeviceAddress)
{
- LOGE("Raw 16-bit load-store was used, which must be implemented with SSBO or BDA.\n");
+ LOGE("Raw 16-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n");
return false;
}
if (raw_access_64bit &&
descriptor_type != VulkanDescriptorType::SSBO &&
+ descriptor_type != VulkanDescriptorType::UBO &&
descriptor_type != VulkanDescriptorType::BufferDeviceAddress)
{
- LOGE("Raw 64-bit load-store was used, which must be implemented with SSBO or BDA.\n");
+ LOGE("Raw 64-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n");
return false;
}
- // Only SSBO can be reclared with different types.
+ // Only SSBO and UBO can be reclared with different types.
// Typed descriptors are always scalar.
- aliased_access.requires_alias_decoration = descriptor_type == VulkanDescriptorType::SSBO &&
+ aliased_access.requires_alias_decoration = (descriptor_type == VulkanDescriptorType::SSBO ||
+ descriptor_type == VulkanDescriptorType::UBO) &&
aliased_access.raw_declarations.size() > 1;
- // If we only emit one 16-bit or 64-bit SSBO, we need to override the component type of that meta declaration.
- aliased_access.override_primary_component_types = descriptor_type == VulkanDescriptorType::SSBO &&
+ // If we only emit one 16-bit or 64-bit SSBO/UBO, we need to override the component type of that meta declaration.
+ aliased_access.override_primary_component_types = (descriptor_type == VulkanDescriptorType::SSBO ||
+ descriptor_type == VulkanDescriptorType::UBO) &&
aliased_access.raw_declarations.size() == 1;
// If the SSBO is never actually accessed (UAV counters for example), fudge the default type.
if (descriptor_type == VulkanDescriptorType::SSBO && aliased_access.raw_declarations.empty())
aliased_access.raw_declarations.push_back({ RawWidth::B32, RawVecSize::V1 });
+ // If the CBV is never actually accessed, fudge the default legacy CBV type.
+ if (descriptor_type == VulkanDescriptorType::UBO && aliased_access.raw_declarations.empty())
+ aliased_access.raw_declarations.push_back({ RawWidth::B32, RawVecSize::V4 });
+
+ // Safeguard against unused variables where we never end up setting any primary component type.
+ if ((descriptor_type == VulkanDescriptorType::SSBO ||
+ descriptor_type == VulkanDescriptorType::UBO) &&
+ aliased_access.raw_declarations.size() == 1)
+ {
+ aliased_access.primary_component_type =
+ raw_width_to_component_type(aliased_access.raw_declarations.front().width);
+ aliased_access.primary_raw_vecsize = aliased_access.raw_declarations.front().vecsize;
+ aliased_access.override_primary_component_types = true;
+ }
+
return true;
}
@@ -1049,7 +1152,7 @@ bool Converter::Impl::emit_srvs(const llvm::MDNode *srvs)
if (type_id)
ref.var_id = create_variable(storage, type_id, name.empty() ? nullptr : name.c_str());
else if (aliased_access.requires_alias_decoration)
- ref.var_alias_group = create_variable_alias_group(aliased_access.raw_declarations, range_size, name);
+ ref.var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name);
else
{
assert(aliased_access.raw_declarations.size() == 1);
@@ -1596,7 +1699,7 @@ bool Converter::Impl::emit_uavs(const llvm::MDNode *uavs)
storage = spv::StorageClassStorageBuffer;
if (aliased_access.requires_alias_decoration)
- var_alias_group = create_variable_alias_group(aliased_access.raw_declarations, range_size, name);
+ var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name);
else
{
assert(aliased_access.raw_declarations.size() == 1);
@@ -1756,6 +1859,11 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs)
if (need_resource_remapping && resource_mapping_iface && !resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding))
return false;
+ auto &access_meta = cbv_access_tracking[index];
+ AliasedAccess aliased_access;
+ if (!analyze_aliased_access(access_meta, VulkanDescriptorType::UBO, aliased_access))
+ return false;
+
cbv_index_to_reference.resize(std::max(cbv_index_to_reference.size(), size_t(index + 1)));
if (range_size != 1)
@@ -1777,6 +1885,8 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs)
bindless_info.kind = DXIL::ResourceKind::CBuffer;
bindless_info.desc_set = vulkan_binding.buffer.descriptor_set;
bindless_info.binding = vulkan_binding.buffer.binding;
+ bindless_info.component = aliased_access.primary_component_type;
+ bindless_info.raw_vecsize = aliased_access.primary_raw_vecsize;
if (local_root_signature_entry >= 0)
{
@@ -1789,8 +1899,6 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs)
return false;
}
- spv::Id var_id = create_bindless_heap_variable(bindless_info);
-
uint32_t heap_offset = local_table_entry.offset_in_heap;
heap_offset += bind_register - local_table_entry.register_index;
@@ -1801,7 +1909,17 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs)
}
auto &ref = cbv_index_to_reference[index];
- ref.var_id = var_id;
+
+ if (aliased_access.requires_alias_decoration)
+ {
+ ref.var_alias_group = create_bindless_heap_variable_alias_group(
+ bindless_info, aliased_access.raw_declarations);
+ }
+ else
+ {
+ ref.var_id = create_bindless_heap_variable(bindless_info);
+ }
+
ref.base_offset = heap_offset;
ref.base_resource_is_array = range_size != 1;
ref.bindless = true;
@@ -1851,8 +1969,6 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs)
}
else if (vulkan_binding.buffer.bindless.use_heap)
{
- spv::Id var_id = create_bindless_heap_variable(bindless_info);
-
// DXIL already applies the t# register offset to any dynamic index, so counteract that here.
// The exception is with lib_* where we access resources by variable, not through
// createResource() >_____<.
@@ -1861,7 +1977,17 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs)
heap_offset -= bind_register;
auto &ref = cbv_index_to_reference[index];
- ref.var_id = var_id;
+
+ if (aliased_access.requires_alias_decoration)
+ {
+ ref.var_alias_group = create_bindless_heap_variable_alias_group(
+ bindless_info, aliased_access.raw_declarations);
+ }
+ else
+ {
+ ref.var_id = create_bindless_heap_variable(bindless_info);
+ }
+
ref.push_constant_member = vulkan_binding.buffer.root_constant_index + root_descriptor_count;
ref.base_offset = heap_offset;
ref.base_resource_is_array = range_size != 1;
@@ -1870,35 +1996,47 @@ bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs)
}
else
{
- unsigned vec4_length = (cbv_size + 15) / 16;
-
- // It seems like we will have to bitcast ourselves away from vec4 here after loading.
- spv::Id member_array_type = builder.makeArrayType(builder.makeVectorType(builder.makeFloatType(32), 4),
- builder.makeUintConstant(vec4_length, false), 16);
-
- builder.addDecoration(member_array_type, spv::DecorationArrayStride, 16);
-
- spv::Id type_id = get_struct_type({ member_array_type }, name.c_str());
- builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0);
- builder.addDecoration(type_id, spv::DecorationBlock);
+ auto &ref = cbv_index_to_reference[index];
- if (range_size != 1)
+ if (aliased_access.requires_alias_decoration)
{
- if (range_size == ~0u)
- type_id = builder.makeRuntimeArray(type_id);
- else
- type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0);
+ ref.var_alias_group = create_ubo_variable_alias_group(
+ aliased_access.raw_declarations, range_size, name, cbv_size);
+ }
+ else
+ {
+ assert(aliased_access.raw_declarations.size() == 1);
+ ref.var_id = create_ubo_variable(aliased_access.raw_declarations.front(), range_size, name, cbv_size);
}
- spv::Id var_id = create_variable(spv::StorageClassUniform, type_id, name.empty() ? nullptr : name.c_str());
-
- builder.addDecoration(var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set);
- builder.addDecoration(var_id, spv::DecorationBinding, vulkan_binding.buffer.binding);
-
- auto &ref = cbv_index_to_reference[index];
- ref.var_id = var_id;
ref.base_resource_is_array = range_size != 1;
ref.resource_kind = DXIL::ResourceKind::CBuffer;
+
+ if (ref.var_id)
+ {
+ auto &meta = handle_to_resource_meta[ref.var_id];
+ meta = {};
+ meta.kind = ref.resource_kind;
+ meta.var_id = ref.var_id;
+ meta.storage = spv::StorageClassUniform;
+ meta.component_type = aliased_access.primary_component_type;
+ meta.raw_component_vecsize = aliased_access.primary_raw_vecsize;
+ builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set);
+ builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding);
+ }
+
+ for (auto &var : ref.var_alias_group)
+ {
+ auto &meta = handle_to_resource_meta[var.var_id];
+ meta = {};
+ meta.kind = ref.resource_kind;
+ meta.var_id = var.var_id;
+ meta.storage = spv::StorageClassUniform;
+ meta.component_type = raw_width_to_component_type(var.declaration.width);
+ meta.raw_component_vecsize = var.declaration.vecsize;
+ builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set);
+ builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding);
+ }
}
}
@@ -2194,9 +2332,10 @@ bool Converter::Impl::emit_shader_record_buffer()
{
case LocalRootSignatureType::Constants:
{
+ spv::Id array_size_id = builder.makeUintConstant(elem.constants.num_words);
+ spv::Id u32_type = builder.makeUintType(32);
spv::Id member_type_id =
- builder.makeArrayType(builder.makeUintType(32),
- builder.makeUintConstant(elem.constants.num_words), 4);
+ builder.makeArrayType(u32_type, array_size_id, 4);
builder.addDecoration(member_type_id, spv::DecorationArrayStride, 4);
member_types.push_back(member_type_id);
offsets.push_back(current_offset);
@@ -2441,7 +2580,8 @@ bool Converter::Impl::emit_global_heaps()
auto actual_component_type = DXIL::ComponentType::U32;
info.format = spv::ImageFormatUnknown;
- if (annotation->resource_kind != DXIL::ResourceKind::RawBuffer &&
+ if (annotation->resource_type != DXIL::ResourceType::CBV &&
+ annotation->resource_kind != DXIL::ResourceKind::RawBuffer &&
annotation->resource_kind != DXIL::ResourceKind::StructuredBuffer)
{
actual_component_type = normalize_component_type(annotation->component_type);
@@ -2529,6 +2669,7 @@ bool Converter::Impl::emit_global_heaps()
return false;
}
vulkan_binding = vulkan_cbv_binding.buffer;
+ vulkan_binding.descriptor_type = VulkanDescriptorType::UBO;
break;
}
@@ -3105,10 +3246,14 @@ spv::Id Converter::Impl::get_type_id(const llvm::Type *type)
}
case llvm::Type::TypeID::ArrayTyID:
+ {
if (type->getArrayNumElements() == 0)
return 0;
- return builder.makeArrayType(get_type_id(type->getArrayElementType()),
- builder.makeUintConstant(type->getArrayNumElements(), false), 0);
+
+ spv::Id array_size_id = builder.makeUintConstant(type->getArrayNumElements());
+ spv::Id element_type_id = get_type_id(type->getArrayElementType());
+ return builder.makeArrayType(element_type_id, array_size_id, 0);
+ }
case llvm::Type::TypeID::StructTyID:
{
diff --git a/dxil_converter.hpp b/dxil_converter.hpp
index fa0e487..10210a4 100644
--- a/dxil_converter.hpp
+++ b/dxil_converter.hpp
@@ -89,7 +89,8 @@ enum class VulkanDescriptorType : unsigned
Identity = 0,
SSBO = 1,
TexelBuffer = 2,
- BufferDeviceAddress = 3
+ BufferDeviceAddress = 3,
+ UBO = 4
};
struct VulkanBinding
diff --git a/opcodes/converter_impl.hpp b/opcodes/converter_impl.hpp
index 6008d9a..5d8b99c 100644
--- a/opcodes/converter_impl.hpp
+++ b/opcodes/converter_impl.hpp
@@ -191,8 +191,10 @@ struct Converter::Impl
bool has_atomic_64bit = false;
bool raw_access_buffer_declarations[unsigned(RawWidth::Count)][unsigned(RawVecSize::Count)] = {};
};
+ UnorderedMap<uint32_t, AccessTracking> cbv_access_tracking;
UnorderedMap<uint32_t, AccessTracking> srv_access_tracking;
UnorderedMap<uint32_t, AccessTracking> uav_access_tracking;
+ UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_cbv_resource_index_map;
UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_srv_resource_index_map;
UnorderedMap<const llvm::Value *, uint32_t> llvm_value_to_uav_resource_index_map;
UnorderedSet<const llvm::Value *> llvm_values_using_update_counter;
@@ -514,10 +516,15 @@ struct Converter::Impl
spv::Id create_bindless_heap_variable(const BindlessInfo &info);
Vector<RawDeclarationVariable> create_bindless_heap_variable_alias_group(
- const BindlessInfo &base_info, const Vector<RawDeclaration> &raw_decls);
- Vector<RawDeclarationVariable> create_variable_alias_group(
- const Vector<RawDeclaration> &raw_decls, uint32_t range_size, const String &name);
+ const BindlessInfo &base_info, const Vector<RawDeclaration> &raw_decls);
+ Vector<RawDeclarationVariable> create_raw_ssbo_variable_alias_group(
+ const Vector<RawDeclaration> &raw_decls,
+ uint32_t range_size, const String &name);
+ Vector<RawDeclarationVariable> create_ubo_variable_alias_group(
+ const Vector<RawDeclaration> &raw_decls,
+ uint32_t range_size, const String &name, unsigned cbv_size);
spv::Id create_raw_ssbo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name);
+ spv::Id create_ubo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name, unsigned cbv_size);
struct BindlessResource
{
diff --git a/opcodes/dxil/dxil_arithmetic.cpp b/opcodes/dxil/dxil_arithmetic.cpp
index 33ab03d..0f9b057 100644
--- a/opcodes/dxil/dxil_arithmetic.cpp
+++ b/opcodes/dxil/dxil_arithmetic.cpp
@@ -27,12 +27,13 @@ bool emit_imad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct
// FIXME: Do we need to deal with intermediate mul overflow here somehow?
Operation *mul = impl.allocate(spv::OpIMul, impl.get_type_id(instruction->getType()));
- mul->add_ids(
- { impl.get_id_for_value(instruction->getOperand(1)), impl.get_id_for_value(instruction->getOperand(2)) });
+ mul->add_id(impl.get_id_for_value(instruction->getOperand(1)));
+ mul->add_id(impl.get_id_for_value(instruction->getOperand(2)));
impl.add(mul);
Operation *add = impl.allocate(spv::OpIAdd, instruction);
- add->add_ids({ mul->id, impl.get_id_for_value(instruction->getOperand(3)) });
+ add->add_id(mul->id);
+ add->add_id(impl.get_id_for_value(instruction->getOperand(3)));
impl.add(add);
return true;
}
@@ -68,11 +69,8 @@ bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct
Operation *op = impl.allocate(spv::OpExtInst, instruction);
op->add_id(impl.glsl_std450_ext);
op->add_literal(GLSLstd450Fma);
- op->add_ids({
- impl.get_id_for_value(instruction->getOperand(1)),
- impl.get_id_for_value(instruction->getOperand(2)),
- impl.get_id_for_value(instruction->getOperand(3)),
- });
+ for (unsigned i = 1; i < 4; i++)
+ op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
impl.add(op);
}
@@ -144,10 +142,8 @@ bool emit_dxil_std450_binary_instruction(GLSLstd450 opcode, Converter::Impl &imp
Operation *op = impl.allocate(spv::OpExtInst, instruction);
op->add_id(impl.glsl_std450_ext);
op->add_literal(opcode);
- op->add_ids({
- impl.get_id_for_value(instruction->getOperand(1)),
- impl.get_id_for_value(instruction->getOperand(2))
- });
+ for (unsigned i = 1; i < 3; i++)
+ op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
impl.add(op);
return true;
@@ -162,9 +158,9 @@ bool emit_dxil_std450_trinary_instruction(GLSLstd450 opcode, Converter::Impl &im
Operation *op = impl.allocate(spv::OpExtInst, instruction);
op->add_id(impl.glsl_std450_ext);
op->add_literal(opcode);
- op->add_ids({ impl.get_id_for_value(instruction->getOperand(1)),
- impl.get_id_for_value(instruction->getOperand(2)),
- impl.get_id_for_value(instruction->getOperand(3)) });
+
+ for (unsigned i = 1; i < 4; i++)
+ op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
impl.add(op);
return true;
@@ -283,11 +279,8 @@ static spv::Id clamp_bitfield_width(Converter::Impl &impl, spv::Id offset, spv::
static spv::Id mask_input(Converter::Impl &impl, const llvm::Value *value)
{
Operation *op = impl.allocate(spv::OpBitwiseAnd, impl.get_type_id(value->getType()));
- op->add_ids({
- impl.get_id_for_value(value),
- impl.builder().makeUintConstant(31),
- });
-
+ op->add_id(impl.get_id_for_value(value));
+ op->add_id(impl.builder().makeUintConstant(31));
impl.add(op);
return op->id;
}
diff --git a/opcodes/dxil/dxil_buffer.cpp b/opcodes/dxil/dxil_buffer.cpp
index 0a5b60a..cf0aa4a 100644
--- a/opcodes/dxil/dxil_buffer.cpp
+++ b/opcodes/dxil/dxil_buffer.cpp
@@ -24,153 +24,6 @@
namespace dxil_spv
{
-static spv::Id build_index_divider_fallback(Converter::Impl &impl, const llvm::Value *offset, unsigned addr_shift_log2)
-{
- auto &builder = impl.builder();
- Operation *op = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32));
- op->add_ids({ impl.get_id_for_value(offset), builder.makeUintConstant(addr_shift_log2) });
- impl.add(op);
- return op->id;
-}
-
-bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride,
- uint32_t addr_shift_log2, unsigned vecsize,
- RawBufferAccessSplit &split)
-{
- unsigned element_size = (1u << addr_shift_log2) * vecsize;
-
- // Base case first, a constant value.
- if (const auto *const_addr = llvm::dyn_cast<llvm::ConstantInt>(index))
- {
- int64_t constant_offset = const_addr->getUniqueInteger().getSExtValue();
- constant_offset *= stride;
-
- // Always pass scalar constant dividers through.
- // Building a fallback divider helps nothing.
- if (vecsize == 1 || constant_offset % int(element_size) == 0)
- {
- split = {};
- split.bias = constant_offset / element_size;
- return true;
- }
- else
- return false;
- }
-
- const llvm::ConstantInt *scale = nullptr;
- const llvm::ConstantInt *bias = nullptr;
- bool scale_log2 = false;
- bool bias_is_add = false;
- bool bias_negate = false;
-
- while (!scale && llvm::isa<llvm::BinaryOperator>(index))
- {
- auto *binop = llvm::cast<llvm::BinaryOperator>(index);
- auto *lhs = binop->getOperand(0);
- auto *rhs = binop->getOperand(1);
- if (!bias && (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add ||
- binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub ||
- binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Or ||
- binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Xor))
- {
- if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs))
- {
- bias = const_lhs;
- index = rhs;
- }
- else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
- {
- bias = const_rhs;
- index = lhs;
- }
- else
- break;
-
- // DXC tends to be emit shift + or in some cases.
- // We can turn this back into mul + add in most cases.
- bias_negate = binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub;
- bias_is_add =
- binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add ||
- bias_negate;
- }
- else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Shl)
- {
- if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
- {
- scale = const_rhs;
- index = lhs;
- }
- else
- break;
-
- scale_log2 = true;
- }
- else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Mul)
- {
- if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs))
- {
- scale = const_lhs;
- index = rhs;
- }
- else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
- {
- scale = const_rhs;
- index = lhs;
- }
- else
- break;
-
- scale_log2 = false;
- }
- else
- break;
- }
-
- if (!scale && !bias)
- {
- // We cannot split anything, but we might be able to vectorize if the stride alone carries us.
- if (stride % element_size == 0)
- {
- split = {};
- split.scale = stride / element_size;
- split.dynamic_index = index;
- return true;
- }
- else
- return false;
- }
-
- uint64_t scale_factor = 1;
- if (scale)
- scale_factor = scale->getUniqueInteger().getZExtValue();
- if (scale_log2)
- scale_factor = 1ull << scale_factor;
-
- int64_t bias_factor = 0;
- if (bias)
- bias_factor = bias->getUniqueInteger().getSExtValue();
- if (bias_negate)
- bias_factor = -bias_factor;
-
- // If there is no bit overlap between scale_factor and bias_factor
- // then the bitwise OR is equivalent to add.
- if (!bias_is_add && (scale_factor & bias_factor) != 0)
- return false;
-
- scale_factor *= stride;
- bias_factor *= stride;
-
- if (scale_factor % element_size == 0 && bias_factor % element_size == 0 && index)
- {
- split.scale = scale_factor / element_size;
- split.bias = bias_factor / int(element_size);
- split.dynamic_index = index;
- return true;
- }
- else
- return false;
-}
-
bool raw_access_byte_address_can_vectorize(Converter::Impl &impl, const llvm::Type *type,
const llvm::Value *byte_offset,
unsigned vecsize)
@@ -280,61 +133,6 @@ static spv::Id build_accumulate_offsets(Converter::Impl &impl, const spv::Id *id
return accumulated_id;
}
-static spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset,
- unsigned addr_shift_log2, unsigned vecsize)
-{
- auto &builder = impl.builder();
- // Attempt to do trivial constant folding to make output a little more sensible to read.
- // Try to find an expression for offset which is "constant0 * offset + constant1",
- // where constant0 and constant1 are aligned with addr_shift_log2.
-
- spv::Id index_id;
- RawBufferAccessSplit split = {};
-
- if (extract_raw_buffer_access_split(offset, 1, addr_shift_log2, vecsize, split))
- {
- if (!split.dynamic_index)
- return builder.makeUintConstant(split.bias);
-
- spv::Op bias_opcode = split.bias > 0 ? spv::OpIAdd : spv::OpISub;
- if (bias_opcode == spv::OpISub)
- split.bias = -split.bias;
-
- spv::Id scaled_id;
- if (split.scale != 1)
- {
- Operation *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
- scale_op->add_id(impl.get_id_for_value(split.dynamic_index));
- scale_op->add_id(builder.makeUintConstant(split.scale));
- impl.add(scale_op);
- scaled_id = scale_op->id;
- }
- else
- scaled_id = impl.get_id_for_value(split.dynamic_index);
-
- spv::Id bias_id;
- if (split.bias != 0)
- {
- Operation *bias_op = impl.allocate(bias_opcode, builder.makeUintType(32));
- bias_op->add_id(scaled_id);
- bias_op->add_id(builder.makeUintConstant(split.bias));
- impl.add(bias_op);
- bias_id = bias_op->id;
- }
- else
- bias_id = scaled_id;
-
- index_id = bias_id;
- }
- else
- {
- assert(vecsize == 1);
- index_id = build_index_divider_fallback(impl, offset, addr_shift_log2);
- }
-
- return index_id;
-}
-
static spv::Id build_structured_index(Converter::Impl &impl, const llvm::Value *index,
unsigned stride,
const llvm::Value *byte_offset,
@@ -410,20 +208,6 @@ static spv::Id build_structured_index(Converter::Impl &impl, const llvm::Value *
}
}
-static bool type_is_16bit(const llvm::Type *data_type)
-{
- return data_type->getTypeID() == llvm::Type::TypeID::HalfTyID ||
- (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
- data_type->getIntegerBitWidth() == 16);
-}
-
-static bool type_is_64bit(const llvm::Type *data_type)
-{
- return data_type->getTypeID() == llvm::Type::TypeID::DoubleTyID ||
- (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
- data_type->getIntegerBitWidth() == 64);
-}
-
unsigned raw_buffer_data_type_to_addr_shift_log2(Converter::Impl &impl, const llvm::Type *data_type)
{
// A 16-bit raw load is only actually 16-bit if native 16-bit operations are enabled.
@@ -591,30 +375,6 @@ static spv::Id build_physical_pointer_address_for_raw_load_store(Converter::Impl
return emit_u32x2_u32_add(impl, ptr_id, byte_offset_id);
}
-static void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type,
- spv::Id &physical_type_id, spv::Op &value_cast_op)
-{
- if (type_is_16bit(element_type) && !impl.execution_mode_meta.native_16bit_operations &&
- impl.options.min_precision_prefer_native_16bit)
- {
- if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
- {
- physical_type_id = impl.get_type_id(DXIL::ComponentType::F32, 1, 1);
- value_cast_op = spv::OpFConvert;
- }
- else
- {
- physical_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1, 1);
- value_cast_op = spv::OpUConvert;
- }
- }
- else
- {
- physical_type_id = impl.get_type_id(element_type);
- value_cast_op = spv::OpNop;
- }
-}
-
static bool emit_physical_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction,
const Converter::Impl::PhysicalPointerMeta &ptr_meta,
uint32_t mask = 0, uint32_t alignment = 0)
@@ -702,21 +462,6 @@ static RawWidth get_buffer_access_bits_per_component(
return RawWidth::B32;
}
-static spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta,
- spv::Id default_id, RawWidth width, RawVecSize vecsize)
-{
- for (auto &alias : meta.var_alias_group)
- {
- if (alias.declaration.width == width && alias.declaration.vecsize == vecsize)
- {
- default_id = alias.var_id;
- break;
- }
- }
-
- return default_id;
-}
-
bool emit_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
// Elide dead loads.
@@ -1425,12 +1170,11 @@ bool emit_atomic_binop_instruction(Converter::Impl &impl, const llvm::CallInst *
}
Operation *op = impl.allocate(opcode, instruction, impl.get_type_id(component_type, 1, 1));
- op->add_ids({
- counter_ptr_op->id,
- builder.makeUintConstant(spv::ScopeDevice),
- builder.makeUintConstant(0), // Relaxed
- impl.fixup_store_type_atomic(component_type, 1, impl.get_id_for_value(instruction->getOperand(6))),
- });
+
+ op->add_id(counter_ptr_op->id);
+ op->add_id(builder.makeUintConstant(spv::ScopeDevice));
+ op->add_id(builder.makeUintConstant(0));
+ op->add_id(impl.fixup_store_type_atomic(component_type, 1, impl.get_id_for_value(instruction->getOperand(6))));
impl.add(op);
impl.fixup_load_type_atomic(component_type, 1, instruction);
@@ -1519,14 +1263,12 @@ bool emit_atomic_cmpxchg_instruction(Converter::Impl &impl, const llvm::CallInst
comparison_id = impl.fixup_store_type_atomic(component_type, 1, comparison_id);
new_value_id = impl.fixup_store_type_atomic(component_type, 1, new_value_id);
- op->add_ids({
- counter_ptr_op->id,
- builder.makeUintConstant(spv::ScopeDevice),
- builder.makeUintConstant(0), // Relaxed
- builder.makeUintConstant(0), // Relaxed
- new_value_id,
- comparison_id,
- });
+ op->add_id(counter_ptr_op->id);
+ op->add_id(builder.makeUintConstant(spv::ScopeDevice));
+ op->add_id(builder.makeUintConstant(0));
+ op->add_id(builder.makeUintConstant(0));
+ op->add_id(new_value_id);
+ op->add_id(comparison_id);
impl.add(op);
impl.fixup_load_type_atomic(component_type, 1, instruction);
@@ -1553,7 +1295,10 @@ bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::C
{
counter_ptr_op = impl.allocate(spv::OpImageTexelPointer,
builder.makePointer(spv::StorageClassImage, builder.makeUintType(32)));
- counter_ptr_op->add_ids({ meta.counter_var_id, builder.makeUintConstant(0), builder.makeUintConstant(0) });
+
+ counter_ptr_op->add_id(meta.counter_var_id);
+ counter_ptr_op->add_id(builder.makeUintConstant(0));
+ counter_ptr_op->add_id(builder.makeUintConstant(0));
if (meta.non_uniform)
builder.addDecoration(counter_ptr_op->id, spv::DecorationNonUniformEXT);
@@ -1562,9 +1307,11 @@ bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::C
impl.add(counter_ptr_op);
Operation *op = impl.allocate(spv::OpAtomicIAdd, instruction);
- op->add_ids({ counter_ptr_op->id, builder.makeUintConstant(spv::ScopeDevice),
- builder.makeUintConstant(0), // Relaxed.
- builder.makeUintConstant(direction) });
+
+ op->add_id(counter_ptr_op->id);
+ op->add_id(builder.makeUintConstant(spv::ScopeDevice));
+ op->add_id(builder.makeUintConstant(0));
+ op->add_id(builder.makeUintConstant(direction));
impl.add(op);
diff --git a/opcodes/dxil/dxil_buffer.hpp b/opcodes/dxil/dxil_buffer.hpp
index 5f8a090..0f8ea5d 100644
--- a/opcodes/dxil/dxil_buffer.hpp
+++ b/opcodes/dxil/dxil_buffer.hpp
@@ -37,16 +37,6 @@ bool emit_buffer_update_counter_instruction(Converter::Impl &impl, const llvm::C
unsigned raw_buffer_data_type_to_addr_shift_log2(Converter::Impl &impl, const llvm::Type *data_type);
-struct RawBufferAccessSplit
-{
- uint64_t scale;
- int64_t bias;
- const llvm::Value *dynamic_index;
-};
-bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride,
- uint32_t addr_shift_log2, unsigned vecsize,
- RawBufferAccessSplit &split);
-
bool raw_access_byte_address_can_vectorize(Converter::Impl &impl, const llvm::Type *type,
const llvm::Value *byte_offset, unsigned vecsize);
diff --git a/opcodes/dxil/dxil_common.cpp b/opcodes/dxil/dxil_common.cpp
index c76d8da..a26f571 100644
--- a/opcodes/dxil/dxil_common.cpp
+++ b/opcodes/dxil/dxil_common.cpp
@@ -79,4 +79,289 @@ spv::Id emit_u32x2_u32_add(Converter::Impl &impl, spv::Id u32x2_value, spv::Id u
spv::Id addr_vec = impl.build_vector(uint_type, addr_elems, 2);
return addr_vec;
}
+
+unsigned get_type_scalar_alignment(Converter::Impl &impl, const llvm::Type *type)
+{
+ unsigned scalar_alignment;
+ switch (type->getTypeID())
+ {
+ case llvm::Type::TypeID::IntegerTyID:
+ scalar_alignment = type->getIntegerBitWidth() / 8;
+ break;
+ case llvm::Type::TypeID::HalfTyID:
+ scalar_alignment = 2;
+ break;
+ case llvm::Type::TypeID::FloatTyID:
+ scalar_alignment = 4;
+ break;
+ case llvm::Type::TypeID::DoubleTyID:
+ scalar_alignment = 8;
+ break;
+ default:
+ LOGE("Invalid type for scalar alignment query.\n");
+ return 1;
+ }
+
+ if (!impl.execution_mode_meta.native_16bit_operations && scalar_alignment == 2)
+ scalar_alignment = 4;
+
+ return scalar_alignment;
+}
+
+spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta,
+ spv::Id default_id, RawWidth width, RawVecSize vecsize)
+{
+ for (auto &alias : meta.var_alias_group)
+ {
+ if (alias.declaration.width == width && alias.declaration.vecsize == vecsize)
+ {
+ default_id = alias.var_id;
+ break;
+ }
+ }
+
+ return default_id;
+}
+
+bool type_is_16bit(const llvm::Type *data_type)
+{
+ return data_type->getTypeID() == llvm::Type::TypeID::HalfTyID ||
+ (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
+ data_type->getIntegerBitWidth() == 16);
+}
+
+bool type_is_64bit(const llvm::Type *data_type)
+{
+ return data_type->getTypeID() == llvm::Type::TypeID::DoubleTyID ||
+ (data_type->getTypeID() == llvm::Type::TypeID::IntegerTyID &&
+ data_type->getIntegerBitWidth() == 64);
+}
+
+void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type,
+ spv::Id &physical_type_id, spv::Op &value_cast_op)
+{
+ if (type_is_16bit(element_type) && !impl.execution_mode_meta.native_16bit_operations &&
+ impl.options.min_precision_prefer_native_16bit)
+ {
+ if (element_type->getTypeID() == llvm::Type::TypeID::HalfTyID)
+ {
+ physical_type_id = impl.get_type_id(DXIL::ComponentType::F32, 1, 1);
+ value_cast_op = spv::OpFConvert;
+ }
+ else
+ {
+ physical_type_id = impl.get_type_id(DXIL::ComponentType::U32, 1, 1);
+ value_cast_op = spv::OpUConvert;
+ }
+ }
+ else
+ {
+ physical_type_id = impl.get_type_id(element_type);
+ value_cast_op = spv::OpNop;
+ }
+}
+
+static spv::Id build_index_divider_fallback(Converter::Impl &impl, const llvm::Value *offset, unsigned addr_shift_log2)
+{
+ auto &builder = impl.builder();
+ Operation *op = impl.allocate(spv::OpShiftRightLogical, builder.makeUintType(32));
+ op->add_id(impl.get_id_for_value(offset));
+ op->add_id(builder.makeUintConstant(addr_shift_log2));
+ impl.add(op);
+ return op->id;
+}
+
+bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride,
+ uint32_t addr_shift_log2, unsigned vecsize,
+ RawBufferAccessSplit &split)
+{
+ unsigned element_size = (1u << addr_shift_log2) * vecsize;
+
+ // Base case first, a constant value.
+ if (const auto *const_addr = llvm::dyn_cast<llvm::ConstantInt>(index))
+ {
+ int64_t constant_offset = const_addr->getUniqueInteger().getSExtValue();
+ constant_offset *= stride;
+
+ // Always pass scalar constant dividers through.
+ // Building a fallback divider helps nothing.
+ if (vecsize == 1 || constant_offset % int(element_size) == 0)
+ {
+ split = {};
+ split.bias = constant_offset / element_size;
+ return true;
+ }
+ else
+ return false;
+ }
+
+ const llvm::ConstantInt *scale = nullptr;
+ const llvm::ConstantInt *bias = nullptr;
+ bool scale_log2 = false;
+ bool bias_is_add = false;
+ bool bias_negate = false;
+
+ while (!scale && llvm::isa<llvm::BinaryOperator>(index))
+ {
+ auto *binop = llvm::cast<llvm::BinaryOperator>(index);
+ auto *lhs = binop->getOperand(0);
+ auto *rhs = binop->getOperand(1);
+ if (!bias && (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add ||
+ binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub ||
+ binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Or ||
+ binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Xor))
+ {
+ if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs))
+ {
+ bias = const_lhs;
+ index = rhs;
+ }
+ else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
+ {
+ bias = const_rhs;
+ index = lhs;
+ }
+ else
+ break;
+
+ // DXC tends to be emit shift + or in some cases.
+ // We can turn this back into mul + add in most cases.
+ bias_negate = binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Sub;
+ bias_is_add =
+ binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Add ||
+ bias_negate;
+ }
+ else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Shl)
+ {
+ if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
+ {
+ scale = const_rhs;
+ index = lhs;
+ }
+ else
+ break;
+
+ scale_log2 = true;
+ }
+ else if (binop->getOpcode() == llvm::BinaryOperator::BinaryOps::Mul)
+ {
+ if (const auto *const_lhs = llvm::dyn_cast<llvm::ConstantInt>(lhs))
+ {
+ scale = const_lhs;
+ index = rhs;
+ }
+ else if (const auto *const_rhs = llvm::dyn_cast<llvm::ConstantInt>(rhs))
+ {
+ scale = const_rhs;
+ index = lhs;
+ }
+ else
+ break;
+
+ scale_log2 = false;
+ }
+ else
+ break;
+ }
+
+ if (!scale && !bias)
+ {
+ // We cannot split anything, but we might be able to vectorize if the stride alone carries us.
+ if (stride % element_size == 0)
+ {
+ split = {};
+ split.scale = stride / element_size;
+ split.dynamic_index = index;
+ return true;
+ }
+ else
+ return false;
+ }
+
+ uint64_t scale_factor = 1;
+ if (scale)
+ scale_factor = scale->getUniqueInteger().getZExtValue();
+ if (scale_log2)
+ scale_factor = 1ull << scale_factor;
+
+ int64_t bias_factor = 0;
+ if (bias)
+ bias_factor = bias->getUniqueInteger().getSExtValue();
+ if (bias_negate)
+ bias_factor = -bias_factor;
+
+ // If there is no bit overlap between scale_factor and bias_factor
+ // then the bitwise OR is equivalent to add.
+ if (!bias_is_add && (scale_factor & bias_factor) != 0)
+ return false;
+
+ scale_factor *= stride;
+ bias_factor *= stride;
+
+ if (scale_factor % element_size == 0 && bias_factor % element_size == 0 && index)
+ {
+ split.scale = scale_factor / element_size;
+ split.bias = bias_factor / int(element_size);
+ split.dynamic_index = index;
+ return true;
+ }
+ else
+ return false;
+}
+
+spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset,
+ unsigned addr_shift_log2, unsigned vecsize)
+{
+ auto &builder = impl.builder();
+ // Attempt to do trivial constant folding to make output a little more sensible to read.
+ // Try to find an expression for offset which is "constant0 * offset + constant1",
+ // where constant0 and constant1 are aligned with addr_shift_log2.
+
+ spv::Id index_id;
+ RawBufferAccessSplit split = {};
+
+ if (extract_raw_buffer_access_split(offset, 1, addr_shift_log2, vecsize, split))
+ {
+ if (!split.dynamic_index)
+ return builder.makeUintConstant(split.bias);
+
+ spv::Op bias_opcode = split.bias > 0 ? spv::OpIAdd : spv::OpISub;
+ if (bias_opcode == spv::OpISub)
+ split.bias = -split.bias;
+
+ spv::Id scaled_id;
+ if (split.scale != 1)
+ {
+ Operation *scale_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
+ scale_op->add_id(impl.get_id_for_value(split.dynamic_index));
+ scale_op->add_id(builder.makeUintConstant(split.scale));
+ impl.add(scale_op);
+ scaled_id = scale_op->id;
+ }
+ else
+ scaled_id = impl.get_id_for_value(split.dynamic_index);
+
+ spv::Id bias_id;
+ if (split.bias != 0)
+ {
+ Operation *bias_op = impl.allocate(bias_opcode, builder.makeUintType(32));
+ bias_op->add_id(scaled_id);
+ bias_op->add_id(builder.makeUintConstant(split.bias));
+ impl.add(bias_op);
+ bias_id = bias_op->id;
+ }
+ else
+ bias_id = scaled_id;
+
+ index_id = bias_id;
+ }
+ else
+ {
+ assert(vecsize == 1);
+ index_id = build_index_divider_fallback(impl, offset, addr_shift_log2);
+ }
+
+ return index_id;
+}
+
} // namespace dxil_spv
diff --git a/opcodes/dxil/dxil_common.hpp b/opcodes/dxil/dxil_common.hpp
index c13f0f9..e50278b 100644
--- a/opcodes/dxil/dxil_common.hpp
+++ b/opcodes/dxil/dxil_common.hpp
@@ -19,9 +19,34 @@
#pragma once
#include "SpvBuilder.h"
#include "opcodes/opcodes.hpp"
+#include "opcodes/converter_impl.hpp"
namespace dxil_spv
{
bool get_constant_operand(const llvm::CallInst *value, unsigned index, uint32_t *operand);
spv::Id emit_u32x2_u32_add(Converter::Impl &impl, spv::Id u32x2_value, spv::Id u32_value);
+unsigned get_type_scalar_alignment(Converter::Impl &impl, const llvm::Type *type);
+
+spv::Id get_buffer_alias_handle(Converter::Impl &impl, const Converter::Impl::ResourceMeta &meta,
+ spv::Id default_id, RawWidth width, RawVecSize vecsize);
+
+bool type_is_16bit(const llvm::Type *data_type);
+bool type_is_64bit(const llvm::Type *data_type);
+
+void get_physical_load_store_cast_info(Converter::Impl &impl, const llvm::Type *element_type,
+ spv::Id &physical_type_id, spv::Op &value_cast_op);
+
+struct RawBufferAccessSplit
+{
+ uint64_t scale;
+ int64_t bias;
+ const llvm::Value *dynamic_index;
+};
+
+bool extract_raw_buffer_access_split(const llvm::Value *index, unsigned stride,
+ uint32_t addr_shift_log2, unsigned vecsize,
+ RawBufferAccessSplit &split);
+
+spv::Id build_index_divider(Converter::Impl &impl, const llvm::Value *offset,
+ unsigned addr_shift_log2, unsigned vecsize);
}
diff --git a/opcodes/dxil/dxil_resources.cpp b/opcodes/dxil/dxil_resources.cpp
index 1bfdfc2..00275db 100644
--- a/opcodes/dxil/dxil_resources.cpp
+++ b/opcodes/dxil/dxil_resources.cpp
@@ -150,11 +150,9 @@ static void fixup_builtin_load(Converter::Impl &impl, spv::Id var_id, const llvm
else if (builtin == spv::BuiltInFrontFacing)
{
Operation *cast_op = impl.allocate(spv::OpSelect, builder.makeUintType(32));
- cast_op->add_ids({
- impl.get_id_for_value(instruction),
- builder.makeUintConstant(~0u),
- builder.makeUintConstant(0),
- });
+ cast_op->add_id(impl.get_id_for_value(instruction));
+ cast_op->add_id(builder.makeUintConstant(~0u));
+ cast_op->add_id(builder.makeUintConstant(0));
impl.add(cast_op);
impl.rewrite_value(instruction, cast_op->id);
}
@@ -272,7 +270,9 @@ static spv::Id build_attribute_offset(spv::Id id, Converter::Impl &impl)
auto &builder = impl.builder();
{
Operation *op = impl.allocate(spv::OpBitFieldSExtract, builder.makeUintType(32));
- op->add_ids({ id, builder.makeUintConstant(0), builder.makeUintConstant(4) });
+ op->add_id(id);
+ op->add_id(builder.makeUintConstant(0));
+ op->add_id(builder.makeUintConstant(4));
id = op->id;
impl.add(op);
}
@@ -664,11 +664,15 @@ static bool build_load_resource_handle(Converter::Impl &impl, spv::Id base_resou
auto storage = get_resource_storage_class(impl, base_resource_id);
is_non_uniform = false;
+ // If we index based on SBT, we must assume non-uniform, even for resources
+ // which are not arrayed, since in theory, the dispatch can process different SBTs concurrently,
+ // perhaps even within same subgroup, so have to be defensive.
+ if (reference.local_root_signature_entry >= 0)
+ is_non_uniform = true;
+
if (reference.base_resource_is_array || reference.bindless)
{
- if (reference.base_resource_is_array)
- is_non_uniform = instruction_is_non_uniform;
- else if (reference.local_root_signature_entry >= 0)
+ if (reference.base_resource_is_array && instruction_offset_value && instruction_is_non_uniform)
is_non_uniform = true;
type_id = builder.getContainedTypeId(type_id);
@@ -725,7 +729,10 @@ static bool build_load_resource_handle(Converter::Impl &impl, spv::Id base_resou
{
*value_id = resource_id;
impl.rewrite_value(instruction, resource_id);
- // Not technically needed, but to be safe against weird compilers ...
+
+ // Generally, we want to add NonUniformEXT after access chain for UBO/SSBO,
+ // but there is a special case in non-uniform OpArrayLength, where we will use this pointer
+ // directly, so mark it as non-uniform here.
if (is_non_uniform)
builder.addDecoration(resource_id, spv::DecorationNonUniformEXT);
}
@@ -1244,8 +1251,13 @@ static bool emit_create_handle(Converter::Impl &impl, const llvm::CallInst *inst
case DXIL::ResourceType::CBV:
{
auto &reference = get_resource_reference(impl, resource_type, instruction, resource_range);
- spv::Id base_cbv_id = reference.var_id;
- spv::Id type_id = builder.getDerefTypeId(base_cbv_id);
+ const LocalRootSignatureEntry *local_root_signature_entry = nullptr;
+ if (reference.local_root_signature_entry >= 0)
+ local_root_signature_entry = &impl.local_root_signature[reference.local_root_signature_entry];
+
+ // Special case root constants since these resources point directly to
+ // the push constant block or SBT and not to any concrete resource,
+ // so we cannot deduce storage classes properly.
if (resource_is_physical_pointer(impl, reference))
{
@@ -1258,93 +1270,82 @@ static bool emit_create_handle(Converter::Impl &impl, const llvm::CallInst *inst
meta.kind = reference.resource_kind;
impl.rewrite_value(instruction, ptr_id);
}
- else if (reference.base_resource_is_array || reference.bindless)
+ else if (reference.var_id != 0 && reference.var_id == impl.root_constant_id)
+ {
+ // Point directly to root constants.
+ impl.rewrite_value(instruction, reference.var_id);
+ unsigned member_offset = reference.push_constant_member;
+ impl.handle_to_root_member_offset[instruction] = member_offset;
+ }
+ else if (local_root_signature_entry && local_root_signature_entry->type == LocalRootSignatureType::Constants)
{
- if (reference.local_root_signature_entry >= 0)
- non_uniform = true;
- else if (!reference.base_resource_is_array)
- non_uniform = false;
+ // Access chain into the desired member once.
+ spv::Id id = build_shader_record_access_chain(impl, reference.local_root_signature_entry);
+
+ auto &meta = impl.handle_to_resource_meta[id];
+ meta = {};
+ meta.storage = spv::StorageClassShaderRecordBufferKHR;
+ meta.kind = DXIL::ResourceKind::CBuffer;
+ impl.handle_to_root_member_offset[instruction] = reference.local_root_signature_entry;
+ impl.rewrite_value(instruction, id);
+ }
+ else
+ {
+ bool is_non_uniform = false;
bool ssbo = reference.bindless && impl.options.bindless_cbv_ssbo_emulation;
auto storage = ssbo ? spv::StorageClassStorageBuffer : spv::StorageClassUniform;
- auto desc_type = ssbo ? DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT : DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT;
+ auto descriptor_type = ssbo ? DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT : DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT;
- type_id = builder.getContainedTypeId(type_id);
- Operation *op = impl.allocate(spv::OpAccessChain, instruction, builder.makePointer(storage, type_id));
- op->add_id(base_cbv_id);
+ Vector<Converter::Impl::RawDeclarationVariable> raw_declarations;
+ spv::Id loaded_id = 0;
+ spv::Id resource_id = 0;
+ raw_declarations.reserve(reference.var_alias_group.size());
- if (reference.bindless)
+ if (reference.var_id)
{
- spv::Id offset_id = build_bindless_heap_offset(impl, reference, desc_type,
- reference.base_resource_is_array ? instruction_offset : nullptr);
- if (!offset_id)
+ resource_id = reference.var_id;
+ if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type, instruction,
+ instruction_offset, non_uniform, is_non_uniform,
+ nullptr, &loaded_id, nullptr))
{
- LOGE("Failed to load CBV bindless offset.\n");
+ LOGE("Failed to load CBV resource handle.\n");
return false;
}
- op->add_id(offset_id);
}
- else
+
+ for (auto &alias : reference.var_alias_group)
{
- op->add_id(impl.get_id_for_value(instruction_offset));
+ resource_id = alias.var_id;
+ if (!build_load_resource_handle(impl, resource_id, reference, descriptor_type,
+ instruction, instruction_offset, non_uniform, is_non_uniform,
+ nullptr, &loaded_id, nullptr))
+ {
+ LOGE("Failed to load CBV resource handle.\n");
+ return false;
+ }
+
+ raw_declarations.push_back({ alias.declaration, loaded_id });
}
- impl.add(op);
- impl.rewrite_value(instruction, op->id);
+ auto &incoming_meta = impl.handle_to_resource_meta[resource_id];
- auto &meta = impl.handle_to_resource_meta[op->id];
- meta = {};
- meta.non_uniform = non_uniform;
+ auto &meta = impl.handle_to_resource_meta[loaded_id];
+ meta = incoming_meta;
+ meta.non_uniform = is_non_uniform;
meta.storage = storage;
+ meta.var_alias_group = std::move(raw_declarations);
meta.kind = DXIL::ResourceKind::CBuffer;
- if (meta.non_uniform)
+ if (is_non_uniform)
{
if (ssbo)
builder.addCapability(spv::CapabilityStorageBufferArrayNonUniformIndexingEXT);
else
builder.addCapability(spv::CapabilityUniformBufferArrayNonUniformIndexingEXT);
- builder.addDecoration(op->id, spv::DecorationNonUniformEXT);
builder.addExtension("SPV_EXT_descriptor_indexing");
}
}
- else if (reference.local_root_signature_entry >= 0)
- {
- // Either we have root constants or a physical storage pointer here.
- // CBufferLoad functions will deal with that. If we have a physical storage pointer, we can load it here.
- auto &local_entry = impl.local_root_signature[reference.local_root_signature_entry];
-
- if (local_entry.type == LocalRootSignatureType::Descriptor)
- {
- spv::Id id = build_root_descriptor_load_physical_pointer(impl, reference);
- auto &meta = impl.handle_to_resource_meta[id];
- meta = {};
- meta.storage = spv::StorageClassPhysicalStorageBuffer;
- meta.kind = DXIL::ResourceKind::CBuffer;
- impl.rewrite_value(instruction, id);
- }
- else
- {
- // Access chain into the desired member once.
- spv::Id id = build_shader_record_access_chain(impl, reference.local_root_signature_entry);
-
- auto &meta = impl.handle_to_resource_meta[id];
- meta = {};
- meta.storage = spv::StorageClassShaderRecordBufferKHR;
- meta.kind = DXIL::ResourceKind::CBuffer;
- impl.handle_to_root_member_offset[instruction] = reference.local_root_signature_entry;
- impl.rewrite_value(instruction, id);
- }
- }
- else
- {
- impl.rewrite_value(instruction, base_cbv_id);
- if (base_cbv_id == impl.root_constant_id)
- {
- unsigned member_offset = reference.push_constant_member;
- impl.handle_to_root_member_offset[instruction] = member_offset;
- }
- }
break;
}
@@ -1538,47 +1539,158 @@ bool emit_annotate_handle_instruction(Converter::Impl &impl, const llvm::CallIns
meta.binding_index, meta.offset, meta.non_uniform);
}
-static bool emit_cbuffer_load_legacy_physical_pointer(Converter::Impl &impl, const llvm::CallInst *instruction)
+static bool build_bitcast_32x4_to_16x8_composite(Converter::Impl &impl, const llvm::CallInst *instruction,
+ spv::Id loaded_id)
+{
+ auto &builder = impl.builder();
+
+ Vector<spv::Id> member_types(8);
+ spv::Id type_id = impl.get_type_id(instruction->getType()->getStructElementType(0));
+ for (auto &type : member_types)
+ type = type_id;
+
+ spv::Id vec2_type_id = builder.makeVectorType(type_id, 2);
+
+ spv::Id u32_composites[4];
+ for (unsigned i = 0; i < 4; i++)
+ {
+ auto *extract_op = impl.allocate(spv::OpCompositeExtract, builder.makeFloatType(32));
+ extract_op->add_id(loaded_id);
+ extract_op->add_literal(i);
+ impl.add(extract_op);
+ u32_composites[i] = extract_op->id;
+ }
+
+ spv::Id u16_composites[8];
+ for (unsigned i = 0; i < 4; i++)
+ {
+ auto *bitcast_op = impl.allocate(spv::OpBitcast, vec2_type_id);
+ bitcast_op->add_id(u32_composites[i]);
+ impl.add(bitcast_op);
+
+ for (unsigned j = 0; j < 2; j++)
+ {
+ auto *extract = impl.allocate(spv::OpCompositeExtract, type_id);
+ extract->add_id(bitcast_op->id);
+ extract->add_literal(j);
+ impl.add(extract);
+ u16_composites[2 * i + j] = extract->id;
+ }
+ }
+
+ spv::Id struct_type_id = impl.get_struct_type(member_types, "CBVComposite16x8");
+ auto *composite = impl.allocate(spv::OpCompositeConstruct, struct_type_id);
+ for (auto &comp : u16_composites)
+ composite->add_id(comp);
+ impl.add(composite);
+ impl.rewrite_value(instruction, composite->id);
+ return true;
+}
+
+static bool emit_cbuffer_load_physical_pointer(Converter::Impl &impl, const llvm::CallInst *instruction)
{
auto &builder = impl.builder();
spv::Id member_index = impl.get_id_for_value(instruction->getOperand(2));
+ bool scalar_load = instruction->getType()->getTypeID() != llvm::Type::TypeID::StructTyID;
+ unsigned scalar_alignment;
+ spv::Id byteaddr_id;
+ uint32_t alignment;
- auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
- mul_op->add_id(member_index);
- mul_op->add_id(builder.makeUintConstant(16));
- impl.add(mul_op);
+ const llvm::Type *result_component_type;
- spv::Id addr_vec = emit_u32x2_u32_add(impl, impl.get_id_for_value(instruction->getOperand(1)), mul_op->id);
+ if (!scalar_load)
+ {
+ auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32));
+ mul_op->add_id(member_index);
+ mul_op->add_id(builder.makeUintConstant(16));
+ impl.add(mul_op);
+ byteaddr_id = mul_op->id;
+ result_component_type = instruction->getType()->getStructElementType(0);
+ scalar_alignment = get_type_scalar_alignment(impl, result_component_type);
+ alignment = 16;
+ }
+ else
+ {
+ byteaddr_id = member_index;
+ // DXIL emits the alignment, but we cannot trust it, DXC is completely buggy here and emits
+ // obviously bogus alignment values.
+ // Use scalar alignment.
+ result_component_type = instruction->getType();
+ alignment = get_type_scalar_alignment(impl, instruction->getType());
+ scalar_alignment = alignment;
+ }
+
+ // Handle min16float where we want FP16 value, but FP32 physical.
+ spv::Op value_cast_op = spv::OpNop;
+ spv::Id physical_type_id = 0;
+ get_physical_load_store_cast_info(impl, result_component_type, physical_type_id, value_cast_op);
+
+ spv::Id addr_vec = emit_u32x2_u32_add(impl, impl.get_id_for_value(instruction->getOperand(1)), byteaddr_id);
auto *result_type = instruction->getType();
- spv::Id vec_type_id = builder.makeVectorType(impl.get_type_id(result_type->getStructElementType(0)), 4);
+ unsigned physical_vecsize;
+ spv::Id result_type_id;
+
+ if (scalar_load)
+ {
+ result_type_id = impl.get_type_id(result_type);
+ physical_vecsize = 1;
+ }
+ else
+ {
+ if (scalar_alignment != 2)
+ {
+ physical_vecsize = 16 / scalar_alignment;
+ result_type_id = builder.makeVectorType(physical_type_id, physical_vecsize);
+ }
+ else
+ {
+ result_type_id = builder.makeVectorType(builder.makeFloatType(32), 4);
+ physical_vecsize = 4;
+ }
+ }
+
Converter::Impl::PhysicalPointerMeta ptr_meta = {};
ptr_meta.nonwritable = true;
- spv::Id ptr_type_id = impl.get_physical_pointer_block_type(vec_type_id, ptr_meta);
+ spv::Id ptr_type_id = impl.get_physical_pointer_block_type(result_type_id, ptr_meta);
auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id);
ptr_bitcast_op->add_id(addr_vec);
impl.add(ptr_bitcast_op);
- auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, vec_type_id));
+ auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, result_type_id));
chain_op->add_id(ptr_bitcast_op->id);
chain_op->add_id(builder.makeUintConstant(0));
impl.add(chain_op);
- auto *load_op = impl.allocate(spv::OpLoad, instruction, vec_type_id);
+ auto *load_op = impl.allocate(spv::OpLoad, instruction, result_type_id);
load_op->add_id(chain_op->id);
load_op->add_literal(spv::MemoryAccessAlignedMask);
- load_op->add_literal(16);
+ load_op->add_literal(alignment);
impl.add(load_op);
+ // Handle f16x8 loads.
+ if (!scalar_load && scalar_alignment == 2)
+ return build_bitcast_32x4_to_16x8_composite(impl, instruction, load_op->id);
+ else if (value_cast_op != spv::OpNop)
+ {
+ spv::Id type_id = impl.get_type_id(result_component_type);
+ if (physical_vecsize != 1)
+ type_id = builder.makeVectorType(type_id, physical_vecsize);
+ auto *cast_op = impl.allocate(value_cast_op, type_id);
+ cast_op->add_id(impl.get_id_for_value(instruction));
+ impl.add(cast_op);
+ impl.rewrite_value(instruction, cast_op->id);
+ }
+
return true;
}
-static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llvm::CallInst *instruction,
- spv::Id base_ptr,
- spv::StorageClass storage,
- unsigned index_offset, unsigned num_elements)
+static bool emit_cbuffer_load_from_uints(Converter::Impl &impl, const llvm::CallInst *instruction,
+ spv::Id base_ptr,
+ spv::StorageClass storage,
+ unsigned index_offset, unsigned num_elements)
{
auto &builder = impl.builder();
@@ -1589,18 +1701,56 @@ static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llv
return false;
}
- unsigned member_index = 4 * unsigned(constant_int->getUniqueInteger().getZExtValue());
+ // CBufferLoad vs CBufferLoadLegacy
+ bool scalar_load = instruction->getType()->getTypeID() != llvm::Type::TypeID::StructTyID;
+ auto member_index = unsigned(constant_int->getUniqueInteger().getZExtValue());
+
+ // In scalar load, we index by byte offset. Ignore alignment, we read from registers.
+ if (scalar_load)
+ {
+ if (member_index % 4)
+ {
+ LOGE("Scalar CBufferLoad on root constant buffer is not aligned to 4 bytes.\n");
+ return false;
+ }
+ member_index /= 4;
+
+ if (get_type_scalar_alignment(impl, instruction->getType()) != 4)
+ {
+ LOGE("Attempting to use root constant buffer with non-32bit type.\n");
+ return false;
+ }
+ }
+ else
+ {
+ // In legacy load, we index in terms of float4[]s.
+ member_index *= 4;
+
+ if (get_type_scalar_alignment(impl, instruction->getType()->getStructElementType(0)) != 4)
+ {
+ LOGE("Attempting to use root constant buffer with non-32bit type.\n");
+ return false;
+ }
+ }
+
member_index += index_offset;
if (member_index >= num_elements)
+ {
+ LOGE("Root constant CBV is accessed out of bounds. (%u > %u).\n", member_index, num_elements);
return false;
+ }
- unsigned num_words = std::min(4u, num_elements - member_index);
+ unsigned num_words = std::min(scalar_load ? 1u : 4u, num_elements - member_index);
auto *result_type = instruction->getType();
// Root constants are emitted as uints as they are typically used as indices.
- bool need_bitcast = result_type->getStructElementType(0)->getTypeID() != llvm::Type::TypeID::IntegerTyID;
+ bool need_bitcast;
+ if (scalar_load)
+ need_bitcast = result_type->getTypeID() != llvm::Type::TypeID::IntegerTyID;
+ else
+ need_bitcast = result_type->getStructElementType(0)->getTypeID() != llvm::Type::TypeID::IntegerTyID;
spv::Id elements[4];
for (unsigned i = 0; i < 4; i++)
@@ -1625,10 +1775,21 @@ static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llv
elements[i] = builder.makeUintConstant(0);
}
- spv::Id id = impl.build_vector(builder.makeUintType(32), elements, 4);
+ spv::Id id;
+
+ if (scalar_load)
+ id = elements[0];
+ else
+ id = impl.build_vector(builder.makeUintType(32), elements, 4);
+
if (need_bitcast)
{
- spv::Id type_id = builder.makeVectorType(impl.get_type_id(result_type->getStructElementType(0)), 4);
+ spv::Id type_id;
+ if (scalar_load)
+ type_id = impl.get_type_id(result_type);
+ else
+ type_id = builder.makeVectorType(impl.get_type_id(result_type->getStructElementType(0)), 4);
+
auto *op = impl.allocate(spv::OpBitcast, instruction, type_id);
op->add_id(id);
impl.add(op);
@@ -1641,23 +1802,121 @@ static bool emit_cbuffer_load_legacy_from_uints(Converter::Impl &impl, const llv
return true;
}
-static bool emit_cbuffer_load_legacy_shader_record(Converter::Impl &impl, const llvm::CallInst *instruction,
- unsigned local_root_signature_entry)
+static bool emit_cbuffer_load_shader_record(Converter::Impl &impl, const llvm::CallInst *instruction,
+ unsigned local_root_signature_entry)
{
auto &entry = impl.local_root_signature[local_root_signature_entry];
- return emit_cbuffer_load_legacy_from_uints(impl, instruction,
- impl.get_id_for_value(instruction->getOperand(1)),
- spv::StorageClassShaderRecordBufferKHR,
- 0, entry.constants.num_words);
+ return emit_cbuffer_load_from_uints(impl, instruction,
+ impl.get_id_for_value(instruction->getOperand(1)),
+ spv::StorageClassShaderRecordBufferKHR,
+ 0, entry.constants.num_words);
+}
+
+static bool emit_cbuffer_load_root_constant(Converter::Impl &impl, const llvm::CallInst *instruction)
+{
+ return emit_cbuffer_load_from_uints(impl, instruction,
+ impl.root_constant_id,
+ spv::StorageClassPushConstant,
+ impl.handle_to_root_member_offset[instruction->getOperand(1)],
+ impl.root_constant_num_words + impl.root_descriptor_count);
}
-static bool emit_cbuffer_load_legacy_root_constant(Converter::Impl &impl, const llvm::CallInst *instruction)
+bool emit_cbuffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
{
- return emit_cbuffer_load_legacy_from_uints(impl, instruction,
- impl.root_constant_id,
- spv::StorageClassPushConstant,
- impl.handle_to_root_member_offset[instruction->getOperand(1)],
- impl.root_constant_num_words + impl.root_descriptor_count);
+ auto &builder = impl.builder();
+
+ // This always returns a scalar.
+ spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1));
+ if (!ptr_id)
+ return false;
+
+ if (ptr_id == impl.root_constant_id)
+ {
+ return emit_cbuffer_load_root_constant(impl, instruction);
+ }
+ else
+ {
+ auto &meta = impl.handle_to_resource_meta[ptr_id];
+
+ if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
+ {
+ return emit_cbuffer_load_physical_pointer(impl, instruction);
+ }
+ else if (meta.storage == spv::StorageClassShaderRecordBufferKHR)
+ {
+ return emit_cbuffer_load_shader_record(impl, instruction,
+ impl.handle_to_root_member_offset[instruction->getOperand(1)]);
+ }
+
+ // Handle min16float where we want FP16 value, but FP32 physical.
+ spv::Op value_cast_op = spv::OpNop;
+ spv::Id physical_type_id = 0;
+ get_physical_load_store_cast_info(impl, instruction->getType(), physical_type_id, value_cast_op);
+
+ unsigned addr_shift;
+ RawWidth raw_width;
+ switch (get_type_scalar_alignment(impl, instruction->getType()))
+ {
+ case 2:
+ raw_width = RawWidth::B16;
+ addr_shift = 1;
+ break;
+
+ case 4:
+ raw_width = RawWidth::B32;
+ addr_shift = 2;
+ break;
+
+ case 8:
+ raw_width = RawWidth::B64;
+ addr_shift = 3;
+ break;
+
+ default:
+ return false;
+ }
+
+ unsigned raw_bits = raw_width_to_bits(raw_width);
+ ptr_id = get_buffer_alias_handle(impl, meta, ptr_id, raw_width, RawVecSize::V1);
+
+ spv::Id array_index_id = build_index_divider(impl, instruction->getOperand(2), addr_shift, 1);
+
+ Operation *access_chain_op = impl.allocate(
+ spv::OpAccessChain, builder.makePointer(meta.storage, builder.makeFloatType(raw_bits)));
+ access_chain_op->add_ids({ ptr_id, builder.makeUintConstant(0), array_index_id });
+ impl.add(access_chain_op);
+
+ if (meta.non_uniform)
+ builder.addDecoration(access_chain_op->id, spv::DecorationNonUniformEXT);
+
+ bool need_bitcast = false;
+ auto *result_type = instruction->getType();
+ if (result_type->getTypeID() == llvm::Type::TypeID::IntegerTyID)
+ need_bitcast = true;
+
+ Operation *load_op = impl.allocate(spv::OpLoad, instruction, builder.makeFloatType(raw_bits));
+ load_op->add_id(access_chain_op->id);
+ impl.add(load_op);
+
+ if (need_bitcast)
+ {
+ Operation *op = impl.allocate(spv::OpBitcast, builder.makeUintType(raw_bits));
+ op->add_id(load_op->id);
+ impl.add(op);
+ impl.rewrite_value(instruction, op->id);
+ }
+
+ // Handle min16float4 value cast scenarios.
+ if (value_cast_op != spv::OpNop)
+ {
+ auto *cast_op = impl.allocate(value_cast_op, impl.get_type_id(instruction->getType()));
+ cast_op->add_id(impl.get_id_for_value(instruction));
+ impl.add(cast_op);
+ impl.rewrite_value(instruction, cast_op->id);
+ }
+
+ return true;
+ }
}
bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
@@ -1672,64 +1931,101 @@ bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::Cal
if (ptr_id == impl.root_constant_id)
{
- return emit_cbuffer_load_legacy_root_constant(impl, instruction);
+ return emit_cbuffer_load_root_constant(impl, instruction);
}
else
{
- auto itr = impl.handle_to_resource_meta.find(ptr_id);
- bool non_uniform = false;
- spv::StorageClass storage = spv::StorageClassUniform;
+ auto &meta = impl.handle_to_resource_meta[ptr_id];
+
+ auto *result_type = instruction->getType();
- if (itr != impl.handle_to_resource_meta.end())
+ if (result_type->getTypeID() != llvm::Type::TypeID::StructTyID)
{
- non_uniform = itr->second.non_uniform;
- storage = itr->second.storage;
+ LOGE("CBufferLoadLegacy: return type must be struct.\n");
+ return false;
}
- if (storage == spv::StorageClassPhysicalStorageBuffer)
+ if (meta.storage == spv::StorageClassPhysicalStorageBuffer)
{
- return emit_cbuffer_load_legacy_physical_pointer(impl, instruction);
+ return emit_cbuffer_load_physical_pointer(impl, instruction);
}
- else if (storage == spv::StorageClassShaderRecordBufferKHR)
+ else if (meta.storage == spv::StorageClassShaderRecordBufferKHR)
{
- return emit_cbuffer_load_legacy_shader_record(impl, instruction,
- impl.handle_to_root_member_offset[instruction->getOperand(1)]);
+ return emit_cbuffer_load_shader_record(impl, instruction,
+ impl.handle_to_root_member_offset[instruction->getOperand(1)]);
}
+ // Handle min16float where we want FP16 value, but FP32 physical.
+ auto *result_component_type = result_type->getStructElementType(0);
+ spv::Op value_cast_op = spv::OpNop;
+ spv::Id physical_type_id = 0;
+ get_physical_load_store_cast_info(impl, result_component_type, physical_type_id, value_cast_op);
+
+ RawVecSize alias_vecsize;
+ RawWidth alias_width;
+ unsigned scalar_alignment = get_type_scalar_alignment(impl, result_component_type);
+ unsigned bits, vecsize;
+
+ if (scalar_alignment == 8)
+ {
+ alias_width = RawWidth::B64;
+ alias_vecsize = RawVecSize::V2;
+ }
+ else
+ {
+ alias_width = RawWidth::B32;
+ alias_vecsize = RawVecSize::V4;
+ }
+
+ bits = raw_width_to_bits(alias_width);
+ vecsize = raw_vecsize_to_vecsize(alias_vecsize);
+
+ ptr_id = get_buffer_alias_handle(impl, meta, ptr_id, alias_width, alias_vecsize);
+
spv::Id vec4_index = impl.get_id_for_value(instruction->getOperand(2));
- Operation *access_chain_op = impl.allocate(
- spv::OpAccessChain, builder.makePointer(storage, builder.makeVectorType(builder.makeFloatType(32), 4)));
+ spv::Id vector_type_id = builder.makeVectorType(builder.makeFloatType(bits), vecsize);
+ Operation *access_chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(meta.storage, vector_type_id));
access_chain_op->add_ids({ ptr_id, builder.makeUintConstant(0), vec4_index });
impl.add(access_chain_op);
- if (non_uniform)
+ if (meta.non_uniform)
builder.addDecoration(access_chain_op->id, spv::DecorationNonUniformEXT);
bool need_bitcast = false;
- auto *result_type = instruction->getType();
- if (result_type->getTypeID() != llvm::Type::TypeID::StructTyID)
- return false;
- if (result_type->getStructNumElements() != 4)
- return false;
- if (result_type->getStructElementType(0)->getTypeID() != llvm::Type::TypeID::FloatTyID)
+ if (result_type->getStructElementType(0)->getTypeID() == llvm::Type::TypeID::IntegerTyID)
need_bitcast = true;
- Operation *load_op =
- impl.allocate(spv::OpLoad, instruction, builder.makeVectorType(builder.makeFloatType(32), 4));
+ Operation *load_op = impl.allocate(spv::OpLoad, instruction, vector_type_id);
load_op->add_id(access_chain_op->id);
impl.add(load_op);
- if (need_bitcast)
+ if (scalar_alignment == 2)
{
- Operation *op = impl.allocate(spv::OpBitcast, builder.makeVectorType(builder.makeUintType(32), 4));
+ // Special case, need to bitcast and build a struct with 8 elements instead.
+ if (!build_bitcast_32x4_to_16x8_composite(impl, instruction, load_op->id))
+ return false;
+ }
+ else if (need_bitcast)
+ {
+ spv::Id uint_vector_type_id = builder.makeVectorType(builder.makeUintType(bits), vecsize);
+ Operation *op = impl.allocate(spv::OpBitcast, uint_vector_type_id);
- assert(result_type->getStructElementType(0)->getTypeID() == llvm::Type::TypeID::IntegerTyID);
op->add_id(load_op->id);
impl.add(op);
impl.rewrite_value(instruction, op->id);
}
- return true;
+
+ // If we have min-precision loads, we might have to truncate here.
+ if (value_cast_op != spv::OpNop)
+ {
+ auto *cast_op = impl.allocate(value_cast_op, builder.makeVectorType(impl.get_type_id(result_component_type), vecsize));
+ cast_op->add_id(impl.get_id_for_value(instruction));
+ impl.add(cast_op);
+ impl.rewrite_value(instruction, cast_op->id);
+ }
}
+
+ return true;
}
} // namespace dxil_spv
diff --git a/opcodes/dxil/dxil_resources.hpp b/opcodes/dxil/dxil_resources.hpp
index dc8c842..d04784b 100644
--- a/opcodes/dxil/dxil_resources.hpp
+++ b/opcodes/dxil/dxil_resources.hpp
@@ -32,6 +32,7 @@ bool emit_create_handle_from_heap_instruction(Converter::Impl &impl, const llvm:
bool emit_create_handle_from_binding_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_annotate_handle_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
+bool emit_cbuffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
bool emit_cbuffer_load_legacy_instruction(Converter::Impl &impl, const llvm::CallInst *instruction);
template <GLSLstd450 opcode>
diff --git a/opcodes/opcodes_dxil_builtins.cpp b/opcodes/opcodes_dxil_builtins.cpp
index 74bf633..9099cc1 100644
--- a/opcodes/opcodes_dxil_builtins.cpp
+++ b/opcodes/opcodes_dxil_builtins.cpp
@@ -48,6 +48,7 @@ struct DXILDispatcher
OP(CreateHandle) = emit_create_handle_instruction;
OP(CreateHandleForLib) = emit_create_handle_for_lib_instruction;
OP(CBufferLoadLegacy) = emit_cbuffer_load_legacy_instruction;
+ OP(CBufferLoad) = emit_cbuffer_load_instruction;
OP(EvalSnapped) = emit_interpolate_dispatch<GLSLstd450InterpolateAtOffset>;
OP(EvalSampleIndex) = emit_interpolate_dispatch<GLSLstd450InterpolateAtSample>;
OP(EvalCentroid) = emit_interpolate_dispatch<GLSLstd450InterpolateAtCentroid>;
@@ -356,7 +357,12 @@ bool emit_dxil_instruction(Converter::Impl &impl, const llvm::CallInst *instruct
return false;
}
- return global_dispatcher.builder_lut[opcode](impl, instruction);
+ if (!global_dispatcher.builder_lut[opcode](impl, instruction))
+ {
+ LOGE("Failed DXIL opcode %u.\n", opcode);
+ return false;
+ }
+ return true;
}
static void update_raw_access_tracking_from_vector_type(Converter::Impl::AccessTracking &tracking,
@@ -463,6 +469,64 @@ get_resource_meta_from_buffer_op(Converter::Impl &impl, const llvm::CallInst *in
return { DXIL::ResourceKind::Invalid, 0 };
}
+static void analyze_dxil_cbuffer_load(Converter::Impl &impl, const llvm::CallInst *instruction)
+{
+ Converter::Impl::AccessTracking *tracking = nullptr;
+ auto itr = impl.llvm_value_to_cbv_resource_index_map.find(instruction->getOperand(1));
+ if (itr != impl.llvm_value_to_cbv_resource_index_map.end())
+ tracking = &impl.cbv_access_tracking[itr->second];
+
+ if (!tracking)
+ {
+ auto annotate_itr = impl.llvm_annotate_handle_uses.find(instruction->getOperand(1));
+ if (annotate_itr != impl.llvm_annotate_handle_uses.end())
+ tracking = &annotate_itr->second.tracking;
+ }
+
+ if (tracking)
+ {
+ if (instruction->getType()->getTypeID() == llvm::Type::TypeID::StructTyID)
+ {
+ // Legacy float4 model. However, it seems like DXIL also supports f16x8, f32x4 and f64x2 ... :(
+ switch (get_type_scalar_alignment(impl, instruction->getType()->getStructElementType(0)))
+ {
+ case 2:
+ case 4:
+ // We'll bit-cast on-demand for f16x8.
+ tracking->raw_access_buffer_declarations[int(RawWidth::B32)][int(RawVecSize::V4)] = true;
+ break;
+
+ case 8:
+ tracking->raw_access_buffer_declarations[int(RawWidth::B64)][int(RawVecSize::V2)] = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ else
+ {
+ switch (get_type_scalar_alignment(impl, instruction->getType()))
+ {
+ case 2:
+ tracking->raw_access_buffer_declarations[int(RawWidth::B16)][int(RawVecSize::V1)] = true;
+ break;
+
+ case 4:
+ tracking->raw_access_buffer_declarations[int(RawWidth::B32)][int(RawVecSize::V1)] = true;
+ break;
+
+ case 8:
+ tracking->raw_access_buffer_declarations[int(RawWidth::B64)][int(RawVecSize::V1)] = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+}
+
static void analyze_dxil_buffer_load(Converter::Impl &impl, const llvm::CallInst *instruction, DXIL::Op opcode)
{
Converter::Impl::AccessTracking *tracking = nullptr;
@@ -653,6 +717,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn
impl.llvm_value_to_uav_resource_index_map[instruction] = resource_range;
else if (static_cast<DXIL::ResourceType>(resource_type_operand) == DXIL::ResourceType::SRV)
impl.llvm_value_to_srv_resource_index_map[instruction] = resource_range;
+ else if (static_cast<DXIL::ResourceType>(resource_type_operand) == DXIL::ResourceType::CBV)
+ impl.llvm_value_to_cbv_resource_index_map[instruction] = resource_range;
if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles)
impl.resource_handle_to_block[instruction] = bb;
@@ -669,6 +735,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn
impl.llvm_value_to_uav_resource_index_map[instruction] = itr->second.meta_index;
else if (itr->second.type == DXIL::ResourceType::SRV)
impl.llvm_value_to_srv_resource_index_map[instruction] = itr->second.meta_index;
+ else if (itr->second.type == DXIL::ResourceType::CBV)
+ impl.llvm_value_to_cbv_resource_index_map[instruction] = itr->second.meta_index;
impl.llvm_active_global_resource_variables.insert(itr->second.variable);
@@ -726,7 +794,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn
if (use.resource_kind == DXIL::ResourceKind::StructuredBuffer)
use.stride = params;
- else if (use.resource_kind != DXIL::ResourceKind::RawBuffer)
+ else if (use.resource_kind != DXIL::ResourceKind::RawBuffer &&
+ use.resource_kind != DXIL::ResourceKind::CBuffer)
use.component_type = DXIL::ComponentType(params & 0xff);
}
else if (meta.resource_op == DXIL::Op::CreateHandleFromBinding ||
@@ -736,6 +805,8 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn
impl.llvm_value_to_uav_resource_index_map[instruction] = meta.binding_index;
else if (meta.resource_type == DXIL::ResourceType::SRV)
impl.llvm_value_to_srv_resource_index_map[instruction] = meta.binding_index;
+ else if (meta.resource_type == DXIL::ResourceType::CBV)
+ impl.llvm_value_to_cbv_resource_index_map[instruction] = meta.binding_index;
}
if (impl.options.descriptor_qa_enabled && impl.options.descriptor_qa_sink_handles)
@@ -751,6 +822,11 @@ bool analyze_dxil_resource_instruction(Converter::Impl &impl, const llvm::CallIn
analyze_dxil_buffer_store(impl, instruction, op);
break;
+ case DXIL::Op::CBufferLoad:
+ case DXIL::Op::CBufferLoadLegacy:
+ analyze_dxil_cbuffer_load(impl, instruction);
+ break;
+
case DXIL::Op::BufferUpdateCounter:
{
impl.llvm_values_using_update_counter.insert(instruction->getOperand(1));
diff --git a/opcodes/opcodes_llvm_builtins.cpp b/opcodes/opcodes_llvm_builtins.cpp
index 3d2b144..f62a50a 100644
--- a/opcodes/opcodes_llvm_builtins.cpp
+++ b/opcodes/opcodes_llvm_builtins.cpp
@@ -1092,11 +1092,8 @@ bool emit_select_instruction(Converter::Impl &impl, const llvm::SelectInst *inst
{
Operation *op = impl.allocate(spv::OpSelect, instruction);
- op->add_ids({
- impl.get_id_for_value(instruction->getOperand(0)),
- impl.get_id_for_value(instruction->getOperand(1)),
- impl.get_id_for_value(instruction->getOperand(2)),
- });
+ for (unsigned i = 0; i < 3; i++)
+ op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
impl.add(op);
return true;
@@ -1114,11 +1111,11 @@ bool emit_cmpxchg_instruction(Converter::Impl &impl, const llvm::AtomicCmpXchgIn
atomic_op->add_id(impl.get_id_for_value(instruction->getPointerOperand()));
- atomic_op->add_ids({ builder.makeUintConstant(spv::ScopeWorkgroup),
- builder.makeUintConstant(0), // Relaxed
- builder.makeUintConstant(0), // Relaxed
- impl.get_id_for_value(instruction->getNewValOperand()),
- impl.get_id_for_value(instruction->getCompareOperand()) });
+ atomic_op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));
+ atomic_op->add_id(builder.makeUintConstant(0));
+ atomic_op->add_id(builder.makeUintConstant(0));
+ atomic_op->add_id(impl.get_id_for_value(instruction->getNewValOperand()));
+ atomic_op->add_id(impl.get_id_for_value(instruction->getCompareOperand()));
impl.add(atomic_op);
@@ -1196,11 +1193,9 @@ bool emit_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst
op->add_id(impl.get_id_for_value(instruction->getPointerOperand()));
- op->add_ids({
- builder.makeUintConstant(spv::ScopeWorkgroup),
- builder.makeUintConstant(0),
- impl.get_id_for_value(instruction->getValOperand()),
- });
+ op->add_id(builder.makeUintConstant(spv::ScopeWorkgroup));
+ op->add_id(builder.makeUintConstant(0));
+ op->add_id(impl.get_id_for_value(instruction->getValOperand()));
impl.add(op);
return true;
@@ -1209,7 +1204,9 @@ bool emit_atomicrmw_instruction(Converter::Impl &impl, const llvm::AtomicRMWInst
bool emit_shufflevector_instruction(Converter::Impl &impl, const llvm::ShuffleVectorInst *inst)
{
Operation *op = impl.allocate(spv::OpVectorShuffle, inst);
- op->add_ids({ impl.get_id_for_value(inst->getOperand(0)), impl.get_id_for_value(inst->getOperand(1)) });
+
+ for (unsigned i = 0; i < 2; i++)
+ op->add_id(impl.get_id_for_value(inst->getOperand(i)));
unsigned num_outputs = inst->getType()->getVectorNumElements();
for (unsigned i = 0; i < num_outputs; i++)
diff --git a/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil
new file mode 100644
index 0000000..a9846a6
--- /dev/null
+++ b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil
@@ -0,0 +1,105 @@
+#version 460
+#extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_scalar_block_layout : require
+
+layout(set = 0, binding = 0, scalar) uniform BindlessCBV
+{
+ float _m0[16384];
+} _12[];
+
+layout(set = 0, binding = 0, std140) uniform _16_19
+{
+ vec4 _m0[4096];
+} _19[];
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ uint _34 = floatBitsToUint(_12[1u]._m0[1u]) >> 2u;
+ SV_Target.x = _12[0u]._m0[_34];
+ SV_Target.y = _12[0u]._m0[_34];
+ SV_Target.z = _12[0u]._m0[_34];
+ SV_Target.w = _12[0u]._m0[_34];
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 45
+; Schema: 0
+OpCapability Shader
+OpCapability RuntimeDescriptorArray
+OpExtension "SPV_EXT_descriptor_indexing"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %21
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %9 "BindlessCBV"
+OpName %16 "BindlessCBV"
+OpName %21 "SV_Target"
+OpDecorate %8 ArrayStride 4
+OpDecorate %9 Block
+OpMemberDecorate %9 0 Offset 0
+OpDecorate %12 DescriptorSet 0
+OpDecorate %12 Binding 0
+OpDecorate %15 ArrayStride 16
+OpDecorate %16 Block
+OpMemberDecorate %16 0 Offset 0
+OpDecorate %19 DescriptorSet 0
+OpDecorate %19 Binding 0
+OpDecorate %21 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeFloat 32
+%6 = OpTypeInt 32 0
+%7 = OpConstant %6 16384
+%8 = OpTypeArray %5 %7
+%9 = OpTypeStruct %8
+%10 = OpTypeRuntimeArray %9
+%11 = OpTypePointer Uniform %10
+%12 = OpVariable %11 Uniform
+%13 = OpTypeVector %5 4
+%14 = OpConstant %6 4096
+%15 = OpTypeArray %13 %14
+%16 = OpTypeStruct %15
+%17 = OpTypeRuntimeArray %16
+%18 = OpTypePointer Uniform %17
+%19 = OpVariable %18 Uniform
+%20 = OpTypePointer Output %13
+%21 = OpVariable %20 Output
+%22 = OpTypePointer Uniform %9
+%24 = OpConstant %6 0
+%26 = OpConstant %6 1
+%27 = OpTypePointer Uniform %16
+%29 = OpConstant %6 2
+%30 = OpTypePointer Uniform %5
+%37 = OpTypePointer Output %5
+%42 = OpConstant %6 3
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %43
+%43 = OpLabel
+%23 = OpAccessChain %22 %12 %24
+%25 = OpAccessChain %22 %12 %26
+%28 = OpAccessChain %27 %19 %29
+%31 = OpAccessChain %30 %25 %24 %26
+%32 = OpLoad %5 %31
+%33 = OpBitcast %6 %32
+%34 = OpShiftRightLogical %6 %33 %29
+%35 = OpAccessChain %30 %23 %24 %34
+%36 = OpLoad %5 %35
+%38 = OpAccessChain %37 %21 %24
+OpStore %38 %36
+%39 = OpAccessChain %37 %21 %26
+OpStore %39 %36
+%40 = OpAccessChain %37 %21 %29
+OpStore %40 %36
+%41 = OpAccessChain %37 %21 %42
+OpStore %41 %36
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil
new file mode 100644
index 0000000..98b135b
--- /dev/null
+++ b/reference/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil
@@ -0,0 +1,146 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_scalar_block_layout : require
+
+layout(set = 0, binding = 0, scalar) uniform BindlessCBV
+{
+ float _m0[16384];
+} _12[];
+
+layout(set = 0, binding = 0, scalar) uniform _16_19
+{
+ float16_t _m0[32768];
+} _19[];
+
+layout(set = 0, binding = 0, scalar) uniform _23_26
+{
+ double _m0[8192];
+} _26[];
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ float _53 = (_12[0u]._m0[1u] + float(_19[1u]._m0[1u])) + float(doubleBitsToUint64(_26[2u]._m0[1u]));
+ SV_Target.x = _53;
+ SV_Target.y = _53;
+ SV_Target.z = _53;
+ SV_Target.w = _53;
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 62
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Float64
+OpCapability Int64
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability RuntimeDescriptorArray
+OpExtension "SPV_EXT_descriptor_indexing"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %29
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %9 "BindlessCBV"
+OpName %16 "BindlessCBV"
+OpName %23 "BindlessCBV"
+OpName %29 "SV_Target"
+OpDecorate %8 ArrayStride 4
+OpDecorate %9 Block
+OpMemberDecorate %9 0 Offset 0
+OpDecorate %12 DescriptorSet 0
+OpDecorate %12 Binding 0
+OpDecorate %15 ArrayStride 2
+OpDecorate %16 Block
+OpMemberDecorate %16 0 Offset 0
+OpDecorate %19 DescriptorSet 0
+OpDecorate %19 Binding 0
+OpDecorate %22 ArrayStride 8
+OpDecorate %23 Block
+OpMemberDecorate %23 0 Offset 0
+OpDecorate %26 DescriptorSet 0
+OpDecorate %26 Binding 0
+OpDecorate %29 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeFloat 32
+%6 = OpTypeInt 32 0
+%7 = OpConstant %6 16384
+%8 = OpTypeArray %5 %7
+%9 = OpTypeStruct %8
+%10 = OpTypeRuntimeArray %9
+%11 = OpTypePointer Uniform %10
+%12 = OpVariable %11 Uniform
+%13 = OpTypeFloat 16
+%14 = OpConstant %6 32768
+%15 = OpTypeArray %13 %14
+%16 = OpTypeStruct %15
+%17 = OpTypeRuntimeArray %16
+%18 = OpTypePointer Uniform %17
+%19 = OpVariable %18 Uniform
+%20 = OpTypeFloat 64
+%21 = OpConstant %6 8192
+%22 = OpTypeArray %20 %21
+%23 = OpTypeStruct %22
+%24 = OpTypeRuntimeArray %23
+%25 = OpTypePointer Uniform %24
+%26 = OpVariable %25 Uniform
+%27 = OpTypeVector %5 4
+%28 = OpTypePointer Output %27
+%29 = OpVariable %28 Output
+%30 = OpTypePointer Uniform %9
+%32 = OpConstant %6 0
+%33 = OpTypePointer Uniform %16
+%35 = OpConstant %6 1
+%36 = OpTypePointer Uniform %23
+%38 = OpConstant %6 2
+%39 = OpTypePointer Uniform %5
+%42 = OpTypePointer Uniform %13
+%45 = OpTypeInt 64 0
+%46 = OpTypePointer Uniform %20
+%54 = OpTypePointer Output %5
+%59 = OpConstant %6 3
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %60
+%60 = OpLabel
+%31 = OpAccessChain %30 %12 %32
+%34 = OpAccessChain %33 %19 %35
+%37 = OpAccessChain %36 %26 %38
+%40 = OpAccessChain %39 %31 %32 %35
+%41 = OpLoad %5 %40
+%43 = OpAccessChain %42 %34 %32 %35
+%44 = OpLoad %13 %43
+%47 = OpAccessChain %46 %37 %32 %35
+%48 = OpLoad %20 %47
+%49 = OpBitcast %45 %48
+%50 = OpFConvert %5 %44
+%51 = OpConvertUToF %5 %49
+%52 = OpFAdd %5 %41 %50
+%53 = OpFAdd %5 %52 %51
+%55 = OpAccessChain %54 %29 %32
+OpStore %55 %53
+%56 = OpAccessChain %54 %29 %35
+OpStore %56 %53
+%57 = OpAccessChain %54 %29 %38
+OpStore %57 %53
+%58 = OpAccessChain %54 %29 %59
+OpStore %58 %53
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag b/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag
index 8c9b213..d99f257 100644
--- a/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag
+++ b/reference/shaders/descriptor_qa/early-2.bindless.descriptor-qa.frag
@@ -44,8 +44,8 @@ bool discard_state;
void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie, uint heap_index, uint descriptor_type, uint actual_descriptor_type, uint instruction)
{
- uint _69 = atomicAdd(QAGlobalData.fault_atomic, 1u);
- if (_69 == 0u)
+ uint _61 = atomicAdd(QAGlobalData.fault_atomic, 1u);
+ if (_61 == 0u)
{
QAGlobalData.failed_cookie = cookie;
QAGlobalData.failed_offset = heap_offset;
@@ -61,15 +61,15 @@ void descriptor_qa_report_fault(uint fault_type, uint heap_offset, uint cookie,
uint descriptor_qa_check(uint heap_offset, uint descriptor_type_mask, uint instruction)
{
- uint _103 = QAHeapData.descriptor_count;
- uint _105 = QAHeapData.heap_index;
- uvec2 _107 = QAHeapData.cookies_descriptor_info[heap_offset];
- uint _115 = QAGlobalData.live_status_table[_107.x >> 5u];
- uint _126 = (uint(heap_offset >= _103) | (((_107.y & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_115 & (1u << (_107.x & 31u))) != 0u) ? 0u : 4u);
- if (_126 != 0u)
+ uint _95 = QAHeapData.descriptor_count;
+ uint _97 = QAHeapData.heap_index;
+ uvec2 _99 = QAHeapData.cookies_descriptor_info[heap_offset];
+ uint _107 = QAGlobalData.live_status_table[_99.x >> 5u];
+ uint _118 = (uint(heap_offset >= _95) | (((_99.y & descriptor_type_mask) == descriptor_type_mask) ? 0u : 2u)) | (((_107 & (1u << (_99.x & 31u))) != 0u) ? 0u : 4u);
+ if (_118 != 0u)
{
- descriptor_qa_report_fault(_126, heap_offset, _107.x, _105, descriptor_type_mask, _107.y, instruction);
- return _103;
+ descriptor_qa_report_fault(_118, heap_offset, _99.x, _97, descriptor_type_mask, _99.y, instruction);
+ return _95;
}
return heap_offset;
}
@@ -89,12 +89,12 @@ void main()
{
discard_state = true;
}
- uint _50 = descriptor_qa_check(registers._m0, 1u, 1u);
- vec4 _141 = texture(sampler2D(_13[_50], _17[registers._m2]), vec2(UV.x, UV.y));
- SV_Target.x = _141.x;
- SV_Target.y = _141.y;
- SV_Target.z = _141.z;
- SV_Target.w = _141.w;
+ uint _42 = descriptor_qa_check(registers._m0, 1u, 1u);
+ vec4 _133 = texture(sampler2D(_13[_42], _17[registers._m2]), vec2(UV.x, UV.y));
+ SV_Target.x = _133.x;
+ SV_Target.y = _133.y;
+ SV_Target.z = _133.z;
+ SV_Target.w = _133.w;
discard_exit();
}
@@ -104,7 +104,7 @@ void main()
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 164
+; Bound: 156
; Schema: 0
OpCapability Shader
OpCapability RuntimeDescriptorArray
@@ -112,44 +112,44 @@ OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 GLSL450
-OpEntryPoint Fragment %3 "main" %20 %23 %27
+OpEntryPoint Fragment %3 "main" %20 %23
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %6 "RootConstants"
OpName %8 "registers"
OpName %20 "UV"
OpName %23 "SV_Target"
-OpName %43 "discard_state"
-OpName %53 "DescriptorHeapGlobalQAData"
-OpMemberName %53 0 "failed_shader_hash"
-OpMemberName %53 1 "failed_offset"
-OpMemberName %53 2 "failed_heap"
-OpMemberName %53 3 "failed_cookie"
-OpMemberName %53 4 "fault_atomic"
-OpMemberName %53 5 "failed_instruction"
-OpMemberName %53 6 "failed_descriptor_type_mask"
-OpMemberName %53 7 "actual_descriptor_type_mask"
-OpMemberName %53 8 "fault_type"
-OpMemberName %53 9 "live_status_table"
-OpName %55 "QAGlobalData"
-OpName %64 "descriptor_qa_report_fault"
-OpName %57 "fault_type"
-OpName %58 "heap_offset"
-OpName %59 "cookie"
-OpName %60 "heap_index"
-OpName %61 "descriptor_type"
-OpName %62 "actual_descriptor_type"
-OpName %63 "instruction"
-OpName %93 "DescriptorHeapQAData"
-OpMemberName %93 0 "descriptor_count"
-OpMemberName %93 1 "heap_index"
-OpMemberName %93 2 "cookies_descriptor_info"
-OpName %95 "QAHeapData"
-OpName %100 "descriptor_qa_check"
-OpName %97 "heap_offset"
-OpName %98 "descriptor_type_mask"
-OpName %99 "instruction"
-OpName %156 "discard_exit"
+OpName %35 "discard_state"
+OpName %45 "DescriptorHeapGlobalQAData"
+OpMemberName %45 0 "failed_shader_hash"
+OpMemberName %45 1 "failed_offset"
+OpMemberName %45 2 "failed_heap"
+OpMemberName %45 3 "failed_cookie"
+OpMemberName %45 4 "fault_atomic"
+OpMemberName %45 5 "failed_instruction"
+OpMemberName %45 6 "failed_descriptor_type_mask"
+OpMemberName %45 7 "actual_descriptor_type_mask"
+OpMemberName %45 8 "fault_type"
+OpMemberName %45 9 "live_status_table"
+OpName %47 "QAGlobalData"
+OpName %56 "descriptor_qa_report_fault"
+OpName %49 "fault_type"
+OpName %50 "heap_offset"
+OpName %51 "cookie"
+OpName %52 "heap_index"
+OpName %53 "descriptor_type"
+OpName %54 "actual_descriptor_type"
+OpName %55 "instruction"
+OpName %85 "DescriptorHeapQAData"
+OpMemberName %85 0 "descriptor_count"
+OpMemberName %85 1 "heap_index"
+OpMemberName %85 2 "cookies_descriptor_info"
+OpName %87 "QAHeapData"
+OpName %92 "descriptor_qa_check"
+OpName %89 "heap_offset"
+OpName %90 "descriptor_type_mask"
+OpName %91 "instruction"
+OpName %148 "discard_exit"
OpDecorate %6 Block
OpMemberDecorate %6 0 Offset 0
OpMemberDecorate %6 1 Offset 4
@@ -165,29 +165,28 @@ OpDecorate %17 DescriptorSet 2
OpDecorate %17 Binding 0
OpDecorate %20 Location 0
OpDecorate %23 Location 0
-OpDecorate %27 BuiltIn SampleMask
-OpDecorate %52 ArrayStride 4
-OpMemberDecorate %53 0 Offset 0
-OpMemberDecorate %53 1 Offset 8
-OpMemberDecorate %53 2 Offset 12
-OpMemberDecorate %53 3 Offset 16
-OpMemberDecorate %53 4 Offset 20
-OpMemberDecorate %53 5 Offset 24
-OpMemberDecorate %53 6 Offset 28
-OpMemberDecorate %53 7 Offset 32
-OpMemberDecorate %53 8 Offset 36
-OpMemberDecorate %53 9 Offset 40
-OpDecorate %53 Block
-OpDecorate %55 DescriptorSet 10
-OpDecorate %55 Binding 10
-OpDecorate %92 ArrayStride 8
-OpMemberDecorate %93 0 Offset 0
-OpMemberDecorate %93 1 Offset 4
-OpMemberDecorate %93 2 Offset 8
-OpDecorate %93 Block
-OpDecorate %95 DescriptorSet 10
-OpDecorate %95 Binding 11
-OpDecorate %95 NonWritable
+OpDecorate %44 ArrayStride 4
+OpMemberDecorate %45 0 Offset 0
+OpMemberDecorate %45 1 Offset 8
+OpMemberDecorate %45 2 Offset 12
+OpMemberDecorate %45 3 Offset 16
+OpMemberDecorate %45 4 Offset 20
+OpMemberDecorate %45 5 Offset 24
+OpMemberDecorate %45 6 Offset 28
+OpMemberDecorate %45 7 Offset 32
+OpMemberDecorate %45 8 Offset 36
+OpMemberDecorate %45 9 Offset 40
+OpDecorate %45 Block
+OpDecorate %47 DescriptorSet 10
+OpDecorate %47 Binding 10
+OpDecorate %84 ArrayStride 8
+OpMemberDecorate %85 0 Offset 0
+OpMemberDecorate %85 1 Offset 4
+OpMemberDecorate %85 2 Offset 8
+OpDecorate %85 Block
+OpDecorate %87 DescriptorSet 10
+OpDecorate %87 Binding 11
+OpDecorate %87 NonWritable
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
@@ -209,177 +208,169 @@ OpDecorate %95 NonWritable
%21 = OpTypeVector %9 4
%22 = OpTypePointer Output %21
%23 = OpVariable %22 Output
-%24 = OpConstant %5 1
-%25 = OpTypeArray %5 %24
-%26 = OpTypePointer Input %25
-%27 = OpVariable %26 Input
-%28 = OpTypePointer Input %5
-%30 = OpConstant %5 0
-%32 = OpTypeBool
-%35 = OpTypePointer Input %9
-%41 = OpConstant %9 0
-%42 = OpTypePointer Private %32
-%43 = OpVariable %42 Private
-%44 = OpConstantFalse %32
-%45 = OpTypePointer UniformConstant %10
-%47 = OpTypePointer PushConstant %5
-%51 = OpTypeVector %5 2
-%52 = OpTypeRuntimeArray %5
-%53 = OpTypeStruct %51 %5 %5 %5 %5 %5 %5 %5 %5 %52
-%54 = OpTypePointer StorageBuffer %53
-%55 = OpVariable %54 StorageBuffer
-%56 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
-%66 = OpTypePointer StorageBuffer %5
-%68 = OpConstant %5 4
-%74 = OpConstant %5 3
-%77 = OpConstant %5 2
-%79 = OpConstant %5 6
-%81 = OpConstant %5 7
-%83 = OpConstant %5 5
-%84 = OpConstant %5 3735928559
-%85 = OpConstantComposite %51 %84 %30
-%86 = OpTypePointer StorageBuffer %51
-%88 = OpConstant %5 72
-%90 = OpConstant %5 8
-%92 = OpTypeRuntimeArray %51
-%93 = OpTypeStruct %5 %5 %92
-%94 = OpTypePointer StorageBuffer %93
-%95 = OpVariable %94 StorageBuffer
-%96 = OpTypeFunction %5 %5 %5 %5
-%112 = OpConstant %5 31
-%114 = OpConstant %5 9
-%134 = OpTypePointer UniformConstant %14
-%139 = OpTypeSampledImage %10
-%147 = OpTypePointer Output %9
-%155 = OpConstantTrue %32
+%24 = OpTypePointer Input %9
+%26 = OpConstant %5 0
+%29 = OpConstant %5 1
+%31 = OpTypeBool
+%33 = OpConstant %9 0
+%34 = OpTypePointer Private %31
+%35 = OpVariable %34 Private
+%36 = OpConstantFalse %31
+%37 = OpTypePointer UniformConstant %10
+%39 = OpTypePointer PushConstant %5
+%43 = OpTypeVector %5 2
+%44 = OpTypeRuntimeArray %5
+%45 = OpTypeStruct %43 %5 %5 %5 %5 %5 %5 %5 %5 %44
+%46 = OpTypePointer StorageBuffer %45
+%47 = OpVariable %46 StorageBuffer
+%48 = OpTypeFunction %1 %5 %5 %5 %5 %5 %5 %5
+%58 = OpTypePointer StorageBuffer %5
+%60 = OpConstant %5 4
+%66 = OpConstant %5 3
+%69 = OpConstant %5 2
+%71 = OpConstant %5 6
+%73 = OpConstant %5 7
+%75 = OpConstant %5 5
+%76 = OpConstant %5 3735928559
+%77 = OpConstantComposite %43 %76 %26
+%78 = OpTypePointer StorageBuffer %43
+%80 = OpConstant %5 72
+%82 = OpConstant %5 8
+%84 = OpTypeRuntimeArray %43
+%85 = OpTypeStruct %5 %5 %84
+%86 = OpTypePointer StorageBuffer %85
+%87 = OpVariable %86 StorageBuffer
+%88 = OpTypeFunction %5 %5 %5 %5
+%104 = OpConstant %5 31
+%106 = OpConstant %5 9
+%126 = OpTypePointer UniformConstant %14
+%131 = OpTypeSampledImage %10
+%139 = OpTypePointer Output %9
+%147 = OpConstantTrue %31
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpStore %43 %44
-OpBranch %152
-%152 = OpLabel
-%29 = OpAccessChain %28 %27 %30
-%31 = OpLoad %5 %29
-%33 = OpIEqual %32 %30 %31
-%34 = OpSelect %5 %33 %24 %30
-%36 = OpAccessChain %35 %20 %30
-%37 = OpLoad %9 %36
-%38 = OpAccessChain %35 %20 %24
-%39 = OpLoad %9 %38
-%40 = OpFOrdLessThan %32 %37 %41
-OpSelectionMerge %154 None
-OpBranchConditional %40 %153 %154
-%153 = OpLabel
-OpStore %43 %155
-OpBranch %154
-%154 = OpLabel
-%48 = OpAccessChain %47 %8 %30
-%49 = OpLoad %5 %48
-%50 = OpFunctionCall %5 %100 %49 %24 %24
-%46 = OpAccessChain %45 %13 %50
-%133 = OpLoad %10 %46
-%136 = OpAccessChain %47 %8 %77
-%137 = OpLoad %5 %136
-%135 = OpAccessChain %134 %17 %137
-%138 = OpLoad %14 %135
-%140 = OpSampledImage %139 %133 %138
-%142 = OpCompositeConstruct %18 %37 %39
-%141 = OpImageSampleImplicitLod %21 %140 %142 None
-%143 = OpCompositeExtract %9 %141 0
-%144 = OpCompositeExtract %9 %141 1
-%145 = OpCompositeExtract %9 %141 2
-%146 = OpCompositeExtract %9 %141 3
-%148 = OpAccessChain %147 %23 %30
-OpStore %148 %143
-%149 = OpAccessChain %147 %23 %24
-OpStore %149 %144
-%150 = OpAccessChain %147 %23 %77
-OpStore %150 %145
-%151 = OpAccessChain %147 %23 %74
-OpStore %151 %146
-%162 = OpFunctionCall %1 %156
+OpStore %35 %36
+OpBranch %144
+%144 = OpLabel
+%25 = OpAccessChain %24 %20 %26
+%27 = OpLoad %9 %25
+%28 = OpAccessChain %24 %20 %29
+%30 = OpLoad %9 %28
+%32 = OpFOrdLessThan %31 %27 %33
+OpSelectionMerge %146 None
+OpBranchConditional %32 %145 %146
+%145 = OpLabel
+OpStore %35 %147
+OpBranch %146
+%146 = OpLabel
+%40 = OpAccessChain %39 %8 %26
+%41 = OpLoad %5 %40
+%42 = OpFunctionCall %5 %92 %41 %29 %29
+%38 = OpAccessChain %37 %13 %42
+%125 = OpLoad %10 %38
+%128 = OpAccessChain %39 %8 %69
+%129 = OpLoad %5 %128
+%127 = OpAccessChain %126 %17 %129
+%130 = OpLoad %14 %127
+%132 = OpSampledImage %131 %125 %130
+%134 = OpCompositeConstruct %18 %27 %30
+%133 = OpImageSampleImplicitLod %21 %132 %134 None
+%135 = OpCompositeExtract %9 %133 0
+%136 = OpCompositeExtract %9 %133 1
+%137 = OpCompositeExtract %9 %133 2
+%138 = OpCompositeExtract %9 %133 3
+%140 = OpAccessChain %139 %23 %26
+OpStore %140 %135
+%141 = OpAccessChain %139 %23 %29
+OpStore %141 %136
+%142 = OpAccessChain %139 %23 %69
+OpStore %142 %137
+%143 = OpAccessChain %139 %23 %66
+OpStore %143 %138
+%154 = OpFunctionCall %1 %148
OpReturn
OpFunctionEnd
-%64 = OpFunction %1 None %56
-%57 = OpFunctionParameter %5
-%58 = OpFunctionParameter %5
-%59 = OpFunctionParameter %5
-%60 = OpFunctionParameter %5
-%61 = OpFunctionParameter %5
-%62 = OpFunctionParameter %5
-%63 = OpFunctionParameter %5
-%65 = OpLabel
-%67 = OpAccessChain %66 %55 %68
-%69 = OpAtomicIAdd %5 %67 %24 %30 %24
-%70 = OpIEqual %32 %69 %30
-OpSelectionMerge %72 None
-OpBranchConditional %70 %71 %72
-%71 = OpLabel
-%73 = OpAccessChain %66 %55 %74
-OpStore %73 %59
-%75 = OpAccessChain %66 %55 %24
-OpStore %75 %58
-%76 = OpAccessChain %66 %55 %77
-OpStore %76 %60
-%78 = OpAccessChain %66 %55 %79
-OpStore %78 %61
-%80 = OpAccessChain %66 %55 %81
-OpStore %80 %62
-%82 = OpAccessChain %66 %55 %83
-OpStore %82 %63
-%87 = OpAccessChain %86 %55 %30
-OpStore %87 %85
-OpMemoryBarrier %24 %88
-%89 = OpAccessChain %66 %55 %90
-OpStore %89 %57
-OpBranch %72
-%72 = OpLabel
+%56 = OpFunction %1 None %48
+%49 = OpFunctionParameter %5
+%50 = OpFunctionParameter %5
+%51 = OpFunctionParameter %5
+%52 = OpFunctionParameter %5
+%53 = OpFunctionParameter %5
+%54 = OpFunctionParameter %5
+%55 = OpFunctionParameter %5
+%57 = OpLabel
+%59 = OpAccessChain %58 %47 %60
+%61 = OpAtomicIAdd %5 %59 %29 %26 %29
+%62 = OpIEqual %31 %61 %26
+OpSelectionMerge %64 None
+OpBranchConditional %62 %63 %64
+%63 = OpLabel
+%65 = OpAccessChain %58 %47 %66
+OpStore %65 %51
+%67 = OpAccessChain %58 %47 %29
+OpStore %67 %50
+%68 = OpAccessChain %58 %47 %69
+OpStore %68 %52
+%70 = OpAccessChain %58 %47 %71
+OpStore %70 %53
+%72 = OpAccessChain %58 %47 %73
+OpStore %72 %54
+%74 = OpAccessChain %58 %47 %75
+OpStore %74 %55
+%79 = OpAccessChain %78 %47 %26
+OpStore %79 %77
+OpMemoryBarrier %29 %80
+%81 = OpAccessChain %58 %47 %82
+OpStore %81 %49
+OpBranch %64
+%64 = OpLabel
OpReturn
OpFunctionEnd
-%100 = OpFunction %5 None %96
-%97 = OpFunctionParameter %5
-%98 = OpFunctionParameter %5
-%99 = OpFunctionParameter %5
-%101 = OpLabel
-%102 = OpAccessChain %66 %95 %30
-%103 = OpLoad %5 %102
-%104 = OpAccessChain %66 %95 %24
-%105 = OpLoad %5 %104
-%106 = OpAccessChain %86 %95 %77 %97
-%107 = OpLoad %51 %106
-%108 = OpCompositeExtract %5 %107 0
-%110 = OpShiftRightLogical %5 %108 %83
-%111 = OpBitwiseAnd %5 %108 %112
-%109 = OpCompositeExtract %5 %107 1
-%113 = OpAccessChain %66 %55 %114 %110
-%115 = OpLoad %5 %113
-%116 = OpShiftLeftLogical %5 %24 %111
-%117 = OpBitwiseAnd %5 %115 %116
-%118 = OpINotEqual %32 %117 %30
-%119 = OpBitwiseAnd %5 %109 %98
-%120 = OpIEqual %32 %119 %98
-%121 = OpUGreaterThanEqual %32 %97 %103
-%122 = OpSelect %5 %121 %24 %30
-%123 = OpSelect %5 %120 %30 %77
-%124 = OpSelect %5 %118 %30 %68
-%125 = OpBitwiseOr %5 %122 %123
-%126 = OpBitwiseOr %5 %125 %124
-%127 = OpINotEqual %32 %126 %30
-OpSelectionMerge %129 None
-OpBranchConditional %127 %128 %129
-%128 = OpLabel
-%130 = OpFunctionCall %1 %64 %126 %97 %108 %105 %98 %109 %99
-OpReturnValue %103
-%129 = OpLabel
-OpReturnValue %97
+%92 = OpFunction %5 None %88
+%89 = OpFunctionParameter %5
+%90 = OpFunctionParameter %5
+%91 = OpFunctionParameter %5
+%93 = OpLabel
+%94 = OpAccessChain %58 %87 %26
+%95 = OpLoad %5 %94
+%96 = OpAccessChain %58 %87 %29
+%97 = OpLoad %5 %96
+%98 = OpAccessChain %78 %87 %69 %89
+%99 = OpLoad %43 %98
+%100 = OpCompositeExtract %5 %99 0
+%102 = OpShiftRightLogical %5 %100 %75
+%103 = OpBitwiseAnd %5 %100 %104
+%101 = OpCompositeExtract %5 %99 1
+%105 = OpAccessChain %58 %47 %106 %102
+%107 = OpLoad %5 %105
+%108 = OpShiftLeftLogical %5 %29 %103
+%109 = OpBitwiseAnd %5 %107 %108
+%110 = OpINotEqual %31 %109 %26
+%111 = OpBitwiseAnd %5 %101 %90
+%112 = OpIEqual %31 %111 %90
+%113 = OpUGreaterThanEqual %31 %89 %95
+%114 = OpSelect %5 %113 %29 %26
+%115 = OpSelect %5 %112 %26 %69
+%116 = OpSelect %5 %110 %26 %60
+%117 = OpBitwiseOr %5 %114 %115
+%118 = OpBitwiseOr %5 %117 %116
+%119 = OpINotEqual %31 %118 %26
+OpSelectionMerge %121 None
+OpBranchConditional %119 %120 %121
+%120 = OpLabel
+%122 = OpFunctionCall %1 %56 %118 %89 %100 %97 %90 %101 %91
+OpReturnValue %95
+%121 = OpLabel
+OpReturnValue %89
OpFunctionEnd
-%156 = OpFunction %1 None %2
-%157 = OpLabel
-%160 = OpLoad %32 %43
-OpSelectionMerge %159 None
-OpBranchConditional %160 %158 %159
-%158 = OpLabel
+%148 = OpFunction %1 None %2
+%149 = OpLabel
+%152 = OpLoad %31 %35
+OpSelectionMerge %151 None
+OpBranchConditional %152 %150 %151
+%150 = OpLabel
OpKill
-%159 = OpLabel
+%151 = OpLabel
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/dxil-builtin/clip.demote-to-helper.frag b/reference/shaders/dxil-builtin/clip.demote-to-helper.frag
index 7697f58..a52b58b 100644
--- a/reference/shaders/dxil-builtin/clip.demote-to-helper.frag
+++ b/reference/shaders/dxil-builtin/clip.demote-to-helper.frag
@@ -3,9 +3,9 @@
layout(location = 0) in vec2 TEXCOORD;
-void demote_cond(bool _37)
+void demote_cond(bool _27)
{
- if (_37)
+ if (_27)
{
demote;
}
@@ -13,10 +13,8 @@ void demote_cond(bool _37)
void main()
{
- bool _28 = (TEXCOORD.x + (-10.0)) < 0.0;
- demote_cond(_28);
- bool _33 = (TEXCOORD.y + (-20.0)) < 0.0;
- demote_cond(_33);
+ demote_cond((TEXCOORD.x + (-10.0)) < 0.0);
+ demote_cond((TEXCOORD.y + (-20.0)) < 0.0);
}
@@ -25,69 +23,58 @@ void main()
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 46
+; Bound: 36
; Schema: 0
OpCapability Shader
OpCapability DemoteToHelperInvocationEXT
OpExtension "SPV_EXT_demote_to_helper_invocation"
OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %3 "main" %8 %13
+OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
-OpName %38 "demote_cond"
+OpName %28 "demote_cond"
OpDecorate %8 Location 0
-OpDecorate %13 BuiltIn SampleMask
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
-%9 = OpTypeInt 32 0
-%10 = OpConstant %9 1
-%11 = OpTypeArray %9 %10
-%12 = OpTypePointer Input %11
-%13 = OpVariable %12 Input
-%14 = OpTypePointer Input %9
-%16 = OpConstant %9 0
-%18 = OpTypeBool
-%21 = OpTypePointer Input %5
-%27 = OpConstant %5 -10
-%29 = OpConstant %5 0
-%32 = OpConstant %5 -20
-%36 = OpTypeFunction %1 %18
+%9 = OpTypePointer Input %5
+%11 = OpTypeInt 32 0
+%12 = OpConstant %11 0
+%15 = OpConstant %11 1
+%18 = OpConstant %5 -10
+%19 = OpTypeBool
+%21 = OpConstant %5 0
+%23 = OpConstant %5 -20
+%26 = OpTypeFunction %1 %19
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpBranch %35
-%35 = OpLabel
-%15 = OpAccessChain %14 %13 %16
-%17 = OpLoad %9 %15
-%19 = OpIEqual %18 %16 %17
-%20 = OpSelect %9 %19 %10 %16
-%22 = OpAccessChain %21 %8 %16
-%23 = OpLoad %5 %22
-%24 = OpAccessChain %21 %8 %10
-%25 = OpLoad %5 %24
-%26 = OpFAdd %5 %23 %27
-%28 = OpFOrdLessThan %18 %26 %29
-%30 = OpSelect %9 %28 %10 %16
-%43 = OpFunctionCall %1 %38 %28
-%31 = OpFAdd %5 %25 %32
-%33 = OpFOrdLessThan %18 %31 %29
-%34 = OpSelect %9 %33 %10 %16
-%44 = OpFunctionCall %1 %38 %33
+OpBranch %25
+%25 = OpLabel
+%10 = OpAccessChain %9 %8 %12
+%13 = OpLoad %5 %10
+%14 = OpAccessChain %9 %8 %15
+%16 = OpLoad %5 %14
+%17 = OpFAdd %5 %13 %18
+%20 = OpFOrdLessThan %19 %17 %21
+%33 = OpFunctionCall %1 %28 %20
+%22 = OpFAdd %5 %16 %23
+%24 = OpFOrdLessThan %19 %22 %21
+%34 = OpFunctionCall %1 %28 %24
OpReturn
OpFunctionEnd
-%38 = OpFunction %1 None %36
-%37 = OpFunctionParameter %18
-%39 = OpLabel
-OpSelectionMerge %41 None
-OpBranchConditional %37 %40 %41
-%40 = OpLabel
+%28 = OpFunction %1 None %26
+%27 = OpFunctionParameter %19
+%29 = OpLabel
+OpSelectionMerge %31 None
+OpBranchConditional %27 %30 %31
+%30 = OpLabel
OpDemoteToHelperInvocationEXT
-OpBranch %41
-%41 = OpLabel
+OpBranch %31
+%31 = OpLabel
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/dxil-builtin/clip.frag b/reference/shaders/dxil-builtin/clip.frag
index 942e587..fb07ef5 100644
--- a/reference/shaders/dxil-builtin/clip.frag
+++ b/reference/shaders/dxil-builtin/clip.frag
@@ -3,9 +3,9 @@
layout(location = 0) in vec2 TEXCOORD;
bool discard_state;
-void discard_cond(bool _40)
+void discard_cond(bool _30)
{
- if (_40)
+ if (_30)
{
discard_state = true;
}
@@ -22,10 +22,8 @@ void discard_exit()
void main()
{
discard_state = false;
- bool _28 = (TEXCOORD.x + (-10.0)) < 0.0;
- discard_cond(_28);
- bool _36 = (TEXCOORD.y + (-20.0)) < 0.0;
- discard_cond(_36);
+ discard_cond((TEXCOORD.x + (-10.0)) < 0.0);
+ discard_cond((TEXCOORD.y + (-20.0)) < 0.0);
discard_exit();
}
@@ -35,85 +33,74 @@ void main()
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 57
+; Bound: 47
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %3 "main" %8 %13
+OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
-OpName %32 "discard_state"
-OpName %41 "discard_cond"
-OpName %49 "discard_exit"
+OpName %23 "discard_state"
+OpName %31 "discard_cond"
+OpName %39 "discard_exit"
OpDecorate %8 Location 0
-OpDecorate %13 BuiltIn SampleMask
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
-%9 = OpTypeInt 32 0
-%10 = OpConstant %9 1
-%11 = OpTypeArray %9 %10
-%12 = OpTypePointer Input %11
-%13 = OpVariable %12 Input
-%14 = OpTypePointer Input %9
-%16 = OpConstant %9 0
-%18 = OpTypeBool
-%21 = OpTypePointer Input %5
-%27 = OpConstant %5 -10
-%29 = OpConstant %5 0
-%31 = OpTypePointer Private %18
-%32 = OpVariable %31 Private
-%33 = OpConstantFalse %18
-%35 = OpConstant %5 -20
-%39 = OpTypeFunction %1 %18
-%45 = OpConstantTrue %18
+%9 = OpTypePointer Input %5
+%11 = OpTypeInt 32 0
+%12 = OpConstant %11 0
+%15 = OpConstant %11 1
+%18 = OpConstant %5 -10
+%19 = OpTypeBool
+%21 = OpConstant %5 0
+%22 = OpTypePointer Private %19
+%23 = OpVariable %22 Private
+%24 = OpConstantFalse %19
+%26 = OpConstant %5 -20
+%29 = OpTypeFunction %1 %19
+%35 = OpConstantTrue %19
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpStore %32 %33
-OpBranch %38
-%38 = OpLabel
-%15 = OpAccessChain %14 %13 %16
-%17 = OpLoad %9 %15
-%19 = OpIEqual %18 %16 %17
-%20 = OpSelect %9 %19 %10 %16
-%22 = OpAccessChain %21 %8 %16
-%23 = OpLoad %5 %22
-%24 = OpAccessChain %21 %8 %10
-%25 = OpLoad %5 %24
-%26 = OpFAdd %5 %23 %27
-%28 = OpFOrdLessThan %18 %26 %29
-%30 = OpSelect %9 %28 %10 %16
-%47 = OpFunctionCall %1 %41 %28
-%34 = OpFAdd %5 %25 %35
-%36 = OpFOrdLessThan %18 %34 %29
-%37 = OpSelect %9 %36 %10 %16
-%48 = OpFunctionCall %1 %41 %36
-%55 = OpFunctionCall %1 %49
+OpStore %23 %24
+OpBranch %28
+%28 = OpLabel
+%10 = OpAccessChain %9 %8 %12
+%13 = OpLoad %5 %10
+%14 = OpAccessChain %9 %8 %15
+%16 = OpLoad %5 %14
+%17 = OpFAdd %5 %13 %18
+%20 = OpFOrdLessThan %19 %17 %21
+%37 = OpFunctionCall %1 %31 %20
+%25 = OpFAdd %5 %16 %26
+%27 = OpFOrdLessThan %19 %25 %21
+%38 = OpFunctionCall %1 %31 %27
+%45 = OpFunctionCall %1 %39
OpReturn
OpFunctionEnd
-%41 = OpFunction %1 None %39
-%40 = OpFunctionParameter %18
-%42 = OpLabel
-OpSelectionMerge %44 None
-OpBranchConditional %40 %43 %44
-%43 = OpLabel
-OpStore %32 %45
-OpBranch %44
-%44 = OpLabel
+%31 = OpFunction %1 None %29
+%30 = OpFunctionParameter %19
+%32 = OpLabel
+OpSelectionMerge %34 None
+OpBranchConditional %30 %33 %34
+%33 = OpLabel
+OpStore %23 %35
+OpBranch %34
+%34 = OpLabel
OpReturn
OpFunctionEnd
-%49 = OpFunction %1 None %2
-%50 = OpLabel
-%53 = OpLoad %18 %32
-OpSelectionMerge %52 None
-OpBranchConditional %53 %51 %52
-%51 = OpLabel
+%39 = OpFunction %1 None %2
+%40 = OpLabel
+%43 = OpLoad %19 %23
+OpSelectionMerge %42 None
+OpBranchConditional %43 %41 %42
+%41 = OpLabel
OpKill
-%52 = OpLabel
+%42 = OpLabel
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/dxil-builtin/discard.demote-to-helper.frag b/reference/shaders/dxil-builtin/discard.demote-to-helper.frag
index cdd1b17..717a476 100644
--- a/reference/shaders/dxil-builtin/discard.demote-to-helper.frag
+++ b/reference/shaders/dxil-builtin/discard.demote-to-helper.frag
@@ -24,63 +24,54 @@ void main()
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 37
+; Bound: 29
; Schema: 0
OpCapability Shader
OpCapability DemoteToHelperInvocationEXT
OpExtension "SPV_EXT_demote_to_helper_invocation"
OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %3 "main" %8 %13
+OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
OpDecorate %8 Location 0
-OpDecorate %13 BuiltIn SampleMask
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
-%9 = OpTypeInt 32 0
-%10 = OpConstant %9 1
-%11 = OpTypeArray %9 %10
-%12 = OpTypePointer Input %11
-%13 = OpVariable %12 Input
-%14 = OpTypePointer Input %9
-%16 = OpConstant %9 0
-%18 = OpTypeBool
-%21 = OpTypePointer Input %5
-%25 = OpConstant %5 10
-%29 = OpConstant %5 20
+%9 = OpTypePointer Input %5
+%11 = OpTypeInt 32 0
+%12 = OpConstant %11 0
+%14 = OpTypeBool
+%16 = OpConstant %5 10
+%18 = OpConstant %11 1
+%21 = OpConstant %5 20
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpBranch %30
-%30 = OpLabel
-%15 = OpAccessChain %14 %13 %16
-%17 = OpLoad %9 %15
-%19 = OpIEqual %18 %16 %17
-%20 = OpSelect %9 %19 %10 %16
-%22 = OpAccessChain %21 %8 %16
-%23 = OpLoad %5 %22
-%24 = OpFOrdGreaterThan %18 %23 %25
-OpSelectionMerge %35 None
-OpBranchConditional %24 %34 %31
-%34 = OpLabel
+OpBranch %22
+%22 = OpLabel
+%10 = OpAccessChain %9 %8 %12
+%13 = OpLoad %5 %10
+%15 = OpFOrdGreaterThan %14 %13 %16
+OpSelectionMerge %27 None
+OpBranchConditional %15 %26 %23
+%26 = OpLabel
OpDemoteToHelperInvocationEXT
-OpBranch %35
-%31 = OpLabel
-%26 = OpAccessChain %21 %8 %10
-%27 = OpLoad %5 %26
-%28 = OpFOrdGreaterThan %18 %27 %29
-OpSelectionMerge %33 None
-OpBranchConditional %28 %32 %33
-%32 = OpLabel
+OpBranch %27
+%23 = OpLabel
+%17 = OpAccessChain %9 %8 %18
+%19 = OpLoad %5 %17
+%20 = OpFOrdGreaterThan %14 %19 %21
+OpSelectionMerge %25 None
+OpBranchConditional %20 %24 %25
+%24 = OpLabel
OpDemoteToHelperInvocationEXT
-OpBranch %33
-%33 = OpLabel
-OpBranch %35
-%35 = OpLabel
+OpBranch %25
+%25 = OpLabel
+OpBranch %27
+%27 = OpLabel
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/dxil-builtin/discard.frag b/reference/shaders/dxil-builtin/discard.frag
index c26a174..48c065a 100644
--- a/reference/shaders/dxil-builtin/discard.frag
+++ b/reference/shaders/dxil-builtin/discard.frag
@@ -34,79 +34,70 @@ void main()
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 48
+; Bound: 40
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %3 "main" %8 %13
+OpEntryPoint Fragment %3 "main" %8
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %8 "TEXCOORD"
-OpName %27 "discard_state"
-OpName %40 "discard_exit"
+OpName %18 "discard_state"
+OpName %32 "discard_exit"
OpDecorate %8 Location 0
-OpDecorate %13 BuiltIn SampleMask
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypeVector %5 2
%7 = OpTypePointer Input %6
%8 = OpVariable %7 Input
-%9 = OpTypeInt 32 0
-%10 = OpConstant %9 1
-%11 = OpTypeArray %9 %10
-%12 = OpTypePointer Input %11
-%13 = OpVariable %12 Input
-%14 = OpTypePointer Input %9
-%16 = OpConstant %9 0
-%18 = OpTypeBool
-%21 = OpTypePointer Input %5
-%25 = OpConstant %5 10
-%26 = OpTypePointer Private %18
-%27 = OpVariable %26 Private
-%28 = OpConstantFalse %18
-%32 = OpConstant %5 20
-%39 = OpConstantTrue %18
+%9 = OpTypePointer Input %5
+%11 = OpTypeInt 32 0
+%12 = OpConstant %11 0
+%14 = OpTypeBool
+%16 = OpConstant %5 10
+%17 = OpTypePointer Private %14
+%18 = OpVariable %17 Private
+%19 = OpConstantFalse %14
+%21 = OpConstant %11 1
+%24 = OpConstant %5 20
+%31 = OpConstantTrue %14
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpStore %27 %28
-OpBranch %33
-%33 = OpLabel
-%15 = OpAccessChain %14 %13 %16
-%17 = OpLoad %9 %15
-%19 = OpIEqual %18 %16 %17
-%20 = OpSelect %9 %19 %10 %16
-%22 = OpAccessChain %21 %8 %16
-%23 = OpLoad %5 %22
-%24 = OpFOrdGreaterThan %18 %23 %25
-OpSelectionMerge %38 None
-OpBranchConditional %24 %37 %34
-%37 = OpLabel
-OpStore %27 %39
-OpBranch %38
-%34 = OpLabel
-%29 = OpAccessChain %21 %8 %10
-%30 = OpLoad %5 %29
-%31 = OpFOrdGreaterThan %18 %30 %32
-OpSelectionMerge %36 None
-OpBranchConditional %31 %35 %36
-%35 = OpLabel
-OpStore %27 %39
-OpBranch %36
-%36 = OpLabel
-OpBranch %38
-%38 = OpLabel
-%46 = OpFunctionCall %1 %40
+OpStore %18 %19
+OpBranch %25
+%25 = OpLabel
+%10 = OpAccessChain %9 %8 %12
+%13 = OpLoad %5 %10
+%15 = OpFOrdGreaterThan %14 %13 %16
+OpSelectionMerge %30 None
+OpBranchConditional %15 %29 %26
+%29 = OpLabel
+OpStore %18 %31
+OpBranch %30
+%26 = OpLabel
+%20 = OpAccessChain %9 %8 %21
+%22 = OpLoad %5 %20
+%23 = OpFOrdGreaterThan %14 %22 %24
+OpSelectionMerge %28 None
+OpBranchConditional %23 %27 %28
+%27 = OpLabel
+OpStore %18 %31
+OpBranch %28
+%28 = OpLabel
+OpBranch %30
+%30 = OpLabel
+%38 = OpFunctionCall %1 %32
OpReturn
OpFunctionEnd
-%40 = OpFunction %1 None %2
-%41 = OpLabel
-%44 = OpLoad %18 %27
-OpSelectionMerge %43 None
-OpBranchConditional %44 %42 %43
-%42 = OpLabel
+%32 = OpFunction %1 None %2
+%33 = OpLabel
+%36 = OpLoad %14 %18
+OpSelectionMerge %35 None
+OpBranchConditional %36 %34 %35
+%34 = OpLabel
OpKill
-%43 = OpLabel
+%35 = OpLabel
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp b/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp
index 7a839df..e7a4aa8 100644
--- a/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp
+++ b/reference/shaders/dxil-builtin/pack-unpack.ssbo.sm66.comp
@@ -45,6 +45,7 @@ void main()
OpCapability Shader
OpCapability Int16
OpCapability Int8
+OpCapability StorageBuffer16BitAccess
%80 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %23
diff --git a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp
index a82fcd6..07039f7 100644
--- a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp
+++ b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.comp
@@ -72,6 +72,7 @@ void main()
OpCapability Shader
OpCapability Float16
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %34
OpExecutionMode %3 LocalSize 64 1 1
diff --git a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp
index c626cd6..6fd75c1 100644
--- a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp
+++ b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp
@@ -6,6 +6,7 @@
OpCapability Shader
OpCapability Float16
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpCapability DotProductInput4x8BitPackedKHR
OpCapability DotProductKHR
OpExtension "SPV_KHR_integer_dot_product"
diff --git a/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag b/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag
index 74cda90..aa21585 100644
--- a/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag
+++ b/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag
@@ -11,11 +11,11 @@ void main()
{
demote;
}
- uvec4 _26 = subgroupBallot(INDEX < 100u);
- SV_Target.x = _26.x;
- SV_Target.y = _26.y;
- SV_Target.z = _26.z;
- SV_Target.w = _26.w;
+ uvec4 _17 = subgroupBallot(INDEX < 100u);
+ SV_Target.x = _17.x;
+ SV_Target.y = _17.y;
+ SV_Target.z = _17.z;
+ SV_Target.w = _17.w;
}
@@ -24,14 +24,14 @@ void main()
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 42
+; Bound: 35
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpCapability DemoteToHelperInvocationEXT
OpExtension "SPV_EXT_demote_to_helper_invocation"
OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %3 "main" %7 %10 %14
+OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "INDEX"
@@ -39,7 +39,6 @@ OpName %10 "SV_Target"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
-OpDecorate %14 BuiltIn SampleMask
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
@@ -48,47 +47,40 @@ OpDecorate %14 BuiltIn SampleMask
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
-%11 = OpConstant %5 1
-%12 = OpTypeArray %5 %11
-%13 = OpTypePointer Input %12
-%14 = OpVariable %13 Input
-%16 = OpConstant %5 0
-%18 = OpTypeBool
-%23 = OpConstant %5 40
-%25 = OpConstant %5 100
-%27 = OpConstant %5 3
-%32 = OpTypePointer Output %5
-%36 = OpConstant %5 2
+%12 = OpTypeBool
+%14 = OpConstant %5 40
+%16 = OpConstant %5 100
+%18 = OpConstant %5 3
+%23 = OpTypePointer Output %5
+%25 = OpConstant %5 0
+%27 = OpConstant %5 1
+%29 = OpConstant %5 2
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpBranch %38
-%38 = OpLabel
-%15 = OpAccessChain %6 %14 %16
-%17 = OpLoad %5 %15
-%19 = OpIEqual %18 %16 %17
-%20 = OpSelect %5 %19 %11 %16
-%21 = OpLoad %5 %7
-%22 = OpIEqual %18 %21 %23
-OpSelectionMerge %40 None
-OpBranchConditional %22 %39 %40
-%39 = OpLabel
+OpBranch %31
+%31 = OpLabel
+%11 = OpLoad %5 %7
+%13 = OpIEqual %12 %11 %14
+OpSelectionMerge %33 None
+OpBranchConditional %13 %32 %33
+%32 = OpLabel
OpDemoteToHelperInvocationEXT
-OpBranch %40
-%40 = OpLabel
-%24 = OpULessThan %18 %21 %25
-%26 = OpGroupNonUniformBallot %8 %27 %24
-%28 = OpCompositeExtract %5 %26 0
-%29 = OpCompositeExtract %5 %26 1
-%30 = OpCompositeExtract %5 %26 2
-%31 = OpCompositeExtract %5 %26 3
-%33 = OpAccessChain %32 %10 %16
-OpStore %33 %28
-%34 = OpAccessChain %32 %10 %11
-OpStore %34 %29
-%35 = OpAccessChain %32 %10 %36
-OpStore %35 %30
-%37 = OpAccessChain %32 %10 %27
-OpStore %37 %31
+OpBranch %33
+%33 = OpLabel
+%15 = OpULessThan %12 %11 %16
+%17 = OpGroupNonUniformBallot %8 %18 %15
+%19 = OpCompositeExtract %5 %17 0
+%20 = OpCompositeExtract %5 %17 1
+%21 = OpCompositeExtract %5 %17 2
+%22 = OpCompositeExtract %5 %17 3
+%24 = OpAccessChain %23 %10 %25
+OpStore %24 %19
+%26 = OpAccessChain %23 %10 %27
+OpStore %26 %20
+%28 = OpAccessChain %23 %10 %29
+OpStore %28 %21
+%30 = OpAccessChain %23 %10 %18
+OpStore %30 %22
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag b/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag
index 9754d4d..3efd1d9 100644
--- a/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag
+++ b/reference/shaders/dxil-builtin/wave-active-ballot-discard.frag
@@ -20,11 +20,11 @@ void main()
{
discard_state = true;
}
- uvec4 _29 = subgroupBallot(INDEX < 100u);
- SV_Target.x = _29.x;
- SV_Target.y = _29.y;
- SV_Target.z = _29.z;
- SV_Target.w = _29.w;
+ uvec4 _20 = subgroupBallot(INDEX < 100u);
+ SV_Target.x = _20.x;
+ SV_Target.y = _20.y;
+ SV_Target.z = _20.z;
+ SV_Target.w = _20.w;
discard_exit();
}
@@ -34,22 +34,21 @@ void main()
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 53
+; Bound: 46
; Schema: 0
OpCapability Shader
OpCapability GroupNonUniformBallot
OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %3 "main" %7 %10 %14
+OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "INDEX"
OpName %10 "SV_Target"
-OpName %25 "discard_state"
-OpName %45 "discard_exit"
+OpName %16 "discard_state"
+OpName %38 "discard_exit"
OpDecorate %7 Flat
OpDecorate %7 Location 0
OpDecorate %10 Location 0
-OpDecorate %14 BuiltIn SampleMask
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeInt 32 0
@@ -58,63 +57,56 @@ OpDecorate %14 BuiltIn SampleMask
%8 = OpTypeVector %5 4
%9 = OpTypePointer Output %8
%10 = OpVariable %9 Output
-%11 = OpConstant %5 1
-%12 = OpTypeArray %5 %11
-%13 = OpTypePointer Input %12
-%14 = OpVariable %13 Input
-%16 = OpConstant %5 0
-%18 = OpTypeBool
-%23 = OpConstant %5 40
-%24 = OpTypePointer Private %18
-%25 = OpVariable %24 Private
-%26 = OpConstantFalse %18
-%28 = OpConstant %5 100
-%30 = OpConstant %5 3
-%35 = OpTypePointer Output %5
-%39 = OpConstant %5 2
-%44 = OpConstantTrue %18
+%12 = OpTypeBool
+%14 = OpConstant %5 40
+%15 = OpTypePointer Private %12
+%16 = OpVariable %15 Private
+%17 = OpConstantFalse %12
+%19 = OpConstant %5 100
+%21 = OpConstant %5 3
+%26 = OpTypePointer Output %5
+%28 = OpConstant %5 0
+%30 = OpConstant %5 1
+%32 = OpConstant %5 2
+%37 = OpConstantTrue %12
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpStore %25 %26
-OpBranch %41
-%41 = OpLabel
-%15 = OpAccessChain %6 %14 %16
-%17 = OpLoad %5 %15
-%19 = OpIEqual %18 %16 %17
-%20 = OpSelect %5 %19 %11 %16
-%21 = OpLoad %5 %7
-%22 = OpIEqual %18 %21 %23
-OpSelectionMerge %43 None
-OpBranchConditional %22 %42 %43
-%42 = OpLabel
-OpStore %25 %44
-OpBranch %43
-%43 = OpLabel
-%27 = OpULessThan %18 %21 %28
-%29 = OpGroupNonUniformBallot %8 %30 %27
-%31 = OpCompositeExtract %5 %29 0
-%32 = OpCompositeExtract %5 %29 1
-%33 = OpCompositeExtract %5 %29 2
-%34 = OpCompositeExtract %5 %29 3
-%36 = OpAccessChain %35 %10 %16
-OpStore %36 %31
-%37 = OpAccessChain %35 %10 %11
-OpStore %37 %32
-%38 = OpAccessChain %35 %10 %39
-OpStore %38 %33
-%40 = OpAccessChain %35 %10 %30
-OpStore %40 %34
-%51 = OpFunctionCall %1 %45
+OpStore %16 %17
+OpBranch %34
+%34 = OpLabel
+%11 = OpLoad %5 %7
+%13 = OpIEqual %12 %11 %14
+OpSelectionMerge %36 None
+OpBranchConditional %13 %35 %36
+%35 = OpLabel
+OpStore %16 %37
+OpBranch %36
+%36 = OpLabel
+%18 = OpULessThan %12 %11 %19
+%20 = OpGroupNonUniformBallot %8 %21 %18
+%22 = OpCompositeExtract %5 %20 0
+%23 = OpCompositeExtract %5 %20 1
+%24 = OpCompositeExtract %5 %20 2
+%25 = OpCompositeExtract %5 %20 3
+%27 = OpAccessChain %26 %10 %28
+OpStore %27 %22
+%29 = OpAccessChain %26 %10 %30
+OpStore %29 %23
+%31 = OpAccessChain %26 %10 %32
+OpStore %31 %24
+%33 = OpAccessChain %26 %10 %21
+OpStore %33 %25
+%44 = OpFunctionCall %1 %38
OpReturn
OpFunctionEnd
-%45 = OpFunction %1 None %2
-%46 = OpLabel
-%49 = OpLoad %18 %25
-OpSelectionMerge %48 None
-OpBranchConditional %49 %47 %48
-%47 = OpLabel
+%38 = OpFunction %1 None %2
+%39 = OpLabel
+%42 = OpLoad %12 %16
+OpSelectionMerge %41 None
+OpBranchConditional %42 %40 %41
+%40 = OpLabel
OpKill
-%48 = OpLabel
+%41 = OpLabel
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp b/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp
index c94f1c8..029d98d 100644
--- a/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp
+++ b/reference/shaders/resources/buffer-16bit.ssbo.bindless.comp
@@ -161,6 +161,7 @@ OpCapability Float16
OpCapability Int16
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageQuery
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
diff --git a/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp b/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp
index 3d51b1b..701d4bf 100644
--- a/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp
+++ b/reference/shaders/resources/buffer-16bit.ssbo.bindless.ssbo-align.comp
@@ -209,6 +209,7 @@ OpCapability Int16
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageQuery
OpCapability GroupNonUniformBallot
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpCapability PhysicalStorageBufferAddresses
diff --git a/reference/shaders/resources/buffer-16bit.ssbo.comp b/reference/shaders/resources/buffer-16bit.ssbo.comp
index 2bb9766..138faa0 100644
--- a/reference/shaders/resources/buffer-16bit.ssbo.comp
+++ b/reference/shaders/resources/buffer-16bit.ssbo.comp
@@ -170,6 +170,7 @@ OpCapability Float16
OpCapability Int16
OpCapability StorageBufferArrayDynamicIndexing
OpCapability ImageQuery
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability StorageBufferArrayNonUniformIndexing
OpExtension "SPV_EXT_descriptor_indexing"
diff --git a/reference/shaders/resources/cbv-array-nonuniform.frag b/reference/shaders/resources/cbv-array-nonuniform.frag
index 568d8a5..8fcb778 100644
--- a/reference/shaders/resources/cbv-array-nonuniform.frag
+++ b/reference/shaders/resources/cbv-array-nonuniform.frag
@@ -60,8 +60,10 @@ OpDecorate %19 Binding 0
OpDecorate %21 Flat
OpDecorate %21 Location 0
OpDecorate %23 Location 0
+OpDecorate %27 NonUniform
OpDecorate %30 NonUniform
OpDecorate %32 NonUniform
+OpDecorate %41 NonUniform
OpDecorate %43 NonUniform
OpDecorate %44 NonUniform
%1 = OpTypeVoid
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.frag
new file mode 100644
index 0000000..49a7d61
--- /dev/null
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.frag
@@ -0,0 +1,193 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+
+struct CBVComposite16x8
+{
+ float16_t _m0;
+ float16_t _m1;
+ float16_t _m2;
+ float16_t _m3;
+ float16_t _m4;
+ float16_t _m5;
+ float16_t _m6;
+ float16_t _m7;
+};
+
+layout(set = 0, binding = 0, std140) uniform _10_12
+{
+ vec4 _m0[4];
+} _12;
+
+layout(set = 0, binding = 0, std140) uniform _16_18
+{
+ dvec2 _m0[4];
+} _18;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ f16vec2 _38 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].x));
+ f16vec2 _41 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].y));
+ f16vec2 _44 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].z));
+ f16vec2 _47 = unpackFloat2x16(floatBitsToUint(_12._m0[1u].w));
+ CBVComposite16x8 _51 = CBVComposite16x8(_38.x, _38.y, _41.x, _41.y, _44.x, _44.y, _47.x, _47.y);
+ u64vec2 _82 = doubleBitsToUint64(_18._m0[2u]);
+ u64vec2 _88 = doubleBitsToUint64(_18._m0[3u]);
+ SV_Target.x = ((float(_51._m0) + _12._m0[0u].x) + float(_51._m4)) + float(int64_t(_82.x));
+ SV_Target.y = ((float(_51._m1) + _12._m0[0u].y) + float(_51._m5)) + float(int64_t(_82.y));
+ SV_Target.z = ((float(_51._m2) + _12._m0[0u].z) + float(_51._m6)) + float(int64_t(_88.x));
+ SV_Target.w = ((float(_51._m3) + _12._m0[0u].w) + float(_51._m7)) + float(int64_t(_88.y));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 106
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Float64
+OpCapability Int64
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %20
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %10 ""
+OpName %16 ""
+OpName %20 "SV_Target"
+OpName %50 "CBVComposite16x8"
+OpDecorate %9 ArrayStride 16
+OpMemberDecorate %10 0 Offset 0
+OpDecorate %10 Block
+OpDecorate %15 ArrayStride 16
+OpMemberDecorate %16 0 Offset 0
+OpDecorate %16 Block
+OpDecorate %12 DescriptorSet 0
+OpDecorate %12 Binding 0
+OpDecorate %18 DescriptorSet 0
+OpDecorate %18 Binding 0
+OpDecorate %20 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpConstant %5 4
+%7 = OpTypeFloat 32
+%8 = OpTypeVector %7 4
+%9 = OpTypeArray %8 %6
+%10 = OpTypeStruct %9
+%11 = OpTypePointer Uniform %10
+%12 = OpVariable %11 Uniform
+%13 = OpTypeFloat 64
+%14 = OpTypeVector %13 2
+%15 = OpTypeArray %14 %6
+%16 = OpTypeStruct %15
+%17 = OpTypePointer Uniform %16
+%18 = OpVariable %17 Uniform
+%19 = OpTypePointer Output %8
+%20 = OpVariable %19 Output
+%21 = OpConstant %5 0
+%22 = OpTypePointer Uniform %8
+%29 = OpTypeFloat 16
+%30 = OpConstant %5 1
+%33 = OpTypeVector %29 2
+%50 = OpTypeStruct %29 %29 %29 %29 %29 %29 %29 %29
+%76 = OpTypeInt 64 0
+%77 = OpConstant %5 2
+%78 = OpTypePointer Uniform %14
+%81 = OpTypeVector %76 2
+%85 = OpConstant %5 3
+%99 = OpTypePointer Output %7
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %104
+%104 = OpLabel
+%23 = OpAccessChain %22 %12 %21 %21
+%24 = OpLoad %8 %23
+%25 = OpCompositeExtract %7 %24 0
+%26 = OpCompositeExtract %7 %24 1
+%27 = OpCompositeExtract %7 %24 2
+%28 = OpCompositeExtract %7 %24 3
+%31 = OpAccessChain %22 %12 %21 %30
+%32 = OpLoad %8 %31
+%34 = OpCompositeExtract %7 %32 0
+%35 = OpCompositeExtract %7 %32 1
+%36 = OpCompositeExtract %7 %32 2
+%37 = OpCompositeExtract %7 %32 3
+%38 = OpBitcast %33 %34
+%39 = OpCompositeExtract %29 %38 0
+%40 = OpCompositeExtract %29 %38 1
+%41 = OpBitcast %33 %35
+%42 = OpCompositeExtract %29 %41 0
+%43 = OpCompositeExtract %29 %41 1
+%44 = OpBitcast %33 %36
+%45 = OpCompositeExtract %29 %44 0
+%46 = OpCompositeExtract %29 %44 1
+%47 = OpBitcast %33 %37
+%48 = OpCompositeExtract %29 %47 0
+%49 = OpCompositeExtract %29 %47 1
+%51 = OpCompositeConstruct %50 %39 %40 %42 %43 %45 %46 %48 %49
+%52 = OpCompositeExtract %29 %51 0
+%53 = OpCompositeExtract %29 %51 1
+%54 = OpCompositeExtract %29 %51 2
+%55 = OpCompositeExtract %29 %51 3
+%56 = OpFConvert %7 %52
+%57 = OpFConvert %7 %53
+%58 = OpFConvert %7 %54
+%59 = OpFConvert %7 %55
+%60 = OpFAdd %7 %56 %25
+%61 = OpFAdd %7 %57 %26
+%62 = OpFAdd %7 %58 %27
+%63 = OpFAdd %7 %59 %28
+%64 = OpCompositeExtract %29 %51 4
+%65 = OpCompositeExtract %29 %51 5
+%66 = OpCompositeExtract %29 %51 6
+%67 = OpCompositeExtract %29 %51 7
+%68 = OpFConvert %7 %64
+%69 = OpFConvert %7 %65
+%70 = OpFConvert %7 %66
+%71 = OpFConvert %7 %67
+%72 = OpFAdd %7 %60 %68
+%73 = OpFAdd %7 %61 %69
+%74 = OpFAdd %7 %62 %70
+%75 = OpFAdd %7 %63 %71
+%79 = OpAccessChain %78 %18 %21 %77
+%80 = OpLoad %14 %79
+%82 = OpBitcast %81 %80
+%83 = OpCompositeExtract %76 %82 0
+%84 = OpCompositeExtract %76 %82 1
+%86 = OpAccessChain %78 %18 %21 %85
+%87 = OpLoad %14 %86
+%88 = OpBitcast %81 %87
+%89 = OpCompositeExtract %76 %88 0
+%90 = OpCompositeExtract %76 %88 1
+%91 = OpConvertSToF %7 %83
+%92 = OpConvertSToF %7 %84
+%93 = OpConvertSToF %7 %89
+%94 = OpConvertSToF %7 %90
+%95 = OpFAdd %7 %72 %91
+%96 = OpFAdd %7 %73 %92
+%97 = OpFAdd %7 %74 %93
+%98 = OpFAdd %7 %75 %94
+%100 = OpAccessChain %99 %20 %21
+OpStore %100 %95
+%101 = OpAccessChain %99 %20 %30
+OpStore %101 %96
+%102 = OpAccessChain %99 %20 %77
+OpStore %102 %97
+%103 = OpAccessChain %99 %20 %85
+OpStore %103 %98
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag
new file mode 100644
index 0000000..996b3f0
--- /dev/null
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag
@@ -0,0 +1,263 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_buffer_reference : require
+
+struct AddCarry
+{
+ uint _m0;
+ uint _m1;
+};
+
+struct CBVComposite16x8
+{
+ float16_t _m0;
+ float16_t _m1;
+ float16_t _m2;
+ float16_t _m3;
+ float16_t _m4;
+ float16_t _m5;
+ float16_t _m6;
+ float16_t _m7;
+};
+
+layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite;
+layout(buffer_reference) buffer PhysicalPointerUint642NonWrite;
+layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite
+{
+ vec4 value;
+};
+
+layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite
+{
+ u64vec2 value;
+};
+
+layout(push_constant, std430) uniform RootConstants
+{
+ uvec2 _m0;
+ uvec2 _m1;
+ uvec2 _m2;
+ uvec2 _m3;
+} registers;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ AddCarry _23;
+ _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1);
+ PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1));
+ AddCarry _43;
+ _43._m0 = uaddCarry(registers._m0.x, 1u * 16u, _43._m1);
+ PhysicalPointerFloat4NonWrite _48 = PhysicalPointerFloat4NonWrite(uvec2(_43._m0, registers._m0.y + _43._m1));
+ f16vec2 _56 = unpackFloat2x16(floatBitsToUint(_48.value.x));
+ f16vec2 _59 = unpackFloat2x16(floatBitsToUint(_48.value.y));
+ f16vec2 _62 = unpackFloat2x16(floatBitsToUint(_48.value.z));
+ f16vec2 _65 = unpackFloat2x16(floatBitsToUint(_48.value.w));
+ CBVComposite16x8 _69 = CBVComposite16x8(_56.x, _56.y, _59.x, _59.y, _62.x, _62.y, _65.x, _65.y);
+ AddCarry _99;
+ _99._m0 = uaddCarry(registers._m0.x, 2u * 16u, _99._m1);
+ PhysicalPointerUint642NonWrite _107 = PhysicalPointerUint642NonWrite(uvec2(_99._m0, registers._m0.y + _99._m1));
+ AddCarry _117;
+ _117._m0 = uaddCarry(registers._m0.x, 3u * 16u, _117._m1);
+ PhysicalPointerUint642NonWrite _122 = PhysicalPointerUint642NonWrite(uvec2(_117._m0, registers._m0.y + _117._m1));
+ SV_Target.x = ((float(_69._m0) + _30.value.x) + float(_69._m4)) + float(int64_t(_107.value.x));
+ SV_Target.y = ((float(_69._m1) + _30.value.y) + float(_69._m5)) + float(int64_t(_107.value.y));
+ SV_Target.z = ((float(_69._m2) + _30.value.z) + float(_69._m6)) + float(int64_t(_122.value.x));
+ SV_Target.w = ((float(_69._m3) + _30.value.w) + float(_69._m7)) + float(int64_t(_122.value.y));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 142
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Int64
+OpCapability PhysicalStorageBufferAddresses
+OpExtension "SPV_KHR_physical_storage_buffer"
+OpMemoryModel PhysicalStorageBuffer64 GLSL450
+OpEntryPoint Fragment %3 "main" %13
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %7 "RootConstants"
+OpName %9 "registers"
+OpName %13 "SV_Target"
+OpName %22 "AddCarry"
+OpName %28 "PhysicalPointerFloat4NonWrite"
+OpMemberName %28 0 "value"
+OpName %68 "CBVComposite16x8"
+OpName %105 "PhysicalPointerUint642NonWrite"
+OpMemberName %105 0 "value"
+OpDecorate %7 Block
+OpMemberDecorate %7 0 Offset 0
+OpMemberDecorate %7 1 Offset 8
+OpMemberDecorate %7 2 Offset 16
+OpMemberDecorate %7 3 Offset 24
+OpDecorate %13 Location 0
+OpMemberDecorate %28 0 Offset 0
+OpDecorate %28 Block
+OpMemberDecorate %28 0 NonWritable
+OpMemberDecorate %105 0 Offset 0
+OpDecorate %105 Block
+OpMemberDecorate %105 0 NonWritable
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeVector %5 2
+%7 = OpTypeStruct %6 %6 %6 %6
+%8 = OpTypePointer PushConstant %7
+%9 = OpVariable %8 PushConstant
+%10 = OpTypeFloat 32
+%11 = OpTypeVector %10 4
+%12 = OpTypePointer Output %11
+%13 = OpVariable %12 Output
+%14 = OpTypePointer PushConstant %6
+%16 = OpConstant %5 0
+%19 = OpConstant %5 16
+%22 = OpTypeStruct %5 %5
+%28 = OpTypeStruct %11
+%29 = OpTypePointer PhysicalStorageBuffer %28
+%31 = OpTypePointer PhysicalStorageBuffer %11
+%38 = OpConstant %5 1
+%40 = OpTypeFloat 16
+%51 = OpTypeVector %40 2
+%68 = OpTypeStruct %40 %40 %40 %40 %40 %40 %40 %40
+%94 = OpConstant %5 2
+%96 = OpTypeInt 64 0
+%104 = OpTypeVector %96 2
+%105 = OpTypeStruct %104
+%106 = OpTypePointer PhysicalStorageBuffer %105
+%108 = OpTypePointer PhysicalStorageBuffer %104
+%113 = OpConstant %5 3
+%135 = OpTypePointer Output %10
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %140
+%140 = OpLabel
+%15 = OpAccessChain %14 %9 %16
+%17 = OpLoad %6 %15
+%18 = OpIMul %5 %16 %19
+%20 = OpCompositeExtract %5 %17 0
+%21 = OpCompositeExtract %5 %17 1
+%23 = OpIAddCarry %22 %20 %18
+%24 = OpCompositeExtract %5 %23 0
+%25 = OpCompositeExtract %5 %23 1
+%26 = OpIAdd %5 %21 %25
+%27 = OpCompositeConstruct %6 %24 %26
+%30 = OpBitcast %29 %27
+%32 = OpAccessChain %31 %30 %16
+%33 = OpLoad %11 %32 Aligned 16
+%34 = OpCompositeExtract %10 %33 0
+%35 = OpCompositeExtract %10 %33 1
+%36 = OpCompositeExtract %10 %33 2
+%37 = OpCompositeExtract %10 %33 3
+%39 = OpIMul %5 %38 %19
+%41 = OpCompositeExtract %5 %17 0
+%42 = OpCompositeExtract %5 %17 1
+%43 = OpIAddCarry %22 %41 %39
+%44 = OpCompositeExtract %5 %43 0
+%45 = OpCompositeExtract %5 %43 1
+%46 = OpIAdd %5 %42 %45
+%47 = OpCompositeConstruct %6 %44 %46
+%48 = OpBitcast %29 %47
+%49 = OpAccessChain %31 %48 %16
+%50 = OpLoad %11 %49 Aligned 16
+%52 = OpCompositeExtract %10 %50 0
+%53 = OpCompositeExtract %10 %50 1
+%54 = OpCompositeExtract %10 %50 2
+%55 = OpCompositeExtract %10 %50 3
+%56 = OpBitcast %51 %52
+%57 = OpCompositeExtract %40 %56 0
+%58 = OpCompositeExtract %40 %56 1
+%59 = OpBitcast %51 %53
+%60 = OpCompositeExtract %40 %59 0
+%61 = OpCompositeExtract %40 %59 1
+%62 = OpBitcast %51 %54
+%63 = OpCompositeExtract %40 %62 0
+%64 = OpCompositeExtract %40 %62 1
+%65 = OpBitcast %51 %55
+%66 = OpCompositeExtract %40 %65 0
+%67 = OpCompositeExtract %40 %65 1
+%69 = OpCompositeConstruct %68 %57 %58 %60 %61 %63 %64 %66 %67
+%70 = OpCompositeExtract %40 %69 0
+%71 = OpCompositeExtract %40 %69 1
+%72 = OpCompositeExtract %40 %69 2
+%73 = OpCompositeExtract %40 %69 3
+%74 = OpFConvert %10 %70
+%75 = OpFConvert %10 %71
+%76 = OpFConvert %10 %72
+%77 = OpFConvert %10 %73
+%78 = OpFAdd %10 %74 %34
+%79 = OpFAdd %10 %75 %35
+%80 = OpFAdd %10 %76 %36
+%81 = OpFAdd %10 %77 %37
+%82 = OpCompositeExtract %40 %69 4
+%83 = OpCompositeExtract %40 %69 5
+%84 = OpCompositeExtract %40 %69 6
+%85 = OpCompositeExtract %40 %69 7
+%86 = OpFConvert %10 %82
+%87 = OpFConvert %10 %83
+%88 = OpFConvert %10 %84
+%89 = OpFConvert %10 %85
+%90 = OpFAdd %10 %78 %86
+%91 = OpFAdd %10 %79 %87
+%92 = OpFAdd %10 %80 %88
+%93 = OpFAdd %10 %81 %89
+%95 = OpIMul %5 %94 %19
+%97 = OpCompositeExtract %5 %17 0
+%98 = OpCompositeExtract %5 %17 1
+%99 = OpIAddCarry %22 %97 %95
+%100 = OpCompositeExtract %5 %99 0
+%101 = OpCompositeExtract %5 %99 1
+%102 = OpIAdd %5 %98 %101
+%103 = OpCompositeConstruct %6 %100 %102
+%107 = OpBitcast %106 %103
+%109 = OpAccessChain %108 %107 %16
+%110 = OpLoad %104 %109 Aligned 16
+%111 = OpCompositeExtract %96 %110 0
+%112 = OpCompositeExtract %96 %110 1
+%114 = OpIMul %5 %113 %19
+%115 = OpCompositeExtract %5 %17 0
+%116 = OpCompositeExtract %5 %17 1
+%117 = OpIAddCarry %22 %115 %114
+%118 = OpCompositeExtract %5 %117 0
+%119 = OpCompositeExtract %5 %117 1
+%120 = OpIAdd %5 %116 %119
+%121 = OpCompositeConstruct %6 %118 %120
+%122 = OpBitcast %106 %121
+%123 = OpAccessChain %108 %122 %16
+%124 = OpLoad %104 %123 Aligned 16
+%125 = OpCompositeExtract %96 %124 0
+%126 = OpCompositeExtract %96 %124 1
+%127 = OpConvertSToF %10 %111
+%128 = OpConvertSToF %10 %112
+%129 = OpConvertSToF %10 %125
+%130 = OpConvertSToF %10 %126
+%131 = OpFAdd %10 %90 %127
+%132 = OpFAdd %10 %91 %128
+%133 = OpFAdd %10 %92 %129
+%134 = OpFAdd %10 %93 %130
+%136 = OpAccessChain %135 %13 %16
+OpStore %136 %131
+%137 = OpAccessChain %135 %13 %38
+OpStore %137 %132
+%138 = OpAccessChain %135 %13 %94
+OpStore %138 %133
+%139 = OpAccessChain %135 %13 %113
+OpStore %139 %134
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
new file mode 100644
index 0000000..ec79c5f
--- /dev/null
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
@@ -0,0 +1,223 @@
+#version 460
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_buffer_reference : require
+
+struct AddCarry
+{
+ uint _m0;
+ uint _m1;
+};
+
+layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite;
+layout(buffer_reference) buffer PhysicalPointerUint642NonWrite;
+layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite
+{
+ vec4 value;
+};
+
+layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite
+{
+ u64vec2 value;
+};
+
+layout(push_constant, std430) uniform RootConstants
+{
+ uvec2 _m0;
+ uvec2 _m1;
+ uvec2 _m2;
+ uvec2 _m3;
+} registers;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ AddCarry _23;
+ _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1);
+ PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1));
+ AddCarry _42;
+ _42._m0 = uaddCarry(registers._m0.x, 1u * 16u, _42._m1);
+ PhysicalPointerFloat4NonWrite _47 = PhysicalPointerFloat4NonWrite(uvec2(_42._m0, registers._m0.y + _42._m1));
+ AddCarry _62;
+ _62._m0 = uaddCarry(registers._m0.x, 2u * 16u, _62._m1);
+ PhysicalPointerFloat4NonWrite _67 = PhysicalPointerFloat4NonWrite(uvec2(_62._m0, registers._m0.y + _62._m1));
+ AddCarry _83;
+ _83._m0 = uaddCarry(registers._m0.x, 3u * 16u, _83._m1);
+ PhysicalPointerUint642NonWrite _91 = PhysicalPointerUint642NonWrite(uvec2(_83._m0, registers._m0.y + _83._m1));
+ AddCarry _101;
+ _101._m0 = uaddCarry(registers._m0.x, 4u * 16u, _101._m1);
+ PhysicalPointerUint642NonWrite _106 = PhysicalPointerUint642NonWrite(uvec2(_101._m0, registers._m0.y + _101._m1));
+ SV_Target.x = ((_47.value.x + _30.value.x) + _67.value.x) + float(int64_t(_91.value.x));
+ SV_Target.y = ((_47.value.y + _30.value.y) + _67.value.y) + float(int64_t(_91.value.y));
+ SV_Target.z = ((_47.value.z + _30.value.z) + _67.value.z) + float(int64_t(_106.value.x));
+ SV_Target.w = ((_47.value.w + _30.value.w) + _67.value.w) + float(int64_t(_106.value.y));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 126
+; Schema: 0
+OpCapability Shader
+OpCapability Int64
+OpCapability PhysicalStorageBufferAddresses
+OpExtension "SPV_KHR_physical_storage_buffer"
+OpMemoryModel PhysicalStorageBuffer64 GLSL450
+OpEntryPoint Fragment %3 "main" %13
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %7 "RootConstants"
+OpName %9 "registers"
+OpName %13 "SV_Target"
+OpName %22 "AddCarry"
+OpName %28 "PhysicalPointerFloat4NonWrite"
+OpMemberName %28 0 "value"
+OpName %89 "PhysicalPointerUint642NonWrite"
+OpMemberName %89 0 "value"
+OpDecorate %7 Block
+OpMemberDecorate %7 0 Offset 0
+OpMemberDecorate %7 1 Offset 8
+OpMemberDecorate %7 2 Offset 16
+OpMemberDecorate %7 3 Offset 24
+OpDecorate %13 Location 0
+OpMemberDecorate %28 0 Offset 0
+OpDecorate %28 Block
+OpMemberDecorate %28 0 NonWritable
+OpMemberDecorate %89 0 Offset 0
+OpDecorate %89 Block
+OpMemberDecorate %89 0 NonWritable
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeVector %5 2
+%7 = OpTypeStruct %6 %6 %6 %6
+%8 = OpTypePointer PushConstant %7
+%9 = OpVariable %8 PushConstant
+%10 = OpTypeFloat 32
+%11 = OpTypeVector %10 4
+%12 = OpTypePointer Output %11
+%13 = OpVariable %12 Output
+%14 = OpTypePointer PushConstant %6
+%16 = OpConstant %5 0
+%19 = OpConstant %5 16
+%22 = OpTypeStruct %5 %5
+%28 = OpTypeStruct %11
+%29 = OpTypePointer PhysicalStorageBuffer %28
+%31 = OpTypePointer PhysicalStorageBuffer %11
+%38 = OpConstant %5 1
+%58 = OpConstant %5 2
+%78 = OpConstant %5 3
+%80 = OpTypeInt 64 0
+%88 = OpTypeVector %80 2
+%89 = OpTypeStruct %88
+%90 = OpTypePointer PhysicalStorageBuffer %89
+%92 = OpTypePointer PhysicalStorageBuffer %88
+%97 = OpConstant %5 4
+%119 = OpTypePointer Output %10
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %124
+%124 = OpLabel
+%15 = OpAccessChain %14 %9 %16
+%17 = OpLoad %6 %15
+%18 = OpIMul %5 %16 %19
+%20 = OpCompositeExtract %5 %17 0
+%21 = OpCompositeExtract %5 %17 1
+%23 = OpIAddCarry %22 %20 %18
+%24 = OpCompositeExtract %5 %23 0
+%25 = OpCompositeExtract %5 %23 1
+%26 = OpIAdd %5 %21 %25
+%27 = OpCompositeConstruct %6 %24 %26
+%30 = OpBitcast %29 %27
+%32 = OpAccessChain %31 %30 %16
+%33 = OpLoad %11 %32 Aligned 16
+%34 = OpCompositeExtract %10 %33 0
+%35 = OpCompositeExtract %10 %33 1
+%36 = OpCompositeExtract %10 %33 2
+%37 = OpCompositeExtract %10 %33 3
+%39 = OpIMul %5 %38 %19
+%40 = OpCompositeExtract %5 %17 0
+%41 = OpCompositeExtract %5 %17 1
+%42 = OpIAddCarry %22 %40 %39
+%43 = OpCompositeExtract %5 %42 0
+%44 = OpCompositeExtract %5 %42 1
+%45 = OpIAdd %5 %41 %44
+%46 = OpCompositeConstruct %6 %43 %45
+%47 = OpBitcast %29 %46
+%48 = OpAccessChain %31 %47 %16
+%49 = OpLoad %11 %48 Aligned 16
+%50 = OpCompositeExtract %10 %49 0
+%51 = OpCompositeExtract %10 %49 1
+%52 = OpCompositeExtract %10 %49 2
+%53 = OpCompositeExtract %10 %49 3
+%54 = OpFAdd %10 %50 %34
+%55 = OpFAdd %10 %51 %35
+%56 = OpFAdd %10 %52 %36
+%57 = OpFAdd %10 %53 %37
+%59 = OpIMul %5 %58 %19
+%60 = OpCompositeExtract %5 %17 0
+%61 = OpCompositeExtract %5 %17 1
+%62 = OpIAddCarry %22 %60 %59
+%63 = OpCompositeExtract %5 %62 0
+%64 = OpCompositeExtract %5 %62 1
+%65 = OpIAdd %5 %61 %64
+%66 = OpCompositeConstruct %6 %63 %65
+%67 = OpBitcast %29 %66
+%68 = OpAccessChain %31 %67 %16
+%69 = OpLoad %11 %68 Aligned 16
+%70 = OpCompositeExtract %10 %69 0
+%71 = OpCompositeExtract %10 %69 1
+%72 = OpCompositeExtract %10 %69 2
+%73 = OpCompositeExtract %10 %69 3
+%74 = OpFAdd %10 %54 %70
+%75 = OpFAdd %10 %55 %71
+%76 = OpFAdd %10 %56 %72
+%77 = OpFAdd %10 %57 %73
+%79 = OpIMul %5 %78 %19
+%81 = OpCompositeExtract %5 %17 0
+%82 = OpCompositeExtract %5 %17 1
+%83 = OpIAddCarry %22 %81 %79
+%84 = OpCompositeExtract %5 %83 0
+%85 = OpCompositeExtract %5 %83 1
+%86 = OpIAdd %5 %82 %85
+%87 = OpCompositeConstruct %6 %84 %86
+%91 = OpBitcast %90 %87
+%93 = OpAccessChain %92 %91 %16
+%94 = OpLoad %88 %93 Aligned 16
+%95 = OpCompositeExtract %80 %94 0
+%96 = OpCompositeExtract %80 %94 1
+%98 = OpIMul %5 %97 %19
+%99 = OpCompositeExtract %5 %17 0
+%100 = OpCompositeExtract %5 %17 1
+%101 = OpIAddCarry %22 %99 %98
+%102 = OpCompositeExtract %5 %101 0
+%103 = OpCompositeExtract %5 %101 1
+%104 = OpIAdd %5 %100 %103
+%105 = OpCompositeConstruct %6 %102 %104
+%106 = OpBitcast %90 %105
+%107 = OpAccessChain %92 %106 %16
+%108 = OpLoad %88 %107 Aligned 16
+%109 = OpCompositeExtract %80 %108 0
+%110 = OpCompositeExtract %80 %108 1
+%111 = OpConvertSToF %10 %95
+%112 = OpConvertSToF %10 %96
+%113 = OpConvertSToF %10 %109
+%114 = OpConvertSToF %10 %110
+%115 = OpFAdd %10 %74 %111
+%116 = OpFAdd %10 %75 %112
+%117 = OpFAdd %10 %76 %113
+%118 = OpFAdd %10 %77 %114
+%120 = OpAccessChain %119 %13 %16
+OpStore %120 %115
+%121 = OpAccessChain %119 %13 %38
+OpStore %121 %116
+%122 = OpAccessChain %119 %13 %58
+OpStore %122 %117
+%123 = OpAccessChain %119 %13 %78
+OpStore %123 %118
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
new file mode 100644
index 0000000..4f60fc9
--- /dev/null
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
@@ -0,0 +1,244 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_buffer_reference : require
+
+struct AddCarry
+{
+ uint _m0;
+ uint _m1;
+};
+
+layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite;
+layout(buffer_reference) buffer PhysicalPointerUint642NonWrite;
+layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite
+{
+ vec4 value;
+};
+
+layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite
+{
+ u64vec2 value;
+};
+
+layout(push_constant, std430) uniform RootConstants
+{
+ uvec2 _m0;
+ uvec2 _m1;
+ uvec2 _m2;
+ uvec2 _m3;
+} registers;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ AddCarry _23;
+ _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1);
+ PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1));
+ AddCarry _42;
+ _42._m0 = uaddCarry(registers._m0.x, 1u * 16u, _42._m1);
+ f16vec4 _52 = f16vec4(PhysicalPointerFloat4NonWrite(uvec2(_42._m0, registers._m0.y + _42._m1)).value);
+ AddCarry _69;
+ _69._m0 = uaddCarry(registers._m0.x, 2u * 16u, _69._m1);
+ f16vec4 _77 = f16vec4(PhysicalPointerFloat4NonWrite(uvec2(_69._m0, registers._m0.y + _69._m1)).value);
+ AddCarry _95;
+ _95._m0 = uaddCarry(registers._m0.x, 3u * 16u, _95._m1);
+ PhysicalPointerUint642NonWrite _103 = PhysicalPointerUint642NonWrite(uvec2(_95._m0, registers._m0.y + _95._m1));
+ AddCarry _113;
+ _113._m0 = uaddCarry(registers._m0.x, 4u * 16u, _113._m1);
+ PhysicalPointerUint642NonWrite _118 = PhysicalPointerUint642NonWrite(uvec2(_113._m0, registers._m0.y + _113._m1));
+ SV_Target.x = ((float(_52.x) + _30.value.x) + float(_77.x)) + float(int64_t(_103.value.x));
+ SV_Target.y = ((float(_52.y) + _30.value.y) + float(_77.y)) + float(int64_t(_103.value.y));
+ SV_Target.z = ((float(_52.z) + _30.value.z) + float(_77.z)) + float(int64_t(_118.value.x));
+ SV_Target.w = ((float(_52.w) + _30.value.w) + float(_77.w)) + float(int64_t(_118.value.y));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 138
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Int64
+OpCapability PhysicalStorageBufferAddresses
+OpExtension "SPV_KHR_physical_storage_buffer"
+OpMemoryModel PhysicalStorageBuffer64 GLSL450
+OpEntryPoint Fragment %3 "main" %13
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %7 "RootConstants"
+OpName %9 "registers"
+OpName %13 "SV_Target"
+OpName %22 "AddCarry"
+OpName %28 "PhysicalPointerFloat4NonWrite"
+OpMemberName %28 0 "value"
+OpName %101 "PhysicalPointerUint642NonWrite"
+OpMemberName %101 0 "value"
+OpDecorate %7 Block
+OpMemberDecorate %7 0 Offset 0
+OpMemberDecorate %7 1 Offset 8
+OpMemberDecorate %7 2 Offset 16
+OpMemberDecorate %7 3 Offset 24
+OpDecorate %13 Location 0
+OpMemberDecorate %28 0 Offset 0
+OpDecorate %28 Block
+OpMemberDecorate %28 0 NonWritable
+OpMemberDecorate %101 0 Offset 0
+OpDecorate %101 Block
+OpMemberDecorate %101 0 NonWritable
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeVector %5 2
+%7 = OpTypeStruct %6 %6 %6 %6
+%8 = OpTypePointer PushConstant %7
+%9 = OpVariable %8 PushConstant
+%10 = OpTypeFloat 32
+%11 = OpTypeVector %10 4
+%12 = OpTypePointer Output %11
+%13 = OpVariable %12 Output
+%14 = OpTypePointer PushConstant %6
+%16 = OpConstant %5 0
+%19 = OpConstant %5 16
+%22 = OpTypeStruct %5 %5
+%28 = OpTypeStruct %11
+%29 = OpTypePointer PhysicalStorageBuffer %28
+%31 = OpTypePointer PhysicalStorageBuffer %11
+%38 = OpConstant %5 1
+%50 = OpTypeFloat 16
+%51 = OpTypeVector %50 4
+%65 = OpConstant %5 2
+%90 = OpConstant %5 3
+%92 = OpTypeInt 64 0
+%100 = OpTypeVector %92 2
+%101 = OpTypeStruct %100
+%102 = OpTypePointer PhysicalStorageBuffer %101
+%104 = OpTypePointer PhysicalStorageBuffer %100
+%109 = OpConstant %5 4
+%131 = OpTypePointer Output %10
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %136
+%136 = OpLabel
+%15 = OpAccessChain %14 %9 %16
+%17 = OpLoad %6 %15
+%18 = OpIMul %5 %16 %19
+%20 = OpCompositeExtract %5 %17 0
+%21 = OpCompositeExtract %5 %17 1
+%23 = OpIAddCarry %22 %20 %18
+%24 = OpCompositeExtract %5 %23 0
+%25 = OpCompositeExtract %5 %23 1
+%26 = OpIAdd %5 %21 %25
+%27 = OpCompositeConstruct %6 %24 %26
+%30 = OpBitcast %29 %27
+%32 = OpAccessChain %31 %30 %16
+%33 = OpLoad %11 %32 Aligned 16
+%34 = OpCompositeExtract %10 %33 0
+%35 = OpCompositeExtract %10 %33 1
+%36 = OpCompositeExtract %10 %33 2
+%37 = OpCompositeExtract %10 %33 3
+%39 = OpIMul %5 %38 %19
+%40 = OpCompositeExtract %5 %17 0
+%41 = OpCompositeExtract %5 %17 1
+%42 = OpIAddCarry %22 %40 %39
+%43 = OpCompositeExtract %5 %42 0
+%44 = OpCompositeExtract %5 %42 1
+%45 = OpIAdd %5 %41 %44
+%46 = OpCompositeConstruct %6 %43 %45
+%47 = OpBitcast %29 %46
+%48 = OpAccessChain %31 %47 %16
+%49 = OpLoad %11 %48 Aligned 16
+%52 = OpFConvert %51 %49
+%53 = OpCompositeExtract %50 %52 0
+%54 = OpCompositeExtract %50 %52 1
+%55 = OpCompositeExtract %50 %52 2
+%56 = OpCompositeExtract %50 %52 3
+%57 = OpFConvert %10 %53
+%58 = OpFConvert %10 %54
+%59 = OpFConvert %10 %55
+%60 = OpFConvert %10 %56
+%61 = OpFAdd %10 %57 %34
+%62 = OpFAdd %10 %58 %35
+%63 = OpFAdd %10 %59 %36
+%64 = OpFAdd %10 %60 %37
+%66 = OpIMul %5 %65 %19
+%67 = OpCompositeExtract %5 %17 0
+%68 = OpCompositeExtract %5 %17 1
+%69 = OpIAddCarry %22 %67 %66
+%70 = OpCompositeExtract %5 %69 0
+%71 = OpCompositeExtract %5 %69 1
+%72 = OpIAdd %5 %68 %71
+%73 = OpCompositeConstruct %6 %70 %72
+%74 = OpBitcast %29 %73
+%75 = OpAccessChain %31 %74 %16
+%76 = OpLoad %11 %75 Aligned 16
+%77 = OpFConvert %51 %76
+%78 = OpCompositeExtract %50 %77 0
+%79 = OpCompositeExtract %50 %77 1
+%80 = OpCompositeExtract %50 %77 2
+%81 = OpCompositeExtract %50 %77 3
+%82 = OpFConvert %10 %78
+%83 = OpFConvert %10 %79
+%84 = OpFConvert %10 %80
+%85 = OpFConvert %10 %81
+%86 = OpFAdd %10 %61 %82
+%87 = OpFAdd %10 %62 %83
+%88 = OpFAdd %10 %63 %84
+%89 = OpFAdd %10 %64 %85
+%91 = OpIMul %5 %90 %19
+%93 = OpCompositeExtract %5 %17 0
+%94 = OpCompositeExtract %5 %17 1
+%95 = OpIAddCarry %22 %93 %91
+%96 = OpCompositeExtract %5 %95 0
+%97 = OpCompositeExtract %5 %95 1
+%98 = OpIAdd %5 %94 %97
+%99 = OpCompositeConstruct %6 %96 %98
+%103 = OpBitcast %102 %99
+%105 = OpAccessChain %104 %103 %16
+%106 = OpLoad %100 %105 Aligned 16
+%107 = OpCompositeExtract %92 %106 0
+%108 = OpCompositeExtract %92 %106 1
+%110 = OpIMul %5 %109 %19
+%111 = OpCompositeExtract %5 %17 0
+%112 = OpCompositeExtract %5 %17 1
+%113 = OpIAddCarry %22 %111 %110
+%114 = OpCompositeExtract %5 %113 0
+%115 = OpCompositeExtract %5 %113 1
+%116 = OpIAdd %5 %112 %115
+%117 = OpCompositeConstruct %6 %114 %116
+%118 = OpBitcast %102 %117
+%119 = OpAccessChain %104 %118 %16
+%120 = OpLoad %100 %119 Aligned 16
+%121 = OpCompositeExtract %92 %120 0
+%122 = OpCompositeExtract %92 %120 1
+%123 = OpConvertSToF %10 %107
+%124 = OpConvertSToF %10 %108
+%125 = OpConvertSToF %10 %121
+%126 = OpConvertSToF %10 %122
+%127 = OpFAdd %10 %86 %123
+%128 = OpFAdd %10 %87 %124
+%129 = OpFAdd %10 %88 %125
+%130 = OpFAdd %10 %89 %126
+%132 = OpAccessChain %131 %13 %16
+OpStore %132 %127
+%133 = OpAccessChain %131 %13 %38
+OpStore %133 %128
+%134 = OpAccessChain %131 %13 %65
+OpStore %134 %129
+%135 = OpAccessChain %131 %13 %90
+OpStore %135 %130
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
new file mode 100644
index 0000000..0e8c333
--- /dev/null
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
@@ -0,0 +1,141 @@
+#version 460
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout(set = 0, binding = 0, std140) uniform _10_12
+{
+ vec4 _m0[5];
+} _12;
+
+layout(set = 0, binding = 0, std140) uniform _16_18
+{
+ dvec2 _m0[5];
+} _18;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ u64vec2 _57 = doubleBitsToUint64(_18._m0[3u]);
+ u64vec2 _63 = doubleBitsToUint64(_18._m0[4u]);
+ SV_Target.x = ((_12._m0[1u].x + _12._m0[0u].x) + _12._m0[2u].x) + float(int64_t(_57.x));
+ SV_Target.y = ((_12._m0[1u].y + _12._m0[0u].y) + _12._m0[2u].y) + float(int64_t(_57.y));
+ SV_Target.z = ((_12._m0[1u].z + _12._m0[0u].z) + _12._m0[2u].z) + float(int64_t(_63.x));
+ SV_Target.w = ((_12._m0[1u].w + _12._m0[0u].w) + _12._m0[2u].w) + float(int64_t(_63.y));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 81
+; Schema: 0
+OpCapability Shader
+OpCapability Float64
+OpCapability Int64
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %20
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %10 ""
+OpName %16 ""
+OpName %20 "SV_Target"
+OpDecorate %9 ArrayStride 16
+OpMemberDecorate %10 0 Offset 0
+OpDecorate %10 Block
+OpDecorate %15 ArrayStride 16
+OpMemberDecorate %16 0 Offset 0
+OpDecorate %16 Block
+OpDecorate %12 DescriptorSet 0
+OpDecorate %12 Binding 0
+OpDecorate %18 DescriptorSet 0
+OpDecorate %18 Binding 0
+OpDecorate %20 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpConstant %5 5
+%7 = OpTypeFloat 32
+%8 = OpTypeVector %7 4
+%9 = OpTypeArray %8 %6
+%10 = OpTypeStruct %9
+%11 = OpTypePointer Uniform %10
+%12 = OpVariable %11 Uniform
+%13 = OpTypeFloat 64
+%14 = OpTypeVector %13 2
+%15 = OpTypeArray %14 %6
+%16 = OpTypeStruct %15
+%17 = OpTypePointer Uniform %16
+%18 = OpVariable %17 Uniform
+%19 = OpTypePointer Output %8
+%20 = OpVariable %19 Output
+%21 = OpConstant %5 0
+%22 = OpTypePointer Uniform %8
+%29 = OpConstant %5 1
+%40 = OpConstant %5 2
+%51 = OpTypeInt 64 0
+%52 = OpConstant %5 3
+%53 = OpTypePointer Uniform %14
+%56 = OpTypeVector %51 2
+%60 = OpConstant %5 4
+%74 = OpTypePointer Output %7
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %79
+%79 = OpLabel
+%23 = OpAccessChain %22 %12 %21 %21
+%24 = OpLoad %8 %23
+%25 = OpCompositeExtract %7 %24 0
+%26 = OpCompositeExtract %7 %24 1
+%27 = OpCompositeExtract %7 %24 2
+%28 = OpCompositeExtract %7 %24 3
+%30 = OpAccessChain %22 %12 %21 %29
+%31 = OpLoad %8 %30
+%32 = OpCompositeExtract %7 %31 0
+%33 = OpCompositeExtract %7 %31 1
+%34 = OpCompositeExtract %7 %31 2
+%35 = OpCompositeExtract %7 %31 3
+%36 = OpFAdd %7 %32 %25
+%37 = OpFAdd %7 %33 %26
+%38 = OpFAdd %7 %34 %27
+%39 = OpFAdd %7 %35 %28
+%41 = OpAccessChain %22 %12 %21 %40
+%42 = OpLoad %8 %41
+%43 = OpCompositeExtract %7 %42 0
+%44 = OpCompositeExtract %7 %42 1
+%45 = OpCompositeExtract %7 %42 2
+%46 = OpCompositeExtract %7 %42 3
+%47 = OpFAdd %7 %36 %43
+%48 = OpFAdd %7 %37 %44
+%49 = OpFAdd %7 %38 %45
+%50 = OpFAdd %7 %39 %46
+%54 = OpAccessChain %53 %18 %21 %52
+%55 = OpLoad %14 %54
+%57 = OpBitcast %56 %55
+%58 = OpCompositeExtract %51 %57 0
+%59 = OpCompositeExtract %51 %57 1
+%61 = OpAccessChain %53 %18 %21 %60
+%62 = OpLoad %14 %61
+%63 = OpBitcast %56 %62
+%64 = OpCompositeExtract %51 %63 0
+%65 = OpCompositeExtract %51 %63 1
+%66 = OpConvertSToF %7 %58
+%67 = OpConvertSToF %7 %59
+%68 = OpConvertSToF %7 %64
+%69 = OpConvertSToF %7 %65
+%70 = OpFAdd %7 %47 %66
+%71 = OpFAdd %7 %48 %67
+%72 = OpFAdd %7 %49 %68
+%73 = OpFAdd %7 %50 %69
+%75 = OpAccessChain %74 %20 %21
+OpStore %75 %70
+%76 = OpAccessChain %74 %20 %29
+OpStore %76 %71
+%77 = OpAccessChain %74 %20 %40
+OpStore %77 %72
+%78 = OpAccessChain %74 %20 %52
+OpStore %78 %73
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
new file mode 100644
index 0000000..1a732b3
--- /dev/null
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
@@ -0,0 +1,164 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout(set = 0, binding = 0, std140) uniform _10_12
+{
+ vec4 _m0[5];
+} _12;
+
+layout(set = 0, binding = 0, std140) uniform _16_18
+{
+ dvec2 _m0[5];
+} _18;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ f16vec4 _34 = f16vec4(_12._m0[1u]);
+ f16vec4 _50 = f16vec4(_12._m0[2u]);
+ u64vec2 _69 = doubleBitsToUint64(_18._m0[3u]);
+ u64vec2 _75 = doubleBitsToUint64(_18._m0[4u]);
+ SV_Target.x = ((float(_34.x) + _12._m0[0u].x) + float(_50.x)) + float(int64_t(_69.x));
+ SV_Target.y = ((float(_34.y) + _12._m0[0u].y) + float(_50.y)) + float(int64_t(_69.y));
+ SV_Target.z = ((float(_34.z) + _12._m0[0u].z) + float(_50.z)) + float(int64_t(_75.x));
+ SV_Target.w = ((float(_34.w) + _12._m0[0u].w) + float(_50.w)) + float(int64_t(_75.y));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 93
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Float64
+OpCapability Int64
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %20
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %10 ""
+OpName %16 ""
+OpName %20 "SV_Target"
+OpDecorate %9 ArrayStride 16
+OpMemberDecorate %10 0 Offset 0
+OpDecorate %10 Block
+OpDecorate %15 ArrayStride 16
+OpMemberDecorate %16 0 Offset 0
+OpDecorate %16 Block
+OpDecorate %12 DescriptorSet 0
+OpDecorate %12 Binding 0
+OpDecorate %18 DescriptorSet 0
+OpDecorate %18 Binding 0
+OpDecorate %20 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpConstant %5 5
+%7 = OpTypeFloat 32
+%8 = OpTypeVector %7 4
+%9 = OpTypeArray %8 %6
+%10 = OpTypeStruct %9
+%11 = OpTypePointer Uniform %10
+%12 = OpVariable %11 Uniform
+%13 = OpTypeFloat 64
+%14 = OpTypeVector %13 2
+%15 = OpTypeArray %14 %6
+%16 = OpTypeStruct %15
+%17 = OpTypePointer Uniform %16
+%18 = OpVariable %17 Uniform
+%19 = OpTypePointer Output %8
+%20 = OpVariable %19 Output
+%21 = OpConstant %5 0
+%22 = OpTypePointer Uniform %8
+%29 = OpConstant %5 1
+%32 = OpTypeFloat 16
+%33 = OpTypeVector %32 4
+%47 = OpConstant %5 2
+%63 = OpTypeInt 64 0
+%64 = OpConstant %5 3
+%65 = OpTypePointer Uniform %14
+%68 = OpTypeVector %63 2
+%72 = OpConstant %5 4
+%86 = OpTypePointer Output %7
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %91
+%91 = OpLabel
+%23 = OpAccessChain %22 %12 %21 %21
+%24 = OpLoad %8 %23
+%25 = OpCompositeExtract %7 %24 0
+%26 = OpCompositeExtract %7 %24 1
+%27 = OpCompositeExtract %7 %24 2
+%28 = OpCompositeExtract %7 %24 3
+%30 = OpAccessChain %22 %12 %21 %29
+%31 = OpLoad %8 %30
+%34 = OpFConvert %33 %31
+%35 = OpCompositeExtract %32 %34 0
+%36 = OpCompositeExtract %32 %34 1
+%37 = OpCompositeExtract %32 %34 2
+%38 = OpCompositeExtract %32 %34 3
+%39 = OpFConvert %7 %35
+%40 = OpFConvert %7 %36
+%41 = OpFConvert %7 %37
+%42 = OpFConvert %7 %38
+%43 = OpFAdd %7 %39 %25
+%44 = OpFAdd %7 %40 %26
+%45 = OpFAdd %7 %41 %27
+%46 = OpFAdd %7 %42 %28
+%48 = OpAccessChain %22 %12 %21 %47
+%49 = OpLoad %8 %48
+%50 = OpFConvert %33 %49
+%51 = OpCompositeExtract %32 %50 0
+%52 = OpCompositeExtract %32 %50 1
+%53 = OpCompositeExtract %32 %50 2
+%54 = OpCompositeExtract %32 %50 3
+%55 = OpFConvert %7 %51
+%56 = OpFConvert %7 %52
+%57 = OpFConvert %7 %53
+%58 = OpFConvert %7 %54
+%59 = OpFAdd %7 %43 %55
+%60 = OpFAdd %7 %44 %56
+%61 = OpFAdd %7 %45 %57
+%62 = OpFAdd %7 %46 %58
+%66 = OpAccessChain %65 %18 %21 %64
+%67 = OpLoad %14 %66
+%69 = OpBitcast %68 %67
+%70 = OpCompositeExtract %63 %69 0
+%71 = OpCompositeExtract %63 %69 1
+%73 = OpAccessChain %65 %18 %21 %72
+%74 = OpLoad %14 %73
+%75 = OpBitcast %68 %74
+%76 = OpCompositeExtract %63 %75 0
+%77 = OpCompositeExtract %63 %75 1
+%78 = OpConvertSToF %7 %70
+%79 = OpConvertSToF %7 %71
+%80 = OpConvertSToF %7 %76
+%81 = OpConvertSToF %7 %77
+%82 = OpFAdd %7 %59 %78
+%83 = OpFAdd %7 %60 %79
+%84 = OpFAdd %7 %61 %80
+%85 = OpFAdd %7 %62 %81
+%87 = OpAccessChain %86 %20 %21
+OpStore %87 %82
+%88 = OpAccessChain %86 %20 %29
+OpStore %88 %83
+%89 = OpAccessChain %86 %20 %47
+OpStore %89 %84
+%90 = OpAccessChain %86 %20 %64
+OpStore %90 %85
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag b/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag
index 8b39407..4f08a54 100644
--- a/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag
+++ b/reference/shaders/resources/cbv.bindless.root-constant.cbv-as-ssbo.frag
@@ -92,6 +92,7 @@ OpDecorate %16 Binding 0
OpDecorate %18 Flat
OpDecorate %18 Location 0
OpDecorate %20 Location 0
+OpDecorate %86 NonUniform
OpDecorate %83 NonUniform
OpDecorate %87 NonUniform
%1 = OpTypeVoid
diff --git a/reference/shaders/resources/cbv.bindless.root-constant.frag b/reference/shaders/resources/cbv.bindless.root-constant.frag
index 19939bb..149d9fa 100644
--- a/reference/shaders/resources/cbv.bindless.root-constant.frag
+++ b/reference/shaders/resources/cbv.bindless.root-constant.frag
@@ -91,6 +91,7 @@ OpDecorate %16 Binding 0
OpDecorate %18 Flat
OpDecorate %18 Location 0
OpDecorate %20 Location 0
+OpDecorate %86 NonUniform
OpDecorate %83 NonUniform
OpDecorate %87 NonUniform
%1 = OpTypeVoid
diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag
new file mode 100644
index 0000000..ba78e81
--- /dev/null
+++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag
@@ -0,0 +1,260 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_scalar_block_layout : require
+
+layout(set = 5, binding = 0, scalar) uniform BindlessCBV
+{
+ float16_t _m0[32768];
+} _15[];
+
+layout(set = 5, binding = 0, scalar) uniform _19_22
+{
+ float _m0[16384];
+} _22[];
+
+layout(set = 5, binding = 0, scalar) uniform _26_29
+{
+ double _m0[8192];
+} _29[];
+
+layout(push_constant, std430) uniform RootConstants
+{
+ uint _m0;
+ uint _m1;
+ uint _m2;
+ uint _m3;
+ uint _m4;
+ uint _m5;
+ uint _m6;
+ uint _m7;
+} registers;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ uint _39 = registers._m5 + 2u;
+ uint _45 = registers._m5 + 1u;
+ SV_Target.x = (((float(_15[registers._m5]._m0[8u]) + _22[registers._m5]._m0[0u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[4u])))) + _22[_45]._m0[0u]) + float(_29[_39]._m0[0u]);
+ SV_Target.y = (((float(_15[registers._m5]._m0[10u]) + _22[registers._m5]._m0[1u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[5u])))) + _22[_45]._m0[1u]) + float(_29[_39]._m0[1u]);
+ SV_Target.z = (((float(_15[registers._m5]._m0[12u]) + _22[registers._m5]._m0[2u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[6u])))) + _22[_45]._m0[2u]) + float(_29[_39]._m0[2u]);
+ SV_Target.w = (((float(_15[registers._m5]._m0[14u]) + _22[registers._m5]._m0[3u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[7u])))) + _22[_45]._m0[3u]) + float(_29[_39]._m0[3u]);
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 149
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Float64
+OpCapability Int64
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability RuntimeDescriptorArray
+OpCapability PhysicalStorageBufferAddresses
+OpExtension "SPV_EXT_descriptor_indexing"
+OpExtension "SPV_KHR_physical_storage_buffer"
+OpMemoryModel PhysicalStorageBuffer64 GLSL450
+OpEntryPoint Fragment %3 "main" %32
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %6 "RootConstants"
+OpName %8 "registers"
+OpName %12 "BindlessCBV"
+OpName %19 "BindlessCBV"
+OpName %26 "BindlessCBV"
+OpName %32 "SV_Target"
+OpDecorate %6 Block
+OpMemberDecorate %6 0 Offset 0
+OpMemberDecorate %6 1 Offset 4
+OpMemberDecorate %6 2 Offset 8
+OpMemberDecorate %6 3 Offset 12
+OpMemberDecorate %6 4 Offset 16
+OpMemberDecorate %6 5 Offset 20
+OpMemberDecorate %6 6 Offset 24
+OpMemberDecorate %6 7 Offset 28
+OpDecorate %11 ArrayStride 2
+OpDecorate %12 Block
+OpMemberDecorate %12 0 Offset 0
+OpDecorate %15 DescriptorSet 5
+OpDecorate %15 Binding 0
+OpDecorate %18 ArrayStride 4
+OpDecorate %19 Block
+OpMemberDecorate %19 0 Offset 0
+OpDecorate %22 DescriptorSet 5
+OpDecorate %22 Binding 0
+OpDecorate %25 ArrayStride 8
+OpDecorate %26 Block
+OpMemberDecorate %26 0 Offset 0
+OpDecorate %29 DescriptorSet 5
+OpDecorate %29 Binding 0
+OpDecorate %32 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
+%7 = OpTypePointer PushConstant %6
+%8 = OpVariable %7 PushConstant
+%9 = OpTypeFloat 16
+%10 = OpConstant %5 32768
+%11 = OpTypeArray %9 %10
+%12 = OpTypeStruct %11
+%13 = OpTypeRuntimeArray %12
+%14 = OpTypePointer Uniform %13
+%15 = OpVariable %14 Uniform
+%16 = OpTypeFloat 32
+%17 = OpConstant %5 16384
+%18 = OpTypeArray %16 %17
+%19 = OpTypeStruct %18
+%20 = OpTypeRuntimeArray %19
+%21 = OpTypePointer Uniform %20
+%22 = OpVariable %21 Uniform
+%23 = OpTypeFloat 64
+%24 = OpConstant %5 8192
+%25 = OpTypeArray %23 %24
+%26 = OpTypeStruct %25
+%27 = OpTypeRuntimeArray %26
+%28 = OpTypePointer Uniform %27
+%29 = OpVariable %28 Uniform
+%30 = OpTypeVector %16 4
+%31 = OpTypePointer Output %30
+%32 = OpVariable %31 Output
+%33 = OpTypePointer Uniform %26
+%35 = OpTypePointer PushConstant %5
+%37 = OpConstant %5 5
+%40 = OpConstant %5 2
+%41 = OpTypePointer Uniform %19
+%46 = OpConstant %5 1
+%47 = OpTypePointer Uniform %12
+%57 = OpConstant %5 0
+%58 = OpTypePointer Uniform %16
+%65 = OpConstant %5 3
+%68 = OpConstant %5 8
+%69 = OpTypePointer Uniform %9
+%72 = OpConstant %5 10
+%75 = OpConstant %5 12
+%78 = OpConstant %5 14
+%89 = OpTypeInt 64 0
+%90 = OpConstant %5 4
+%91 = OpTypePointer Uniform %23
+%98 = OpConstant %5 6
+%102 = OpConstant %5 7
+%142 = OpTypePointer Output %16
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %147
+%147 = OpLabel
+%36 = OpAccessChain %35 %8 %37
+%38 = OpLoad %5 %36
+%39 = OpIAdd %5 %38 %40
+%34 = OpAccessChain %33 %29 %39
+%43 = OpAccessChain %35 %8 %37
+%44 = OpLoad %5 %43
+%45 = OpIAdd %5 %44 %46
+%42 = OpAccessChain %41 %22 %45
+%49 = OpAccessChain %35 %8 %37
+%50 = OpLoad %5 %49
+%48 = OpAccessChain %47 %15 %50
+%52 = OpAccessChain %35 %8 %37
+%53 = OpLoad %5 %52
+%51 = OpAccessChain %41 %22 %53
+%55 = OpAccessChain %35 %8 %37
+%56 = OpLoad %5 %55
+%54 = OpAccessChain %33 %29 %56
+%59 = OpAccessChain %58 %51 %57 %57
+%60 = OpLoad %16 %59
+%61 = OpAccessChain %58 %51 %57 %46
+%62 = OpLoad %16 %61
+%63 = OpAccessChain %58 %51 %57 %40
+%64 = OpLoad %16 %63
+%66 = OpAccessChain %58 %51 %57 %65
+%67 = OpLoad %16 %66
+%70 = OpAccessChain %69 %48 %57 %68
+%71 = OpLoad %9 %70
+%73 = OpAccessChain %69 %48 %57 %72
+%74 = OpLoad %9 %73
+%76 = OpAccessChain %69 %48 %57 %75
+%77 = OpLoad %9 %76
+%79 = OpAccessChain %69 %48 %57 %78
+%80 = OpLoad %9 %79
+%81 = OpFConvert %16 %71
+%82 = OpFConvert %16 %74
+%83 = OpFConvert %16 %77
+%84 = OpFConvert %16 %80
+%85 = OpFAdd %16 %81 %60
+%86 = OpFAdd %16 %82 %62
+%87 = OpFAdd %16 %83 %64
+%88 = OpFAdd %16 %84 %67
+%92 = OpAccessChain %91 %54 %57 %90
+%93 = OpLoad %23 %92
+%94 = OpBitcast %89 %93
+%95 = OpAccessChain %91 %54 %57 %37
+%96 = OpLoad %23 %95
+%97 = OpBitcast %89 %96
+%99 = OpAccessChain %91 %54 %57 %98
+%100 = OpLoad %23 %99
+%101 = OpBitcast %89 %100
+%103 = OpAccessChain %91 %54 %57 %102
+%104 = OpLoad %23 %103
+%105 = OpBitcast %89 %104
+%106 = OpConvertSToF %16 %94
+%107 = OpConvertSToF %16 %97
+%108 = OpConvertSToF %16 %101
+%109 = OpConvertSToF %16 %105
+%110 = OpFAdd %16 %85 %106
+%111 = OpFAdd %16 %86 %107
+%112 = OpFAdd %16 %87 %108
+%113 = OpFAdd %16 %88 %109
+%114 = OpAccessChain %58 %42 %57 %57
+%115 = OpLoad %16 %114
+%116 = OpAccessChain %58 %42 %57 %46
+%117 = OpLoad %16 %116
+%118 = OpAccessChain %58 %42 %57 %40
+%119 = OpLoad %16 %118
+%120 = OpAccessChain %58 %42 %57 %65
+%121 = OpLoad %16 %120
+%122 = OpFAdd %16 %110 %115
+%123 = OpFAdd %16 %111 %117
+%124 = OpFAdd %16 %112 %119
+%125 = OpFAdd %16 %113 %121
+%126 = OpAccessChain %91 %34 %57 %57
+%127 = OpLoad %23 %126
+%128 = OpAccessChain %91 %34 %57 %46
+%129 = OpLoad %23 %128
+%130 = OpAccessChain %91 %34 %57 %40
+%131 = OpLoad %23 %130
+%132 = OpAccessChain %91 %34 %57 %65
+%133 = OpLoad %23 %132
+%134 = OpFConvert %16 %127
+%135 = OpFConvert %16 %129
+%136 = OpFConvert %16 %131
+%137 = OpFConvert %16 %133
+%138 = OpFAdd %16 %122 %134
+%139 = OpFAdd %16 %123 %135
+%140 = OpFAdd %16 %124 %136
+%141 = OpFAdd %16 %125 %137
+%143 = OpAccessChain %142 %32 %57
+OpStore %143 %138
+%144 = OpAccessChain %142 %32 %46
+OpStore %144 %139
+%145 = OpAccessChain %142 %32 %40
+OpStore %145 %140
+%146 = OpAccessChain %142 %32 %65
+OpStore %146 %141
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag
new file mode 100644
index 0000000..eea884f
--- /dev/null
+++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag
@@ -0,0 +1,161 @@
+#version 460
+#extension GL_EXT_scalar_block_layout : require
+
+layout(set = 0, binding = 0, scalar) uniform _9_11
+{
+ float _m0[1024];
+} _11;
+
+layout(location = 0) flat in uint INDEX;
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ uint _18 = INDEX << 4u;
+ uint _20 = INDEX * 4u;
+ float _38 = _11._m0[(INDEX * 4u) + 256u] + _11._m0[_20];
+ float _39 = _11._m0[(INDEX * 4u) + 257u] + _11._m0[_20];
+ uint _43 = (INDEX * 4u) + 512u;
+ SV_Target.x = (_38 + _11._m0[_43]) + _11._m0[(INDEX * 4u) + 768u];
+ SV_Target.y = (_39 + _11._m0[(INDEX * 4u) + 513u]) + _11._m0[(INDEX * 4u) + 769u];
+ SV_Target.z = (_38 + _11._m0[(INDEX * 4u) + 514u]) + _11._m0[(INDEX * 4u) + 770u];
+ SV_Target.w = (_39 + _11._m0[_43]) + _11._m0[(INDEX * 4u) + 771u];
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 107
+; Schema: 0
+OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %13 %16
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %9 ""
+OpName %13 "INDEX"
+OpName %16 "SV_Target"
+OpDecorate %8 ArrayStride 4
+OpMemberDecorate %9 0 Offset 0
+OpDecorate %9 Block
+OpDecorate %11 DescriptorSet 0
+OpDecorate %11 Binding 0
+OpDecorate %13 Flat
+OpDecorate %13 Location 0
+OpDecorate %16 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpConstant %5 1024
+%7 = OpTypeFloat 32
+%8 = OpTypeArray %7 %6
+%9 = OpTypeStruct %8
+%10 = OpTypePointer Uniform %9
+%11 = OpVariable %10 Uniform
+%12 = OpTypePointer Input %5
+%13 = OpVariable %12 Input
+%14 = OpTypeVector %7 4
+%15 = OpTypePointer Output %14
+%16 = OpVariable %15 Output
+%19 = OpConstant %5 4
+%21 = OpTypePointer Uniform %7
+%23 = OpConstant %5 0
+%28 = OpConstant %5 256
+%32 = OpConstant %5 1028
+%35 = OpConstant %5 257
+%41 = OpConstant %5 2048
+%44 = OpConstant %5 512
+%48 = OpConstant %5 2052
+%51 = OpConstant %5 513
+%55 = OpConstant %5 2056
+%58 = OpConstant %5 514
+%66 = OpConstant %5 3072
+%69 = OpConstant %5 768
+%73 = OpConstant %5 3076
+%76 = OpConstant %5 769
+%80 = OpConstant %5 3080
+%83 = OpConstant %5 770
+%87 = OpConstant %5 3084
+%90 = OpConstant %5 771
+%97 = OpTypePointer Output %7
+%100 = OpConstant %5 1
+%102 = OpConstant %5 2
+%104 = OpConstant %5 3
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %105
+%105 = OpLabel
+%17 = OpLoad %5 %13
+%18 = OpShiftLeftLogical %5 %17 %19
+%20 = OpIMul %5 %17 %19
+%22 = OpAccessChain %21 %11 %23 %20
+%24 = OpLoad %7 %22
+%25 = OpIAdd %5 %18 %6
+%26 = OpIMul %5 %17 %19
+%27 = OpIAdd %5 %26 %28
+%29 = OpAccessChain %21 %11 %23 %27
+%30 = OpLoad %7 %29
+%31 = OpIAdd %5 %18 %32
+%33 = OpIMul %5 %17 %19
+%34 = OpIAdd %5 %33 %35
+%36 = OpAccessChain %21 %11 %23 %34
+%37 = OpLoad %7 %36
+%38 = OpFAdd %7 %30 %24
+%39 = OpFAdd %7 %37 %24
+%40 = OpIAdd %5 %18 %41
+%42 = OpIMul %5 %17 %19
+%43 = OpIAdd %5 %42 %44
+%45 = OpAccessChain %21 %11 %23 %43
+%46 = OpLoad %7 %45
+%47 = OpIAdd %5 %18 %48
+%49 = OpIMul %5 %17 %19
+%50 = OpIAdd %5 %49 %51
+%52 = OpAccessChain %21 %11 %23 %50
+%53 = OpLoad %7 %52
+%54 = OpIAdd %5 %18 %55
+%56 = OpIMul %5 %17 %19
+%57 = OpIAdd %5 %56 %58
+%59 = OpAccessChain %21 %11 %23 %57
+%60 = OpLoad %7 %59
+%61 = OpFAdd %7 %38 %46
+%62 = OpFAdd %7 %39 %53
+%63 = OpFAdd %7 %38 %60
+%64 = OpFAdd %7 %39 %46
+%65 = OpIAdd %5 %18 %66
+%67 = OpIMul %5 %17 %19
+%68 = OpIAdd %5 %67 %69
+%70 = OpAccessChain %21 %11 %23 %68
+%71 = OpLoad %7 %70
+%72 = OpIAdd %5 %18 %73
+%74 = OpIMul %5 %17 %19
+%75 = OpIAdd %5 %74 %76
+%77 = OpAccessChain %21 %11 %23 %75
+%78 = OpLoad %7 %77
+%79 = OpIAdd %5 %18 %80
+%81 = OpIMul %5 %17 %19
+%82 = OpIAdd %5 %81 %83
+%84 = OpAccessChain %21 %11 %23 %82
+%85 = OpLoad %7 %84
+%86 = OpIAdd %5 %18 %87
+%88 = OpIMul %5 %17 %19
+%89 = OpIAdd %5 %88 %90
+%91 = OpAccessChain %21 %11 %23 %89
+%92 = OpLoad %7 %91
+%93 = OpFAdd %7 %61 %71
+%94 = OpFAdd %7 %62 %78
+%95 = OpFAdd %7 %63 %85
+%96 = OpFAdd %7 %64 %92
+%98 = OpAccessChain %97 %16 %23
+OpStore %98 %93
+%99 = OpAccessChain %97 %16 %100
+OpStore %99 %94
+%101 = OpAccessChain %97 %16 %102
+OpStore %101 %95
+%103 = OpAccessChain %97 %16 %104
+OpStore %103 %96
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
new file mode 100644
index 0000000..5c816f7
--- /dev/null
+++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
@@ -0,0 +1,162 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+#extension GL_EXT_nonuniform_qualifier : require
+
+struct _17
+{
+ vec4 _m0;
+ uvec4 _m1;
+};
+
+layout(shaderRecordEXT, std430) buffer SBTBlock
+{
+ uint _m0[5];
+ uint _m1[6];
+ uvec2 _m2;
+ uvec2 _m3;
+ uvec2 _m4;
+ uvec2 _m5;
+ uvec2 _m6;
+ uvec2 _m7;
+ uvec2 _m8;
+ uvec2 _m9;
+ uvec2 _m10;
+} SBT;
+
+layout(location = 0) rayPayloadInEXT _17 payload;
+
+vec4 _38;
+uvec4 _54;
+
+void main()
+{
+ vec4 _37 = _38;
+ _37.x = uintBitsToFloat(SBT._m0[0u]);
+ vec4 _39 = _37;
+ _39.y = float(SBT._m0[1u]);
+ vec4 _40 = _39;
+ _40.z = float(int(SBT._m0[2u]));
+ vec4 _41 = _40;
+ _41.w = 1.0;
+ uvec4 _53 = _54;
+ _53.x = uint(int(uintBitsToFloat(SBT._m1[0u])));
+ uvec4 _55 = _53;
+ _55.y = uint(int(uintBitsToFloat(SBT._m1[1u])));
+ uvec4 _56 = _55;
+ _56.z = SBT._m1[2u];
+ uvec4 _57 = _56;
+ _57.w = SBT._m0[2u];
+ payload._m0 = _41;
+ payload._m1 = _57;
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.4
+; Generator: Unknown(30017); 21022
+; Bound: 64
+; Schema: 0
+OpCapability Shader
+OpCapability UniformBufferArrayDynamicIndexing
+OpCapability SampledImageArrayDynamicIndexing
+OpCapability StorageBufferArrayDynamicIndexing
+OpCapability StorageImageArrayDynamicIndexing
+OpCapability RayTracingKHR
+OpCapability RuntimeDescriptorArray
+OpCapability UniformBufferArrayNonUniformIndexing
+OpCapability SampledImageArrayNonUniformIndexing
+OpCapability StorageBufferArrayNonUniformIndexing
+OpCapability StorageImageArrayNonUniformIndexing
+OpExtension "SPV_EXT_descriptor_indexing"
+OpExtension "SPV_KHR_ray_tracing"
+OpMemoryModel Logical GLSL450
+OpEntryPoint MissNV %3 "main" %13 %19
+OpName %3 "main"
+OpName %11 "SBTBlock"
+OpName %13 "SBT"
+OpName %17 ""
+OpName %19 "payload"
+OpDecorate %7 ArrayStride 4
+OpDecorate %9 ArrayStride 4
+OpDecorate %11 Block
+OpMemberDecorate %11 0 Offset 0
+OpMemberDecorate %11 1 Offset 20
+OpMemberDecorate %11 2 Offset 48
+OpMemberDecorate %11 3 Offset 56
+OpMemberDecorate %11 4 Offset 64
+OpMemberDecorate %11 5 Offset 72
+OpMemberDecorate %11 6 Offset 80
+OpMemberDecorate %11 7 Offset 88
+OpMemberDecorate %11 8 Offset 96
+OpMemberDecorate %11 9 Offset 104
+OpMemberDecorate %11 10 Offset 112
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpConstant %5 5
+%7 = OpTypeArray %5 %6
+%8 = OpConstant %5 6
+%9 = OpTypeArray %5 %8
+%10 = OpTypeVector %5 2
+%11 = OpTypeStruct %7 %9 %10 %10 %10 %10 %10 %10 %10 %10 %10
+%12 = OpTypePointer ShaderRecordBufferNV %11
+%13 = OpVariable %12 ShaderRecordBufferNV
+%14 = OpTypeFloat 32
+%15 = OpTypeVector %14 4
+%16 = OpTypeVector %5 4
+%17 = OpTypeStruct %15 %16
+%18 = OpTypePointer IncomingRayPayloadNV %17
+%19 = OpVariable %18 IncomingRayPayloadNV
+%20 = OpTypePointer ShaderRecordBufferNV %9
+%22 = OpConstant %5 1
+%23 = OpTypePointer ShaderRecordBufferNV %7
+%25 = OpConstant %5 0
+%26 = OpTypePointer ShaderRecordBufferNV %5
+%34 = OpConstant %5 2
+%42 = OpConstant %14 1
+%58 = OpTypePointer IncomingRayPayloadNV %15
+%60 = OpTypePointer IncomingRayPayloadNV %16
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+%38 = OpUndef %15
+%54 = OpUndef %16
+OpBranch %62
+%62 = OpLabel
+%21 = OpAccessChain %20 %13 %22
+%24 = OpAccessChain %23 %13 %25
+%27 = OpAccessChain %26 %24 %25
+%28 = OpLoad %5 %27
+%29 = OpBitcast %14 %28
+%30 = OpAccessChain %26 %24 %22
+%31 = OpLoad %5 %30
+%32 = OpConvertUToF %14 %31
+%33 = OpAccessChain %26 %24 %34
+%35 = OpLoad %5 %33
+%36 = OpConvertSToF %14 %35
+%37 = OpCompositeInsert %15 %29 %38 0
+%39 = OpCompositeInsert %15 %32 %37 1
+%40 = OpCompositeInsert %15 %36 %39 2
+%41 = OpCompositeInsert %15 %42 %40 3
+%43 = OpAccessChain %26 %21 %25
+%44 = OpLoad %5 %43
+%45 = OpBitcast %14 %44
+%46 = OpAccessChain %26 %21 %22
+%47 = OpLoad %5 %46
+%48 = OpBitcast %14 %47
+%49 = OpAccessChain %26 %21 %34
+%50 = OpLoad %5 %49
+%51 = OpConvertFToS %5 %45
+%52 = OpConvertFToS %5 %48
+%53 = OpCompositeInsert %16 %51 %54 0
+%55 = OpCompositeInsert %16 %52 %53 1
+%56 = OpCompositeInsert %16 %50 %55 2
+%57 = OpCompositeInsert %16 %35 %56 3
+%59 = OpInBoundsAccessChain %58 %19 %25
+OpStore %59 %41
+%61 = OpInBoundsAccessChain %60 %19 %22
+OpStore %61 %57
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
new file mode 100644
index 0000000..567eb93
--- /dev/null
+++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
@@ -0,0 +1,179 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_scalar_block_layout : require
+
+layout(set = 0, binding = 0, scalar) uniform _9_11
+{
+ float _m0[20];
+} _11;
+
+layout(set = 0, binding = 0, scalar) uniform _15_17
+{
+ double _m0[10];
+} _17;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ SV_Target.x = ((float(float16_t(_11._m0[4u])) + _11._m0[0u]) + float(float16_t(_11._m0[8u]))) + float(int64_t(doubleBitsToUint64(_17._m0[6u])));
+ SV_Target.y = ((float(float16_t(_11._m0[5u])) + _11._m0[1u]) + float(float16_t(_11._m0[9u]))) + float(int64_t(doubleBitsToUint64(_17._m0[7u])));
+ SV_Target.z = ((float(float16_t(_11._m0[6u])) + _11._m0[2u]) + float(float16_t(_11._m0[10u]))) + float(int64_t(doubleBitsToUint64(_17._m0[8u])));
+ SV_Target.w = ((float(float16_t(_11._m0[7u])) + _11._m0[3u]) + float(float16_t(_11._m0[11u]))) + float(int64_t(doubleBitsToUint64(_17._m0[9u])));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 111
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Float64
+OpCapability Int64
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %20
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %9 ""
+OpName %15 ""
+OpName %20 "SV_Target"
+OpDecorate %8 ArrayStride 4
+OpMemberDecorate %9 0 Offset 0
+OpDecorate %9 Block
+OpDecorate %14 ArrayStride 8
+OpMemberDecorate %15 0 Offset 0
+OpDecorate %15 Block
+OpDecorate %11 DescriptorSet 0
+OpDecorate %11 Binding 0
+OpDecorate %17 DescriptorSet 0
+OpDecorate %17 Binding 0
+OpDecorate %20 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpConstant %5 20
+%7 = OpTypeFloat 32
+%8 = OpTypeArray %7 %6
+%9 = OpTypeStruct %8
+%10 = OpTypePointer Uniform %9
+%11 = OpVariable %10 Uniform
+%12 = OpConstant %5 10
+%13 = OpTypeFloat 64
+%14 = OpTypeArray %13 %12
+%15 = OpTypeStruct %14
+%16 = OpTypePointer Uniform %15
+%17 = OpVariable %16 Uniform
+%18 = OpTypeVector %7 4
+%19 = OpTypePointer Output %18
+%20 = OpVariable %19 Output
+%21 = OpConstant %5 0
+%22 = OpTypePointer Uniform %7
+%25 = OpConstant %5 1
+%28 = OpConstant %5 2
+%31 = OpConstant %5 3
+%34 = OpConstant %5 4
+%37 = OpTypeFloat 16
+%39 = OpConstant %5 5
+%43 = OpConstant %5 6
+%47 = OpConstant %5 7
+%59 = OpConstant %5 8
+%63 = OpConstant %5 9
+%70 = OpConstant %5 11
+%82 = OpTypeInt 64 0
+%83 = OpTypePointer Uniform %13
+%104 = OpTypePointer Output %7
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %109
+%109 = OpLabel
+%23 = OpAccessChain %22 %11 %21 %21
+%24 = OpLoad %7 %23
+%26 = OpAccessChain %22 %11 %21 %25
+%27 = OpLoad %7 %26
+%29 = OpAccessChain %22 %11 %21 %28
+%30 = OpLoad %7 %29
+%32 = OpAccessChain %22 %11 %21 %31
+%33 = OpLoad %7 %32
+%35 = OpAccessChain %22 %11 %21 %34
+%36 = OpLoad %7 %35
+%38 = OpFConvert %37 %36
+%40 = OpAccessChain %22 %11 %21 %39
+%41 = OpLoad %7 %40
+%42 = OpFConvert %37 %41
+%44 = OpAccessChain %22 %11 %21 %43
+%45 = OpLoad %7 %44
+%46 = OpFConvert %37 %45
+%48 = OpAccessChain %22 %11 %21 %47
+%49 = OpLoad %7 %48
+%50 = OpFConvert %37 %49
+%51 = OpFConvert %7 %38
+%52 = OpFConvert %7 %42
+%53 = OpFConvert %7 %46
+%54 = OpFConvert %7 %50
+%55 = OpFAdd %7 %51 %24
+%56 = OpFAdd %7 %52 %27
+%57 = OpFAdd %7 %53 %30
+%58 = OpFAdd %7 %54 %33
+%60 = OpAccessChain %22 %11 %21 %59
+%61 = OpLoad %7 %60
+%62 = OpFConvert %37 %61
+%64 = OpAccessChain %22 %11 %21 %63
+%65 = OpLoad %7 %64
+%66 = OpFConvert %37 %65
+%67 = OpAccessChain %22 %11 %21 %12
+%68 = OpLoad %7 %67
+%69 = OpFConvert %37 %68
+%71 = OpAccessChain %22 %11 %21 %70
+%72 = OpLoad %7 %71
+%73 = OpFConvert %37 %72
+%74 = OpFConvert %7 %62
+%75 = OpFConvert %7 %66
+%76 = OpFConvert %7 %69
+%77 = OpFConvert %7 %73
+%78 = OpFAdd %7 %55 %74
+%79 = OpFAdd %7 %56 %75
+%80 = OpFAdd %7 %57 %76
+%81 = OpFAdd %7 %58 %77
+%84 = OpAccessChain %83 %17 %21 %43
+%85 = OpLoad %13 %84
+%86 = OpBitcast %82 %85
+%87 = OpAccessChain %83 %17 %21 %47
+%88 = OpLoad %13 %87
+%89 = OpBitcast %82 %88
+%90 = OpAccessChain %83 %17 %21 %59
+%91 = OpLoad %13 %90
+%92 = OpBitcast %82 %91
+%93 = OpAccessChain %83 %17 %21 %63
+%94 = OpLoad %13 %93
+%95 = OpBitcast %82 %94
+%96 = OpConvertSToF %7 %86
+%97 = OpConvertSToF %7 %89
+%98 = OpConvertSToF %7 %92
+%99 = OpConvertSToF %7 %95
+%100 = OpFAdd %7 %78 %96
+%101 = OpFAdd %7 %79 %97
+%102 = OpFAdd %7 %80 %98
+%103 = OpFAdd %7 %81 %99
+%105 = OpAccessChain %104 %20 %21
+OpStore %105 %100
+%106 = OpAccessChain %104 %20 %25
+OpStore %106 %101
+%107 = OpAccessChain %104 %20 %28
+OpStore %107 %102
+%108 = OpAccessChain %104 %20 %31
+OpStore %108 %103
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag
new file mode 100644
index 0000000..af4eb5b
--- /dev/null
+++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag
@@ -0,0 +1,124 @@
+#version 460
+
+layout(push_constant, std430) uniform RootConstants
+{
+ uint _m0;
+ uint _m1;
+ uint _m2;
+ uint _m3;
+ uint _m4;
+ uint _m5;
+ uint _m6;
+ uint _m7;
+ uint _m8;
+ uint _m9;
+ uint _m10;
+ uint _m11;
+ uint _m12;
+ uint _m13;
+ uint _m14;
+ uint _m15;
+} registers;
+
+layout(location = 0) out vec2 SV_Target;
+
+void main()
+{
+ float _18 = uintBitsToFloat(registers._m4);
+ float _35 = float(registers._m2 + registers._m5);
+ float _45 = float(int(registers._m3 + registers._m6));
+ SV_Target.x = ((uintBitsToFloat(registers._m0) + _18) + _35) + _45;
+ SV_Target.y = ((uintBitsToFloat(registers._m1) + _18) + _35) + _45;
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 53
+; Schema: 0
+OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %12
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %6 "RootConstants"
+OpName %8 "registers"
+OpName %12 "SV_Target"
+OpDecorate %6 Block
+OpMemberDecorate %6 0 Offset 0
+OpMemberDecorate %6 1 Offset 4
+OpMemberDecorate %6 2 Offset 8
+OpMemberDecorate %6 3 Offset 12
+OpMemberDecorate %6 4 Offset 16
+OpMemberDecorate %6 5 Offset 20
+OpMemberDecorate %6 6 Offset 24
+OpMemberDecorate %6 7 Offset 28
+OpMemberDecorate %6 8 Offset 32
+OpMemberDecorate %6 9 Offset 36
+OpMemberDecorate %6 10 Offset 40
+OpMemberDecorate %6 11 Offset 44
+OpMemberDecorate %6 12 Offset 48
+OpMemberDecorate %6 13 Offset 52
+OpMemberDecorate %6 14 Offset 56
+OpMemberDecorate %6 15 Offset 60
+OpDecorate %12 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5 %5
+%7 = OpTypePointer PushConstant %6
+%8 = OpVariable %7 PushConstant
+%9 = OpTypeFloat 32
+%10 = OpTypeVector %9 2
+%11 = OpTypePointer Output %10
+%12 = OpVariable %11 Output
+%13 = OpTypePointer PushConstant %5
+%15 = OpConstant %5 4
+%17 = OpConstant %5 0
+%23 = OpConstant %5 1
+%29 = OpConstant %5 5
+%32 = OpConstant %5 2
+%39 = OpConstant %5 6
+%42 = OpConstant %5 3
+%48 = OpTypePointer Output %9
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %51
+%51 = OpLabel
+%14 = OpAccessChain %13 %8 %15
+%16 = OpLoad %5 %14
+%18 = OpBitcast %9 %16
+%19 = OpAccessChain %13 %8 %17
+%20 = OpLoad %5 %19
+%21 = OpBitcast %9 %20
+%22 = OpAccessChain %13 %8 %23
+%24 = OpLoad %5 %22
+%25 = OpBitcast %9 %24
+%26 = OpFAdd %9 %21 %18
+%27 = OpFAdd %9 %25 %18
+%28 = OpAccessChain %13 %8 %29
+%30 = OpLoad %5 %28
+%31 = OpAccessChain %13 %8 %32
+%33 = OpLoad %5 %31
+%34 = OpIAdd %5 %33 %30
+%35 = OpConvertUToF %9 %34
+%36 = OpFAdd %9 %26 %35
+%37 = OpFAdd %9 %27 %35
+%38 = OpAccessChain %13 %8 %39
+%40 = OpLoad %5 %38
+%41 = OpAccessChain %13 %8 %42
+%43 = OpLoad %5 %41
+%44 = OpIAdd %5 %43 %40
+%45 = OpConvertSToF %9 %44
+%46 = OpFAdd %9 %36 %45
+%47 = OpFAdd %9 %37 %45
+%49 = OpAccessChain %48 %12 %17
+OpStore %49 %46
+%50 = OpAccessChain %48 %12 %23
+OpStore %50 %47
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag b/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag
new file mode 100644
index 0000000..8eb931e
--- /dev/null
+++ b/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag
@@ -0,0 +1,313 @@
+#version 460
+#extension GL_ARB_gpu_shader_int64 : require
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_buffer_reference : require
+
+struct AddCarry
+{
+ uint _m0;
+ uint _m1;
+};
+
+layout(buffer_reference) buffer PhysicalPointerFloatNonWrite;
+layout(buffer_reference) buffer PhysicalPointerUint64NonWrite;
+layout(buffer_reference) buffer PhysicalPointerHalfNonWrite;
+layout(buffer_reference, std430) readonly buffer PhysicalPointerFloatNonWrite
+{
+ float value;
+};
+
+layout(buffer_reference, std430) readonly buffer PhysicalPointerUint64NonWrite
+{
+ uint64_t value;
+};
+
+layout(buffer_reference, std430) readonly buffer PhysicalPointerHalfNonWrite
+{
+ float16_t value;
+};
+
+layout(push_constant, std430) uniform RootConstants
+{
+ uvec2 _m0;
+ uvec2 _m1;
+ uvec2 _m2;
+ uvec2 _m3;
+} registers;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ AddCarry _21;
+ _21._m0 = uaddCarry(registers._m0.x, 0u, _21._m1);
+ AddCarry _35;
+ _35._m0 = uaddCarry(registers._m0.x, 4u, _35._m1);
+ AddCarry _46;
+ _46._m0 = uaddCarry(registers._m0.x, 8u, _46._m1);
+ AddCarry _57;
+ _57._m0 = uaddCarry(registers._m0.x, 12u, _57._m1);
+ AddCarry _69;
+ _69._m0 = uaddCarry(registers._m0.x, 32u, _69._m1);
+ AddCarry _83;
+ _83._m0 = uaddCarry(registers._m0.x, 40u, _83._m1);
+ AddCarry _94;
+ _94._m0 = uaddCarry(registers._m0.x, 48u, _94._m1);
+ AddCarry _105;
+ _105._m0 = uaddCarry(registers._m0.x, 56u, _105._m1);
+ AddCarry _125;
+ _125._m0 = uaddCarry(registers._m0.x, 16u, _125._m1);
+ AddCarry _139;
+ _139._m0 = uaddCarry(registers._m0.x, 20u, _139._m1);
+ AddCarry _150;
+ _150._m0 = uaddCarry(registers._m0.x, 24u, _150._m1);
+ AddCarry _161;
+ _161._m0 = uaddCarry(registers._m0.x, 28u, _161._m1);
+ SV_Target.x = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_69._m0, registers._m0.y + _69._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_21._m0, registers._m0.y + _21._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_125._m0, registers._m0.y + _125._m1)).value);
+ SV_Target.y = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_83._m0, registers._m0.y + _83._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_35._m0, registers._m0.y + _35._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_139._m0, registers._m0.y + _139._m1)).value);
+ SV_Target.z = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_94._m0, registers._m0.y + _94._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_46._m0, registers._m0.y + _46._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_150._m0, registers._m0.y + _150._m1)).value);
+ SV_Target.w = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_105._m0, registers._m0.y + _105._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_57._m0, registers._m0.y + _57._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_161._m0, registers._m0.y + _161._m1)).value);
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 187
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Int64
+OpCapability PhysicalStorageBufferAddresses
+OpExtension "SPV_KHR_physical_storage_buffer"
+OpMemoryModel PhysicalStorageBuffer64 GLSL450
+OpEntryPoint Fragment %3 "main" %13
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %7 "RootConstants"
+OpName %9 "registers"
+OpName %13 "SV_Target"
+OpName %20 "AddCarry"
+OpName %26 "PhysicalPointerFloatNonWrite"
+OpMemberName %26 0 "value"
+OpName %74 "PhysicalPointerUint64NonWrite"
+OpMemberName %74 0 "value"
+OpName %130 "PhysicalPointerHalfNonWrite"
+OpMemberName %130 0 "value"
+OpDecorate %7 Block
+OpMemberDecorate %7 0 Offset 0
+OpMemberDecorate %7 1 Offset 8
+OpMemberDecorate %7 2 Offset 16
+OpMemberDecorate %7 3 Offset 24
+OpDecorate %13 Location 0
+OpMemberDecorate %26 0 Offset 0
+OpDecorate %26 Block
+OpMemberDecorate %26 0 NonWritable
+OpMemberDecorate %74 0 Offset 0
+OpDecorate %74 Block
+OpMemberDecorate %74 0 NonWritable
+OpMemberDecorate %130 0 Offset 0
+OpDecorate %130 Block
+OpMemberDecorate %130 0 NonWritable
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeVector %5 2
+%7 = OpTypeStruct %6 %6 %6 %6
+%8 = OpTypePointer PushConstant %7
+%9 = OpVariable %8 PushConstant
+%10 = OpTypeFloat 32
+%11 = OpTypeVector %10 4
+%12 = OpTypePointer Output %11
+%13 = OpVariable %12 Output
+%14 = OpTypePointer PushConstant %6
+%16 = OpConstant %5 0
+%20 = OpTypeStruct %5 %5
+%26 = OpTypeStruct %10
+%27 = OpTypePointer PhysicalStorageBuffer %26
+%29 = OpTypePointer PhysicalStorageBuffer %10
+%32 = OpConstant %5 4
+%43 = OpConstant %5 8
+%54 = OpConstant %5 12
+%65 = OpConstant %5 32
+%66 = OpTypeInt 64 0
+%74 = OpTypeStruct %66
+%75 = OpTypePointer PhysicalStorageBuffer %74
+%77 = OpTypePointer PhysicalStorageBuffer %66
+%80 = OpConstant %5 40
+%91 = OpConstant %5 48
+%102 = OpConstant %5 56
+%121 = OpConstant %5 16
+%122 = OpTypeFloat 16
+%130 = OpTypeStruct %122
+%131 = OpTypePointer PhysicalStorageBuffer %130
+%133 = OpTypePointer PhysicalStorageBuffer %122
+%136 = OpConstant %5 20
+%147 = OpConstant %5 24
+%158 = OpConstant %5 28
+%177 = OpTypePointer Output %10
+%180 = OpConstant %5 1
+%182 = OpConstant %5 2
+%184 = OpConstant %5 3
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %185
+%185 = OpLabel
+%15 = OpAccessChain %14 %9 %16
+%17 = OpLoad %6 %15
+%18 = OpCompositeExtract %5 %17 0
+%19 = OpCompositeExtract %5 %17 1
+%21 = OpIAddCarry %20 %18 %16
+%22 = OpCompositeExtract %5 %21 0
+%23 = OpCompositeExtract %5 %21 1
+%24 = OpIAdd %5 %19 %23
+%25 = OpCompositeConstruct %6 %22 %24
+%28 = OpBitcast %27 %25
+%30 = OpAccessChain %29 %28 %16
+%31 = OpLoad %10 %30 Aligned 4
+%33 = OpCompositeExtract %5 %17 0
+%34 = OpCompositeExtract %5 %17 1
+%35 = OpIAddCarry %20 %33 %32
+%36 = OpCompositeExtract %5 %35 0
+%37 = OpCompositeExtract %5 %35 1
+%38 = OpIAdd %5 %34 %37
+%39 = OpCompositeConstruct %6 %36 %38
+%40 = OpBitcast %27 %39
+%41 = OpAccessChain %29 %40 %16
+%42 = OpLoad %10 %41 Aligned 4
+%44 = OpCompositeExtract %5 %17 0
+%45 = OpCompositeExtract %5 %17 1
+%46 = OpIAddCarry %20 %44 %43
+%47 = OpCompositeExtract %5 %46 0
+%48 = OpCompositeExtract %5 %46 1
+%49 = OpIAdd %5 %45 %48
+%50 = OpCompositeConstruct %6 %47 %49
+%51 = OpBitcast %27 %50
+%52 = OpAccessChain %29 %51 %16
+%53 = OpLoad %10 %52 Aligned 4
+%55 = OpCompositeExtract %5 %17 0
+%56 = OpCompositeExtract %5 %17 1
+%57 = OpIAddCarry %20 %55 %54
+%58 = OpCompositeExtract %5 %57 0
+%59 = OpCompositeExtract %5 %57 1
+%60 = OpIAdd %5 %56 %59
+%61 = OpCompositeConstruct %6 %58 %60
+%62 = OpBitcast %27 %61
+%63 = OpAccessChain %29 %62 %16
+%64 = OpLoad %10 %63 Aligned 4
+%67 = OpCompositeExtract %5 %17 0
+%68 = OpCompositeExtract %5 %17 1
+%69 = OpIAddCarry %20 %67 %65
+%70 = OpCompositeExtract %5 %69 0
+%71 = OpCompositeExtract %5 %69 1
+%72 = OpIAdd %5 %68 %71
+%73 = OpCompositeConstruct %6 %70 %72
+%76 = OpBitcast %75 %73
+%78 = OpAccessChain %77 %76 %16
+%79 = OpLoad %66 %78 Aligned 8
+%81 = OpCompositeExtract %5 %17 0
+%82 = OpCompositeExtract %5 %17 1
+%83 = OpIAddCarry %20 %81 %80
+%84 = OpCompositeExtract %5 %83 0
+%85 = OpCompositeExtract %5 %83 1
+%86 = OpIAdd %5 %82 %85
+%87 = OpCompositeConstruct %6 %84 %86
+%88 = OpBitcast %75 %87
+%89 = OpAccessChain %77 %88 %16
+%90 = OpLoad %66 %89 Aligned 8
+%92 = OpCompositeExtract %5 %17 0
+%93 = OpCompositeExtract %5 %17 1
+%94 = OpIAddCarry %20 %92 %91
+%95 = OpCompositeExtract %5 %94 0
+%96 = OpCompositeExtract %5 %94 1
+%97 = OpIAdd %5 %93 %96
+%98 = OpCompositeConstruct %6 %95 %97
+%99 = OpBitcast %75 %98
+%100 = OpAccessChain %77 %99 %16
+%101 = OpLoad %66 %100 Aligned 8
+%103 = OpCompositeExtract %5 %17 0
+%104 = OpCompositeExtract %5 %17 1
+%105 = OpIAddCarry %20 %103 %102
+%106 = OpCompositeExtract %5 %105 0
+%107 = OpCompositeExtract %5 %105 1
+%108 = OpIAdd %5 %104 %107
+%109 = OpCompositeConstruct %6 %106 %108
+%110 = OpBitcast %75 %109
+%111 = OpAccessChain %77 %110 %16
+%112 = OpLoad %66 %111 Aligned 8
+%113 = OpConvertSToF %10 %79
+%114 = OpConvertSToF %10 %90
+%115 = OpConvertSToF %10 %101
+%116 = OpConvertSToF %10 %112
+%117 = OpFAdd %10 %113 %31
+%118 = OpFAdd %10 %114 %42
+%119 = OpFAdd %10 %115 %53
+%120 = OpFAdd %10 %116 %64
+%123 = OpCompositeExtract %5 %17 0
+%124 = OpCompositeExtract %5 %17 1
+%125 = OpIAddCarry %20 %123 %121
+%126 = OpCompositeExtract %5 %125 0
+%127 = OpCompositeExtract %5 %125 1
+%128 = OpIAdd %5 %124 %127
+%129 = OpCompositeConstruct %6 %126 %128
+%132 = OpBitcast %131 %129
+%134 = OpAccessChain %133 %132 %16
+%135 = OpLoad %122 %134 Aligned 2
+%137 = OpCompositeExtract %5 %17 0
+%138 = OpCompositeExtract %5 %17 1
+%139 = OpIAddCarry %20 %137 %136
+%140 = OpCompositeExtract %5 %139 0
+%141 = OpCompositeExtract %5 %139 1
+%142 = OpIAdd %5 %138 %141
+%143 = OpCompositeConstruct %6 %140 %142
+%144 = OpBitcast %131 %143
+%145 = OpAccessChain %133 %144 %16
+%146 = OpLoad %122 %145 Aligned 2
+%148 = OpCompositeExtract %5 %17 0
+%149 = OpCompositeExtract %5 %17 1
+%150 = OpIAddCarry %20 %148 %147
+%151 = OpCompositeExtract %5 %150 0
+%152 = OpCompositeExtract %5 %150 1
+%153 = OpIAdd %5 %149 %152
+%154 = OpCompositeConstruct %6 %151 %153
+%155 = OpBitcast %131 %154
+%156 = OpAccessChain %133 %155 %16
+%157 = OpLoad %122 %156 Aligned 2
+%159 = OpCompositeExtract %5 %17 0
+%160 = OpCompositeExtract %5 %17 1
+%161 = OpIAddCarry %20 %159 %158
+%162 = OpCompositeExtract %5 %161 0
+%163 = OpCompositeExtract %5 %161 1
+%164 = OpIAdd %5 %160 %163
+%165 = OpCompositeConstruct %6 %162 %164
+%166 = OpBitcast %131 %165
+%167 = OpAccessChain %133 %166 %16
+%168 = OpLoad %122 %167 Aligned 2
+%169 = OpFConvert %10 %135
+%170 = OpFConvert %10 %146
+%171 = OpFConvert %10 %157
+%172 = OpFConvert %10 %168
+%173 = OpFAdd %10 %117 %169
+%174 = OpFAdd %10 %118 %170
+%175 = OpFAdd %10 %119 %171
+%176 = OpFAdd %10 %120 %172
+%178 = OpAccessChain %177 %13 %16
+OpStore %178 %173
+%179 = OpAccessChain %177 %13 %180
+OpStore %179 %174
+%181 = OpAccessChain %177 %13 %182
+OpStore %181 %175
+%183 = OpAccessChain %177 %13 %184
+OpStore %183 %176
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen b/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen
index 9312a4f..d8abf60 100644
--- a/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen
+++ b/reference/shaders/resources/min16float-ssbo-dxr.ssbo.rgen
@@ -36,6 +36,7 @@ OpCapability UniformBufferArrayDynamicIndexing
OpCapability SampledImageArrayDynamicIndexing
OpCapability StorageBufferArrayDynamicIndexing
OpCapability StorageImageArrayDynamicIndexing
+OpCapability StorageBuffer16BitAccess
OpCapability RayTracingKHR
OpCapability RuntimeDescriptorArray
OpCapability UniformBufferArrayNonUniformIndexing
diff --git a/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss b/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss
index 7ebdc2c..e4815b2 100644
--- a/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss
+++ b/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss
@@ -105,8 +105,8 @@ void main()
uint _59 = _58 & 1u;
vec4 _67 = texelFetch(_21[registers._m0 + _59], ivec2(uvec2(0u)), int(0u));
vec4 _80 = texelFetch(_21[registers._m0 + _58], ivec2(uvec2(0u)), int(0u));
- vec4 _99 = texelFetch(_21[((SBT._m7.x >> 6u) + 17u) + _58], ivec2(uvec2(0u)), int(0u));
- vec4 _119 = imageLoad(_25[((SBT._m8.x >> 6u) + 18u) + _58], ivec2(uvec2(0u)));
+ vec4 _99 = texelFetch(_21[nonuniformEXT(((SBT._m7.x >> 6u) + 17u) + _58)], ivec2(uvec2(0u)), int(0u));
+ vec4 _119 = imageLoad(_25[nonuniformEXT(((SBT._m8.x >> 6u) + 18u) + _58)], ivec2(uvec2(0u)));
uint _146 = ((SBT._m9.x >> 6u) + 13u) + _58;
vec4 _169 = uintBitsToFloat(uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u]));
vec4 _182 = uintBitsToFloat(uvec4(SBT._m0[4u], 0u, 0u, 0u));
@@ -114,7 +114,7 @@ void main()
_196._m0 = uaddCarry(SBT._m6.x, 1u * 16u, _196._m1);
PhysicalPointerFloat4NonWrite _203 = PhysicalPointerFloat4NonWrite(uvec2(_196._m0, SBT._m6.y + _196._m1));
vec4 _232 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _59], _36[(SBT._m10.x >> 5u) + 13u])), vec2(0.5), 0.0);
- vec4 _258 = textureLod(sampler2D(_21[registers._m0 + _58], _36[((SBT._m10.x >> 5u) + 14u) + (_58 ^ 1u)]), vec2(0.5), 0.0);
+ vec4 _258 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _58], _36[((SBT._m10.x >> 5u) + 14u) + (_58 ^ 1u)])), vec2(0.5), 0.0);
AddCarry _274;
_274._m0 = uaddCarry(SBT._m2.x, (_58 * 16u) + 0u, _274._m1);
PhysicalPointerFloat4NonWrite _279 = PhysicalPointerFloat4NonWrite(uvec2(_274._m0, SBT._m2.y + _274._m1));
@@ -243,7 +243,12 @@ OpDecorate %32 Binding 0
OpDecorate %36 DescriptorSet 2
OpDecorate %36 Binding 0
OpDecorate %47 NonUniform
+OpDecorate %97 NonUniform
+OpDecorate %98 NonUniform
+OpDecorate %117 NonUniform
+OpDecorate %118 NonUniform
OpDecorate %130 NonUniform
+OpDecorate %146 NonUniform
OpDecorate %140 NonUniform
OpDecorate %147 NonUniform
OpMemberDecorate %201 0 Offset 0
@@ -251,6 +256,9 @@ OpDecorate %201 Block
OpMemberDecorate %201 0 NonWritable
OpDecorate %227 NonUniform
OpDecorate %229 NonUniform
+OpDecorate %255 NonUniform
+OpDecorate %256 NonUniform
+OpDecorate %257 NonUniform
OpMemberDecorate %293 0 Offset 0
OpDecorate %293 Block
OpMemberDecorate %293 0 NonWritable
diff --git a/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag
new file mode 100644
index 0000000..ba78e81
--- /dev/null
+++ b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag
@@ -0,0 +1,260 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_scalar_block_layout : require
+
+layout(set = 5, binding = 0, scalar) uniform BindlessCBV
+{
+ float16_t _m0[32768];
+} _15[];
+
+layout(set = 5, binding = 0, scalar) uniform _19_22
+{
+ float _m0[16384];
+} _22[];
+
+layout(set = 5, binding = 0, scalar) uniform _26_29
+{
+ double _m0[8192];
+} _29[];
+
+layout(push_constant, std430) uniform RootConstants
+{
+ uint _m0;
+ uint _m1;
+ uint _m2;
+ uint _m3;
+ uint _m4;
+ uint _m5;
+ uint _m6;
+ uint _m7;
+} registers;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ uint _39 = registers._m5 + 2u;
+ uint _45 = registers._m5 + 1u;
+ SV_Target.x = (((float(_15[registers._m5]._m0[8u]) + _22[registers._m5]._m0[0u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[4u])))) + _22[_45]._m0[0u]) + float(_29[_39]._m0[0u]);
+ SV_Target.y = (((float(_15[registers._m5]._m0[10u]) + _22[registers._m5]._m0[1u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[5u])))) + _22[_45]._m0[1u]) + float(_29[_39]._m0[1u]);
+ SV_Target.z = (((float(_15[registers._m5]._m0[12u]) + _22[registers._m5]._m0[2u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[6u])))) + _22[_45]._m0[2u]) + float(_29[_39]._m0[2u]);
+ SV_Target.w = (((float(_15[registers._m5]._m0[14u]) + _22[registers._m5]._m0[3u]) + float(int64_t(doubleBitsToUint64(_29[registers._m5]._m0[7u])))) + _22[_45]._m0[3u]) + float(_29[_39]._m0[3u]);
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 149
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Float64
+OpCapability Int64
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability RuntimeDescriptorArray
+OpCapability PhysicalStorageBufferAddresses
+OpExtension "SPV_EXT_descriptor_indexing"
+OpExtension "SPV_KHR_physical_storage_buffer"
+OpMemoryModel PhysicalStorageBuffer64 GLSL450
+OpEntryPoint Fragment %3 "main" %32
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %6 "RootConstants"
+OpName %8 "registers"
+OpName %12 "BindlessCBV"
+OpName %19 "BindlessCBV"
+OpName %26 "BindlessCBV"
+OpName %32 "SV_Target"
+OpDecorate %6 Block
+OpMemberDecorate %6 0 Offset 0
+OpMemberDecorate %6 1 Offset 4
+OpMemberDecorate %6 2 Offset 8
+OpMemberDecorate %6 3 Offset 12
+OpMemberDecorate %6 4 Offset 16
+OpMemberDecorate %6 5 Offset 20
+OpMemberDecorate %6 6 Offset 24
+OpMemberDecorate %6 7 Offset 28
+OpDecorate %11 ArrayStride 2
+OpDecorate %12 Block
+OpMemberDecorate %12 0 Offset 0
+OpDecorate %15 DescriptorSet 5
+OpDecorate %15 Binding 0
+OpDecorate %18 ArrayStride 4
+OpDecorate %19 Block
+OpMemberDecorate %19 0 Offset 0
+OpDecorate %22 DescriptorSet 5
+OpDecorate %22 Binding 0
+OpDecorate %25 ArrayStride 8
+OpDecorate %26 Block
+OpMemberDecorate %26 0 Offset 0
+OpDecorate %29 DescriptorSet 5
+OpDecorate %29 Binding 0
+OpDecorate %32 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeStruct %5 %5 %5 %5 %5 %5 %5 %5
+%7 = OpTypePointer PushConstant %6
+%8 = OpVariable %7 PushConstant
+%9 = OpTypeFloat 16
+%10 = OpConstant %5 32768
+%11 = OpTypeArray %9 %10
+%12 = OpTypeStruct %11
+%13 = OpTypeRuntimeArray %12
+%14 = OpTypePointer Uniform %13
+%15 = OpVariable %14 Uniform
+%16 = OpTypeFloat 32
+%17 = OpConstant %5 16384
+%18 = OpTypeArray %16 %17
+%19 = OpTypeStruct %18
+%20 = OpTypeRuntimeArray %19
+%21 = OpTypePointer Uniform %20
+%22 = OpVariable %21 Uniform
+%23 = OpTypeFloat 64
+%24 = OpConstant %5 8192
+%25 = OpTypeArray %23 %24
+%26 = OpTypeStruct %25
+%27 = OpTypeRuntimeArray %26
+%28 = OpTypePointer Uniform %27
+%29 = OpVariable %28 Uniform
+%30 = OpTypeVector %16 4
+%31 = OpTypePointer Output %30
+%32 = OpVariable %31 Output
+%33 = OpTypePointer Uniform %26
+%35 = OpTypePointer PushConstant %5
+%37 = OpConstant %5 5
+%40 = OpConstant %5 2
+%41 = OpTypePointer Uniform %19
+%46 = OpConstant %5 1
+%47 = OpTypePointer Uniform %12
+%57 = OpConstant %5 0
+%58 = OpTypePointer Uniform %16
+%65 = OpConstant %5 3
+%68 = OpConstant %5 8
+%69 = OpTypePointer Uniform %9
+%72 = OpConstant %5 10
+%75 = OpConstant %5 12
+%78 = OpConstant %5 14
+%89 = OpTypeInt 64 0
+%90 = OpConstant %5 4
+%91 = OpTypePointer Uniform %23
+%98 = OpConstant %5 6
+%102 = OpConstant %5 7
+%142 = OpTypePointer Output %16
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %147
+%147 = OpLabel
+%36 = OpAccessChain %35 %8 %37
+%38 = OpLoad %5 %36
+%39 = OpIAdd %5 %38 %40
+%34 = OpAccessChain %33 %29 %39
+%43 = OpAccessChain %35 %8 %37
+%44 = OpLoad %5 %43
+%45 = OpIAdd %5 %44 %46
+%42 = OpAccessChain %41 %22 %45
+%49 = OpAccessChain %35 %8 %37
+%50 = OpLoad %5 %49
+%48 = OpAccessChain %47 %15 %50
+%52 = OpAccessChain %35 %8 %37
+%53 = OpLoad %5 %52
+%51 = OpAccessChain %41 %22 %53
+%55 = OpAccessChain %35 %8 %37
+%56 = OpLoad %5 %55
+%54 = OpAccessChain %33 %29 %56
+%59 = OpAccessChain %58 %51 %57 %57
+%60 = OpLoad %16 %59
+%61 = OpAccessChain %58 %51 %57 %46
+%62 = OpLoad %16 %61
+%63 = OpAccessChain %58 %51 %57 %40
+%64 = OpLoad %16 %63
+%66 = OpAccessChain %58 %51 %57 %65
+%67 = OpLoad %16 %66
+%70 = OpAccessChain %69 %48 %57 %68
+%71 = OpLoad %9 %70
+%73 = OpAccessChain %69 %48 %57 %72
+%74 = OpLoad %9 %73
+%76 = OpAccessChain %69 %48 %57 %75
+%77 = OpLoad %9 %76
+%79 = OpAccessChain %69 %48 %57 %78
+%80 = OpLoad %9 %79
+%81 = OpFConvert %16 %71
+%82 = OpFConvert %16 %74
+%83 = OpFConvert %16 %77
+%84 = OpFConvert %16 %80
+%85 = OpFAdd %16 %81 %60
+%86 = OpFAdd %16 %82 %62
+%87 = OpFAdd %16 %83 %64
+%88 = OpFAdd %16 %84 %67
+%92 = OpAccessChain %91 %54 %57 %90
+%93 = OpLoad %23 %92
+%94 = OpBitcast %89 %93
+%95 = OpAccessChain %91 %54 %57 %37
+%96 = OpLoad %23 %95
+%97 = OpBitcast %89 %96
+%99 = OpAccessChain %91 %54 %57 %98
+%100 = OpLoad %23 %99
+%101 = OpBitcast %89 %100
+%103 = OpAccessChain %91 %54 %57 %102
+%104 = OpLoad %23 %103
+%105 = OpBitcast %89 %104
+%106 = OpConvertSToF %16 %94
+%107 = OpConvertSToF %16 %97
+%108 = OpConvertSToF %16 %101
+%109 = OpConvertSToF %16 %105
+%110 = OpFAdd %16 %85 %106
+%111 = OpFAdd %16 %86 %107
+%112 = OpFAdd %16 %87 %108
+%113 = OpFAdd %16 %88 %109
+%114 = OpAccessChain %58 %42 %57 %57
+%115 = OpLoad %16 %114
+%116 = OpAccessChain %58 %42 %57 %46
+%117 = OpLoad %16 %116
+%118 = OpAccessChain %58 %42 %57 %40
+%119 = OpLoad %16 %118
+%120 = OpAccessChain %58 %42 %57 %65
+%121 = OpLoad %16 %120
+%122 = OpFAdd %16 %110 %115
+%123 = OpFAdd %16 %111 %117
+%124 = OpFAdd %16 %112 %119
+%125 = OpFAdd %16 %113 %121
+%126 = OpAccessChain %91 %34 %57 %57
+%127 = OpLoad %23 %126
+%128 = OpAccessChain %91 %34 %57 %46
+%129 = OpLoad %23 %128
+%130 = OpAccessChain %91 %34 %57 %40
+%131 = OpLoad %23 %130
+%132 = OpAccessChain %91 %34 %57 %65
+%133 = OpLoad %23 %132
+%134 = OpFConvert %16 %127
+%135 = OpFConvert %16 %129
+%136 = OpFConvert %16 %131
+%137 = OpFConvert %16 %133
+%138 = OpFAdd %16 %122 %134
+%139 = OpFAdd %16 %123 %135
+%140 = OpFAdd %16 %124 %136
+%141 = OpFAdd %16 %125 %137
+%143 = OpAccessChain %142 %32 %57
+OpStore %143 %138
+%144 = OpAccessChain %142 %32 %46
+OpStore %144 %139
+%145 = OpAccessChain %142 %32 %40
+OpStore %145 %140
+%146 = OpAccessChain %142 %32 %65
+OpStore %146 %141
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag
new file mode 100644
index 0000000..32d7dcf
--- /dev/null
+++ b/reference/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag
@@ -0,0 +1,231 @@
+#version 460
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_EXT_scalar_block_layout : require
+
+layout(set = 0, binding = 0, scalar) uniform _9_11
+{
+ float16_t _m0[32];
+} _11;
+
+layout(set = 0, binding = 0, scalar) uniform _15_17
+{
+ float _m0[16];
+} _17;
+
+layout(set = 0, binding = 0, scalar) uniform _21_23
+{
+ double _m0[8];
+} _23;
+
+layout(set = 0, binding = 1, scalar) uniform _26_28
+{
+ float _m0[4];
+} _28;
+
+layout(set = 0, binding = 2, scalar) uniform _30_32
+{
+ double _m0[4];
+} _32;
+
+layout(location = 0) out vec4 SV_Target;
+
+void main()
+{
+ SV_Target.x = (((float(_11._m0[8u]) + _17._m0[0u]) + float(int64_t(doubleBitsToUint64(_23._m0[4u])))) + _28._m0[0u]) + float(_32._m0[0u]);
+ SV_Target.y = (((float(_11._m0[10u]) + _17._m0[1u]) + float(int64_t(doubleBitsToUint64(_23._m0[5u])))) + _28._m0[1u]) + float(_32._m0[1u]);
+ SV_Target.z = (((float(_11._m0[12u]) + _17._m0[2u]) + float(int64_t(doubleBitsToUint64(_23._m0[6u])))) + _28._m0[2u]) + float(_32._m0[2u]);
+ SV_Target.w = (((float(_11._m0[14u]) + _17._m0[3u]) + float(int64_t(doubleBitsToUint64(_23._m0[7u])))) + _28._m0[3u]) + float(_32._m0[3u]);
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 129
+; Schema: 0
+OpCapability Shader
+OpCapability Float16
+OpCapability Float64
+OpCapability Int64
+OpCapability UniformAndStorageBuffer16BitAccess
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %3 "main" %35
+OpExecutionMode %3 OriginUpperLeft
+OpName %3 "main"
+OpName %9 ""
+OpName %15 ""
+OpName %21 ""
+OpName %26 ""
+OpName %30 ""
+OpName %35 "SV_Target"
+OpDecorate %8 ArrayStride 2
+OpMemberDecorate %9 0 Offset 0
+OpDecorate %9 Block
+OpDecorate %14 ArrayStride 4
+OpMemberDecorate %15 0 Offset 0
+OpDecorate %15 Block
+OpDecorate %20 ArrayStride 8
+OpMemberDecorate %21 0 Offset 0
+OpDecorate %21 Block
+OpDecorate %11 DescriptorSet 0
+OpDecorate %11 Binding 0
+OpDecorate %17 DescriptorSet 0
+OpDecorate %17 Binding 0
+OpDecorate %23 DescriptorSet 0
+OpDecorate %23 Binding 0
+OpDecorate %25 ArrayStride 4
+OpMemberDecorate %26 0 Offset 0
+OpDecorate %26 Block
+OpDecorate %28 DescriptorSet 0
+OpDecorate %28 Binding 1
+OpDecorate %29 ArrayStride 8
+OpMemberDecorate %30 0 Offset 0
+OpDecorate %30 Block
+OpDecorate %32 DescriptorSet 0
+OpDecorate %32 Binding 2
+OpDecorate %35 Location 0
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpConstant %5 32
+%7 = OpTypeFloat 16
+%8 = OpTypeArray %7 %6
+%9 = OpTypeStruct %8
+%10 = OpTypePointer Uniform %9
+%11 = OpVariable %10 Uniform
+%12 = OpConstant %5 16
+%13 = OpTypeFloat 32
+%14 = OpTypeArray %13 %12
+%15 = OpTypeStruct %14
+%16 = OpTypePointer Uniform %15
+%17 = OpVariable %16 Uniform
+%18 = OpConstant %5 8
+%19 = OpTypeFloat 64
+%20 = OpTypeArray %19 %18
+%21 = OpTypeStruct %20
+%22 = OpTypePointer Uniform %21
+%23 = OpVariable %22 Uniform
+%24 = OpConstant %5 4
+%25 = OpTypeArray %13 %24
+%26 = OpTypeStruct %25
+%27 = OpTypePointer Uniform %26
+%28 = OpVariable %27 Uniform
+%29 = OpTypeArray %19 %24
+%30 = OpTypeStruct %29
+%31 = OpTypePointer Uniform %30
+%32 = OpVariable %31 Uniform
+%33 = OpTypeVector %13 4
+%34 = OpTypePointer Output %33
+%35 = OpVariable %34 Output
+%36 = OpConstant %5 0
+%37 = OpTypePointer Uniform %13
+%40 = OpConstant %5 1
+%43 = OpConstant %5 2
+%46 = OpConstant %5 3
+%49 = OpTypePointer Uniform %7
+%52 = OpConstant %5 10
+%55 = OpConstant %5 12
+%58 = OpConstant %5 14
+%69 = OpTypeInt 64 0
+%70 = OpTypePointer Uniform %19
+%74 = OpConstant %5 5
+%78 = OpConstant %5 6
+%82 = OpConstant %5 7
+%122 = OpTypePointer Output %13
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %127
+%127 = OpLabel
+%38 = OpAccessChain %37 %17 %36 %36
+%39 = OpLoad %13 %38
+%41 = OpAccessChain %37 %17 %36 %40
+%42 = OpLoad %13 %41
+%44 = OpAccessChain %37 %17 %36 %43
+%45 = OpLoad %13 %44
+%47 = OpAccessChain %37 %17 %36 %46
+%48 = OpLoad %13 %47
+%50 = OpAccessChain %49 %11 %36 %18
+%51 = OpLoad %7 %50
+%53 = OpAccessChain %49 %11 %36 %52
+%54 = OpLoad %7 %53
+%56 = OpAccessChain %49 %11 %36 %55
+%57 = OpLoad %7 %56
+%59 = OpAccessChain %49 %11 %36 %58
+%60 = OpLoad %7 %59
+%61 = OpFConvert %13 %51
+%62 = OpFConvert %13 %54
+%63 = OpFConvert %13 %57
+%64 = OpFConvert %13 %60
+%65 = OpFAdd %13 %61 %39
+%66 = OpFAdd %13 %62 %42
+%67 = OpFAdd %13 %63 %45
+%68 = OpFAdd %13 %64 %48
+%71 = OpAccessChain %70 %23 %36 %24
+%72 = OpLoad %19 %71
+%73 = OpBitcast %69 %72
+%75 = OpAccessChain %70 %23 %36 %74
+%76 = OpLoad %19 %75
+%77 = OpBitcast %69 %76
+%79 = OpAccessChain %70 %23 %36 %78
+%80 = OpLoad %19 %79
+%81 = OpBitcast %69 %80
+%83 = OpAccessChain %70 %23 %36 %82
+%84 = OpLoad %19 %83
+%85 = OpBitcast %69 %84
+%86 = OpConvertSToF %13 %73
+%87 = OpConvertSToF %13 %77
+%88 = OpConvertSToF %13 %81
+%89 = OpConvertSToF %13 %85
+%90 = OpFAdd %13 %65 %86
+%91 = OpFAdd %13 %66 %87
+%92 = OpFAdd %13 %67 %88
+%93 = OpFAdd %13 %68 %89
+%94 = OpAccessChain %37 %28 %36 %36
+%95 = OpLoad %13 %94
+%96 = OpAccessChain %37 %28 %36 %40
+%97 = OpLoad %13 %96
+%98 = OpAccessChain %37 %28 %36 %43
+%99 = OpLoad %13 %98
+%100 = OpAccessChain %37 %28 %36 %46
+%101 = OpLoad %13 %100
+%102 = OpFAdd %13 %90 %95
+%103 = OpFAdd %13 %91 %97
+%104 = OpFAdd %13 %92 %99
+%105 = OpFAdd %13 %93 %101
+%106 = OpAccessChain %70 %32 %36 %36
+%107 = OpLoad %19 %106
+%108 = OpAccessChain %70 %32 %36 %40
+%109 = OpLoad %19 %108
+%110 = OpAccessChain %70 %32 %36 %43
+%111 = OpLoad %19 %110
+%112 = OpAccessChain %70 %32 %36 %46
+%113 = OpLoad %19 %112
+%114 = OpFConvert %13 %107
+%115 = OpFConvert %13 %109
+%116 = OpFConvert %13 %111
+%117 = OpFConvert %13 %113
+%118 = OpFAdd %13 %102 %114
+%119 = OpFAdd %13 %103 %115
+%120 = OpFAdd %13 %104 %116
+%121 = OpFAdd %13 %105 %117
+%123 = OpAccessChain %122 %35 %36
+OpStore %123 %118
+%124 = OpAccessChain %122 %35 %40
+OpStore %124 %119
+%125 = OpAccessChain %122 %35 %43
+OpStore %125 %120
+%126 = OpAccessChain %122 %35 %46
+OpStore %126 %121
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag
index f494f31..e5ee27b 100644
--- a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag
+++ b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.sm66.frag
@@ -99,6 +99,7 @@ void main()
OpCapability Shader
OpCapability Float16
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
diff --git a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag
index d70287c..b9c3f99 100644
--- a/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag
+++ b/reference/shaders/resources/sm66/raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag
@@ -125,6 +125,7 @@ OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
diff --git a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag
index 941d6dc..6bc6537 100644
--- a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag
+++ b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.sm66.frag
@@ -88,6 +88,7 @@ void main()
OpCapability Shader
OpCapability Float16
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
diff --git a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag
index 088e381..8c122b8 100644
--- a/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag
+++ b/reference/shaders/resources/sm66/structured-16bit-heap.ssbo.ssbo-align.sm66.frag
@@ -112,6 +112,7 @@ OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpExtension "SPV_EXT_descriptor_indexing"
OpMemoryModel Logical GLSL450
diff --git a/reference/shaders/semantics/clip-distance-flatten.vert b/reference/shaders/semantics/clip-distance-flatten.vert
index f80f694..5a53683 100644
--- a/reference/shaders/semantics/clip-distance-flatten.vert
+++ b/reference/shaders/semantics/clip-distance-flatten.vert
@@ -59,8 +59,8 @@ OpDecorate %20 BuiltIn ClipDistance
%23 = OpConstant %14 0
%26 = OpConstant %14 1
%35 = OpConstant %14 3
-%37 = OpTypePointer Output %5
-%45 = OpConstant %5 1
+%38 = OpConstant %5 1
+%40 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %49
@@ -77,24 +77,24 @@ OpBranch %49
%33 = OpLoad %5 %32
%34 = OpAccessChain %21 %8 %35
%36 = OpLoad %5 %34
-%38 = OpAccessChain %37 %13 %23
-OpStore %38 %29
-%39 = OpAccessChain %37 %13 %26
-OpStore %39 %31
-%40 = OpAccessChain %37 %13 %15
-OpStore %40 %33
-%41 = OpAccessChain %37 %13 %35
-OpStore %41 %36
-%42 = OpAccessChain %37 %20 %23
-OpStore %42 %24
-%43 = OpAccessChain %37 %20 %26
-OpStore %43 %27
-%44 = OpFAdd %5 %24 %45
-%46 = OpFAdd %5 %27 %45
-%47 = OpAccessChain %37 %20 %15
-OpStore %47 %44
-%48 = OpAccessChain %37 %20 %35
-OpStore %48 %46
+%37 = OpFAdd %5 %24 %38
+%39 = OpFAdd %5 %27 %38
+%41 = OpAccessChain %40 %13 %23
+OpStore %41 %29
+%42 = OpAccessChain %40 %13 %26
+OpStore %42 %31
+%43 = OpAccessChain %40 %13 %15
+OpStore %43 %33
+%44 = OpAccessChain %40 %13 %35
+OpStore %44 %36
+%45 = OpAccessChain %40 %20 %23
+OpStore %45 %24
+%46 = OpAccessChain %40 %20 %26
+OpStore %46 %27
+%47 = OpAccessChain %40 %20 %15
+OpStore %47 %37
+%48 = OpAccessChain %40 %20 %35
+OpStore %48 %39
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/semantics/clip-distance-rows.vert b/reference/shaders/semantics/clip-distance-rows.vert
index ec89d09..e57eb95 100644
--- a/reference/shaders/semantics/clip-distance-rows.vert
+++ b/reference/shaders/semantics/clip-distance-rows.vert
@@ -53,8 +53,8 @@ OpDecorate %17 BuiltIn ClipDistance
%20 = OpConstant %13 0
%23 = OpConstant %13 1
%28 = OpConstant %13 3
-%30 = OpTypePointer Output %5
-%37 = OpConstant %5 1
+%31 = OpConstant %5 1
+%32 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %39
@@ -68,19 +68,19 @@ OpBranch %39
%26 = OpLoad %5 %25
%27 = OpAccessChain %9 %8 %28
%29 = OpLoad %5 %27
-%31 = OpAccessChain %30 %12 %20
-OpStore %31 %21
-%32 = OpAccessChain %30 %12 %23
-OpStore %32 %24
-%33 = OpAccessChain %30 %12 %14
-OpStore %33 %26
-%34 = OpAccessChain %30 %12 %28
-OpStore %34 %29
-%35 = OpAccessChain %30 %17 %20
-OpStore %35 %18
-%36 = OpFAdd %5 %18 %37
-%38 = OpAccessChain %30 %17 %23
-OpStore %38 %36
+%30 = OpFAdd %5 %18 %31
+%33 = OpAccessChain %32 %12 %20
+OpStore %33 %21
+%34 = OpAccessChain %32 %12 %23
+OpStore %34 %24
+%35 = OpAccessChain %32 %12 %14
+OpStore %35 %26
+%36 = OpAccessChain %32 %12 %28
+OpStore %36 %29
+%37 = OpAccessChain %32 %17 %20
+OpStore %37 %18
+%38 = OpAccessChain %32 %17 %23
+OpStore %38 %30
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/semantics/coverage.frag b/reference/shaders/semantics/coverage.frag
index b41b0f6..1b8066d 100644
--- a/reference/shaders/semantics/coverage.frag
+++ b/reference/shaders/semantics/coverage.frag
@@ -44,10 +44,10 @@ OpDecorate %15 BuiltIn SampleMask
%15 = OpVariable %14 Input
%16 = OpTypePointer Input %9
%18 = OpConstant %9 0
-%20 = OpTypePointer Output %5
-%22 = OpConstant %5 1
-%25 = OpConstant %9 2
-%27 = OpConstant %9 3
+%21 = OpConstant %9 3
+%22 = OpTypePointer Output %5
+%24 = OpConstant %5 1
+%27 = OpConstant %9 2
%29 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
@@ -55,17 +55,17 @@ OpBranch %31
%31 = OpLabel
%17 = OpAccessChain %16 %15 %18
%19 = OpLoad %9 %17
-%21 = OpAccessChain %20 %8 %18
-OpStore %21 %22
-%23 = OpAccessChain %20 %8 %10
-OpStore %23 %22
-%24 = OpAccessChain %20 %8 %25
-OpStore %24 %22
-%26 = OpAccessChain %20 %8 %27
-OpStore %26 %22
-%28 = OpBitwiseAnd %9 %19 %27
+%20 = OpBitwiseAnd %9 %19 %21
+%23 = OpAccessChain %22 %8 %18
+OpStore %23 %24
+%25 = OpAccessChain %22 %8 %10
+OpStore %25 %24
+%26 = OpAccessChain %22 %8 %27
+OpStore %26 %24
+%28 = OpAccessChain %22 %8 %21
+OpStore %28 %24
%30 = OpAccessChain %29 %13 %18
-OpStore %30 %28
+OpStore %30 %20
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/semantics/inner-coverage.noglsl.frag b/reference/shaders/semantics/inner-coverage.noglsl.frag
index 27f0f81..2d574d7 100644
--- a/reference/shaders/semantics/inner-coverage.noglsl.frag
+++ b/reference/shaders/semantics/inner-coverage.noglsl.frag
@@ -1,71 +1,62 @@
; SPIR-V
; Version: 1.3
; Generator: Unknown(30017); 21022
-; Bound: 41
+; Bound: 33
; Schema: 0
OpCapability Shader
OpCapability FragmentFullyCoveredEXT
OpExtension "SPV_EXT_fragment_fully_covered"
OpMemoryModel Logical GLSL450
-OpEntryPoint Fragment %3 "main" %7 %12 %21
+OpEntryPoint Fragment %3 "main" %7 %10
OpExecutionMode %3 OriginUpperLeft
OpName %3 "main"
OpName %7 "SV_Target"
-OpName %26 "discard_state"
-OpName %33 "discard_exit"
+OpName %18 "discard_state"
+OpName %25 "discard_exit"
OpDecorate %7 Location 0
-OpDecorate %12 BuiltIn SampleMask
-OpDecorate %21 BuiltIn FullyCoveredEXT
+OpDecorate %10 BuiltIn FullyCoveredEXT
%1 = OpTypeVoid
%2 = OpTypeFunction %1
%5 = OpTypeFloat 32
%6 = OpTypePointer Output %5
%7 = OpVariable %6 Output
-%8 = OpTypeInt 32 0
-%9 = OpConstant %8 1
-%10 = OpTypeArray %8 %9
-%11 = OpTypePointer Input %10
-%12 = OpVariable %11 Input
-%13 = OpTypePointer Input %8
-%15 = OpConstant %8 0
-%17 = OpTypeBool
-%20 = OpTypePointer Input %17
-%21 = OpVariable %20 Input
-%25 = OpTypePointer Private %17
-%26 = OpVariable %25 Private
-%27 = OpConstantFalse %17
-%28 = OpConstant %5 1
-%32 = OpConstantTrue %17
+%8 = OpTypeBool
+%9 = OpTypePointer Input %8
+%10 = OpVariable %9 Input
+%12 = OpTypeInt 32 0
+%14 = OpConstant %12 1
+%15 = OpConstant %12 0
+%17 = OpTypePointer Private %8
+%18 = OpVariable %17 Private
+%19 = OpConstantFalse %8
+%20 = OpConstant %5 1
+%24 = OpConstantTrue %8
%3 = OpFunction %1 None %2
%4 = OpLabel
-OpStore %26 %27
-OpBranch %29
-%29 = OpLabel
-%14 = OpAccessChain %13 %12 %15
-%16 = OpLoad %8 %14
-%18 = OpIEqual %17 %15 %16
-%19 = OpSelect %8 %18 %9 %15
-%22 = OpLoad %17 %21
-%23 = OpSelect %8 %22 %9 %15
-%24 = OpIEqual %17 %23 %15
-OpSelectionMerge %31 None
-OpBranchConditional %24 %30 %31
-%30 = OpLabel
-OpStore %26 %32
-OpBranch %31
-%31 = OpLabel
-OpStore %7 %28
-%39 = OpFunctionCall %1 %33
+OpStore %18 %19
+OpBranch %21
+%21 = OpLabel
+%11 = OpLoad %8 %10
+%13 = OpSelect %12 %11 %14 %15
+%16 = OpIEqual %8 %13 %15
+OpSelectionMerge %23 None
+OpBranchConditional %16 %22 %23
+%22 = OpLabel
+OpStore %18 %24
+OpBranch %23
+%23 = OpLabel
+OpStore %7 %20
+%31 = OpFunctionCall %1 %25
OpReturn
OpFunctionEnd
-%33 = OpFunction %1 None %2
-%34 = OpLabel
-%37 = OpLoad %17 %26
-OpSelectionMerge %36 None
-OpBranchConditional %37 %35 %36
-%35 = OpLabel
+%25 = OpFunction %1 None %2
+%26 = OpLabel
+%29 = OpLoad %8 %18
+OpSelectionMerge %28 None
+OpBranchConditional %29 %27 %28
+%27 = OpLabel
OpKill
-%36 = OpLabel
+%28 = OpLabel
OpReturn
OpFunctionEnd
diff --git a/reference/shaders/semantics/stencil-ref.frag b/reference/shaders/semantics/stencil-ref.frag
index 222cbba..5d86f7e 100644
--- a/reference/shaders/semantics/stencil-ref.frag
+++ b/reference/shaders/semantics/stencil-ref.frag
@@ -46,26 +46,26 @@ OpDecorate %13 BuiltIn FragStencilRefEXT
%11 = OpTypeInt 32 0
%12 = OpTypePointer Output %11
%13 = OpVariable %12 Output
-%15 = OpTypePointer Output %5
-%17 = OpConstant %11 0
-%19 = OpConstant %11 1
-%21 = OpConstant %11 2
-%23 = OpConstant %11 3
+%16 = OpTypePointer Output %5
+%18 = OpConstant %11 0
+%20 = OpConstant %11 1
+%22 = OpConstant %11 2
+%24 = OpConstant %11 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %25
%25 = OpLabel
%14 = OpLoad %5 %7
-%16 = OpAccessChain %15 %10 %17
-OpStore %16 %14
-%18 = OpAccessChain %15 %10 %19
-OpStore %18 %14
-%20 = OpAccessChain %15 %10 %21
-OpStore %20 %14
-%22 = OpAccessChain %15 %10 %23
-OpStore %22 %14
-%24 = OpConvertFToU %11 %14
-OpStore %13 %24
+%15 = OpConvertFToU %11 %14
+%17 = OpAccessChain %16 %10 %18
+OpStore %17 %14
+%19 = OpAccessChain %16 %10 %20
+OpStore %19 %14
+%21 = OpAccessChain %16 %10 %22
+OpStore %21 %14
+%23 = OpAccessChain %16 %10 %24
+OpStore %23 %14
+OpStore %13 %15
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/stages/hull-arrays.tesc b/reference/shaders/stages/hull-arrays.tesc
index 7c4f171..ecc21e4 100644
--- a/reference/shaders/stages/hull-arrays.tesc
+++ b/reference/shaders/stages/hull-arrays.tesc
@@ -15,10 +15,10 @@ void hull_main()
void patch_main()
{
- gl_TessLevelInner[0u] = HSValue[1u][2u] + HSValue[0u][1u];
gl_TessLevelOuter[0u] = VSValue[0u][2u];
gl_TessLevelOuter[1u] = VSValue[1u][1u];
gl_TessLevelOuter[2u] = VSValue[2u][0u];
+ gl_TessLevelInner[0u] = HSValue[1u][2u] + HSValue[0u][1u];
PATCH[0u] = VSValue[3u][2u];
PATCH[1u] = VSValue[4u][1u];
}
@@ -147,28 +147,28 @@ OpBranch %85
%55 = OpAccessChain %44 %16 %34 %13
%56 = OpLoad %5 %55
%57 = OpFAdd %5 %56 %54
-%58 = OpAccessChain %44 %23 %31
-OpStore %58 %57
-%59 = OpAccessChain %29 %12 %31 %13
-%60 = OpLoad %5 %59
-%61 = OpAccessChain %44 %20 %31
-OpStore %61 %60
-%62 = OpAccessChain %29 %12 %34 %34
+%58 = OpAccessChain %29 %12 %31 %13
+%59 = OpLoad %5 %58
+%60 = OpAccessChain %29 %12 %34 %34
+%61 = OpLoad %5 %60
+%62 = OpAccessChain %29 %12 %13 %31
%63 = OpLoad %5 %62
-%64 = OpAccessChain %44 %20 %34
-OpStore %64 %63
-%65 = OpAccessChain %29 %12 %13 %31
-%66 = OpLoad %5 %65
-%67 = OpAccessChain %44 %20 %13
-OpStore %67 %66
-%68 = OpAccessChain %29 %12 %7 %13
-%69 = OpLoad %5 %68
-%70 = OpAccessChain %44 %24 %31
-OpStore %70 %69
-%71 = OpAccessChain %29 %12 %17 %34
-%72 = OpLoad %5 %71
+%64 = OpAccessChain %29 %12 %7 %13
+%65 = OpLoad %5 %64
+%66 = OpAccessChain %29 %12 %17 %34
+%67 = OpLoad %5 %66
+%68 = OpAccessChain %44 %20 %31
+OpStore %68 %59
+%69 = OpAccessChain %44 %20 %34
+OpStore %69 %61
+%70 = OpAccessChain %44 %20 %13
+OpStore %70 %63
+%71 = OpAccessChain %44 %23 %31
+OpStore %71 %57
+%72 = OpAccessChain %44 %24 %31
+OpStore %72 %65
%73 = OpAccessChain %44 %24 %34
-OpStore %73 %72
+OpStore %73 %67
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/stages/hull-single-cp.tesc b/reference/shaders/stages/hull-single-cp.tesc
index b7aa73f..c6669e3 100644
--- a/reference/shaders/stages/hull-single-cp.tesc
+++ b/reference/shaders/stages/hull-single-cp.tesc
@@ -12,10 +12,10 @@ void hull_main()
void patch_main()
{
- gl_TessLevelInner[0u] = HSValue[0u];
gl_TessLevelOuter[0u] = VSValue[0u];
gl_TessLevelOuter[1u] = VSValue[1u];
gl_TessLevelOuter[2u] = VSValue[2u];
+ gl_TessLevelInner[0u] = HSValue[0u];
PATCH = VSValue[3u];
}
@@ -84,7 +84,7 @@ OpDecorate %41 BuiltIn InvocationId
%31 = OpConstant %6 0
%40 = OpTypePointer Input %6
%41 = OpVariable %40 Input
-%56 = OpConstant %6 3
+%52 = OpConstant %6 3
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %60
@@ -116,23 +116,23 @@ OpBranch %64
%64 = OpLabel
%43 = OpAccessChain %23 %14 %31
%44 = OpLoad %5 %43
-%45 = OpAccessChain %23 %22 %31
-OpStore %45 %44
-%46 = OpAccessChain %29 %10 %31
-%47 = OpLoad %5 %46
-%48 = OpAccessChain %23 %18 %31
-OpStore %48 %47
-%49 = OpAccessChain %29 %10 %11
+%45 = OpAccessChain %29 %10 %31
+%46 = OpLoad %5 %45
+%47 = OpAccessChain %29 %10 %11
+%48 = OpLoad %5 %47
+%49 = OpAccessChain %29 %10 %19
%50 = OpLoad %5 %49
-%51 = OpAccessChain %23 %18 %11
-OpStore %51 %50
-%52 = OpAccessChain %29 %10 %19
-%53 = OpLoad %5 %52
-%54 = OpAccessChain %23 %18 %19
-OpStore %54 %53
-%55 = OpAccessChain %29 %10 %56
-%57 = OpLoad %5 %55
-OpStore %24 %57
+%51 = OpAccessChain %29 %10 %52
+%53 = OpLoad %5 %51
+%54 = OpAccessChain %23 %18 %31
+OpStore %54 %46
+%55 = OpAccessChain %23 %18 %11
+OpStore %55 %48
+%56 = OpAccessChain %23 %18 %19
+OpStore %56 %50
+%57 = OpAccessChain %23 %22 %31
+OpStore %57 %44
+OpStore %24 %53
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/stages/hull.tesc b/reference/shaders/stages/hull.tesc
index e4fac52..d0e5f68 100644
--- a/reference/shaders/stages/hull.tesc
+++ b/reference/shaders/stages/hull.tesc
@@ -31,12 +31,12 @@ void patch_main()
_66[3u] = VSValue[3u];
uint _79 = uint(int(VSValue[0u]));
_66[_79] += 40.0;
- gl_TessLevelInner[0u] = HSValue[0u];
- gl_TessLevelInner[1u] = HSValue[1u];
gl_TessLevelOuter[0u] = VSValue[0u];
gl_TessLevelOuter[1u] = VSValue[1u];
gl_TessLevelOuter[2u] = VSValue[2u];
gl_TessLevelOuter[3u] = HSValue[0u] + VSValue[0u];
+ gl_TessLevelInner[0u] = HSValue[0u];
+ gl_TessLevelInner[1u] = HSValue[1u];
PATCH = _12._m0[0u].x + _66[3u];
}
@@ -122,7 +122,7 @@ OpDecorate %33 BuiltIn InvocationId
%41 = OpTypePointer Function %7
%50 = OpConstant %5 3
%57 = OpConstant %7 40
-%95 = OpTypePointer Uniform %8
+%89 = OpTypePointer Uniform %8
%102 = OpTypeBool
%3 = OpFunction %1 None %2
%4 = OpLabel
@@ -206,27 +206,27 @@ OpStore %78 %77
OpStore %80 %82
%83 = OpAccessChain %26 %20 %39
%84 = OpLoad %7 %83
-%85 = OpAccessChain %26 %25 %39
-OpStore %85 %84
-%86 = OpAccessChain %26 %20 %6
-%87 = OpLoad %7 %86
-%88 = OpAccessChain %26 %25 %6
-OpStore %88 %87
-%89 = OpAccessChain %26 %21 %39
-OpStore %89 %68
-%90 = OpAccessChain %26 %21 %6
-OpStore %90 %71
-%91 = OpAccessChain %26 %21 %22
-OpStore %91 %74
-%92 = OpFAdd %7 %84 %68
-%93 = OpAccessChain %26 %21 %50
-OpStore %93 %92
-%94 = OpLoad %7 %78
-%96 = OpAccessChain %95 %12 %39 %39
-%97 = OpLoad %8 %96
-%98 = OpCompositeExtract %7 %97 0
-%99 = OpFAdd %7 %98 %94
-OpStore %27 %99
+%85 = OpAccessChain %26 %20 %6
+%86 = OpLoad %7 %85
+%87 = OpFAdd %7 %84 %68
+%88 = OpLoad %7 %78
+%90 = OpAccessChain %89 %12 %39 %39
+%91 = OpLoad %8 %90
+%92 = OpCompositeExtract %7 %91 0
+%93 = OpFAdd %7 %92 %88
+%94 = OpAccessChain %26 %21 %39
+OpStore %94 %68
+%95 = OpAccessChain %26 %21 %6
+OpStore %95 %71
+%96 = OpAccessChain %26 %21 %22
+OpStore %96 %74
+%97 = OpAccessChain %26 %21 %50
+OpStore %97 %87
+%98 = OpAccessChain %26 %25 %39
+OpStore %98 %84
+%99 = OpAccessChain %26 %25 %6
+OpStore %99 %86
+OpStore %27 %93
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/stages/stage-input-output.16bit-io.frag b/reference/shaders/stages/stage-input-output.16bit-io.frag
index 06852c4..7229252 100644
--- a/reference/shaders/stages/stage-input-output.16bit-io.frag
+++ b/reference/shaders/stages/stage-input-output.16bit-io.frag
@@ -81,10 +81,10 @@ OpDecorate %21 Location 2
%30 = OpConstant %25 1
%33 = OpTypePointer Input %5
%39 = OpConstant %5 0x1p+3
-%41 = OpTypePointer Output %5
-%45 = OpConstant %13 65528
-%47 = OpTypePointer Output %9
-%53 = OpConstant %13 2
+%42 = OpConstant %13 65528
+%45 = OpConstant %13 2
+%46 = OpTypePointer Output %5
+%49 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %54
@@ -102,20 +102,20 @@ OpBranch %54
%37 = OpLoad %5 %36
%38 = OpFMul %5 %35 %39
%40 = OpFMul %5 %37 %39
-%42 = OpAccessChain %41 %17 %26
-OpStore %42 %38
-%43 = OpAccessChain %41 %17 %30
-OpStore %43 %40
-%44 = OpIMul %13 %28 %45
-%46 = OpIMul %13 %32 %45
-%48 = OpAccessChain %47 %19 %26
-%49 = OpBitcast %9 %44
-OpStore %48 %49
-%50 = OpAccessChain %47 %19 %30
-%51 = OpBitcast %9 %46
+%41 = OpIMul %13 %28 %42
+%43 = OpIMul %13 %32 %42
+%44 = OpShiftLeftLogical %13 %22 %45
+%47 = OpAccessChain %46 %17 %26
+OpStore %47 %38
+%48 = OpAccessChain %46 %17 %30
+OpStore %48 %40
+%50 = OpAccessChain %49 %19 %26
+%51 = OpBitcast %9 %41
OpStore %50 %51
-%52 = OpShiftLeftLogical %13 %22 %53
-OpStore %21 %52
+%52 = OpAccessChain %49 %19 %30
+%53 = OpBitcast %9 %43
+OpStore %52 %53
+OpStore %21 %44
OpReturn
OpFunctionEnd
#endif
diff --git a/reference/shaders/stages/stage-input-output.frag b/reference/shaders/stages/stage-input-output.frag
index 98d5992..1d2acfd 100644
--- a/reference/shaders/stages/stage-input-output.frag
+++ b/reference/shaders/stages/stage-input-output.frag
@@ -87,10 +87,10 @@ OpDecorate %21 Location 2
%34 = OpTypePointer Input %5
%37 = OpTypeFloat 16
%43 = OpConstant %37 0x1p+3
-%45 = OpTypePointer Output %5
-%51 = OpConstant %23 65528
-%53 = OpTypePointer Output %9
-%59 = OpConstant %23 2
+%46 = OpConstant %23 65528
+%49 = OpConstant %23 2
+%50 = OpTypePointer Output %5
+%55 = OpTypePointer Output %9
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %61
@@ -111,22 +111,22 @@ OpBranch %61
%41 = OpFConvert %37 %40
%42 = OpFMul %37 %38 %43
%44 = OpFMul %37 %41 %43
-%46 = OpAccessChain %45 %17 %27
-%47 = OpFConvert %5 %42
-OpStore %46 %47
-%48 = OpAccessChain %45 %17 %31
-%49 = OpFConvert %5 %44
-OpStore %48 %49
-%50 = OpIMul %23 %29 %51
-%52 = OpIMul %23 %33 %51
-%54 = OpAccessChain %53 %19 %27
-%55 = OpSConvert %9 %50
-OpStore %54 %55
-%56 = OpAccessChain %53 %19 %31
-%57 = OpSConvert %9 %52
+%45 = OpIMul %23 %29 %46
+%47 = OpIMul %23 %33 %46
+%48 = OpShiftLeftLogical %23 %24 %49
+%51 = OpAccessChain %50 %17 %27
+%52 = OpFConvert %5 %42
+OpStore %51 %52
+%53 = OpAccessChain %50 %17 %31
+%54 = OpFConvert %5 %44
+OpStore %53 %54
+%56 = OpAccessChain %55 %19 %27
+%57 = OpSConvert %9 %45
OpStore %56 %57
-%58 = OpShiftLeftLogical %23 %24 %59
-%60 = OpUConvert %13 %58
+%58 = OpAccessChain %55 %19 %31
+%59 = OpSConvert %9 %47
+OpStore %58 %59
+%60 = OpUConvert %13 %48
OpStore %21 %60
OpReturn
OpFunctionEnd
diff --git a/reference/shaders/stages/vertex-array-output.vert b/reference/shaders/stages/vertex-array-output.vert
index 1995650..5d146ad 100644
--- a/reference/shaders/stages/vertex-array-output.vert
+++ b/reference/shaders/stages/vertex-array-output.vert
@@ -73,10 +73,10 @@ OpDecorate %18 Location 5
%24 = OpConstant %11 1
%27 = OpConstant %11 2
%30 = OpConstant %11 3
-%32 = OpTypePointer Output %5
-%42 = OpConstant %5 1
-%51 = OpConstant %5 2
-%60 = OpConstant %5 3
+%33 = OpConstant %5 1
+%38 = OpConstant %5 2
+%43 = OpConstant %5 3
+%47 = OpTypePointer Output %5
%3 = OpFunction %1 None %2
%4 = OpLabel
OpBranch %72
@@ -89,65 +89,65 @@ OpBranch %72
%28 = OpLoad %5 %26
%29 = OpAccessChain %19 %8 %30
%31 = OpLoad %5 %29
-%33 = OpAccessChain %32 %10 %21
-OpStore %33 %22
-%34 = OpAccessChain %32 %10 %24
-OpStore %34 %25
-%35 = OpAccessChain %32 %10 %27
-OpStore %35 %28
-%36 = OpAccessChain %32 %10 %30
-OpStore %36 %31
-%37 = OpAccessChain %32 %15 %21 %21
-OpStore %37 %22
-%38 = OpAccessChain %32 %15 %21 %24
-OpStore %38 %25
-%39 = OpAccessChain %32 %15 %21 %27
-OpStore %39 %28
-%40 = OpAccessChain %32 %15 %21 %30
-OpStore %40 %31
-%41 = OpFAdd %5 %22 %42
-%43 = OpFAdd %5 %25 %42
-%44 = OpFAdd %5 %28 %42
-%45 = OpFAdd %5 %31 %42
-%46 = OpAccessChain %32 %15 %24 %21
-OpStore %46 %41
-%47 = OpAccessChain %32 %15 %24 %24
-OpStore %47 %43
-%48 = OpAccessChain %32 %15 %24 %27
-OpStore %48 %44
-%49 = OpAccessChain %32 %15 %24 %30
-OpStore %49 %45
-%50 = OpFAdd %5 %22 %51
-%52 = OpFAdd %5 %25 %51
-%53 = OpFAdd %5 %28 %51
-%54 = OpFAdd %5 %31 %51
-%55 = OpAccessChain %32 %15 %27 %21
-OpStore %55 %50
-%56 = OpAccessChain %32 %15 %27 %24
-OpStore %56 %52
-%57 = OpAccessChain %32 %15 %27 %27
-OpStore %57 %53
-%58 = OpAccessChain %32 %15 %27 %30
-OpStore %58 %54
-%59 = OpFAdd %5 %22 %60
-%61 = OpFAdd %5 %25 %60
-%62 = OpFAdd %5 %28 %60
-%63 = OpFAdd %5 %31 %60
-%64 = OpAccessChain %32 %15 %30 %21
-OpStore %64 %59
-%65 = OpAccessChain %32 %15 %30 %24
-OpStore %65 %61
-%66 = OpAccessChain %32 %15 %30 %27
-OpStore %66 %62
-%67 = OpAccessChain %32 %15 %30 %30
-OpStore %67 %63
-%68 = OpAccessChain %32 %18 %21
+%32 = OpFAdd %5 %22 %33
+%34 = OpFAdd %5 %25 %33
+%35 = OpFAdd %5 %28 %33
+%36 = OpFAdd %5 %31 %33
+%37 = OpFAdd %5 %22 %38
+%39 = OpFAdd %5 %25 %38
+%40 = OpFAdd %5 %28 %38
+%41 = OpFAdd %5 %31 %38
+%42 = OpFAdd %5 %22 %43
+%44 = OpFAdd %5 %25 %43
+%45 = OpFAdd %5 %28 %43
+%46 = OpFAdd %5 %31 %43
+%48 = OpAccessChain %47 %10 %21
+OpStore %48 %22
+%49 = OpAccessChain %47 %10 %24
+OpStore %49 %25
+%50 = OpAccessChain %47 %10 %27
+OpStore %50 %28
+%51 = OpAccessChain %47 %10 %30
+OpStore %51 %31
+%52 = OpAccessChain %47 %15 %21 %21
+OpStore %52 %22
+%53 = OpAccessChain %47 %15 %21 %24
+OpStore %53 %25
+%54 = OpAccessChain %47 %15 %21 %27
+OpStore %54 %28
+%55 = OpAccessChain %47 %15 %21 %30
+OpStore %55 %31
+%56 = OpAccessChain %47 %15 %24 %21
+OpStore %56 %32
+%57 = OpAccessChain %47 %15 %24 %24
+OpStore %57 %34
+%58 = OpAccessChain %47 %15 %24 %27
+OpStore %58 %35
+%59 = OpAccessChain %47 %15 %24 %30
+OpStore %59 %36
+%60 = OpAccessChain %47 %15 %27 %21
+OpStore %60 %37
+%61 = OpAccessChain %47 %15 %27 %24
+OpStore %61 %39
+%62 = OpAccessChain %47 %15 %27 %27
+OpStore %62 %40
+%63 = OpAccessChain %47 %15 %27 %30
+OpStore %63 %41
+%64 = OpAccessChain %47 %15 %30 %21
+OpStore %64 %42
+%65 = OpAccessChain %47 %15 %30 %24
+OpStore %65 %44
+%66 = OpAccessChain %47 %15 %30 %27
+OpStore %66 %45
+%67 = OpAccessChain %47 %15 %30 %30
+OpStore %67 %46
+%68 = OpAccessChain %47 %18 %21
OpStore %68 %22
-%69 = OpAccessChain %32 %18 %24
+%69 = OpAccessChain %47 %18 %24
OpStore %69 %25
-%70 = OpAccessChain %32 %18 %27
+%70 = OpAccessChain %47 %18 %27
OpStore %70 %28
-%71 = OpAccessChain %32 %18 %30
+%71 = OpAccessChain %47 %18 %30
OpStore %71 %31
OpReturn
OpFunctionEnd
diff --git a/reference/shaders/vectorization/copy-byte-address.ssbo.comp b/reference/shaders/vectorization/copy-byte-address.ssbo.comp
index 4ff513d..2a2b9eb 100644
--- a/reference/shaders/vectorization/copy-byte-address.ssbo.comp
+++ b/reference/shaders/vectorization/copy-byte-address.ssbo.comp
@@ -183,6 +183,7 @@ OpCapability Float16
OpCapability Float64
OpCapability Int64
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %114 %119
OpExecutionMode %3 LocalSize 2 1 1
diff --git a/reference/shaders/vectorization/copy-half2.ssbo.comp b/reference/shaders/vectorization/copy-half2.ssbo.comp
index 59f9cf3..42704dd 100644
--- a/reference/shaders/vectorization/copy-half2.ssbo.comp
+++ b/reference/shaders/vectorization/copy-half2.ssbo.comp
@@ -37,6 +37,7 @@ void main()
OpCapability Shader
OpCapability Float16
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
diff --git a/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp b/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp
index 02f759d..6f04b21 100644
--- a/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp
+++ b/reference/shaders/vectorization/copy-half2.ssbo.ssbo-align.bindless.comp
@@ -61,6 +61,7 @@ OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
diff --git a/reference/shaders/vectorization/copy-half3.ssbo.comp b/reference/shaders/vectorization/copy-half3.ssbo.comp
index 72b6c4a..bda36cb 100644
--- a/reference/shaders/vectorization/copy-half3.ssbo.comp
+++ b/reference/shaders/vectorization/copy-half3.ssbo.comp
@@ -38,6 +38,7 @@ void main()
OpCapability Shader
OpCapability Float16
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
diff --git a/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp b/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp
index 82224d6..48b394e 100644
--- a/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp
+++ b/reference/shaders/vectorization/copy-half3.ssbo.ssbo-align.bindless.comp
@@ -70,6 +70,7 @@ OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
diff --git a/reference/shaders/vectorization/copy-half4.ssbo.comp b/reference/shaders/vectorization/copy-half4.ssbo.comp
index cd0f4cd..ba2e329 100644
--- a/reference/shaders/vectorization/copy-half4.ssbo.comp
+++ b/reference/shaders/vectorization/copy-half4.ssbo.comp
@@ -37,6 +37,7 @@ void main()
OpCapability Shader
OpCapability Float16
OpCapability Int16
+OpCapability StorageBuffer16BitAccess
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %3 "main" %18
OpExecutionMode %3 LocalSize 2 1 1
diff --git a/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp b/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp
index 145e9c4..7661e74 100644
--- a/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp
+++ b/reference/shaders/vectorization/copy-half4.ssbo.ssbo-align.bindless.comp
@@ -61,6 +61,7 @@ OpCapability Shader
OpCapability Float16
OpCapability Int16
OpCapability GroupNonUniformBallot
+OpCapability StorageBuffer16BitAccess
OpCapability RuntimeDescriptorArray
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_EXT_descriptor_indexing"
diff --git a/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil
new file mode 100644
index 0000000..dc55350
--- /dev/null
+++ b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil
Binary files differ
diff --git a/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil
new file mode 100644
index 0000000..0739985
--- /dev/null
+++ b/shaders/asm/cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil
Binary files differ
diff --git a/shaders/resources/cbv-legacy-fp16-fp64.frag b/shaders/resources/cbv-legacy-fp16-fp64.frag
new file mode 100644
index 0000000..039e61c
--- /dev/null
+++ b/shaders/resources/cbv-legacy-fp16-fp64.frag
@@ -0,0 +1,14 @@
+struct Half8 { min16float4 lo; min16float4 hi; };
+
+cbuffer Cbuf
+{
+ float4 a;
+ Half8 b;
+ int64_t4 c;
+};
+
+float4 main() : SV_Target
+{
+ Half8 half8 = b;
+ return a + float4(half8.lo) + float4(half8.hi) + float4(c);
+}
diff --git a/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag
new file mode 100644
index 0000000..039e61c
--- /dev/null
+++ b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag
@@ -0,0 +1,14 @@
+struct Half8 { min16float4 lo; min16float4 hi; };
+
+cbuffer Cbuf
+{
+ float4 a;
+ Half8 b;
+ int64_t4 c;
+};
+
+float4 main() : SV_Target
+{
+ Half8 half8 = b;
+ return a + float4(half8.lo) + float4(half8.hi) + float4(c);
+}
diff --git a/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
new file mode 100644
index 0000000..039e61c
--- /dev/null
+++ b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
@@ -0,0 +1,14 @@
+struct Half8 { min16float4 lo; min16float4 hi; };
+
+cbuffer Cbuf
+{
+ float4 a;
+ Half8 b;
+ int64_t4 c;
+};
+
+float4 main() : SV_Target
+{
+ Half8 half8 = b;
+ return a + float4(half8.lo) + float4(half8.hi) + float4(c);
+}
diff --git a/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
new file mode 100644
index 0000000..039e61c
--- /dev/null
+++ b/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
@@ -0,0 +1,14 @@
+struct Half8 { min16float4 lo; min16float4 hi; };
+
+cbuffer Cbuf
+{
+ float4 a;
+ Half8 b;
+ int64_t4 c;
+};
+
+float4 main() : SV_Target
+{
+ Half8 half8 = b;
+ return a + float4(half8.lo) + float4(half8.hi) + float4(c);
+}
diff --git a/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag b/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
new file mode 100644
index 0000000..039e61c
--- /dev/null
+++ b/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
@@ -0,0 +1,14 @@
+struct Half8 { min16float4 lo; min16float4 hi; };
+
+cbuffer Cbuf
+{
+ float4 a;
+ Half8 b;
+ int64_t4 c;
+};
+
+float4 main() : SV_Target
+{
+ Half8 half8 = b;
+ return a + float4(half8.lo) + float4(half8.hi) + float4(c);
+}
diff --git a/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag b/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
new file mode 100644
index 0000000..039e61c
--- /dev/null
+++ b/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
@@ -0,0 +1,14 @@
+struct Half8 { min16float4 lo; min16float4 hi; };
+
+cbuffer Cbuf
+{
+ float4 a;
+ Half8 b;
+ int64_t4 c;
+};
+
+float4 main() : SV_Target
+{
+ Half8 half8 = b;
+ return a + float4(half8.lo) + float4(half8.hi) + float4(c);
+}
diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag
new file mode 100644
index 0000000..7bc5a19
--- /dev/null
+++ b/shaders/resources/cbv.no-legacy-cbuf-layout.bindless.frag
@@ -0,0 +1,21 @@
+cbuffer Cbuf
+{
+ float4 a;
+ half4 b;
+ int64_t4 c;
+};
+
+cbuffer Cbuf1 : register(b1)
+{
+ float4 d;
+};
+
+cbuffer Cbuf2 : register(b2)
+{
+ double4 e;
+};
+
+float4 main() : SV_Target
+{
+ return a + float4(b) + float4(c) + d + float4(e);
+}
diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag
new file mode 100644
index 0000000..2ae7750
--- /dev/null
+++ b/shaders/resources/cbv.no-legacy-cbuf-layout.index-divider.frag
@@ -0,0 +1,12 @@
+cbuffer Cbuf
+{
+ float a[64];
+ float2 b[64];
+ float3 c[64];
+ float4 d[64];
+};
+
+float4 main(nointerpolation uint index : INDEX) : SV_Target
+{
+ return a[index].xxxx + b[index].xyxy + c[index].xyzx + d[index];
+}
diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss b/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
new file mode 100644
index 0000000..3ce3a8e
--- /dev/null
+++ b/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss
@@ -0,0 +1,26 @@
+cbuffer A : register(b0, space15)
+{
+ float a;
+ uint b;
+ int c;
+};
+
+cbuffer B : register(b1, space15)
+{
+ float2 a2;
+ uint b2;
+ int c2;
+};
+
+struct Payload
+{
+ float4 f;
+ int4 i;
+};
+
+[shader("miss")]
+void main(inout Payload payload)
+{
+ payload.f = float4(a, b, c, 1.0);
+ payload.i = int4(a2, b2, c);
+}
diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
new file mode 100644
index 0000000..039e61c
--- /dev/null
+++ b/shaders/resources/cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag
@@ -0,0 +1,14 @@
+struct Half8 { min16float4 lo; min16float4 hi; };
+
+cbuffer Cbuf
+{
+ float4 a;
+ Half8 b;
+ int64_t4 c;
+};
+
+float4 main() : SV_Target
+{
+ Half8 half8 = b;
+ return a + float4(half8.lo) + float4(half8.hi) + float4(c);
+}
diff --git a/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag b/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag
new file mode 100644
index 0000000..fbc0db9
--- /dev/null
+++ b/shaders/resources/cbv.no-legacy-cbuf-layout.root-constant.frag
@@ -0,0 +1,18 @@
+cbuffer A : register(b0, space0)
+{
+ float a;
+ uint b;
+ int c;
+};
+
+cbuffer B : register(b0, space1)
+{
+ float2 a2;
+ uint b2;
+ int c2;
+};
+
+float2 main() : SV_Target
+{
+ return (a + a2) + (b + b2) + (c + c2);
+}
diff --git a/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag b/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag
new file mode 100644
index 0000000..39cc769
--- /dev/null
+++ b/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag
@@ -0,0 +1,15 @@
+cbuffer Buf : register(b0)
+{
+ float4 a;
+ half4 c;
+ int64_t4 b;
+};
+
+float4 main() : SV_Target
+{
+ float4 res = 0.0.xxxx;
+ res += a;
+ res += float4(b);
+ res += float4(c);
+ return res;
+}
diff --git a/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag
new file mode 100644
index 0000000..7bc5a19
--- /dev/null
+++ b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.bindless.sm66.frag
@@ -0,0 +1,21 @@
+cbuffer Cbuf
+{
+ float4 a;
+ half4 b;
+ int64_t4 c;
+};
+
+cbuffer Cbuf1 : register(b1)
+{
+ float4 d;
+};
+
+cbuffer Cbuf2 : register(b2)
+{
+ double4 e;
+};
+
+float4 main() : SV_Target
+{
+ return a + float4(b) + float4(c) + d + float4(e);
+}
diff --git a/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag
new file mode 100644
index 0000000..7bc5a19
--- /dev/null
+++ b/shaders/resources/sm66/cbv.no-legacy-cbuf-layout.sm66.frag
@@ -0,0 +1,21 @@
+cbuffer Cbuf
+{
+ float4 a;
+ half4 b;
+ int64_t4 c;
+};
+
+cbuffer Cbuf1 : register(b1)
+{
+ float4 d;
+};
+
+cbuffer Cbuf2 : register(b2)
+{
+ double4 e;
+};
+
+float4 main() : SV_Target
+{
+ return a + float4(b) + float4(c) + d + float4(e);
+}
diff --git a/test_shaders.py b/test_shaders.py
index 5afc278..8cb46d6 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -97,6 +97,8 @@ def cross_compile_dxil(shader, args, paths, is_asm):
dxil_cmd += ['-denorm', 'ftz']
if '.denorm-preserve.' in shader:
dxil_cmd += ['-denorm', 'preserve']
+ if '.no-legacy-cbuf-layout.' in shader:
+ dxil_cmd += ['-no-legacy-cbuf-layout']
subprocess.check_call(dxil_cmd)
else:
dxil_path = shader
@@ -343,7 +345,7 @@ def main():
action = 'store_true',
help = 'Execute tests in parallel. Useful for doing regression quickly, but bad for debugging and stat output.')
parser.add_argument('--dxc',
- default = './external/dxc-build/output/bin/dxc',
+ default = './external/dxc-build/bin/dxc',
help = 'Explicit path to DXC')
parser.add_argument('--dxil-spirv',
default = './dxil-spirv',