diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-05-25 13:21:03 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-25 13:21:03 +0300 |
commit | 24eb1dbbd769e95d630045650b81c82a57d51200 (patch) | |
tree | bddd07260c75009c525ea6ad95f40a1a2867d5bd | |
parent | 3a258858c3ae1a41143499599f107dbda9c73066 (diff) | |
parent | 03f619778c5afebcdb3b7f3e6b38f500a9cc93af (diff) |
Merge pull request #114 from HansKristian-Work/physical-srv-uav-access-chain
Generate more straight forward code for BDA descriptors.
67 files changed, 1593 insertions, 1998 deletions
diff --git a/dxil_converter.cpp b/dxil_converter.cpp index 5a14eba..cf61fd0 100644 --- a/dxil_converter.cpp +++ b/dxil_converter.cpp @@ -700,6 +700,7 @@ spv::Id Converter::Impl::get_physical_pointer_block_type(spv::Id base_type_id, c return entry.meta.coherent == meta.coherent && entry.meta.nonreadable == meta.nonreadable && entry.meta.nonwritable == meta.nonwritable && + entry.meta.size == meta.size && entry.meta.stride == meta.stride && entry.base_type_id == base_type_id; }); @@ -753,9 +754,18 @@ spv::Id Converter::Impl::get_physical_pointer_block_type(spv::Id base_type_id, c if (meta.stride > 0) { - type_id = builder().makeRuntimeArray(type_id); + if (meta.size == 0) + { + type_id = builder().makeRuntimeArray(type_id); + type += "Array"; + } + else + { + type_id = builder().makeArrayType(type_id, builder().makeUintConstant(meta.size / meta.stride), + meta.stride); + type += "CBVArray"; + } builder().addDecoration(type_id, spv::DecorationArrayStride, meta.stride); - type += "Array"; } spv::Id block_type_id = builder().makeStructType({ type_id }, type.c_str()); diff --git a/opcodes/converter_impl.hpp b/opcodes/converter_impl.hpp index 353efdf..dde1c85 100644 --- a/opcodes/converter_impl.hpp +++ b/opcodes/converter_impl.hpp @@ -341,6 +341,7 @@ struct Converter::Impl bool nonreadable; bool coherent; uint8_t stride; + uint32_t size; }; struct ResourceMeta diff --git a/opcodes/dxil/dxil_buffer.cpp b/opcodes/dxil/dxil_buffer.cpp index 9bc9c46..442b77d 100644 --- a/opcodes/dxil/dxil_buffer.cpp +++ b/opcodes/dxil/dxil_buffer.cpp @@ -53,10 +53,10 @@ bool raw_access_byte_address_can_vectorize(Converter::Impl &impl, const llvm::Ty } bool raw_access_structured_can_vectorize( - Converter::Impl &impl, const llvm::Type *type, - const llvm::Value *index, const llvm::Value *byte_offset, - unsigned stride, - unsigned vecsize) + Converter::Impl &impl, const llvm::Type *type, + const llvm::Value *index, unsigned stride, + const llvm::Value *byte_offset, + unsigned vecsize) { // vec3 vectorization requires scalar block layout always. if (!impl.options.scalar_block_layout && vecsize == 3) @@ -101,11 +101,11 @@ RawVecSize raw_access_structured_vectorize( const llvm::Value *byte_offset, uint32_t mask) { - if (mask == 0xfu && raw_access_structured_can_vectorize(impl, type, index, byte_offset, stride, 4)) + if (mask == 0xfu && raw_access_structured_can_vectorize(impl, type, index, stride, byte_offset, 4)) return RawVecSize::V4; - else if (mask == 0x7u && raw_access_structured_can_vectorize(impl, type, index, byte_offset, stride, 3)) + else if (mask == 0x7u && raw_access_structured_can_vectorize(impl, type, index, stride, byte_offset, 3)) return RawVecSize::V3; - else if (mask == 0x3u && raw_access_structured_can_vectorize(impl, type, index, byte_offset, stride, 2)) + else if (mask == 0x3u && raw_access_structured_can_vectorize(impl, type, index, stride, byte_offset, 2)) return RawVecSize::V2; else return RawVecSize::V1; @@ -381,6 +381,41 @@ static spv::Id build_physical_pointer_address_for_raw_load_store(Converter::Impl return emit_u32x2_u32_add(impl, ptr_id, byte_offset_id); } +static spv::Id build_vectorized_physical_load_store_access(Converter::Impl &impl, const llvm::CallInst *instruction, + unsigned vecsize, const llvm::Type *element_type) +{ + spv::Id ptr_id = impl.get_id_for_value(instruction->getOperand(1)); + const auto &meta = impl.handle_to_resource_meta[ptr_id]; + unsigned mask = (1u << vecsize) - 1u; + + // If we can express this as a plain access chain, do so for clarity and ideally better perf. + // If we cannot do it trivially, fallback to raw pointer arithmetic. + bool can_vectorize = false; + + if (meta.stride) + { + if (raw_access_structured_can_vectorize(impl, element_type, + instruction->getOperand(2), meta.stride, + instruction->getOperand(3), vecsize)) + { + can_vectorize = true; + } + } + else if (raw_access_byte_address_can_vectorize(impl, element_type, + instruction->getOperand(2), vecsize)) + { + can_vectorize = true; + } + + if (can_vectorize) + { + auto access = build_buffer_access(impl, instruction, 0, 0, element_type, mask); + return access.index_id; + } + else + return 0; +} + static bool emit_physical_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *instruction, const Converter::Impl::PhysicalPointerMeta &ptr_meta, uint32_t mask = 0, uint32_t alignment = 0) @@ -408,15 +443,26 @@ static bool emit_physical_buffer_load_instruction(Converter::Impl &impl, const l } auto *element_type = instruction->getType()->getStructElementType(0); + // If we can express this as a plain access chain, do so for clarity and ideally better perf. + // If we cannot do it trivially, fallback to raw pointer arithmetic. + spv::Id array_id = build_vectorized_physical_load_store_access(impl, instruction, vecsize, element_type); + spv::Id physical_type_id; spv::Op value_cast_op; get_physical_load_store_cast_info(impl, element_type, physical_type_id, value_cast_op); if (vecsize > 1) physical_type_id = builder.makeVectorType(physical_type_id, vecsize); - spv::Id ptr_type_id = impl.get_physical_pointer_block_type(physical_type_id, ptr_meta); - spv::Id u64_ptr_id = build_physical_pointer_address_for_raw_load_store(impl, instruction); + auto tmp_ptr_meta = ptr_meta; + tmp_ptr_meta.stride = array_id ? vecsize * get_type_scalar_alignment(impl, element_type) : 0; + spv::Id ptr_type_id = impl.get_physical_pointer_block_type(physical_type_id, tmp_ptr_meta); + + spv::Id u64_ptr_id; + if (array_id) + u64_ptr_id = impl.get_id_for_value(instruction->getOperand(1)); + else + u64_ptr_id = build_physical_pointer_address_for_raw_load_store(impl, instruction); auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id); ptr_bitcast_op->add_id(u64_ptr_id); @@ -425,6 +471,8 @@ static bool emit_physical_buffer_load_instruction(Converter::Impl &impl, const l auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, physical_type_id)); chain_op->add_id(ptr_bitcast_op->id); chain_op->add_id(builder.makeUintConstant(0)); + if (array_id) + chain_op->add_id(array_id); impl.add(chain_op); auto *load_op = impl.allocate(spv::OpLoad, physical_type_id); @@ -797,6 +845,11 @@ static bool emit_physical_buffer_store_instruction(Converter::Impl &impl, const } auto *element_type = instruction->getOperand(4)->getType(); + + // If we can express this as a plain access chain, do so for clarity and ideally better perf. + // If we cannot do it trivially, fallback to raw pointer arithmetic. + spv::Id array_id = build_vectorized_physical_load_store_access(impl, instruction, vecsize, element_type); + spv::Id physical_type_id; spv::Op value_cast_op; get_physical_load_store_cast_info(impl, element_type, physical_type_id, value_cast_op); @@ -804,9 +857,16 @@ static bool emit_physical_buffer_store_instruction(Converter::Impl &impl, const spv::Id vec_type_id = physical_type_id; if (vecsize > 1) vec_type_id = builder.makeVectorType(physical_type_id, vecsize); - spv::Id ptr_type_id = impl.get_physical_pointer_block_type(vec_type_id, ptr_meta); - spv::Id u64_ptr_id = build_physical_pointer_address_for_raw_load_store(impl, instruction); + auto tmp_ptr_meta = ptr_meta; + tmp_ptr_meta.stride = array_id ? vecsize * get_type_scalar_alignment(impl, element_type) : 0; + spv::Id ptr_type_id = impl.get_physical_pointer_block_type(vec_type_id, tmp_ptr_meta); + + spv::Id u64_ptr_id; + if (array_id) + u64_ptr_id = impl.get_id_for_value(instruction->getOperand(1)); + else + u64_ptr_id = build_physical_pointer_address_for_raw_load_store(impl, instruction); auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id); ptr_bitcast_op->add_id(u64_ptr_id); @@ -815,6 +875,8 @@ static bool emit_physical_buffer_store_instruction(Converter::Impl &impl, const auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, vec_type_id)); chain_op->add_id(ptr_bitcast_op->id); chain_op->add_id(builder.makeUintConstant(0)); + if (array_id) + chain_op->add_id(array_id); impl.add(chain_op); spv::Id elems[4] = {}; diff --git a/opcodes/dxil/dxil_resources.cpp b/opcodes/dxil/dxil_resources.cpp index bb8be9d..0ae5c98 100644 --- a/opcodes/dxil/dxil_resources.cpp +++ b/opcodes/dxil/dxil_resources.cpp @@ -27,6 +27,7 @@ #include "logging.hpp" #include "opcodes/converter_impl.hpp" #include "spirv_module.hpp" +#include "dxil_buffer.hpp" namespace dxil_spv { @@ -1597,28 +1598,20 @@ static bool emit_cbuffer_load_physical_pointer(Converter::Impl &impl, const llvm { auto &builder = impl.builder(); - spv::Id member_index = impl.get_id_for_value(instruction->getOperand(2)); bool scalar_load = instruction->getType()->getTypeID() != llvm::Type::TypeID::StructTyID; unsigned scalar_alignment; - spv::Id byteaddr_id; uint32_t alignment; const llvm::Type *result_component_type; if (!scalar_load) { - auto *mul_op = impl.allocate(spv::OpIMul, builder.makeUintType(32)); - mul_op->add_id(member_index); - mul_op->add_id(builder.makeUintConstant(16)); - impl.add(mul_op); - byteaddr_id = mul_op->id; result_component_type = instruction->getType()->getStructElementType(0); scalar_alignment = get_type_scalar_alignment(impl, result_component_type); alignment = 16; } else { - byteaddr_id = member_index; // DXIL emits the alignment, but we cannot trust it, DXC is completely buggy here and emits // obviously bogus alignment values. // Use scalar alignment. @@ -1632,7 +1625,17 @@ static bool emit_cbuffer_load_physical_pointer(Converter::Impl &impl, const llvm spv::Id physical_type_id = 0; get_physical_load_store_cast_info(impl, result_component_type, physical_type_id, value_cast_op); - spv::Id addr_vec = emit_u32x2_u32_add(impl, impl.get_id_for_value(instruction->getOperand(1)), byteaddr_id); + spv::Id index_id; + + if (!scalar_load) + { + index_id = impl.get_id_for_value(instruction->getOperand(2)); + } + else + { + unsigned addr_shift_log2 = raw_buffer_data_type_to_addr_shift_log2(impl, instruction->getType()); + index_id = build_index_divider(impl, instruction->getOperand(2), addr_shift_log2, 1); + } auto *result_type = instruction->getType(); unsigned physical_vecsize; @@ -1659,15 +1662,20 @@ static bool emit_cbuffer_load_physical_pointer(Converter::Impl &impl, const llvm Converter::Impl::PhysicalPointerMeta ptr_meta = {}; ptr_meta.nonwritable = true; + ptr_meta.stride = alignment; + ptr_meta.size = 64 * 1024; spv::Id ptr_type_id = impl.get_physical_pointer_block_type(result_type_id, ptr_meta); auto *ptr_bitcast_op = impl.allocate(spv::OpBitcast, ptr_type_id); - ptr_bitcast_op->add_id(addr_vec); + ptr_bitcast_op->add_id(impl.get_id_for_value(instruction->getOperand(1))); impl.add(ptr_bitcast_op); - auto *chain_op = impl.allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, result_type_id)); + // Out of bounds is undefined behavior for root descriptors. + // Allows a compiler to assume that the index is unsigned and multiplying by stride does not overflow 32-bit space. + auto *chain_op = impl.allocate(spv::OpInBoundsAccessChain, builder.makePointer(spv::StorageClassPhysicalStorageBuffer, result_type_id)); chain_op->add_id(ptr_bitcast_op->id); chain_op->add_id(builder.makeUintConstant(0)); + chain_op->add_id(index_id); impl.add(chain_op); auto *load_op = impl.allocate(spv::OpLoad, instruction, result_type_id); diff --git a/reference/shaders/descriptor_qa/descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp b/reference/shaders/descriptor_qa/descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp index 8e95bd3..d31cad4 100644 --- a/reference/shaders/descriptor_qa/descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp +++ b/reference/shaders/descriptor_qa/descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp @@ -5,7 +5,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer AtomicCounter; -layout(buffer_reference, std430) buffer AtomicCounter +layout(buffer_reference, buffer_reference_align = 4, std430) buffer AtomicCounter { uint _m0; }; diff --git a/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.comp b/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.comp index 7445ea7..fcd47f7 100644 --- a/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.comp +++ b/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.comp @@ -5,7 +5,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer AtomicCounter; -layout(buffer_reference, std430) buffer AtomicCounter +layout(buffer_reference, buffer_reference_align = 4, std430) buffer AtomicCounter { uint _m0; }; diff --git a/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.sm66.comp b/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.sm66.comp index 0a1d0ab..8157210 100644 --- a/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.sm66.comp +++ b/reference/shaders/descriptor_qa/descriptor_qa.bindless.descriptor-qa.sm66.comp @@ -5,7 +5,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer AtomicCounter; -layout(buffer_reference, std430) buffer AtomicCounter +layout(buffer_reference, buffer_reference_align = 4, std430) buffer AtomicCounter { uint _m0; }; diff --git a/reference/shaders/descriptor_qa/descriptor_qa.bindless.ssbo.descriptor-qa.comp b/reference/shaders/descriptor_qa/descriptor_qa.bindless.ssbo.descriptor-qa.comp index 049c910..5695290 100644 --- a/reference/shaders/descriptor_qa/descriptor_qa.bindless.ssbo.descriptor-qa.comp +++ b/reference/shaders/descriptor_qa/descriptor_qa.bindless.ssbo.descriptor-qa.comp @@ -5,7 +5,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer AtomicCounter; -layout(buffer_reference, std430) buffer AtomicCounter +layout(buffer_reference, buffer_reference_align = 4, std430) buffer AtomicCounter { uint _m0; }; diff --git a/reference/shaders/dxil-builtin/atomic-bin-op.root-descriptor.comp b/reference/shaders/dxil-builtin/atomic-bin-op.root-descriptor.comp index 08825b2..a501c13 100644 --- a/reference/shaders/dxil-builtin/atomic-bin-op.root-descriptor.comp +++ b/reference/shaders/dxil-builtin/atomic-bin-op.root-descriptor.comp @@ -1,9 +1,10 @@ #version 460 #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer PhysicalPointerUintArray; -layout(buffer_reference, std430) buffer PhysicalPointerUintArray +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray { uint value[]; }; diff --git a/reference/shaders/dxil-builtin/atomic-compare-exchange.root-descriptor.comp b/reference/shaders/dxil-builtin/atomic-compare-exchange.root-descriptor.comp index addde1a..4429c57 100644 --- a/reference/shaders/dxil-builtin/atomic-compare-exchange.root-descriptor.comp +++ b/reference/shaders/dxil-builtin/atomic-compare-exchange.root-descriptor.comp @@ -1,9 +1,10 @@ #version 460 #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer PhysicalPointerUintArray; -layout(buffer_reference, std430) buffer PhysicalPointerUintArray +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray { uint value[]; }; diff --git a/reference/shaders/dxil-builtin/barycentrics-2.frag b/reference/shaders/dxil-builtin/barycentrics-2.frag index 302d676..d846596 100644 --- a/reference/shaders/dxil-builtin/barycentrics-2.frag +++ b/reference/shaders/dxil-builtin/barycentrics-2.frag @@ -22,7 +22,7 @@ void main() ; Schema: 0 OpCapability Shader OpCapability SampleRateShading -OpCapability FragmentBarycentricNV +OpCapability FragmentBarycentricKHR OpExtension "SPV_NV_fragment_shader_barycentric" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %3 "main" %10 %11 %12 %15 %17 %19 @@ -34,15 +34,15 @@ OpName %12 "ATTRIB_2" OpName %15 "SV_Barycentrics" OpName %17 "FOO" OpName %19 "SV_Target" -OpDecorate %10 PerVertexNV +OpDecorate %10 PerVertexKHR OpDecorate %10 Location 0 -OpDecorate %11 PerVertexNV +OpDecorate %11 PerVertexKHR OpDecorate %11 Location 0 OpDecorate %11 Component 1 -OpDecorate %12 PerVertexNV +OpDecorate %12 PerVertexKHR OpDecorate %12 Location 0 OpDecorate %12 Component 2 -OpDecorate %15 BuiltIn BaryCoordNV +OpDecorate %15 BuiltIn BaryCoordKHR OpDecorate %15 Sample OpDecorate %17 Centroid OpDecorate %17 Location 1 diff --git a/reference/shaders/dxil-builtin/barycentrics.frag b/reference/shaders/dxil-builtin/barycentrics.frag index f7eabcd..e32cfd7 100644 --- a/reference/shaders/dxil-builtin/barycentrics.frag +++ b/reference/shaders/dxil-builtin/barycentrics.frag @@ -23,7 +23,7 @@ void main() ; Bound: 162 ; Schema: 0 OpCapability Shader -OpCapability FragmentBarycentricNV +OpCapability FragmentBarycentricKHR OpExtension "SPV_NV_fragment_shader_barycentric" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %3 "main" %11 %16 %19 %20 %22 %24 @@ -35,12 +35,12 @@ OpName %19 "SV_Barycentrics" OpName %20 "SV_Barycentrics_1" OpName %22 "INDEX" OpName %24 "SV_Target" -OpDecorate %11 PerVertexNV +OpDecorate %11 PerVertexKHR OpDecorate %11 Location 0 -OpDecorate %16 PerVertexNV +OpDecorate %16 PerVertexKHR OpDecorate %16 Location 1 -OpDecorate %19 BuiltIn BaryCoordNV -OpDecorate %20 BuiltIn BaryCoordNoPerspNV +OpDecorate %19 BuiltIn BaryCoordKHR +OpDecorate %20 BuiltIn BaryCoordNoPerspKHR OpDecorate %20 Centroid OpDecorate %22 Flat OpDecorate %22 Location 3 diff --git a/reference/shaders/dxil-builtin/clip.demote-to-helper.frag b/reference/shaders/dxil-builtin/clip.demote-to-helper.frag index a52b58b..b126c34 100644 --- a/reference/shaders/dxil-builtin/clip.demote-to-helper.frag +++ b/reference/shaders/dxil-builtin/clip.demote-to-helper.frag @@ -26,7 +26,7 @@ void main() ; Bound: 36 ; Schema: 0 OpCapability Shader -OpCapability DemoteToHelperInvocationEXT +OpCapability DemoteToHelperInvocation OpExtension "SPV_EXT_demote_to_helper_invocation" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %3 "main" %8 @@ -72,7 +72,7 @@ OpFunctionEnd OpSelectionMerge %31 None OpBranchConditional %27 %30 %31 %30 = OpLabel -OpDemoteToHelperInvocationEXT +OpDemoteToHelperInvocation OpBranch %31 %31 = OpLabel OpReturn diff --git a/reference/shaders/dxil-builtin/discard.demote-to-helper.frag b/reference/shaders/dxil-builtin/discard.demote-to-helper.frag index 717a476..89b0195 100644 --- a/reference/shaders/dxil-builtin/discard.demote-to-helper.frag +++ b/reference/shaders/dxil-builtin/discard.demote-to-helper.frag @@ -27,7 +27,7 @@ void main() ; Bound: 29 ; Schema: 0 OpCapability Shader -OpCapability DemoteToHelperInvocationEXT +OpCapability DemoteToHelperInvocation OpExtension "SPV_EXT_demote_to_helper_invocation" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %3 "main" %8 @@ -58,7 +58,7 @@ OpBranch %22 OpSelectionMerge %27 None OpBranchConditional %15 %26 %23 %26 = OpLabel -OpDemoteToHelperInvocationEXT +OpDemoteToHelperInvocation OpBranch %27 %23 = OpLabel %17 = OpAccessChain %9 %8 %18 @@ -67,7 +67,7 @@ OpBranch %27 OpSelectionMerge %25 None OpBranchConditional %20 %24 %25 %24 = OpLabel -OpDemoteToHelperInvocationEXT +OpDemoteToHelperInvocation OpBranch %25 %25 = OpLabel OpBranch %27 diff --git a/reference/shaders/dxil-builtin/get-dimensions-xyz-only.frag b/reference/shaders/dxil-builtin/get-dimensions-xyz-only.frag index aa90694..5539174 100644 --- a/reference/shaders/dxil-builtin/get-dimensions-xyz-only.frag +++ b/reference/shaders/dxil-builtin/get-dimensions-xyz-only.frag @@ -46,7 +46,8 @@ void main() uint _144 = uint(imageSize(_61)) / 4u; uint _147 = uint(textureSize(_40)) * 4u; uint _150 = uint(imageSize(_62)) * 4u; - SV_Target = (((((((((((((((((((((((((((((((((((((uint(textureSize(_8, int(LEVEL))) + 32u) + _90.y) + _90.x) + _93.y) + _93.x) + _97.y) + _97.x) + _97.z) + _101.y) + _101.x) + _104.y) + _104.x) + _104.z) + _108.y) + _108.x) + _108.z) + _112.y) + _112.x) + _115.y) + _115.x) + _115.z) + (_119 * _119)) + (_121 * _121)) + _123.y) + _123.x) + _126.y) + _126.x) + _129.y) + _129.x) + _129.z) + _133.y) + _133.x) + _133.z) + (_137 * _137)) + (_140 * _140)) + (_144 * _144)) + (_147 * _147)) + (_150 * _150); + uint _180 = (((((((((((((((((((((((((((uint(textureSize(_8, int(LEVEL))) + 32u) + _90.y) + _90.x) + _93.y) + _93.x) + _97.y) + _97.x) + _97.z) + _101.y) + _101.x) + _104.y) + _104.x) + _104.z) + _108.y) + _108.x) + _108.z) + _112.y) + _112.x) + _115.y) + _115.x) + _115.z) + (_119 * _119)) + (_121 * _121)) + _123.y) + _123.x) + _126.y) + _126.x) + _129.y; + SV_Target = (((((((((_180 + _129.x) + _129.z) + _133.y) + _133.x) + _133.z) + (_137 * _137)) + (_140 * _140)) + (_144 * _144)) + (_147 * _147)) + (_150 * _150); } diff --git a/reference/shaders/dxil-builtin/get-dimensions.frag b/reference/shaders/dxil-builtin/get-dimensions.frag index c4c01fa..33b5e6d 100644 --- a/reference/shaders/dxil-builtin/get-dimensions.frag +++ b/reference/shaders/dxil-builtin/get-dimensions.frag @@ -54,7 +54,8 @@ void main() uint _179 = uint(imageSize(_61)) / 4u; uint _182 = uint(textureSize(_40)) * 4u; uint _185 = uint(imageSize(_62)) * 4u; - SV_Target = ((((((((((((((((((((((((((((((((((((((((((((((_91.w + 32u) + _91.x) + _99.y) + _99.x) + _99.w) + _106.y) + _106.x) + _106.w) + _114.y) + _114.x) + _114.z) + _114.w) + _121.y) + _121.x) + _121.w) + _128.y) + _128.x) + _128.z) + _128.w) + _135.y) + _135.x) + _135.z) + _135.w) + _142.y) + _142.x) + _142.w) + _149.y) + _149.x) + _149.z) + _149.w) + (_154 * _154)) + (_156 * _156)) + _158.y) + _158.x) + _161.y) + _161.x) + _164.y) + _164.x) + _164.z) + _168.y) + _168.x) + _168.z) + (_172 * _172)) + (_175 * _175)) + (_179 * _179)) + (_182 * _182)) + (_185 * _185); + uint _216 = ((((((((((((((((((((((((((((_91.w + 32u) + _91.x) + _99.y) + _99.x) + _99.w) + _106.y) + _106.x) + _106.w) + _114.y) + _114.x) + _114.z) + _114.w) + _121.y) + _121.x) + _121.w) + _128.y) + _128.x) + _128.z) + _128.w) + _135.y) + _135.x) + _135.z) + _135.w) + _142.y) + _142.x) + _142.w) + _149.y) + _149.x) + _149.z; + SV_Target = (((((((((((((((((_216 + _149.w) + (_154 * _154)) + (_156 * _156)) + _158.y) + _158.x) + _161.y) + _161.x) + _164.y) + _164.x) + _164.z) + _168.y) + _168.x) + _168.z) + (_172 * _172)) + (_175 * _175)) + (_179 * _179)) + (_182 * _182)) + (_185 * _185); } diff --git a/reference/shaders/dxil-builtin/is-helper-lane.demote-to-helper.sm66.frag b/reference/shaders/dxil-builtin/is-helper-lane.demote-to-helper.sm66.frag index 740be89..f4a2a0b 100644 --- a/reference/shaders/dxil-builtin/is-helper-lane.demote-to-helper.sm66.frag +++ b/reference/shaders/dxil-builtin/is-helper-lane.demote-to-helper.sm66.frag @@ -53,7 +53,7 @@ void main() ; Schema: 0 OpCapability Shader OpCapability GroupNonUniformArithmetic -OpCapability DemoteToHelperInvocationEXT +OpCapability DemoteToHelperInvocation OpExtension "SPV_EXT_demote_to_helper_invocation" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %3 "main" %8 %11 @@ -95,14 +95,14 @@ OpBranch %44 OpSelectionMerge %49 None OpBranchConditional %21 %48 %45 %48 = OpLabel -OpDemoteToHelperInvocationEXT +OpDemoteToHelperInvocation OpBranch %49 %45 = OpLabel %23 = OpFOrdGreaterThan %20 %19 %24 OpSelectionMerge %47 None OpBranchConditional %23 %46 %47 %46 = OpLabel -OpDemoteToHelperInvocationEXT +OpDemoteToHelperInvocation OpBranch %47 %47 = OpLabel OpBranch %49 diff --git a/reference/shaders/dxil-builtin/object-ray-direction.rany b/reference/shaders/dxil-builtin/object-ray-direction.rany index faf9a26..b09cbca 100644 --- a/reference/shaders/dxil-builtin/object-ray-direction.rany +++ b/reference/shaders/dxil-builtin/object-ray-direction.rany @@ -19,13 +19,11 @@ vec3 _22; void main() { - vec3 _21 = _22; + vec3 _21; _21.x = gl_ObjectRayDirectionEXT.x; - vec3 _26 = _21; - _26.y = gl_ObjectRayDirectionEXT.y; - vec3 _30 = _26; - _30.z = gl_ObjectRayDirectionEXT.z; - payload._m0 = _30; + _21.y = gl_ObjectRayDirectionEXT.y; + _21.z = gl_ObjectRayDirectionEXT.z; + payload._m0 = _21; } diff --git a/reference/shaders/dxil-builtin/object-ray-origin.rany b/reference/shaders/dxil-builtin/object-ray-origin.rany index b3298ce..cca449b 100644 --- a/reference/shaders/dxil-builtin/object-ray-origin.rany +++ b/reference/shaders/dxil-builtin/object-ray-origin.rany @@ -19,13 +19,11 @@ vec3 _22; void main() { - vec3 _21 = _22; + vec3 _21; _21.x = gl_ObjectRayOriginEXT.x; - vec3 _26 = _21; - _26.y = gl_ObjectRayOriginEXT.y; - vec3 _30 = _26; - _30.z = gl_ObjectRayOriginEXT.z; - payload._m0 = _30; + _21.y = gl_ObjectRayOriginEXT.y; + _21.z = gl_ObjectRayOriginEXT.z; + payload._m0 = _21; } diff --git a/reference/shaders/dxil-builtin/object-to-world-3x4.rany b/reference/shaders/dxil-builtin/object-to-world-3x4.rany index 26813db..d257807 100644 --- a/reference/shaders/dxil-builtin/object-to-world-3x4.rany +++ b/reference/shaders/dxil-builtin/object-to-world-3x4.rany @@ -19,13 +19,11 @@ vec3 _59; void main() { - vec3 _58 = _59; + vec3 _58; _58.x = fma(gl_ObjectToWorldEXT[2u].x, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].x, payload._m0.y, gl_ObjectToWorldEXT[0u].x * payload._m0.x)) + gl_ObjectToWorldEXT[3u].x; - vec3 _64 = _58; - _64.y = fma(gl_ObjectToWorldEXT[2u].y, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].y, payload._m0.y, gl_ObjectToWorldEXT[0u].y * payload._m0.x)) + gl_ObjectToWorldEXT[3u].y; - vec3 _69 = _64; - _69.z = fma(gl_ObjectToWorldEXT[2u].z, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].z, payload._m0.y, gl_ObjectToWorldEXT[0u].z * payload._m0.x)) + gl_ObjectToWorldEXT[3u].z; - payload._m0 = _69; + _58.y = fma(gl_ObjectToWorldEXT[2u].y, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].y, payload._m0.y, gl_ObjectToWorldEXT[0u].y * payload._m0.x)) + gl_ObjectToWorldEXT[3u].y; + _58.z = fma(gl_ObjectToWorldEXT[2u].z, payload._m0.z, fma(gl_ObjectToWorldEXT[1u].z, payload._m0.y, gl_ObjectToWorldEXT[0u].z * payload._m0.x)) + gl_ObjectToWorldEXT[3u].z; + payload._m0 = _58; } diff --git a/reference/shaders/dxil-builtin/object-to-world-4x3.rany b/reference/shaders/dxil-builtin/object-to-world-4x3.rany index c42f1b4..c4acdf1 100644 --- a/reference/shaders/dxil-builtin/object-to-world-4x3.rany +++ b/reference/shaders/dxil-builtin/object-to-world-4x3.rany @@ -19,13 +19,11 @@ vec3 _59; void main() { - vec3 _58 = _59; + vec3 _58; _58.x = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].x, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].x, payload._m0.x * gl_ObjectToWorldEXT[0u].x)) + gl_ObjectToWorldEXT[3u].x; - vec3 _64 = _58; - _64.y = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].y, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].y, payload._m0.x * gl_ObjectToWorldEXT[0u].y)) + gl_ObjectToWorldEXT[3u].y; - vec3 _69 = _64; - _69.z = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].z, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].z, payload._m0.x * gl_ObjectToWorldEXT[0u].z)) + gl_ObjectToWorldEXT[3u].z; - payload._m0 = _69; + _58.y = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].y, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].y, payload._m0.x * gl_ObjectToWorldEXT[0u].y)) + gl_ObjectToWorldEXT[3u].y; + _58.z = fma(payload._m0.z, gl_ObjectToWorldEXT[2u].z, fma(payload._m0.y, gl_ObjectToWorldEXT[1u].z, payload._m0.x * gl_ObjectToWorldEXT[0u].z)) + gl_ObjectToWorldEXT[3u].z; + payload._m0 = _58; } diff --git a/reference/shaders/dxil-builtin/ray-t-current.rany b/reference/shaders/dxil-builtin/ray-t-current.rany index f4c336a..18f9f5e 100644 --- a/reference/shaders/dxil-builtin/ray-t-current.rany +++ b/reference/shaders/dxil-builtin/ray-t-current.rany @@ -19,13 +19,11 @@ vec3 _18; void main() { - vec3 _17 = _18; + vec3 _17; _17.x = gl_RayTmaxEXT; - vec3 _19 = _17; - _19.y = gl_RayTmaxEXT; - vec3 _20 = _19; - _20.z = gl_RayTmaxEXT; - payload._m0 = _20; + _17.y = gl_RayTmaxEXT; + _17.z = gl_RayTmaxEXT; + payload._m0 = _17; } diff --git a/reference/shaders/dxil-builtin/ray-t-min.rany b/reference/shaders/dxil-builtin/ray-t-min.rany index c94cdcb..500bba3 100644 --- a/reference/shaders/dxil-builtin/ray-t-min.rany +++ b/reference/shaders/dxil-builtin/ray-t-min.rany @@ -19,13 +19,11 @@ vec3 _18; void main() { - vec3 _17 = _18; + vec3 _17; _17.x = gl_RayTminEXT; - vec3 _19 = _17; - _19.y = gl_RayTminEXT; - vec3 _20 = _19; - _20.z = gl_RayTminEXT; - payload._m0 = _20; + _17.y = gl_RayTminEXT; + _17.z = gl_RayTminEXT; + payload._m0 = _17; } diff --git a/reference/shaders/dxil-builtin/render-target-sample-position.frag b/reference/shaders/dxil-builtin/render-target-sample-position.frag index 1dac314..b4ab6a4 100644 --- a/reference/shaders/dxil-builtin/render-target-sample-position.frag +++ b/reference/shaders/dxil-builtin/render-target-sample-position.frag @@ -7,7 +7,8 @@ layout(location = 0) out vec2 SV_Target; void main() { - uint _74 = ((uint(gl_SampleID) < _13) && (_13 <= 16u)) ? ((_13 - 1u) + uint(gl_SampleID)) : 0u; + uint _66 = _13 - 1u; + uint _74 = ((uint(gl_SampleID) < _13) && (_13 <= 16u)) ? (_66 + uint(gl_SampleID)) : 0u; SV_Target.x = _63[_74].x; SV_Target.y = _63[_74].y; } diff --git a/reference/shaders/dxil-builtin/report-hit.rint b/reference/shaders/dxil-builtin/report-hit.rint index 1d229ce..3688505 100644 --- a/reference/shaders/dxil-builtin/report-hit.rint +++ b/reference/shaders/dxil-builtin/report-hit.rint @@ -11,8 +11,9 @@ hitAttributeEXT _6 hit; void main() { + float _14 = gl_RayTmaxEXT; _6 _10; - _10._m0 = gl_RayTmaxEXT; + _10._m0 = _14; _6 _11; _11._m0 = gl_RayTminEXT; hit = _10; diff --git a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp index 6fd75c1..4f1ff22 100644 --- a/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp +++ b/reference/shaders/dxil-builtin/sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp @@ -7,8 +7,8 @@ OpCapability Shader OpCapability Float16 OpCapability Int16 OpCapability StorageBuffer16BitAccess -OpCapability DotProductInput4x8BitPackedKHR -OpCapability DotProductKHR +OpCapability DotProductInput4x8BitPacked +OpCapability DotProduct OpExtension "SPV_KHR_integer_dot_product" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "main" %34 @@ -122,7 +122,7 @@ OpBranch %121 %43 = OpLoad %5 %42 %44 = OpAccessChain %39 %9 %37 %38 %45 = OpLoad %5 %44 -%46 = OpUDotKHR %5 %45 %43 PackedVectorFormat4x8BitKHR +%46 = OpUDot %5 %45 %43 PackedVectorFormat4x8Bit %47 = OpIAdd %5 %41 %46 %48 = OpAccessChain %39 %27 %37 %38 OpStore %48 %47 @@ -132,7 +132,7 @@ OpStore %48 %47 %52 = OpLoad %5 %51 %53 = OpAccessChain %39 %9 %37 %38 %54 = OpLoad %5 %53 -%55 = OpSDotKHR %5 %54 %52 PackedVectorFormat4x8BitKHR +%55 = OpSDot %5 %54 %52 PackedVectorFormat4x8Bit %56 = OpIAdd %5 %50 %55 %57 = OpAccessChain %39 %27 %37 %38 OpStore %57 %56 diff --git a/reference/shaders/dxil-builtin/texture-load-feedback.frag b/reference/shaders/dxil-builtin/texture-load-feedback.frag index 2eeaff8..71c2dc8 100644 --- a/reference/shaders/dxil-builtin/texture-load-feedback.frag +++ b/reference/shaders/dxil-builtin/texture-load-feedback.frag @@ -72,14 +72,14 @@ void main() float _166 = float(sparseTexelsResidentARB(int(_159._m4))); uint _309; vec4 _310; - _309 = sparseTexelFetchARB(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), TEXCOORD.z, _310); + _309 = sparseTexelFetchARB(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z), _310); SparseTexel _169 = SparseTexel(_309, _310); vec4 _172 = _169._m1; _83 _177 = _83(_172.x, _172.y, _172.z, _172.w, _169._m0); float _184 = float(sparseTexelsResidentARB(int(_177._m4))); uint _311; vec4 _312; - _311 = sparseTexelFetchARB(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), TEXCOORD.w, _312); + _311 = sparseTexelFetchARB(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), _312); SparseTexel _187 = SparseTexel(_311, _312); vec4 _190 = _187._m1; _83 _195 = _83(_190.x, _190.y, _190.z, _190.w, _187._m0); diff --git a/reference/shaders/dxil-builtin/texture-load-offset.frag b/reference/shaders/dxil-builtin/texture-load-offset.frag index 4efadce..8c680c8 100644 --- a/reference/shaders/dxil-builtin/texture-load-offset.frag +++ b/reference/shaders/dxil-builtin/texture-load-offset.frag @@ -19,8 +19,8 @@ void main() vec4 _70 = texelFetchOffset(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z), ivec2(3, 4)); vec4 _80 = texelFetchOffset(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), ivec2(-4, -3)); vec4 _88 = texelFetchOffset(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w), ivec3(-4, 2, 3)); - vec4 _96 = texelFetchOffset(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), ivec2(2, 3), TEXCOORD.z); - vec4 _104 = texelFetchOffset(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), ivec2(4, 5), TEXCOORD.w); + vec4 _96 = texelFetchOffset(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), ivec2(2, 3), int(TEXCOORD.z)); + vec4 _104 = texelFetchOffset(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), ivec2(4, 5), int(TEXCOORD.w)); SV_Target.x = (((((_61.x + _57.x) + _70.x) + _80.x) + _88.x) + _96.x) + _104.x; SV_Target.y = (((((_61.y + _57.y) + _70.y) + _80.y) + _88.y) + _96.y) + _104.y; } diff --git a/reference/shaders/dxil-builtin/texture-load-signed.frag b/reference/shaders/dxil-builtin/texture-load-signed.frag index 96e158c..d54dfea 100644 --- a/reference/shaders/dxil-builtin/texture-load-signed.frag +++ b/reference/shaders/dxil-builtin/texture-load-signed.frag @@ -24,8 +24,8 @@ void main() uvec4 _89 = uvec4(texelFetch(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z))); uvec4 _97 = uvec4(texelFetch(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w))); uvec4 _104 = uvec4(texelFetch(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w))); - uvec4 _111 = uvec4(texelFetch(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), TEXCOORD.z)); - uvec4 _118 = uvec4(texelFetch(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), TEXCOORD.w)); + uvec4 _111 = uvec4(texelFetch(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z))); + uvec4 _118 = uvec4(texelFetch(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w))); uvec4 _124 = uvec4(imageLoad(_29, int(TEXCOORD.x))); uvec4 _131 = uvec4(imageLoad(_32, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)))); uvec4 _138 = uvec4(imageLoad(_35, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)))); diff --git a/reference/shaders/dxil-builtin/texture-load.frag b/reference/shaders/dxil-builtin/texture-load.frag index b80a89f..0338a9d 100644 --- a/reference/shaders/dxil-builtin/texture-load.frag +++ b/reference/shaders/dxil-builtin/texture-load.frag @@ -24,8 +24,8 @@ void main() vec4 _85 = texelFetch(_14, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z)); vec4 _91 = texelFetch(_17, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w)); vec4 _98 = texelFetch(_20, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w)); - vec4 _104 = texelFetch(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), TEXCOORD.z); - vec4 _110 = texelFetch(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), TEXCOORD.w); + vec4 _104 = texelFetch(_23, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y)), int(TEXCOORD.z)); + vec4 _110 = texelFetch(_26, ivec3(uvec3(TEXCOORD.x, TEXCOORD.y, TEXCOORD.z)), int(TEXCOORD.w)); vec4 _116 = imageLoad(_29, int(TEXCOORD.x)); vec4 _121 = imageLoad(_32, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y))); vec4 _127 = imageLoad(_35, ivec2(uvec2(TEXCOORD.x, TEXCOORD.y))); diff --git a/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag b/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag index aa21585..5c22543 100644 --- a/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag +++ b/reference/shaders/dxil-builtin/wave-active-ballot-discard.demote-to-helper.frag @@ -28,7 +28,7 @@ void main() ; Schema: 0 OpCapability Shader OpCapability GroupNonUniformBallot -OpCapability DemoteToHelperInvocationEXT +OpCapability DemoteToHelperInvocation OpExtension "SPV_EXT_demote_to_helper_invocation" OpMemoryModel Logical GLSL450 OpEntryPoint Fragment %3 "main" %7 %10 @@ -64,7 +64,7 @@ OpBranch %31 OpSelectionMerge %33 None OpBranchConditional %13 %32 %33 %32 = OpLabel -OpDemoteToHelperInvocationEXT +OpDemoteToHelperInvocation OpBranch %33 %33 = OpLabel %15 = OpULessThan %12 %11 %16 diff --git a/reference/shaders/dxil-builtin/world-ray-direction.rany b/reference/shaders/dxil-builtin/world-ray-direction.rany index 43c623b..660c9e4 100644 --- a/reference/shaders/dxil-builtin/world-ray-direction.rany +++ b/reference/shaders/dxil-builtin/world-ray-direction.rany @@ -19,13 +19,11 @@ vec3 _22; void main() { - vec3 _21 = _22; + vec3 _21; _21.x = gl_WorldRayDirectionEXT.x; - vec3 _26 = _21; - _26.y = gl_WorldRayDirectionEXT.y; - vec3 _30 = _26; - _30.z = gl_WorldRayDirectionEXT.z; - payload._m0 = _30; + _21.y = gl_WorldRayDirectionEXT.y; + _21.z = gl_WorldRayDirectionEXT.z; + payload._m0 = _21; } diff --git a/reference/shaders/dxil-builtin/world-ray-origin.rany b/reference/shaders/dxil-builtin/world-ray-origin.rany index 67e1e0f..e3109ce 100644 --- a/reference/shaders/dxil-builtin/world-ray-origin.rany +++ b/reference/shaders/dxil-builtin/world-ray-origin.rany @@ -19,13 +19,11 @@ vec3 _22; void main() { - vec3 _21 = _22; + vec3 _21; _21.x = gl_WorldRayOriginEXT.x; - vec3 _26 = _21; - _26.y = gl_WorldRayOriginEXT.y; - vec3 _30 = _26; - _30.z = gl_WorldRayOriginEXT.z; - payload._m0 = _30; + _21.y = gl_WorldRayOriginEXT.y; + _21.z = gl_WorldRayOriginEXT.z; + payload._m0 = _21; } diff --git a/reference/shaders/dxil-builtin/world-to-object-3x4.rany b/reference/shaders/dxil-builtin/world-to-object-3x4.rany index a748e97..14177b8 100644 --- a/reference/shaders/dxil-builtin/world-to-object-3x4.rany +++ b/reference/shaders/dxil-builtin/world-to-object-3x4.rany @@ -19,13 +19,11 @@ vec3 _59; void main() { - vec3 _58 = _59; + vec3 _58; _58.x = fma(gl_WorldToObjectEXT[2u].x, payload._m0.z, fma(gl_WorldToObjectEXT[1u].x, payload._m0.y, gl_WorldToObjectEXT[0u].x * payload._m0.x)) + gl_WorldToObjectEXT[3u].x; - vec3 _64 = _58; - _64.y = fma(gl_WorldToObjectEXT[2u].y, payload._m0.z, fma(gl_WorldToObjectEXT[1u].y, payload._m0.y, gl_WorldToObjectEXT[0u].y * payload._m0.x)) + gl_WorldToObjectEXT[3u].y; - vec3 _69 = _64; - _69.z = fma(gl_WorldToObjectEXT[2u].z, payload._m0.z, fma(gl_WorldToObjectEXT[1u].z, payload._m0.y, gl_WorldToObjectEXT[0u].z * payload._m0.x)) + gl_WorldToObjectEXT[3u].z; - payload._m0 = _69; + _58.y = fma(gl_WorldToObjectEXT[2u].y, payload._m0.z, fma(gl_WorldToObjectEXT[1u].y, payload._m0.y, gl_WorldToObjectEXT[0u].y * payload._m0.x)) + gl_WorldToObjectEXT[3u].y; + _58.z = fma(gl_WorldToObjectEXT[2u].z, payload._m0.z, fma(gl_WorldToObjectEXT[1u].z, payload._m0.y, gl_WorldToObjectEXT[0u].z * payload._m0.x)) + gl_WorldToObjectEXT[3u].z; + payload._m0 = _58; } diff --git a/reference/shaders/dxil-builtin/world-to-object-4x3.rany b/reference/shaders/dxil-builtin/world-to-object-4x3.rany index 160f113..7fe5ec7 100644 --- a/reference/shaders/dxil-builtin/world-to-object-4x3.rany +++ b/reference/shaders/dxil-builtin/world-to-object-4x3.rany @@ -19,13 +19,11 @@ vec3 _59; void main() { - vec3 _58 = _59; + vec3 _58; _58.x = fma(payload._m0.z, gl_WorldToObjectEXT[2u].x, fma(payload._m0.y, gl_WorldToObjectEXT[1u].x, payload._m0.x * gl_WorldToObjectEXT[0u].x)) + gl_WorldToObjectEXT[3u].x; - vec3 _64 = _58; - _64.y = fma(payload._m0.z, gl_WorldToObjectEXT[2u].y, fma(payload._m0.y, gl_WorldToObjectEXT[1u].y, payload._m0.x * gl_WorldToObjectEXT[0u].y)) + gl_WorldToObjectEXT[3u].y; - vec3 _69 = _64; - _69.z = fma(payload._m0.z, gl_WorldToObjectEXT[2u].z, fma(payload._m0.y, gl_WorldToObjectEXT[1u].z, payload._m0.x * gl_WorldToObjectEXT[0u].z)) + gl_WorldToObjectEXT[3u].z; - payload._m0 = _69; + _58.y = fma(payload._m0.z, gl_WorldToObjectEXT[2u].y, fma(payload._m0.y, gl_WorldToObjectEXT[1u].y, payload._m0.x * gl_WorldToObjectEXT[0u].y)) + gl_WorldToObjectEXT[3u].y; + _58.z = fma(payload._m0.z, gl_WorldToObjectEXT[2u].z, fma(payload._m0.y, gl_WorldToObjectEXT[1u].z, payload._m0.x * gl_WorldToObjectEXT[0u].z)) + gl_WorldToObjectEXT[3u].z; + payload._m0 = _58; } diff --git a/reference/shaders/fp16/saturate.sm60.frag b/reference/shaders/fp16/saturate.sm60.frag index a4bfecc..5ed9216 100644 --- a/reference/shaders/fp16/saturate.sm60.frag +++ b/reference/shaders/fp16/saturate.sm60.frag @@ -5,14 +5,22 @@ layout(location = 0) out mediump vec4 SV_Target; void main() { - float _39 = isnan(0.0) ? V.x : (isnan(V.x) ? 0.0 : max(V.x, 0.0)); - float _50 = isnan(0.0) ? V.y : (isnan(V.y) ? 0.0 : max(V.y, 0.0)); - float _61 = isnan(0.0) ? V.z : (isnan(V.z) ? 0.0 : max(V.z, 0.0)); - float _72 = isnan(0.0) ? V.w : (isnan(V.w) ? 0.0 : max(V.w, 0.0)); - SV_Target.x = isnan(1.0) ? _39 : (isnan(_39) ? 1.0 : min(_39, 1.0)); - SV_Target.y = isnan(1.0) ? _50 : (isnan(_50) ? 1.0 : min(_50, 1.0)); - SV_Target.z = isnan(1.0) ? _61 : (isnan(_61) ? 1.0 : min(_61, 1.0)); - SV_Target.w = isnan(1.0) ? _72 : (isnan(_72) ? 1.0 : min(_72, 1.0)); + mediump float _15 = V.x; + float hp_copy_15 = _15; + mediump float _18 = V.y; + float hp_copy_18 = _18; + mediump float _21 = V.z; + float hp_copy_21 = _21; + mediump float _24 = V.w; + float hp_copy_24 = _24; + float _40 = isnan(0.0) ? hp_copy_15 : (isnan(hp_copy_15) ? 0.0 : max(hp_copy_15, 0.0)); + float _52 = isnan(0.0) ? hp_copy_18 : (isnan(hp_copy_18) ? 0.0 : max(hp_copy_18, 0.0)); + float _64 = isnan(0.0) ? hp_copy_21 : (isnan(hp_copy_21) ? 0.0 : max(hp_copy_21, 0.0)); + float _76 = isnan(0.0) ? hp_copy_24 : (isnan(hp_copy_24) ? 0.0 : max(hp_copy_24, 0.0)); + SV_Target.x = isnan(1.0) ? _40 : (isnan(_40) ? 1.0 : min(_40, 1.0)); + SV_Target.y = isnan(1.0) ? _52 : (isnan(_52) ? 1.0 : min(_52, 1.0)); + SV_Target.z = isnan(1.0) ? _64 : (isnan(_64) ? 1.0 : min(_64, 1.0)); + SV_Target.w = isnan(1.0) ? _76 : (isnan(_76) ? 1.0 : min(_76, 1.0)); } diff --git a/reference/shaders/llvm-builtin/fcmp_ne.frag b/reference/shaders/llvm-builtin/fcmp_ne.frag index f6fe33d..f5adc87 100644 --- a/reference/shaders/llvm-builtin/fcmp_ne.frag +++ b/reference/shaders/llvm-builtin/fcmp_ne.frag @@ -5,7 +5,7 @@ layout(location = 0) out float SV_Target; void main() { - SV_Target = (!(A.x == A.y)) ? A.z : A.w; + SV_Target = (A.x != A.y) ? A.z : A.w; } diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag index 996b3f0..f2ccf13 100644 --- a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.frag @@ -9,12 +9,7 @@ #extension GL_EXT_shader_16bit_storage : require #extension GL_ARB_gpu_shader_int64 : require #extension GL_EXT_buffer_reference : require - -struct AddCarry -{ - uint _m0; - uint _m1; -}; +#extension GL_EXT_buffer_reference_uvec2 : require struct CBVComposite16x8 { @@ -28,16 +23,16 @@ struct CBVComposite16x8 float16_t _m7; }; -layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; -layout(buffer_reference) buffer PhysicalPointerUint642NonWrite; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerUint642NonWriteCBVArray; +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray { - vec4 value; + vec4 value[4096]; }; -layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint642NonWriteCBVArray { - u64vec2 value; + u64vec2 value[4096]; }; layout(push_constant, std430) uniform RootConstants @@ -52,27 +47,19 @@ layout(location = 0) out vec4 SV_Target; void main() { - AddCarry _23; - _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1); - PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1)); - AddCarry _43; - _43._m0 = uaddCarry(registers._m0.x, 1u * 16u, _43._m1); - PhysicalPointerFloat4NonWrite _48 = PhysicalPointerFloat4NonWrite(uvec2(_43._m0, registers._m0.y + _43._m1)); - f16vec2 _56 = unpackFloat2x16(floatBitsToUint(_48.value.x)); - f16vec2 _59 = unpackFloat2x16(floatBitsToUint(_48.value.y)); - f16vec2 _62 = unpackFloat2x16(floatBitsToUint(_48.value.z)); - f16vec2 _65 = unpackFloat2x16(floatBitsToUint(_48.value.w)); - CBVComposite16x8 _69 = CBVComposite16x8(_56.x, _56.y, _59.x, _59.y, _62.x, _62.y, _65.x, _65.y); - AddCarry _99; - _99._m0 = uaddCarry(registers._m0.x, 2u * 16u, _99._m1); - PhysicalPointerUint642NonWrite _107 = PhysicalPointerUint642NonWrite(uvec2(_99._m0, registers._m0.y + _99._m1)); - AddCarry _117; - _117._m0 = uaddCarry(registers._m0.x, 3u * 16u, _117._m1); - PhysicalPointerUint642NonWrite _122 = PhysicalPointerUint642NonWrite(uvec2(_117._m0, registers._m0.y + _117._m1)); - SV_Target.x = ((float(_69._m0) + _30.value.x) + float(_69._m4)) + float(int64_t(_107.value.x)); - SV_Target.y = ((float(_69._m1) + _30.value.y) + float(_69._m5)) + float(int64_t(_107.value.y)); - SV_Target.z = ((float(_69._m2) + _30.value.z) + float(_69._m6)) + float(int64_t(_122.value.x)); - SV_Target.w = ((float(_69._m3) + _30.value.w) + float(_69._m7)) + float(int64_t(_122.value.y)); + PhysicalPointerFloat4NonWriteCBVArray _22 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + PhysicalPointerFloat4NonWriteCBVArray _32 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + f16vec2 _40 = unpackFloat2x16(floatBitsToUint(_32.value[1u].x)); + f16vec2 _43 = unpackFloat2x16(floatBitsToUint(_32.value[1u].y)); + f16vec2 _46 = unpackFloat2x16(floatBitsToUint(_32.value[1u].z)); + f16vec2 _49 = unpackFloat2x16(floatBitsToUint(_32.value[1u].w)); + CBVComposite16x8 _53 = CBVComposite16x8(_40.x, _40.y, _43.x, _43.y, _46.x, _46.y, _49.x, _49.y); + PhysicalPointerUint642NonWriteCBVArray _84 = PhysicalPointerUint642NonWriteCBVArray(registers._m0); + PhysicalPointerUint642NonWriteCBVArray _91 = PhysicalPointerUint642NonWriteCBVArray(registers._m0); + SV_Target.x = ((float(_53._m0) + _22.value[0u].x) + float(_53._m4)) + float(int64_t(_84.value[2u].x)); + SV_Target.y = ((float(_53._m1) + _22.value[0u].y) + float(_53._m5)) + float(int64_t(_84.value[2u].y)); + SV_Target.z = ((float(_53._m2) + _22.value[0u].z) + float(_53._m6)) + float(int64_t(_91.value[3u].x)); + SV_Target.w = ((float(_53._m3) + _22.value[0u].w) + float(_53._m7)) + float(int64_t(_91.value[3u].y)); } @@ -81,7 +68,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 142 +; Bound: 111 ; Schema: 0 OpCapability Shader OpCapability Float16 @@ -95,24 +82,25 @@ OpName %3 "main" OpName %7 "RootConstants" OpName %9 "registers" OpName %13 "SV_Target" -OpName %22 "AddCarry" -OpName %28 "PhysicalPointerFloat4NonWrite" -OpMemberName %28 0 "value" -OpName %68 "CBVComposite16x8" -OpName %105 "PhysicalPointerUint642NonWrite" -OpMemberName %105 0 "value" +OpName %20 "PhysicalPointerFloat4NonWriteCBVArray" +OpMemberName %20 0 "value" +OpName %52 "CBVComposite16x8" +OpName %82 "PhysicalPointerUint642NonWriteCBVArray" +OpMemberName %82 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 OpMemberDecorate %7 2 Offset 16 OpMemberDecorate %7 3 Offset 24 OpDecorate %13 Location 0 -OpMemberDecorate %28 0 Offset 0 -OpDecorate %28 Block -OpMemberDecorate %28 0 NonWritable -OpMemberDecorate %105 0 Offset 0 -OpDecorate %105 Block -OpMemberDecorate %105 0 NonWritable +OpDecorate %19 ArrayStride 16 +OpMemberDecorate %20 0 Offset 0 +OpDecorate %20 Block +OpMemberDecorate %20 0 NonWritable +OpDecorate %81 ArrayStride 16 +OpMemberDecorate %82 0 Offset 0 +OpDecorate %82 Block +OpMemberDecorate %82 0 NonWritable %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -126,138 +114,107 @@ OpMemberDecorate %105 0 NonWritable %13 = OpVariable %12 Output %14 = OpTypePointer PushConstant %6 %16 = OpConstant %5 0 -%19 = OpConstant %5 16 -%22 = OpTypeStruct %5 %5 -%28 = OpTypeStruct %11 -%29 = OpTypePointer PhysicalStorageBuffer %28 -%31 = OpTypePointer PhysicalStorageBuffer %11 -%38 = OpConstant %5 1 -%40 = OpTypeFloat 16 -%51 = OpTypeVector %40 2 -%68 = OpTypeStruct %40 %40 %40 %40 %40 %40 %40 %40 -%94 = OpConstant %5 2 -%96 = OpTypeInt 64 0 -%104 = OpTypeVector %96 2 -%105 = OpTypeStruct %104 -%106 = OpTypePointer PhysicalStorageBuffer %105 -%108 = OpTypePointer PhysicalStorageBuffer %104 -%113 = OpConstant %5 3 -%135 = OpTypePointer Output %10 +%18 = OpConstant %5 4096 +%19 = OpTypeArray %11 %18 +%20 = OpTypeStruct %19 +%21 = OpTypePointer PhysicalStorageBuffer %20 +%23 = OpTypePointer PhysicalStorageBuffer %11 +%30 = OpTypeFloat 16 +%31 = OpConstant %5 1 +%35 = OpTypeVector %30 2 +%52 = OpTypeStruct %30 %30 %30 %30 %30 %30 %30 %30 +%78 = OpTypeInt 64 0 +%79 = OpConstant %5 2 +%80 = OpTypeVector %78 2 +%81 = OpTypeArray %80 %18 +%82 = OpTypeStruct %81 +%83 = OpTypePointer PhysicalStorageBuffer %82 +%85 = OpTypePointer PhysicalStorageBuffer %80 +%90 = OpConstant %5 3 +%104 = OpTypePointer Output %10 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %140 -%140 = OpLabel +OpBranch %109 +%109 = OpLabel %15 = OpAccessChain %14 %9 %16 %17 = OpLoad %6 %15 -%18 = OpIMul %5 %16 %19 -%20 = OpCompositeExtract %5 %17 0 -%21 = OpCompositeExtract %5 %17 1 -%23 = OpIAddCarry %22 %20 %18 -%24 = OpCompositeExtract %5 %23 0 -%25 = OpCompositeExtract %5 %23 1 -%26 = OpIAdd %5 %21 %25 -%27 = OpCompositeConstruct %6 %24 %26 -%30 = OpBitcast %29 %27 -%32 = OpAccessChain %31 %30 %16 -%33 = OpLoad %11 %32 Aligned 16 -%34 = OpCompositeExtract %10 %33 0 -%35 = OpCompositeExtract %10 %33 1 -%36 = OpCompositeExtract %10 %33 2 -%37 = OpCompositeExtract %10 %33 3 -%39 = OpIMul %5 %38 %19 -%41 = OpCompositeExtract %5 %17 0 -%42 = OpCompositeExtract %5 %17 1 -%43 = OpIAddCarry %22 %41 %39 -%44 = OpCompositeExtract %5 %43 0 -%45 = OpCompositeExtract %5 %43 1 -%46 = OpIAdd %5 %42 %45 -%47 = OpCompositeConstruct %6 %44 %46 -%48 = OpBitcast %29 %47 -%49 = OpAccessChain %31 %48 %16 -%50 = OpLoad %11 %49 Aligned 16 -%52 = OpCompositeExtract %10 %50 0 -%53 = OpCompositeExtract %10 %50 1 -%54 = OpCompositeExtract %10 %50 2 -%55 = OpCompositeExtract %10 %50 3 -%56 = OpBitcast %51 %52 -%57 = OpCompositeExtract %40 %56 0 -%58 = OpCompositeExtract %40 %56 1 -%59 = OpBitcast %51 %53 -%60 = OpCompositeExtract %40 %59 0 -%61 = OpCompositeExtract %40 %59 1 -%62 = OpBitcast %51 %54 -%63 = OpCompositeExtract %40 %62 0 -%64 = OpCompositeExtract %40 %62 1 -%65 = OpBitcast %51 %55 -%66 = OpCompositeExtract %40 %65 0 -%67 = OpCompositeExtract %40 %65 1 -%69 = OpCompositeConstruct %68 %57 %58 %60 %61 %63 %64 %66 %67 -%70 = OpCompositeExtract %40 %69 0 -%71 = OpCompositeExtract %40 %69 1 -%72 = OpCompositeExtract %40 %69 2 -%73 = OpCompositeExtract %40 %69 3 -%74 = OpFConvert %10 %70 -%75 = OpFConvert %10 %71 -%76 = OpFConvert %10 %72 -%77 = OpFConvert %10 %73 -%78 = OpFAdd %10 %74 %34 -%79 = OpFAdd %10 %75 %35 -%80 = OpFAdd %10 %76 %36 -%81 = OpFAdd %10 %77 %37 -%82 = OpCompositeExtract %40 %69 4 -%83 = OpCompositeExtract %40 %69 5 -%84 = OpCompositeExtract %40 %69 6 -%85 = OpCompositeExtract %40 %69 7 -%86 = OpFConvert %10 %82 -%87 = OpFConvert %10 %83 -%88 = OpFConvert %10 %84 -%89 = OpFConvert %10 %85 -%90 = OpFAdd %10 %78 %86 -%91 = OpFAdd %10 %79 %87 -%92 = OpFAdd %10 %80 %88 -%93 = OpFAdd %10 %81 %89 -%95 = OpIMul %5 %94 %19 -%97 = OpCompositeExtract %5 %17 0 -%98 = OpCompositeExtract %5 %17 1 -%99 = OpIAddCarry %22 %97 %95 -%100 = OpCompositeExtract %5 %99 0 -%101 = OpCompositeExtract %5 %99 1 -%102 = OpIAdd %5 %98 %101 -%103 = OpCompositeConstruct %6 %100 %102 -%107 = OpBitcast %106 %103 -%109 = OpAccessChain %108 %107 %16 -%110 = OpLoad %104 %109 Aligned 16 -%111 = OpCompositeExtract %96 %110 0 -%112 = OpCompositeExtract %96 %110 1 -%114 = OpIMul %5 %113 %19 -%115 = OpCompositeExtract %5 %17 0 -%116 = OpCompositeExtract %5 %17 1 -%117 = OpIAddCarry %22 %115 %114 -%118 = OpCompositeExtract %5 %117 0 -%119 = OpCompositeExtract %5 %117 1 -%120 = OpIAdd %5 %116 %119 -%121 = OpCompositeConstruct %6 %118 %120 -%122 = OpBitcast %106 %121 -%123 = OpAccessChain %108 %122 %16 -%124 = OpLoad %104 %123 Aligned 16 -%125 = OpCompositeExtract %96 %124 0 -%126 = OpCompositeExtract %96 %124 1 -%127 = OpConvertSToF %10 %111 -%128 = OpConvertSToF %10 %112 -%129 = OpConvertSToF %10 %125 -%130 = OpConvertSToF %10 %126 -%131 = OpFAdd %10 %90 %127 -%132 = OpFAdd %10 %91 %128 -%133 = OpFAdd %10 %92 %129 -%134 = OpFAdd %10 %93 %130 -%136 = OpAccessChain %135 %13 %16 -OpStore %136 %131 -%137 = OpAccessChain %135 %13 %38 -OpStore %137 %132 -%138 = OpAccessChain %135 %13 %94 -OpStore %138 %133 -%139 = OpAccessChain %135 %13 %113 -OpStore %139 %134 +%22 = OpBitcast %21 %17 +%24 = OpInBoundsAccessChain %23 %22 %16 %16 +%25 = OpLoad %11 %24 Aligned 16 +%26 = OpCompositeExtract %10 %25 0 +%27 = OpCompositeExtract %10 %25 1 +%28 = OpCompositeExtract %10 %25 2 +%29 = OpCompositeExtract %10 %25 3 +%32 = OpBitcast %21 %17 +%33 = OpInBoundsAccessChain %23 %32 %16 %31 +%34 = OpLoad %11 %33 Aligned 16 +%36 = OpCompositeExtract %10 %34 0 +%37 = OpCompositeExtract %10 %34 1 +%38 = OpCompositeExtract %10 %34 2 +%39 = OpCompositeExtract %10 %34 3 +%40 = OpBitcast %35 %36 +%41 = OpCompositeExtract %30 %40 0 +%42 = OpCompositeExtract %30 %40 1 +%43 = OpBitcast %35 %37 +%44 = OpCompositeExtract %30 %43 0 +%45 = OpCompositeExtract %30 %43 1 +%46 = OpBitcast %35 %38 +%47 = OpCompositeExtract %30 %46 0 +%48 = OpCompositeExtract %30 %46 1 +%49 = OpBitcast %35 %39 +%50 = OpCompositeExtract %30 %49 0 +%51 = OpCompositeExtract %30 %49 1 +%53 = OpCompositeConstruct %52 %41 %42 %44 %45 %47 %48 %50 %51 +%54 = OpCompositeExtract %30 %53 0 +%55 = OpCompositeExtract %30 %53 1 +%56 = OpCompositeExtract %30 %53 2 +%57 = OpCompositeExtract %30 %53 3 +%58 = OpFConvert %10 %54 +%59 = OpFConvert %10 %55 +%60 = OpFConvert %10 %56 +%61 = OpFConvert %10 %57 +%62 = OpFAdd %10 %58 %26 +%63 = OpFAdd %10 %59 %27 +%64 = OpFAdd %10 %60 %28 +%65 = OpFAdd %10 %61 %29 +%66 = OpCompositeExtract %30 %53 4 +%67 = OpCompositeExtract %30 %53 5 +%68 = OpCompositeExtract %30 %53 6 +%69 = OpCompositeExtract %30 %53 7 +%70 = OpFConvert %10 %66 +%71 = OpFConvert %10 %67 +%72 = OpFConvert %10 %68 +%73 = OpFConvert %10 %69 +%74 = OpFAdd %10 %62 %70 +%75 = OpFAdd %10 %63 %71 +%76 = OpFAdd %10 %64 %72 +%77 = OpFAdd %10 %65 %73 +%84 = OpBitcast %83 %17 +%86 = OpInBoundsAccessChain %85 %84 %16 %79 +%87 = OpLoad %80 %86 Aligned 16 +%88 = OpCompositeExtract %78 %87 0 +%89 = OpCompositeExtract %78 %87 1 +%91 = OpBitcast %83 %17 +%92 = OpInBoundsAccessChain %85 %91 %16 %90 +%93 = OpLoad %80 %92 Aligned 16 +%94 = OpCompositeExtract %78 %93 0 +%95 = OpCompositeExtract %78 %93 1 +%96 = OpConvertSToF %10 %88 +%97 = OpConvertSToF %10 %89 +%98 = OpConvertSToF %10 %94 +%99 = OpConvertSToF %10 %95 +%100 = OpFAdd %10 %74 %96 +%101 = OpFAdd %10 %75 %97 +%102 = OpFAdd %10 %76 %98 +%103 = OpFAdd %10 %77 %99 +%105 = OpAccessChain %104 %13 %16 +OpStore %105 %100 +%106 = OpAccessChain %104 %13 %31 +OpStore %106 %101 +%107 = OpAccessChain %104 %13 %79 +OpStore %107 %102 +%108 = OpAccessChain %104 %13 %90 +OpStore %108 %103 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag index ec79c5f..8b2179b 100644 --- a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag @@ -1,23 +1,18 @@ #version 460 #extension GL_ARB_gpu_shader_int64 : require #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerUint642NonWriteCBVArray; +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray { - uint _m0; - uint _m1; + vec4 value[4096]; }; -layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; -layout(buffer_reference) buffer PhysicalPointerUint642NonWrite; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint642NonWriteCBVArray { - vec4 value; -}; - -layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite -{ - u64vec2 value; + u64vec2 value[4096]; }; layout(push_constant, std430) uniform RootConstants @@ -32,25 +27,15 @@ layout(location = 0) out vec4 SV_Target; void main() { - AddCarry _23; - _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1); - PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1)); - AddCarry _42; - _42._m0 = uaddCarry(registers._m0.x, 1u * 16u, _42._m1); - PhysicalPointerFloat4NonWrite _47 = PhysicalPointerFloat4NonWrite(uvec2(_42._m0, registers._m0.y + _42._m1)); - AddCarry _62; - _62._m0 = uaddCarry(registers._m0.x, 2u * 16u, _62._m1); - PhysicalPointerFloat4NonWrite _67 = PhysicalPointerFloat4NonWrite(uvec2(_62._m0, registers._m0.y + _62._m1)); - AddCarry _83; - _83._m0 = uaddCarry(registers._m0.x, 3u * 16u, _83._m1); - PhysicalPointerUint642NonWrite _91 = PhysicalPointerUint642NonWrite(uvec2(_83._m0, registers._m0.y + _83._m1)); - AddCarry _101; - _101._m0 = uaddCarry(registers._m0.x, 4u * 16u, _101._m1); - PhysicalPointerUint642NonWrite _106 = PhysicalPointerUint642NonWrite(uvec2(_101._m0, registers._m0.y + _101._m1)); - SV_Target.x = ((_47.value.x + _30.value.x) + _67.value.x) + float(int64_t(_91.value.x)); - SV_Target.y = ((_47.value.y + _30.value.y) + _67.value.y) + float(int64_t(_91.value.y)); - SV_Target.z = ((_47.value.z + _30.value.z) + _67.value.z) + float(int64_t(_106.value.x)); - SV_Target.w = ((_47.value.w + _30.value.w) + _67.value.w) + float(int64_t(_106.value.y)); + PhysicalPointerFloat4NonWriteCBVArray _22 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + PhysicalPointerFloat4NonWriteCBVArray _31 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + PhysicalPointerFloat4NonWriteCBVArray _43 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + PhysicalPointerUint642NonWriteCBVArray _60 = PhysicalPointerUint642NonWriteCBVArray(registers._m0); + PhysicalPointerUint642NonWriteCBVArray _67 = PhysicalPointerUint642NonWriteCBVArray(registers._m0); + SV_Target.x = ((_31.value[1u].x + _22.value[0u].x) + _43.value[2u].x) + float(int64_t(_60.value[3u].x)); + SV_Target.y = ((_31.value[1u].y + _22.value[0u].y) + _43.value[2u].y) + float(int64_t(_60.value[3u].y)); + SV_Target.z = ((_31.value[1u].z + _22.value[0u].z) + _43.value[2u].z) + float(int64_t(_67.value[4u].x)); + SV_Target.w = ((_31.value[1u].w + _22.value[0u].w) + _43.value[2u].w) + float(int64_t(_67.value[4u].y)); } @@ -59,7 +44,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 126 +; Bound: 87 ; Schema: 0 OpCapability Shader OpCapability Int64 @@ -72,23 +57,24 @@ OpName %3 "main" OpName %7 "RootConstants" OpName %9 "registers" OpName %13 "SV_Target" -OpName %22 "AddCarry" -OpName %28 "PhysicalPointerFloat4NonWrite" -OpMemberName %28 0 "value" -OpName %89 "PhysicalPointerUint642NonWrite" -OpMemberName %89 0 "value" +OpName %20 "PhysicalPointerFloat4NonWriteCBVArray" +OpMemberName %20 0 "value" +OpName %58 "PhysicalPointerUint642NonWriteCBVArray" +OpMemberName %58 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 OpMemberDecorate %7 2 Offset 16 OpMemberDecorate %7 3 Offset 24 OpDecorate %13 Location 0 -OpMemberDecorate %28 0 Offset 0 -OpDecorate %28 Block -OpMemberDecorate %28 0 NonWritable -OpMemberDecorate %89 0 Offset 0 -OpDecorate %89 Block -OpMemberDecorate %89 0 NonWritable +OpDecorate %19 ArrayStride 16 +OpMemberDecorate %20 0 Offset 0 +OpDecorate %20 Block +OpMemberDecorate %20 0 NonWritable +OpDecorate %57 ArrayStride 16 +OpMemberDecorate %58 0 Offset 0 +OpDecorate %58 Block +OpMemberDecorate %58 0 NonWritable %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -102,122 +88,83 @@ OpMemberDecorate %89 0 NonWritable %13 = OpVariable %12 Output %14 = OpTypePointer PushConstant %6 %16 = OpConstant %5 0 -%19 = OpConstant %5 16 -%22 = OpTypeStruct %5 %5 -%28 = OpTypeStruct %11 -%29 = OpTypePointer PhysicalStorageBuffer %28 -%31 = OpTypePointer PhysicalStorageBuffer %11 -%38 = OpConstant %5 1 -%58 = OpConstant %5 2 -%78 = OpConstant %5 3 -%80 = OpTypeInt 64 0 -%88 = OpTypeVector %80 2 -%89 = OpTypeStruct %88 -%90 = OpTypePointer PhysicalStorageBuffer %89 -%92 = OpTypePointer PhysicalStorageBuffer %88 -%97 = OpConstant %5 4 -%119 = OpTypePointer Output %10 +%18 = OpConstant %5 4096 +%19 = OpTypeArray %11 %18 +%20 = OpTypeStruct %19 +%21 = OpTypePointer PhysicalStorageBuffer %20 +%23 = OpTypePointer PhysicalStorageBuffer %11 +%30 = OpConstant %5 1 +%42 = OpConstant %5 2 +%54 = OpTypeInt 64 0 +%55 = OpConstant %5 3 +%56 = OpTypeVector %54 2 +%57 = OpTypeArray %56 %18 +%58 = OpTypeStruct %57 +%59 = OpTypePointer PhysicalStorageBuffer %58 +%61 = OpTypePointer PhysicalStorageBuffer %56 +%66 = OpConstant %5 4 +%80 = OpTypePointer Output %10 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %124 -%124 = OpLabel +OpBranch %85 +%85 = OpLabel %15 = OpAccessChain %14 %9 %16 %17 = OpLoad %6 %15 -%18 = OpIMul %5 %16 %19 -%20 = OpCompositeExtract %5 %17 0 -%21 = OpCompositeExtract %5 %17 1 -%23 = OpIAddCarry %22 %20 %18 -%24 = OpCompositeExtract %5 %23 0 -%25 = OpCompositeExtract %5 %23 1 -%26 = OpIAdd %5 %21 %25 -%27 = OpCompositeConstruct %6 %24 %26 -%30 = OpBitcast %29 %27 -%32 = OpAccessChain %31 %30 %16 +%22 = OpBitcast %21 %17 +%24 = OpInBoundsAccessChain %23 %22 %16 %16 +%25 = OpLoad %11 %24 Aligned 16 +%26 = OpCompositeExtract %10 %25 0 +%27 = OpCompositeExtract %10 %25 1 +%28 = OpCompositeExtract %10 %25 2 +%29 = OpCompositeExtract %10 %25 3 +%31 = OpBitcast %21 %17 +%32 = OpInBoundsAccessChain %23 %31 %16 %30 %33 = OpLoad %11 %32 Aligned 16 %34 = OpCompositeExtract %10 %33 0 %35 = OpCompositeExtract %10 %33 1 %36 = OpCompositeExtract %10 %33 2 %37 = OpCompositeExtract %10 %33 3 -%39 = OpIMul %5 %38 %19 -%40 = OpCompositeExtract %5 %17 0 -%41 = OpCompositeExtract %5 %17 1 -%42 = OpIAddCarry %22 %40 %39 -%43 = OpCompositeExtract %5 %42 0 -%44 = OpCompositeExtract %5 %42 1 -%45 = OpIAdd %5 %41 %44 -%46 = OpCompositeConstruct %6 %43 %45 -%47 = OpBitcast %29 %46 -%48 = OpAccessChain %31 %47 %16 -%49 = OpLoad %11 %48 Aligned 16 -%50 = OpCompositeExtract %10 %49 0 -%51 = OpCompositeExtract %10 %49 1 -%52 = OpCompositeExtract %10 %49 2 -%53 = OpCompositeExtract %10 %49 3 -%54 = OpFAdd %10 %50 %34 -%55 = OpFAdd %10 %51 %35 -%56 = OpFAdd %10 %52 %36 -%57 = OpFAdd %10 %53 %37 -%59 = OpIMul %5 %58 %19 -%60 = OpCompositeExtract %5 %17 0 -%61 = OpCompositeExtract %5 %17 1 -%62 = OpIAddCarry %22 %60 %59 -%63 = OpCompositeExtract %5 %62 0 -%64 = OpCompositeExtract %5 %62 1 -%65 = OpIAdd %5 %61 %64 -%66 = OpCompositeConstruct %6 %63 %65 -%67 = OpBitcast %29 %66 -%68 = OpAccessChain %31 %67 %16 -%69 = OpLoad %11 %68 Aligned 16 -%70 = OpCompositeExtract %10 %69 0 -%71 = OpCompositeExtract %10 %69 1 -%72 = OpCompositeExtract %10 %69 2 -%73 = OpCompositeExtract %10 %69 3 -%74 = OpFAdd %10 %54 %70 -%75 = OpFAdd %10 %55 %71 -%76 = OpFAdd %10 %56 %72 -%77 = OpFAdd %10 %57 %73 -%79 = OpIMul %5 %78 %19 -%81 = OpCompositeExtract %5 %17 0 -%82 = OpCompositeExtract %5 %17 1 -%83 = OpIAddCarry %22 %81 %79 -%84 = OpCompositeExtract %5 %83 0 -%85 = OpCompositeExtract %5 %83 1 -%86 = OpIAdd %5 %82 %85 -%87 = OpCompositeConstruct %6 %84 %86 -%91 = OpBitcast %90 %87 -%93 = OpAccessChain %92 %91 %16 -%94 = OpLoad %88 %93 Aligned 16 -%95 = OpCompositeExtract %80 %94 0 -%96 = OpCompositeExtract %80 %94 1 -%98 = OpIMul %5 %97 %19 -%99 = OpCompositeExtract %5 %17 0 -%100 = OpCompositeExtract %5 %17 1 -%101 = OpIAddCarry %22 %99 %98 -%102 = OpCompositeExtract %5 %101 0 -%103 = OpCompositeExtract %5 %101 1 -%104 = OpIAdd %5 %100 %103 -%105 = OpCompositeConstruct %6 %102 %104 -%106 = OpBitcast %90 %105 -%107 = OpAccessChain %92 %106 %16 -%108 = OpLoad %88 %107 Aligned 16 -%109 = OpCompositeExtract %80 %108 0 -%110 = OpCompositeExtract %80 %108 1 -%111 = OpConvertSToF %10 %95 -%112 = OpConvertSToF %10 %96 -%113 = OpConvertSToF %10 %109 -%114 = OpConvertSToF %10 %110 -%115 = OpFAdd %10 %74 %111 -%116 = OpFAdd %10 %75 %112 -%117 = OpFAdd %10 %76 %113 -%118 = OpFAdd %10 %77 %114 -%120 = OpAccessChain %119 %13 %16 -OpStore %120 %115 -%121 = OpAccessChain %119 %13 %38 -OpStore %121 %116 -%122 = OpAccessChain %119 %13 %58 -OpStore %122 %117 -%123 = OpAccessChain %119 %13 %78 -OpStore %123 %118 +%38 = OpFAdd %10 %34 %26 +%39 = OpFAdd %10 %35 %27 +%40 = OpFAdd %10 %36 %28 +%41 = OpFAdd %10 %37 %29 +%43 = OpBitcast %21 %17 +%44 = OpInBoundsAccessChain %23 %43 %16 %42 +%45 = OpLoad %11 %44 Aligned 16 +%46 = OpCompositeExtract %10 %45 0 +%47 = OpCompositeExtract %10 %45 1 +%48 = OpCompositeExtract %10 %45 2 +%49 = OpCompositeExtract %10 %45 3 +%50 = OpFAdd %10 %38 %46 +%51 = OpFAdd %10 %39 %47 +%52 = OpFAdd %10 %40 %48 +%53 = OpFAdd %10 %41 %49 +%60 = OpBitcast %59 %17 +%62 = OpInBoundsAccessChain %61 %60 %16 %55 +%63 = OpLoad %56 %62 Aligned 16 +%64 = OpCompositeExtract %54 %63 0 +%65 = OpCompositeExtract %54 %63 1 +%67 = OpBitcast %59 %17 +%68 = OpInBoundsAccessChain %61 %67 %16 %66 +%69 = OpLoad %56 %68 Aligned 16 +%70 = OpCompositeExtract %54 %69 0 +%71 = OpCompositeExtract %54 %69 1 +%72 = OpConvertSToF %10 %64 +%73 = OpConvertSToF %10 %65 +%74 = OpConvertSToF %10 %70 +%75 = OpConvertSToF %10 %71 +%76 = OpFAdd %10 %50 %72 +%77 = OpFAdd %10 %51 %73 +%78 = OpFAdd %10 %52 %74 +%79 = OpFAdd %10 %53 %75 +%81 = OpAccessChain %80 %13 %16 +OpStore %81 %76 +%82 = OpAccessChain %80 %13 %30 +OpStore %82 %77 +%83 = OpAccessChain %80 %13 %42 +OpStore %83 %78 +%84 = OpAccessChain %80 %13 %55 +OpStore %84 %79 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag index 4f60fc9..6ff9f96 100644 --- a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag @@ -9,23 +9,18 @@ #extension GL_EXT_shader_16bit_storage : require #extension GL_ARB_gpu_shader_int64 : require #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerUint642NonWriteCBVArray; +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray { - uint _m0; - uint _m1; + vec4 value[4096]; }; -layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; -layout(buffer_reference) buffer PhysicalPointerUint642NonWrite; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerUint642NonWriteCBVArray { - vec4 value; -}; - -layout(buffer_reference, std430) readonly buffer PhysicalPointerUint642NonWrite -{ - u64vec2 value; + u64vec2 value[4096]; }; layout(push_constant, std430) uniform RootConstants @@ -40,25 +35,15 @@ layout(location = 0) out vec4 SV_Target; void main() { - AddCarry _23; - _23._m0 = uaddCarry(registers._m0.x, 0u * 16u, _23._m1); - PhysicalPointerFloat4NonWrite _30 = PhysicalPointerFloat4NonWrite(uvec2(_23._m0, registers._m0.y + _23._m1)); - AddCarry _42; - _42._m0 = uaddCarry(registers._m0.x, 1u * 16u, _42._m1); - f16vec4 _52 = f16vec4(PhysicalPointerFloat4NonWrite(uvec2(_42._m0, registers._m0.y + _42._m1)).value); - AddCarry _69; - _69._m0 = uaddCarry(registers._m0.x, 2u * 16u, _69._m1); - f16vec4 _77 = f16vec4(PhysicalPointerFloat4NonWrite(uvec2(_69._m0, registers._m0.y + _69._m1)).value); - AddCarry _95; - _95._m0 = uaddCarry(registers._m0.x, 3u * 16u, _95._m1); - PhysicalPointerUint642NonWrite _103 = PhysicalPointerUint642NonWrite(uvec2(_95._m0, registers._m0.y + _95._m1)); - AddCarry _113; - _113._m0 = uaddCarry(registers._m0.x, 4u * 16u, _113._m1); - PhysicalPointerUint642NonWrite _118 = PhysicalPointerUint642NonWrite(uvec2(_113._m0, registers._m0.y + _113._m1)); - SV_Target.x = ((float(_52.x) + _30.value.x) + float(_77.x)) + float(int64_t(_103.value.x)); - SV_Target.y = ((float(_52.y) + _30.value.y) + float(_77.y)) + float(int64_t(_103.value.y)); - SV_Target.z = ((float(_52.z) + _30.value.z) + float(_77.z)) + float(int64_t(_118.value.x)); - SV_Target.w = ((float(_52.w) + _30.value.w) + float(_77.w)) + float(int64_t(_118.value.y)); + PhysicalPointerFloat4NonWriteCBVArray _22 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + f16vec4 _36 = f16vec4(PhysicalPointerFloat4NonWriteCBVArray(registers._m0).value[1u]); + f16vec4 _53 = f16vec4(PhysicalPointerFloat4NonWriteCBVArray(registers._m0).value[2u]); + PhysicalPointerUint642NonWriteCBVArray _72 = PhysicalPointerUint642NonWriteCBVArray(registers._m0); + PhysicalPointerUint642NonWriteCBVArray _79 = PhysicalPointerUint642NonWriteCBVArray(registers._m0); + SV_Target.x = ((float(_36.x) + _22.value[0u].x) + float(_53.x)) + float(int64_t(_72.value[3u].x)); + SV_Target.y = ((float(_36.y) + _22.value[0u].y) + float(_53.y)) + float(int64_t(_72.value[3u].y)); + SV_Target.z = ((float(_36.z) + _22.value[0u].z) + float(_53.z)) + float(int64_t(_79.value[4u].x)); + SV_Target.w = ((float(_36.w) + _22.value[0u].w) + float(_53.w)) + float(int64_t(_79.value[4u].y)); } @@ -67,7 +52,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 138 +; Bound: 99 ; Schema: 0 OpCapability Shader OpCapability Float16 @@ -81,23 +66,24 @@ OpName %3 "main" OpName %7 "RootConstants" OpName %9 "registers" OpName %13 "SV_Target" -OpName %22 "AddCarry" -OpName %28 "PhysicalPointerFloat4NonWrite" -OpMemberName %28 0 "value" -OpName %101 "PhysicalPointerUint642NonWrite" -OpMemberName %101 0 "value" +OpName %20 "PhysicalPointerFloat4NonWriteCBVArray" +OpMemberName %20 0 "value" +OpName %70 "PhysicalPointerUint642NonWriteCBVArray" +OpMemberName %70 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 OpMemberDecorate %7 2 Offset 16 OpMemberDecorate %7 3 Offset 24 OpDecorate %13 Location 0 -OpMemberDecorate %28 0 Offset 0 -OpDecorate %28 Block -OpMemberDecorate %28 0 NonWritable -OpMemberDecorate %101 0 Offset 0 -OpDecorate %101 Block -OpMemberDecorate %101 0 NonWritable +OpDecorate %19 ArrayStride 16 +OpMemberDecorate %20 0 Offset 0 +OpDecorate %20 Block +OpMemberDecorate %20 0 NonWritable +OpDecorate %69 ArrayStride 16 +OpMemberDecorate %70 0 Offset 0 +OpDecorate %70 Block +OpMemberDecorate %70 0 NonWritable %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -111,134 +97,95 @@ OpMemberDecorate %101 0 NonWritable %13 = OpVariable %12 Output %14 = OpTypePointer PushConstant %6 %16 = OpConstant %5 0 -%19 = OpConstant %5 16 -%22 = OpTypeStruct %5 %5 -%28 = OpTypeStruct %11 -%29 = OpTypePointer PhysicalStorageBuffer %28 -%31 = OpTypePointer PhysicalStorageBuffer %11 -%38 = OpConstant %5 1 -%50 = OpTypeFloat 16 -%51 = OpTypeVector %50 4 -%65 = OpConstant %5 2 -%90 = OpConstant %5 3 -%92 = OpTypeInt 64 0 -%100 = OpTypeVector %92 2 -%101 = OpTypeStruct %100 -%102 = OpTypePointer PhysicalStorageBuffer %101 -%104 = OpTypePointer PhysicalStorageBuffer %100 -%109 = OpConstant %5 4 -%131 = OpTypePointer Output %10 +%18 = OpConstant %5 4096 +%19 = OpTypeArray %11 %18 +%20 = OpTypeStruct %19 +%21 = OpTypePointer PhysicalStorageBuffer %20 +%23 = OpTypePointer PhysicalStorageBuffer %11 +%30 = OpConstant %5 1 +%34 = OpTypeFloat 16 +%35 = OpTypeVector %34 4 +%49 = OpConstant %5 2 +%66 = OpTypeInt 64 0 +%67 = OpConstant %5 3 +%68 = OpTypeVector %66 2 +%69 = OpTypeArray %68 %18 +%70 = OpTypeStruct %69 +%71 = OpTypePointer PhysicalStorageBuffer %70 +%73 = OpTypePointer PhysicalStorageBuffer %68 +%78 = OpConstant %5 4 +%92 = OpTypePointer Output %10 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %136 -%136 = OpLabel +OpBranch %97 +%97 = OpLabel %15 = OpAccessChain %14 %9 %16 %17 = OpLoad %6 %15 -%18 = OpIMul %5 %16 %19 -%20 = OpCompositeExtract %5 %17 0 -%21 = OpCompositeExtract %5 %17 1 -%23 = OpIAddCarry %22 %20 %18 -%24 = OpCompositeExtract %5 %23 0 -%25 = OpCompositeExtract %5 %23 1 -%26 = OpIAdd %5 %21 %25 -%27 = OpCompositeConstruct %6 %24 %26 -%30 = OpBitcast %29 %27 -%32 = OpAccessChain %31 %30 %16 +%22 = OpBitcast %21 %17 +%24 = OpInBoundsAccessChain %23 %22 %16 %16 +%25 = OpLoad %11 %24 Aligned 16 +%26 = OpCompositeExtract %10 %25 0 +%27 = OpCompositeExtract %10 %25 1 +%28 = OpCompositeExtract %10 %25 2 +%29 = OpCompositeExtract %10 %25 3 +%31 = OpBitcast %21 %17 +%32 = OpInBoundsAccessChain %23 %31 %16 %30 %33 = OpLoad %11 %32 Aligned 16 -%34 = OpCompositeExtract %10 %33 0 -%35 = OpCompositeExtract %10 %33 1 -%36 = OpCompositeExtract %10 %33 2 -%37 = OpCompositeExtract %10 %33 3 -%39 = OpIMul %5 %38 %19 -%40 = OpCompositeExtract %5 %17 0 -%41 = OpCompositeExtract %5 %17 1 -%42 = OpIAddCarry %22 %40 %39 -%43 = OpCompositeExtract %5 %42 0 -%44 = OpCompositeExtract %5 %42 1 -%45 = OpIAdd %5 %41 %44 -%46 = OpCompositeConstruct %6 %43 %45 -%47 = OpBitcast %29 %46 -%48 = OpAccessChain %31 %47 %16 -%49 = OpLoad %11 %48 Aligned 16 -%52 = OpFConvert %51 %49 -%53 = OpCompositeExtract %50 %52 0 -%54 = OpCompositeExtract %50 %52 1 -%55 = OpCompositeExtract %50 %52 2 -%56 = OpCompositeExtract %50 %52 3 -%57 = OpFConvert %10 %53 +%36 = OpFConvert %35 %33 +%37 = OpCompositeExtract %34 %36 0 +%38 = OpCompositeExtract %34 %36 1 +%39 = OpCompositeExtract %34 %36 2 +%40 = OpCompositeExtract %34 %36 3 +%41 = OpFConvert %10 %37 +%42 = OpFConvert %10 %38 +%43 = OpFConvert %10 %39 +%44 = OpFConvert %10 %40 +%45 = OpFAdd %10 %41 %26 +%46 = OpFAdd %10 %42 %27 +%47 = OpFAdd %10 %43 %28 +%48 = OpFAdd %10 %44 %29 +%50 = OpBitcast %21 %17 +%51 = OpInBoundsAccessChain %23 %50 %16 %49 +%52 = OpLoad %11 %51 Aligned 16 +%53 = OpFConvert %35 %52 +%54 = OpCompositeExtract %34 %53 0 +%55 = OpCompositeExtract %34 %53 1 +%56 = OpCompositeExtract %34 %53 2 +%57 = OpCompositeExtract %34 %53 3 %58 = OpFConvert %10 %54 %59 = OpFConvert %10 %55 %60 = OpFConvert %10 %56 -%61 = OpFAdd %10 %57 %34 -%62 = OpFAdd %10 %58 %35 -%63 = OpFAdd %10 %59 %36 -%64 = OpFAdd %10 %60 %37 -%66 = OpIMul %5 %65 %19 -%67 = OpCompositeExtract %5 %17 0 -%68 = OpCompositeExtract %5 %17 1 -%69 = OpIAddCarry %22 %67 %66 -%70 = OpCompositeExtract %5 %69 0 -%71 = OpCompositeExtract %5 %69 1 -%72 = OpIAdd %5 %68 %71 -%73 = OpCompositeConstruct %6 %70 %72 -%74 = OpBitcast %29 %73 -%75 = OpAccessChain %31 %74 %16 -%76 = OpLoad %11 %75 Aligned 16 -%77 = OpFConvert %51 %76 -%78 = OpCompositeExtract %50 %77 0 -%79 = OpCompositeExtract %50 %77 1 -%80 = OpCompositeExtract %50 %77 2 -%81 = OpCompositeExtract %50 %77 3 -%82 = OpFConvert %10 %78 -%83 = OpFConvert %10 %79 -%84 = OpFConvert %10 %80 -%85 = OpFConvert %10 %81 -%86 = OpFAdd %10 %61 %82 -%87 = OpFAdd %10 %62 %83 -%88 = OpFAdd %10 %63 %84 -%89 = OpFAdd %10 %64 %85 -%91 = OpIMul %5 %90 %19 -%93 = OpCompositeExtract %5 %17 0 -%94 = OpCompositeExtract %5 %17 1 -%95 = OpIAddCarry %22 %93 %91 -%96 = OpCompositeExtract %5 %95 0 -%97 = OpCompositeExtract %5 %95 1 -%98 = OpIAdd %5 %94 %97 -%99 = OpCompositeConstruct %6 %96 %98 -%103 = OpBitcast %102 %99 -%105 = OpAccessChain %104 %103 %16 -%106 = OpLoad %100 %105 Aligned 16 -%107 = OpCompositeExtract %92 %106 0 -%108 = OpCompositeExtract %92 %106 1 -%110 = OpIMul %5 %109 %19 -%111 = OpCompositeExtract %5 %17 0 -%112 = OpCompositeExtract %5 %17 1 -%113 = OpIAddCarry %22 %111 %110 -%114 = OpCompositeExtract %5 %113 0 -%115 = OpCompositeExtract %5 %113 1 -%116 = OpIAdd %5 %112 %115 -%117 = OpCompositeConstruct %6 %114 %116 -%118 = OpBitcast %102 %117 -%119 = OpAccessChain %104 %118 %16 -%120 = OpLoad %100 %119 Aligned 16 -%121 = OpCompositeExtract %92 %120 0 -%122 = OpCompositeExtract %92 %120 1 -%123 = OpConvertSToF %10 %107 -%124 = OpConvertSToF %10 %108 -%125 = OpConvertSToF %10 %121 -%126 = OpConvertSToF %10 %122 -%127 = OpFAdd %10 %86 %123 -%128 = OpFAdd %10 %87 %124 -%129 = OpFAdd %10 %88 %125 -%130 = OpFAdd %10 %89 %126 -%132 = OpAccessChain %131 %13 %16 -OpStore %132 %127 -%133 = OpAccessChain %131 %13 %38 -OpStore %133 %128 -%134 = OpAccessChain %131 %13 %65 -OpStore %134 %129 -%135 = OpAccessChain %131 %13 %90 -OpStore %135 %130 +%61 = OpFConvert %10 %57 +%62 = OpFAdd %10 %45 %58 +%63 = OpFAdd %10 %46 %59 +%64 = OpFAdd %10 %47 %60 +%65 = OpFAdd %10 %48 %61 +%72 = OpBitcast %71 %17 +%74 = OpInBoundsAccessChain %73 %72 %16 %67 +%75 = OpLoad %68 %74 Aligned 16 +%76 = OpCompositeExtract %66 %75 0 +%77 = OpCompositeExtract %66 %75 1 +%79 = OpBitcast %71 %17 +%80 = OpInBoundsAccessChain %73 %79 %16 %78 +%81 = OpLoad %68 %80 Aligned 16 +%82 = OpCompositeExtract %66 %81 0 +%83 = OpCompositeExtract %66 %81 1 +%84 = OpConvertSToF %10 %76 +%85 = OpConvertSToF %10 %77 +%86 = OpConvertSToF %10 %82 +%87 = OpConvertSToF %10 %83 +%88 = OpFAdd %10 %62 %84 +%89 = OpFAdd %10 %63 %85 +%90 = OpFAdd %10 %64 %86 +%91 = OpFAdd %10 %65 %87 +%93 = OpAccessChain %92 %13 %16 +OpStore %93 %88 +%94 = OpAccessChain %92 %13 %30 +OpStore %94 %89 +%95 = OpAccessChain %92 %13 %49 +OpStore %95 %90 +%96 = OpAccessChain %92 %13 %67 +OpStore %96 %91 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss index 5c816f7..0d79ef9 100644 --- a/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss +++ b/reference/shaders/resources/cbv.no-legacy-cbuf-layout.local-root-signature.rmiss @@ -30,24 +30,18 @@ uvec4 _54; void main() { - vec4 _37 = _38; + vec4 _37; _37.x = uintBitsToFloat(SBT._m0[0u]); - vec4 _39 = _37; - _39.y = float(SBT._m0[1u]); - vec4 _40 = _39; - _40.z = float(int(SBT._m0[2u])); - vec4 _41 = _40; - _41.w = 1.0; - uvec4 _53 = _54; + _37.y = float(SBT._m0[1u]); + _37.z = float(int(SBT._m0[2u])); + _37.w = 1.0; + uvec4 _53; _53.x = uint(int(uintBitsToFloat(SBT._m1[0u]))); - uvec4 _55 = _53; - _55.y = uint(int(uintBitsToFloat(SBT._m1[1u]))); - uvec4 _56 = _55; - _56.z = SBT._m1[2u]; - uvec4 _57 = _56; - _57.w = SBT._m0[2u]; - payload._m0 = _41; - payload._m1 = _57; + _53.y = uint(int(uintBitsToFloat(SBT._m1[1u]))); + _53.z = SBT._m1[2u]; + _53.w = SBT._m0[2u]; + payload._m0 = _37; + payload._m1 = _53; } diff --git a/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag b/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag index 8eb931e..fb6a0ab 100644 --- a/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag +++ b/reference/shaders/resources/cbv.root-descriptor.no-legacy-cbuf-layout.frag @@ -9,29 +9,24 @@ #endif #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloatNonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerUint64NonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerHalfNonWriteCBVArray; +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteCBVArray { - uint _m0; - uint _m1; + float value[16384]; }; -layout(buffer_reference) buffer PhysicalPointerFloatNonWrite; -layout(buffer_reference) buffer PhysicalPointerUint64NonWrite; -layout(buffer_reference) buffer PhysicalPointerHalfNonWrite; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloatNonWrite +layout(buffer_reference, buffer_reference_align = 8, std430) readonly buffer PhysicalPointerUint64NonWriteCBVArray { - float value; + uint64_t value[8192]; }; -layout(buffer_reference, std430) readonly buffer PhysicalPointerUint64NonWrite +layout(buffer_reference, buffer_reference_align = 2, std430) readonly buffer PhysicalPointerHalfNonWriteCBVArray { - uint64_t value; -}; - -layout(buffer_reference, std430) readonly buffer PhysicalPointerHalfNonWrite -{ - float16_t value; + float16_t value[32768]; }; layout(push_constant, std430) uniform RootConstants @@ -46,34 +41,10 @@ layout(location = 0) out vec4 SV_Target; void main() { - AddCarry _21; - _21._m0 = uaddCarry(registers._m0.x, 0u, _21._m1); - AddCarry _35; - _35._m0 = uaddCarry(registers._m0.x, 4u, _35._m1); - AddCarry _46; - _46._m0 = uaddCarry(registers._m0.x, 8u, _46._m1); - AddCarry _57; - _57._m0 = uaddCarry(registers._m0.x, 12u, _57._m1); - AddCarry _69; - _69._m0 = uaddCarry(registers._m0.x, 32u, _69._m1); - AddCarry _83; - _83._m0 = uaddCarry(registers._m0.x, 40u, _83._m1); - AddCarry _94; - _94._m0 = uaddCarry(registers._m0.x, 48u, _94._m1); - AddCarry _105; - _105._m0 = uaddCarry(registers._m0.x, 56u, _105._m1); - AddCarry _125; - _125._m0 = uaddCarry(registers._m0.x, 16u, _125._m1); - AddCarry _139; - _139._m0 = uaddCarry(registers._m0.x, 20u, _139._m1); - AddCarry _150; - _150._m0 = uaddCarry(registers._m0.x, 24u, _150._m1); - AddCarry _161; - _161._m0 = uaddCarry(registers._m0.x, 28u, _161._m1); - SV_Target.x = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_69._m0, registers._m0.y + _69._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_21._m0, registers._m0.y + _21._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_125._m0, registers._m0.y + _125._m1)).value); - SV_Target.y = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_83._m0, registers._m0.y + _83._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_35._m0, registers._m0.y + _35._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_139._m0, registers._m0.y + _139._m1)).value); - SV_Target.z = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_94._m0, registers._m0.y + _94._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_46._m0, registers._m0.y + _46._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_150._m0, registers._m0.y + _150._m1)).value); - SV_Target.w = (float(int64_t(PhysicalPointerUint64NonWrite(uvec2(_105._m0, registers._m0.y + _105._m1)).value)) + PhysicalPointerFloatNonWrite(uvec2(_57._m0, registers._m0.y + _57._m1)).value) + float(PhysicalPointerHalfNonWrite(uvec2(_161._m0, registers._m0.y + _161._m1)).value); + SV_Target.x = (float(int64_t(PhysicalPointerUint64NonWriteCBVArray(registers._m0).value[4u])) + PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[0u]) + float(PhysicalPointerHalfNonWriteCBVArray(registers._m0).value[8u]); + SV_Target.y = (float(int64_t(PhysicalPointerUint64NonWriteCBVArray(registers._m0).value[5u])) + PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[1u]) + float(PhysicalPointerHalfNonWriteCBVArray(registers._m0).value[10u]); + SV_Target.z = (float(int64_t(PhysicalPointerUint64NonWriteCBVArray(registers._m0).value[6u])) + PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[2u]) + float(PhysicalPointerHalfNonWriteCBVArray(registers._m0).value[12u]); + SV_Target.w = (float(int64_t(PhysicalPointerUint64NonWriteCBVArray(registers._m0).value[7u])) + PhysicalPointerFloatNonWriteCBVArray(registers._m0).value[3u]) + float(PhysicalPointerHalfNonWriteCBVArray(registers._m0).value[14u]); } @@ -82,7 +53,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 187 +; Bound: 105 ; Schema: 0 OpCapability Shader OpCapability Float16 @@ -96,28 +67,30 @@ OpName %3 "main" OpName %7 "RootConstants" OpName %9 "registers" OpName %13 "SV_Target" -OpName %20 "AddCarry" -OpName %26 "PhysicalPointerFloatNonWrite" -OpMemberName %26 0 "value" -OpName %74 "PhysicalPointerUint64NonWrite" -OpMemberName %74 0 "value" -OpName %130 "PhysicalPointerHalfNonWrite" -OpMemberName %130 0 "value" +OpName %20 "PhysicalPointerFloatNonWriteCBVArray" +OpMemberName %20 0 "value" +OpName %42 "PhysicalPointerUint64NonWriteCBVArray" +OpMemberName %42 0 "value" +OpName %72 "PhysicalPointerHalfNonWriteCBVArray" +OpMemberName %72 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 OpMemberDecorate %7 2 Offset 16 OpMemberDecorate %7 3 Offset 24 OpDecorate %13 Location 0 -OpMemberDecorate %26 0 Offset 0 -OpDecorate %26 Block -OpMemberDecorate %26 0 NonWritable -OpMemberDecorate %74 0 Offset 0 -OpDecorate %74 Block -OpMemberDecorate %74 0 NonWritable -OpMemberDecorate %130 0 Offset 0 -OpDecorate %130 Block -OpMemberDecorate %130 0 NonWritable +OpDecorate %19 ArrayStride 4 +OpMemberDecorate %20 0 Offset 0 +OpDecorate %20 Block +OpMemberDecorate %20 0 NonWritable +OpDecorate %41 ArrayStride 8 +OpMemberDecorate %42 0 Offset 0 +OpDecorate %42 Block +OpMemberDecorate %42 0 NonWritable +OpDecorate %71 ArrayStride 2 +OpMemberDecorate %72 0 Offset 0 +OpDecorate %72 Block +OpMemberDecorate %72 0 NonWritable %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -131,183 +104,101 @@ OpMemberDecorate %130 0 NonWritable %13 = OpVariable %12 Output %14 = OpTypePointer PushConstant %6 %16 = OpConstant %5 0 -%20 = OpTypeStruct %5 %5 -%26 = OpTypeStruct %10 -%27 = OpTypePointer PhysicalStorageBuffer %26 -%29 = OpTypePointer PhysicalStorageBuffer %10 -%32 = OpConstant %5 4 -%43 = OpConstant %5 8 -%54 = OpConstant %5 12 -%65 = OpConstant %5 32 -%66 = OpTypeInt 64 0 -%74 = OpTypeStruct %66 -%75 = OpTypePointer PhysicalStorageBuffer %74 -%77 = OpTypePointer PhysicalStorageBuffer %66 -%80 = OpConstant %5 40 -%91 = OpConstant %5 48 -%102 = OpConstant %5 56 -%121 = OpConstant %5 16 -%122 = OpTypeFloat 16 -%130 = OpTypeStruct %122 -%131 = OpTypePointer PhysicalStorageBuffer %130 -%133 = OpTypePointer PhysicalStorageBuffer %122 -%136 = OpConstant %5 20 -%147 = OpConstant %5 24 -%158 = OpConstant %5 28 -%177 = OpTypePointer Output %10 -%180 = OpConstant %5 1 -%182 = OpConstant %5 2 -%184 = OpConstant %5 3 +%18 = OpConstant %5 16384 +%19 = OpTypeArray %10 %18 +%20 = OpTypeStruct %19 +%21 = OpTypePointer PhysicalStorageBuffer %20 +%23 = OpTypePointer PhysicalStorageBuffer %10 +%26 = OpConstant %5 1 +%30 = OpConstant %5 2 +%34 = OpConstant %5 3 +%38 = OpTypeInt 64 0 +%39 = OpConstant %5 4 +%40 = OpConstant %5 8192 +%41 = OpTypeArray %38 %40 +%42 = OpTypeStruct %41 +%43 = OpTypePointer PhysicalStorageBuffer %42 +%45 = OpTypePointer PhysicalStorageBuffer %38 +%48 = OpConstant %5 5 +%52 = OpConstant %5 6 +%56 = OpConstant %5 7 +%68 = OpTypeFloat 16 +%69 = OpConstant %5 8 +%70 = OpConstant %5 32768 +%71 = OpTypeArray %68 %70 +%72 = OpTypeStruct %71 +%73 = OpTypePointer PhysicalStorageBuffer %72 +%75 = OpTypePointer PhysicalStorageBuffer %68 +%78 = OpConstant %5 10 +%82 = OpConstant %5 12 +%86 = OpConstant %5 14 +%98 = OpTypePointer Output %10 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %185 -%185 = OpLabel +OpBranch %103 +%103 = OpLabel %15 = OpAccessChain %14 %9 %16 %17 = OpLoad %6 %15 -%18 = OpCompositeExtract %5 %17 0 -%19 = OpCompositeExtract %5 %17 1 -%21 = OpIAddCarry %20 %18 %16 -%22 = OpCompositeExtract %5 %21 0 -%23 = OpCompositeExtract %5 %21 1 -%24 = OpIAdd %5 %19 %23 -%25 = OpCompositeConstruct %6 %22 %24 -%28 = OpBitcast %27 %25 -%30 = OpAccessChain %29 %28 %16 -%31 = OpLoad %10 %30 Aligned 4 -%33 = OpCompositeExtract %5 %17 0 -%34 = OpCompositeExtract %5 %17 1 -%35 = OpIAddCarry %20 %33 %32 -%36 = OpCompositeExtract %5 %35 0 -%37 = OpCompositeExtract %5 %35 1 -%38 = OpIAdd %5 %34 %37 -%39 = OpCompositeConstruct %6 %36 %38 -%40 = OpBitcast %27 %39 -%41 = OpAccessChain %29 %40 %16 -%42 = OpLoad %10 %41 Aligned 4 -%44 = OpCompositeExtract %5 %17 0 -%45 = OpCompositeExtract %5 %17 1 -%46 = OpIAddCarry %20 %44 %43 -%47 = OpCompositeExtract %5 %46 0 -%48 = OpCompositeExtract %5 %46 1 -%49 = OpIAdd %5 %45 %48 -%50 = OpCompositeConstruct %6 %47 %49 -%51 = OpBitcast %27 %50 -%52 = OpAccessChain %29 %51 %16 -%53 = OpLoad %10 %52 Aligned 4 -%55 = OpCompositeExtract %5 %17 0 -%56 = OpCompositeExtract %5 %17 1 -%57 = OpIAddCarry %20 %55 %54 -%58 = OpCompositeExtract %5 %57 0 -%59 = OpCompositeExtract %5 %57 1 -%60 = OpIAdd %5 %56 %59 -%61 = OpCompositeConstruct %6 %58 %60 -%62 = OpBitcast %27 %61 -%63 = OpAccessChain %29 %62 %16 -%64 = OpLoad %10 %63 Aligned 4 -%67 = OpCompositeExtract %5 %17 0 -%68 = OpCompositeExtract %5 %17 1 -%69 = OpIAddCarry %20 %67 %65 -%70 = OpCompositeExtract %5 %69 0 -%71 = OpCompositeExtract %5 %69 1 -%72 = OpIAdd %5 %68 %71 -%73 = OpCompositeConstruct %6 %70 %72 -%76 = OpBitcast %75 %73 -%78 = OpAccessChain %77 %76 %16 -%79 = OpLoad %66 %78 Aligned 8 -%81 = OpCompositeExtract %5 %17 0 -%82 = OpCompositeExtract %5 %17 1 -%83 = OpIAddCarry %20 %81 %80 -%84 = OpCompositeExtract %5 %83 0 -%85 = OpCompositeExtract %5 %83 1 -%86 = OpIAdd %5 %82 %85 -%87 = OpCompositeConstruct %6 %84 %86 -%88 = OpBitcast %75 %87 -%89 = OpAccessChain %77 %88 %16 -%90 = OpLoad %66 %89 Aligned 8 -%92 = OpCompositeExtract %5 %17 0 -%93 = OpCompositeExtract %5 %17 1 -%94 = OpIAddCarry %20 %92 %91 -%95 = OpCompositeExtract %5 %94 0 -%96 = OpCompositeExtract %5 %94 1 -%97 = OpIAdd %5 %93 %96 -%98 = OpCompositeConstruct %6 %95 %97 -%99 = OpBitcast %75 %98 -%100 = OpAccessChain %77 %99 %16 -%101 = OpLoad %66 %100 Aligned 8 -%103 = OpCompositeExtract %5 %17 0 -%104 = OpCompositeExtract %5 %17 1 -%105 = OpIAddCarry %20 %103 %102 -%106 = OpCompositeExtract %5 %105 0 -%107 = OpCompositeExtract %5 %105 1 -%108 = OpIAdd %5 %104 %107 -%109 = OpCompositeConstruct %6 %106 %108 -%110 = OpBitcast %75 %109 -%111 = OpAccessChain %77 %110 %16 -%112 = OpLoad %66 %111 Aligned 8 -%113 = OpConvertSToF %10 %79 -%114 = OpConvertSToF %10 %90 -%115 = OpConvertSToF %10 %101 -%116 = OpConvertSToF %10 %112 -%117 = OpFAdd %10 %113 %31 -%118 = OpFAdd %10 %114 %42 -%119 = OpFAdd %10 %115 %53 -%120 = OpFAdd %10 %116 %64 -%123 = OpCompositeExtract %5 %17 0 -%124 = OpCompositeExtract %5 %17 1 -%125 = OpIAddCarry %20 %123 %121 -%126 = OpCompositeExtract %5 %125 0 -%127 = OpCompositeExtract %5 %125 1 -%128 = OpIAdd %5 %124 %127 -%129 = OpCompositeConstruct %6 %126 %128 -%132 = OpBitcast %131 %129 -%134 = OpAccessChain %133 %132 %16 -%135 = OpLoad %122 %134 Aligned 2 -%137 = OpCompositeExtract %5 %17 0 -%138 = OpCompositeExtract %5 %17 1 -%139 = OpIAddCarry %20 %137 %136 -%140 = OpCompositeExtract %5 %139 0 -%141 = OpCompositeExtract %5 %139 1 -%142 = OpIAdd %5 %138 %141 -%143 = OpCompositeConstruct %6 %140 %142 -%144 = OpBitcast %131 %143 -%145 = OpAccessChain %133 %144 %16 -%146 = OpLoad %122 %145 Aligned 2 -%148 = OpCompositeExtract %5 %17 0 -%149 = OpCompositeExtract %5 %17 1 -%150 = OpIAddCarry %20 %148 %147 -%151 = OpCompositeExtract %5 %150 0 -%152 = OpCompositeExtract %5 %150 1 -%153 = OpIAdd %5 %149 %152 -%154 = OpCompositeConstruct %6 %151 %153 -%155 = OpBitcast %131 %154 -%156 = OpAccessChain %133 %155 %16 -%157 = OpLoad %122 %156 Aligned 2 -%159 = OpCompositeExtract %5 %17 0 -%160 = OpCompositeExtract %5 %17 1 -%161 = OpIAddCarry %20 %159 %158 -%162 = OpCompositeExtract %5 %161 0 -%163 = OpCompositeExtract %5 %161 1 -%164 = OpIAdd %5 %160 %163 -%165 = OpCompositeConstruct %6 %162 %164 -%166 = OpBitcast %131 %165 -%167 = OpAccessChain %133 %166 %16 -%168 = OpLoad %122 %167 Aligned 2 -%169 = OpFConvert %10 %135 -%170 = OpFConvert %10 %146 -%171 = OpFConvert %10 %157 -%172 = OpFConvert %10 %168 -%173 = OpFAdd %10 %117 %169 -%174 = OpFAdd %10 %118 %170 -%175 = OpFAdd %10 %119 %171 -%176 = OpFAdd %10 %120 %172 -%178 = OpAccessChain %177 %13 %16 -OpStore %178 %173 -%179 = OpAccessChain %177 %13 %180 -OpStore %179 %174 -%181 = OpAccessChain %177 %13 %182 -OpStore %181 %175 -%183 = OpAccessChain %177 %13 %184 -OpStore %183 %176 +%22 = OpBitcast %21 %17 +%24 = OpInBoundsAccessChain %23 %22 %16 %16 +%25 = OpLoad %10 %24 Aligned 4 +%27 = OpBitcast %21 %17 +%28 = OpInBoundsAccessChain %23 %27 %16 %26 +%29 = OpLoad %10 %28 Aligned 4 +%31 = OpBitcast %21 %17 +%32 = OpInBoundsAccessChain %23 %31 %16 %30 +%33 = OpLoad %10 %32 Aligned 4 +%35 = OpBitcast %21 %17 +%36 = OpInBoundsAccessChain %23 %35 %16 %34 +%37 = OpLoad %10 %36 Aligned 4 +%44 = OpBitcast %43 %17 +%46 = OpInBoundsAccessChain %45 %44 %16 %39 +%47 = OpLoad %38 %46 Aligned 8 +%49 = OpBitcast %43 %17 +%50 = OpInBoundsAccessChain %45 %49 %16 %48 +%51 = OpLoad %38 %50 Aligned 8 +%53 = OpBitcast %43 %17 +%54 = OpInBoundsAccessChain %45 %53 %16 %52 +%55 = OpLoad %38 %54 Aligned 8 +%57 = OpBitcast %43 %17 +%58 = OpInBoundsAccessChain %45 %57 %16 %56 +%59 = OpLoad %38 %58 Aligned 8 +%60 = OpConvertSToF %10 %47 +%61 = OpConvertSToF %10 %51 +%62 = OpConvertSToF %10 %55 +%63 = OpConvertSToF %10 %59 +%64 = OpFAdd %10 %60 %25 +%65 = OpFAdd %10 %61 %29 +%66 = OpFAdd %10 %62 %33 +%67 = OpFAdd %10 %63 %37 +%74 = OpBitcast %73 %17 +%76 = OpInBoundsAccessChain %75 %74 %16 %69 +%77 = OpLoad %68 %76 Aligned 2 +%79 = OpBitcast %73 %17 +%80 = OpInBoundsAccessChain %75 %79 %16 %78 +%81 = OpLoad %68 %80 Aligned 2 +%83 = OpBitcast %73 %17 +%84 = OpInBoundsAccessChain %75 %83 %16 %82 +%85 = OpLoad %68 %84 Aligned 2 +%87 = OpBitcast %73 %17 +%88 = OpInBoundsAccessChain %75 %87 %16 %86 +%89 = OpLoad %68 %88 Aligned 2 +%90 = OpFConvert %10 %77 +%91 = OpFConvert %10 %81 +%92 = OpFConvert %10 %85 +%93 = OpFConvert %10 %89 +%94 = OpFAdd %10 %64 %90 +%95 = OpFAdd %10 %65 %91 +%96 = OpFAdd %10 %66 %92 +%97 = OpFAdd %10 %67 %93 +%99 = OpAccessChain %98 %13 %16 +OpStore %99 %94 +%100 = OpAccessChain %98 %13 %26 +OpStore %100 %95 +%101 = OpAccessChain %98 %13 %30 +OpStore %101 %96 +%102 = OpAccessChain %98 %13 %34 +OpStore %102 %97 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/root-bda.root-descriptor.comp b/reference/shaders/resources/root-bda.root-descriptor.comp index 547a2cf..c527517 100644 --- a/reference/shaders/resources/root-bda.root-descriptor.comp +++ b/reference/shaders/resources/root-bda.root-descriptor.comp @@ -8,54 +8,56 @@ #endif #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_buffer_reference : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference_uvec2 : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray; +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray; +layout(buffer_reference) buffer PhysicalPointerHalfArray; +layout(buffer_reference) buffer PhysicalPointerHalf2Array; +layout(buffer_reference) buffer PhysicalPointerHalf3Array; +layout(buffer_reference) buffer PhysicalPointerHalf4Array; +layout(buffer_reference) buffer PhysicalPointerUint2Array; +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloat4NonWriteArray { - uint _m0; - uint _m1; + vec4 value[]; }; -layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; -layout(buffer_reference) buffer PhysicalPointerFloat4Coherent; -layout(buffer_reference) buffer PhysicalPointerHalf; -layout(buffer_reference) buffer PhysicalPointerHalf2; -layout(buffer_reference) buffer PhysicalPointerHalf3; -layout(buffer_reference) buffer PhysicalPointerHalf4; -layout(buffer_reference) buffer PhysicalPointerUint2; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray { - vec4 value; + vec4 value[4096]; }; -layout(buffer_reference, std430) coherent buffer PhysicalPointerFloat4Coherent +layout(buffer_reference, buffer_reference_align = 4, std430) coherent buffer PhysicalPointerFloat4CoherentArray { - vec4 value; + vec4 value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerHalf +layout(buffer_reference, buffer_reference_align = 2, std430) buffer PhysicalPointerHalfArray { - float16_t value; + float16_t value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerHalf2 +layout(buffer_reference, buffer_reference_align = 2, std430) buffer PhysicalPointerHalf2Array { - f16vec2 value; + f16vec2 value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerHalf3 +layout(buffer_reference, buffer_reference_align = 2, scalar) buffer PhysicalPointerHalf3Array { - f16vec3 value; + f16vec3 value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerHalf4 +layout(buffer_reference, buffer_reference_align = 2, std430) buffer PhysicalPointerHalf4Array { - f16vec4 value; + f16vec4 value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerUint2 +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUint2Array { - uvec2 value; + uvec2 value[]; }; layout(push_constant, std430) uniform RootConstants @@ -66,36 +68,19 @@ layout(push_constant, std430) uniform RootConstants uvec2 _m3; } registers; -float16_t _100; -uint _164; +float16_t _72; +uint _112; void main() { - AddCarry _39; - _39._m0 = uaddCarry(registers._m1.x, (gl_GlobalInvocationID.x * 16u) + 0u, _39._m1); - PhysicalPointerFloat4NonWrite _44 = PhysicalPointerFloat4NonWrite(uvec2(_39._m0, registers._m1.y + _39._m1)); - AddCarry _55; - _55._m0 = uaddCarry(registers._m0.x, gl_GlobalInvocationID.x * 16u, _55._m1); - PhysicalPointerFloat4NonWrite _60 = PhysicalPointerFloat4NonWrite(uvec2(_55._m0, registers._m0.y + _55._m1)); - AddCarry _77; - _77._m0 = uaddCarry(registers._m2.x, (gl_GlobalInvocationID.x * 16u) + 0u, _77._m1); - PhysicalPointerFloat4Coherent(uvec2(_77._m0, registers._m2.y + _77._m1)).value = vec4(_60.value.x + _44.value.x, _60.value.y + _44.value.y, _60.value.z + _44.value.z, _60.value.w + _44.value.w); - AddCarry _91; - _91._m0 = uaddCarry(registers._m3.x, gl_GlobalInvocationID.x << 1u, _91._m1); - PhysicalPointerHalf(uvec2(_91._m0, registers._m3.y + _91._m1)).value = float16_t(1.0); - AddCarry _107; - _107._m0 = uaddCarry(registers._m3.x, gl_GlobalInvocationID.x << 2u, _107._m1); - PhysicalPointerHalf2(uvec2(_107._m0, registers._m3.y + _107._m1)).value = f16vec2(float16_t(1.0), float16_t(2.0)); - AddCarry _124; - _124._m0 = uaddCarry(registers._m3.x, gl_GlobalInvocationID.x * 6u, _124._m1); - PhysicalPointerHalf3(uvec2(_124._m0, registers._m3.y + _124._m1)).value = f16vec3(float16_t(1.0), float16_t(2.0), float16_t(3.0)); - uint _134 = gl_GlobalInvocationID.x << 3u; - AddCarry _140; - _140._m0 = uaddCarry(registers._m3.x, _134, _140._m1); - PhysicalPointerHalf4(uvec2(_140._m0, registers._m3.y + _140._m1)).value = f16vec4(float16_t(1.0), float16_t(2.0), float16_t(3.0), float16_t(4.0)); - AddCarry _154; - _154._m0 = uaddCarry(registers._m3.x, _134, _154._m1); - PhysicalPointerUint2(uvec2(_154._m0, registers._m3.y + _154._m1)).value = uvec2(4u, 5u); + PhysicalPointerFloat4NonWriteArray _34 = PhysicalPointerFloat4NonWriteArray(registers._m1); + PhysicalPointerFloat4NonWriteCBVArray _46 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_GlobalInvocationID.x] = vec4(_46.value[gl_GlobalInvocationID.x].x + _34.value[gl_GlobalInvocationID.x].x, _46.value[gl_GlobalInvocationID.x].y + _34.value[gl_GlobalInvocationID.x].y, _46.value[gl_GlobalInvocationID.x].z + _34.value[gl_GlobalInvocationID.x].z, _46.value[gl_GlobalInvocationID.x].w + _34.value[gl_GlobalInvocationID.x].w); + PhysicalPointerHalfArray(registers._m3).value[gl_GlobalInvocationID.x] = float16_t(1.0); + PhysicalPointerHalf2Array(registers._m3).value[gl_GlobalInvocationID.x] = f16vec2(float16_t(1.0), float16_t(2.0)); + PhysicalPointerHalf3Array(registers._m3).value[gl_GlobalInvocationID.x] = f16vec3(float16_t(1.0), float16_t(2.0), float16_t(3.0)); + PhysicalPointerHalf4Array(registers._m3).value[gl_GlobalInvocationID.x] = f16vec4(float16_t(1.0), float16_t(2.0), float16_t(3.0), float16_t(4.0)); + PhysicalPointerUint2Array(registers._m3).value[gl_GlobalInvocationID.x] = uvec2(4u, 5u); } @@ -104,7 +89,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 168 +; Bound: 116 ; Schema: 0 OpCapability Shader OpCapability Float16 @@ -116,43 +101,55 @@ OpExecutionMode %3 LocalSize 1 1 1 OpName %3 "main" OpName %7 "RootConstants" OpName %9 "registers" -OpName %31 "PhysicalPointerFloat4NonWrite" -OpMemberName %31 0 "value" -OpName %38 "AddCarry" -OpName %71 "PhysicalPointerFloat4Coherent" -OpMemberName %71 0 "value" -OpName %87 "PhysicalPointerHalf" +OpName %32 "PhysicalPointerFloat4NonWriteArray" +OpMemberName %32 0 "value" +OpName %44 "PhysicalPointerFloat4NonWriteCBVArray" +OpMemberName %44 0 "value" +OpName %58 "PhysicalPointerFloat4CoherentArray" +OpMemberName %58 0 "value" +OpName %66 "PhysicalPointerHalfArray" +OpMemberName %66 0 "value" +OpName %76 "PhysicalPointerHalf2Array" +OpMemberName %76 0 "value" +OpName %87 "PhysicalPointerHalf3Array" OpMemberName %87 0 "value" -OpName %103 "PhysicalPointerHalf2" -OpMemberName %103 0 "value" -OpName %120 "PhysicalPointerHalf3" -OpMemberName %120 0 "value" -OpName %136 "PhysicalPointerHalf4" -OpMemberName %136 0 "value" -OpName %150 "PhysicalPointerUint2" -OpMemberName %150 0 "value" +OpName %97 "PhysicalPointerHalf4Array" +OpMemberName %97 0 "value" +OpName %105 "PhysicalPointerUint2Array" +OpMemberName %105 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 OpMemberDecorate %7 2 Offset 16 OpMemberDecorate %7 3 Offset 24 OpDecorate %25 BuiltIn GlobalInvocationId -OpMemberDecorate %31 0 Offset 0 -OpDecorate %31 Block -OpMemberDecorate %31 0 NonWritable -OpMemberDecorate %71 0 Offset 0 -OpDecorate %71 Block -OpMemberDecorate %71 0 Coherent +OpDecorate %31 ArrayStride 16 +OpMemberDecorate %32 0 Offset 0 +OpDecorate %32 Block +OpMemberDecorate %32 0 NonWritable +OpDecorate %43 ArrayStride 16 +OpMemberDecorate %44 0 Offset 0 +OpDecorate %44 Block +OpMemberDecorate %44 0 NonWritable +OpDecorate %57 ArrayStride 16 +OpMemberDecorate %58 0 Offset 0 +OpDecorate %58 Block +OpMemberDecorate %58 0 Coherent +OpDecorate %65 ArrayStride 2 +OpMemberDecorate %66 0 Offset 0 +OpDecorate %66 Block +OpDecorate %75 ArrayStride 4 +OpMemberDecorate %76 0 Offset 0 +OpDecorate %76 Block +OpDecorate %86 ArrayStride 6 OpMemberDecorate %87 0 Offset 0 OpDecorate %87 Block -OpMemberDecorate %103 0 Offset 0 -OpDecorate %103 Block -OpMemberDecorate %120 0 Offset 0 -OpDecorate %120 Block -OpMemberDecorate %136 0 Offset 0 -OpDecorate %136 Block -OpMemberDecorate %150 0 Offset 0 -OpDecorate %150 Block +OpDecorate %96 ArrayStride 8 +OpMemberDecorate %97 0 Offset 0 +OpDecorate %97 Block +OpDecorate %104 ArrayStride 8 +OpMemberDecorate %105 0 Offset 0 +OpDecorate %105 Block %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -171,45 +168,54 @@ OpDecorate %150 Block %26 = OpTypePointer Input %5 %29 = OpTypeFloat 32 %30 = OpTypeVector %29 4 -%31 = OpTypeStruct %30 -%32 = OpTypePointer PhysicalStorageBuffer %31 -%34 = OpConstant %5 16 -%38 = OpTypeStruct %5 %5 -%45 = OpTypePointer PhysicalStorageBuffer %30 -%71 = OpTypeStruct %30 -%72 = OpTypePointer PhysicalStorageBuffer %71 -%86 = OpTypeFloat 16 +%31 = OpTypeRuntimeArray %30 +%32 = OpTypeStruct %31 +%33 = OpTypePointer PhysicalStorageBuffer %32 +%35 = OpTypePointer PhysicalStorageBuffer %30 +%42 = OpConstant %5 4096 +%43 = OpTypeArray %30 %42 +%44 = OpTypeStruct %43 +%45 = OpTypePointer PhysicalStorageBuffer %44 +%57 = OpTypeRuntimeArray %30 +%58 = OpTypeStruct %57 +%59 = OpTypePointer PhysicalStorageBuffer %58 +%64 = OpTypeFloat 16 +%65 = OpTypeRuntimeArray %64 +%66 = OpTypeStruct %65 +%67 = OpTypePointer PhysicalStorageBuffer %66 +%69 = OpTypePointer PhysicalStorageBuffer %64 +%71 = OpConstant %64 0x1p+0 +%74 = OpTypeVector %64 2 +%75 = OpTypeRuntimeArray %74 +%76 = OpTypeStruct %75 +%77 = OpTypePointer PhysicalStorageBuffer %76 +%79 = OpTypePointer PhysicalStorageBuffer %74 +%81 = OpConstant %64 0x1p+1 +%84 = OpConstant %5 6 +%85 = OpTypeVector %64 3 +%86 = OpTypeRuntimeArray %85 %87 = OpTypeStruct %86 %88 = OpTypePointer PhysicalStorageBuffer %87 -%97 = OpTypePointer PhysicalStorageBuffer %86 -%99 = OpConstant %86 0x1p+0 -%102 = OpTypeVector %86 2 -%103 = OpTypeStruct %102 -%104 = OpTypePointer PhysicalStorageBuffer %103 -%113 = OpTypePointer PhysicalStorageBuffer %102 -%115 = OpConstant %86 0x1p+1 -%118 = OpConstant %5 6 -%119 = OpTypeVector %86 3 -%120 = OpTypeStruct %119 -%121 = OpTypePointer PhysicalStorageBuffer %120 -%130 = OpTypePointer PhysicalStorageBuffer %119 -%132 = OpConstant %86 0x1.8p+1 -%135 = OpTypeVector %86 4 -%136 = OpTypeStruct %135 -%137 = OpTypePointer PhysicalStorageBuffer %136 -%146 = OpTypePointer PhysicalStorageBuffer %135 -%148 = OpConstant %86 0x1p+2 -%150 = OpTypeStruct %6 -%151 = OpTypePointer PhysicalStorageBuffer %150 -%160 = OpTypePointer PhysicalStorageBuffer %6 -%162 = OpConstant %5 4 -%163 = OpConstant %5 5 +%90 = OpTypePointer PhysicalStorageBuffer %85 +%92 = OpConstant %64 0x1.8p+1 +%95 = OpTypeVector %64 4 +%96 = OpTypeRuntimeArray %95 +%97 = OpTypeStruct %96 +%98 = OpTypePointer PhysicalStorageBuffer %97 +%100 = OpTypePointer PhysicalStorageBuffer %95 +%102 = OpConstant %64 0x1p+2 +%104 = OpTypeRuntimeArray %6 +%105 = OpTypeStruct %104 +%106 = OpTypePointer PhysicalStorageBuffer %105 +%108 = OpTypePointer PhysicalStorageBuffer %6 +%110 = OpConstant %5 4 +%111 = OpConstant %5 5 %3 = OpFunction %1 None %2 %4 = OpLabel -%100 = OpUndef %86 -%164 = OpUndef %5 -OpBranch %166 -%166 = OpLabel +%72 = OpUndef %64 +%112 = OpUndef %5 +OpBranch %114 +%114 = OpLabel %11 = OpAccessChain %10 %9 %12 %13 = OpLoad %6 %11 %14 = OpAccessChain %10 %9 %15 @@ -220,112 +226,51 @@ OpBranch %166 %22 = OpLoad %6 %20 %27 = OpAccessChain %26 %25 %21 %28 = OpLoad %5 %27 -%33 = OpIMul %5 %28 %34 -%35 = OpIAdd %5 %33 %21 -%36 = OpCompositeExtract %5 %19 0 -%37 = OpCompositeExtract %5 %19 1 -%39 = OpIAddCarry %38 %36 %35 -%40 = OpCompositeExtract %5 %39 0 -%41 = OpCompositeExtract %5 %39 1 -%42 = OpIAdd %5 %37 %41 -%43 = OpCompositeConstruct %6 %40 %42 -%44 = OpBitcast %32 %43 -%46 = OpAccessChain %45 %44 %21 -%47 = OpLoad %30 %46 Aligned 4 -%48 = OpCompositeExtract %29 %47 0 -%49 = OpCompositeExtract %29 %47 1 -%50 = OpCompositeExtract %29 %47 2 -%51 = OpCompositeExtract %29 %47 3 -%52 = OpIMul %5 %28 %34 -%53 = OpCompositeExtract %5 %22 0 -%54 = OpCompositeExtract %5 %22 1 -%55 = OpIAddCarry %38 %53 %52 -%56 = OpCompositeExtract %5 %55 0 -%57 = OpCompositeExtract %5 %55 1 -%58 = OpIAdd %5 %54 %57 -%59 = OpCompositeConstruct %6 %56 %58 -%60 = OpBitcast %32 %59 -%61 = OpAccessChain %45 %60 %21 -%62 = OpLoad %30 %61 Aligned 16 -%63 = OpCompositeExtract %29 %62 0 -%64 = OpCompositeExtract %29 %62 1 -%65 = OpCompositeExtract %29 %62 2 -%66 = OpCompositeExtract %29 %62 3 -%67 = OpFAdd %29 %63 %48 -%68 = OpFAdd %29 %64 %49 -%69 = OpFAdd %29 %65 %50 -%70 = OpFAdd %29 %66 %51 -%73 = OpIMul %5 %28 %34 -%74 = OpIAdd %5 %73 %21 -%75 = OpCompositeExtract %5 %16 0 -%76 = OpCompositeExtract %5 %16 1 -%77 = OpIAddCarry %38 %75 %74 -%78 = OpCompositeExtract %5 %77 0 -%79 = OpCompositeExtract %5 %77 1 -%80 = OpIAdd %5 %76 %79 -%81 = OpCompositeConstruct %6 %78 %80 -%82 = OpBitcast %72 %81 -%83 = OpAccessChain %45 %82 %21 -%84 = OpCompositeConstruct %30 %67 %68 %69 %70 -OpStore %83 %84 Aligned 4 -%85 = OpShiftLeftLogical %5 %28 %18 -%89 = OpCompositeExtract %5 %13 0 -%90 = OpCompositeExtract %5 %13 1 -%91 = OpIAddCarry %38 %89 %85 -%92 = OpCompositeExtract %5 %91 0 -%93 = OpCompositeExtract %5 %91 1 -%94 = OpIAdd %5 %90 %93 -%95 = OpCompositeConstruct %6 %92 %94 -%96 = OpBitcast %88 %95 -%98 = OpAccessChain %97 %96 %21 -OpStore %98 %99 Aligned 2 -%101 = OpShiftLeftLogical %5 %28 %15 -%105 = OpCompositeExtract %5 %13 0 -%106 = OpCompositeExtract %5 %13 1 -%107 = OpIAddCarry %38 %105 %101 -%108 = OpCompositeExtract %5 %107 0 -%109 = OpCompositeExtract %5 %107 1 -%110 = OpIAdd %5 %106 %109 -%111 = OpCompositeConstruct %6 %108 %110 -%112 = OpBitcast %104 %111 -%114 = OpAccessChain %113 %112 %21 -%116 = OpCompositeConstruct %102 %99 %115 -OpStore %114 %116 Aligned 2 -%117 = OpIMul %5 %28 %118 -%122 = OpCompositeExtract %5 %13 0 -%123 = OpCompositeExtract %5 %13 1 -%124 = OpIAddCarry %38 %122 %117 -%125 = OpCompositeExtract %5 %124 0 -%126 = OpCompositeExtract %5 %124 1 -%127 = OpIAdd %5 %123 %126 -%128 = OpCompositeConstruct %6 %125 %127 -%129 = OpBitcast %121 %128 -%131 = OpAccessChain %130 %129 %21 -%133 = OpCompositeConstruct %119 %99 %115 %132 -OpStore %131 %133 Aligned 2 -%134 = OpShiftLeftLogical %5 %28 %12 -%138 = OpCompositeExtract %5 %13 0 -%139 = OpCompositeExtract %5 %13 1 -%140 = OpIAddCarry %38 %138 %134 -%141 = OpCompositeExtract %5 %140 0 -%142 = OpCompositeExtract %5 %140 1 -%143 = OpIAdd %5 %139 %142 -%144 = OpCompositeConstruct %6 %141 %143 -%145 = OpBitcast %137 %144 -%147 = OpAccessChain %146 %145 %21 -%149 = OpCompositeConstruct %135 %99 %115 %132 %148 -OpStore %147 %149 Aligned 2 -%152 = OpCompositeExtract %5 %13 0 -%153 = OpCompositeExtract %5 %13 1 -%154 = OpIAddCarry %38 %152 %134 -%155 = OpCompositeExtract %5 %154 0 -%156 = OpCompositeExtract %5 %154 1 -%157 = OpIAdd %5 %153 %156 -%158 = OpCompositeConstruct %6 %155 %157 -%159 = OpBitcast %151 %158 -%161 = OpAccessChain %160 %159 %21 -%165 = OpCompositeConstruct %6 %162 %163 -OpStore %161 %165 Aligned 4 +%34 = OpBitcast %33 %19 +%36 = OpAccessChain %35 %34 %21 %28 +%37 = OpLoad %30 %36 Aligned 4 +%38 = OpCompositeExtract %29 %37 0 +%39 = OpCompositeExtract %29 %37 1 +%40 = OpCompositeExtract %29 %37 2 +%41 = OpCompositeExtract %29 %37 3 +%46 = OpBitcast %45 %22 +%47 = OpInBoundsAccessChain %35 %46 %21 %28 +%48 = OpLoad %30 %47 Aligned 16 +%49 = OpCompositeExtract %29 %48 0 +%50 = OpCompositeExtract %29 %48 1 +%51 = OpCompositeExtract %29 %48 2 +%52 = OpCompositeExtract %29 %48 3 +%53 = OpFAdd %29 %49 %38 +%54 = OpFAdd %29 %50 %39 +%55 = OpFAdd %29 %51 %40 +%56 = OpFAdd %29 %52 %41 +%60 = OpBitcast %59 %16 +%61 = OpAccessChain %35 %60 %21 %28 +%62 = OpCompositeConstruct %30 %53 %54 %55 %56 +OpStore %61 %62 Aligned 4 +%63 = OpShiftLeftLogical %5 %28 %18 +%68 = OpBitcast %67 %13 +%70 = OpAccessChain %69 %68 %21 %28 +OpStore %70 %71 Aligned 2 +%73 = OpShiftLeftLogical %5 %28 %15 +%78 = OpBitcast %77 %13 +%80 = OpAccessChain %79 %78 %21 %28 +%82 = OpCompositeConstruct %74 %71 %81 +OpStore %80 %82 Aligned 2 +%83 = OpIMul %5 %28 %84 +%89 = OpBitcast %88 %13 +%91 = OpAccessChain %90 %89 %21 %28 +%93 = OpCompositeConstruct %85 %71 %81 %92 +OpStore %91 %93 Aligned 2 +%94 = OpShiftLeftLogical %5 %28 %12 +%99 = OpBitcast %98 %13 +%101 = OpAccessChain %100 %99 %21 %28 +%103 = OpCompositeConstruct %95 %71 %81 %92 %102 +OpStore %101 %103 Aligned 2 +%107 = OpBitcast %106 %13 +%109 = OpAccessChain %108 %107 %21 %28 +%113 = OpCompositeConstruct %6 %110 %111 +OpStore %109 %113 Aligned 4 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/root-bda.root-descriptor.sm60.comp b/reference/shaders/resources/root-bda.root-descriptor.sm60.comp index 1ebdb75..a2fbd63 100644 --- a/reference/shaders/resources/root-bda.root-descriptor.sm60.comp +++ b/reference/shaders/resources/root-bda.root-descriptor.sm60.comp @@ -1,23 +1,24 @@ #version 460 #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray; +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray; +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloat4NonWriteArray { - uint _m0; - uint _m1; + vec4 value[]; }; -layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; -layout(buffer_reference) buffer PhysicalPointerFloat4Coherent; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray { - vec4 value; + vec4 value[4096]; }; -layout(buffer_reference, std430) coherent buffer PhysicalPointerFloat4Coherent +layout(buffer_reference, buffer_reference_align = 4, std430) coherent buffer PhysicalPointerFloat4CoherentArray { - vec4 value; + vec4 value[]; }; layout(push_constant, std430) uniform RootConstants @@ -30,15 +31,9 @@ layout(push_constant, std430) uniform RootConstants void main() { - AddCarry _36; - _36._m0 = uaddCarry(registers._m1.x, (gl_GlobalInvocationID.x * 16u) + 0u, _36._m1); - PhysicalPointerFloat4NonWrite _41 = PhysicalPointerFloat4NonWrite(uvec2(_36._m0, registers._m1.y + _36._m1)); - AddCarry _52; - _52._m0 = uaddCarry(registers._m0.x, gl_GlobalInvocationID.x * 16u, _52._m1); - PhysicalPointerFloat4NonWrite _57 = PhysicalPointerFloat4NonWrite(uvec2(_52._m0, registers._m0.y + _52._m1)); - AddCarry _74; - _74._m0 = uaddCarry(registers._m2.x, (gl_GlobalInvocationID.x * 16u) + 0u, _74._m1); - PhysicalPointerFloat4Coherent(uvec2(_74._m0, registers._m2.y + _74._m1)).value = vec4(_57.value.x + _41.value.x, _57.value.y + _41.value.y, _57.value.z + _41.value.z, _57.value.w + _41.value.w); + PhysicalPointerFloat4NonWriteArray _31 = PhysicalPointerFloat4NonWriteArray(registers._m1); + PhysicalPointerFloat4NonWriteCBVArray _43 = PhysicalPointerFloat4NonWriteCBVArray(registers._m0); + PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_GlobalInvocationID.x] = vec4(_43.value[gl_GlobalInvocationID.x].x + _31.value[gl_GlobalInvocationID.x].x, _43.value[gl_GlobalInvocationID.x].y + _31.value[gl_GlobalInvocationID.x].y, _43.value[gl_GlobalInvocationID.x].z + _31.value[gl_GlobalInvocationID.x].z, _43.value[gl_GlobalInvocationID.x].w + _31.value[gl_GlobalInvocationID.x].w); } @@ -47,7 +42,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 84 +; Bound: 62 ; Schema: 0 OpCapability Shader OpCapability PhysicalStorageBufferAddresses @@ -58,23 +53,30 @@ OpExecutionMode %3 LocalSize 1 1 1 OpName %3 "main" OpName %7 "RootConstants" OpName %9 "registers" -OpName %28 "PhysicalPointerFloat4NonWrite" -OpMemberName %28 0 "value" -OpName %35 "AddCarry" -OpName %68 "PhysicalPointerFloat4Coherent" -OpMemberName %68 0 "value" +OpName %29 "PhysicalPointerFloat4NonWriteArray" +OpMemberName %29 0 "value" +OpName %41 "PhysicalPointerFloat4NonWriteCBVArray" +OpMemberName %41 0 "value" +OpName %55 "PhysicalPointerFloat4CoherentArray" +OpMemberName %55 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 OpMemberDecorate %7 2 Offset 16 OpMemberDecorate %7 3 Offset 24 OpDecorate %22 BuiltIn GlobalInvocationId -OpMemberDecorate %28 0 Offset 0 -OpDecorate %28 Block -OpMemberDecorate %28 0 NonWritable -OpMemberDecorate %68 0 Offset 0 -OpDecorate %68 Block -OpMemberDecorate %68 0 Coherent +OpDecorate %28 ArrayStride 16 +OpMemberDecorate %29 0 Offset 0 +OpDecorate %29 Block +OpMemberDecorate %29 0 NonWritable +OpDecorate %40 ArrayStride 16 +OpMemberDecorate %41 0 Offset 0 +OpDecorate %41 Block +OpMemberDecorate %41 0 NonWritable +OpDecorate %54 ArrayStride 16 +OpMemberDecorate %55 0 Offset 0 +OpDecorate %55 Block +OpMemberDecorate %55 0 Coherent %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -92,17 +94,21 @@ OpMemberDecorate %68 0 Coherent %23 = OpTypePointer Input %5 %26 = OpTypeFloat 32 %27 = OpTypeVector %26 4 -%28 = OpTypeStruct %27 -%29 = OpTypePointer PhysicalStorageBuffer %28 -%31 = OpConstant %5 16 -%35 = OpTypeStruct %5 %5 -%42 = OpTypePointer PhysicalStorageBuffer %27 -%68 = OpTypeStruct %27 -%69 = OpTypePointer PhysicalStorageBuffer %68 +%28 = OpTypeRuntimeArray %27 +%29 = OpTypeStruct %28 +%30 = OpTypePointer PhysicalStorageBuffer %29 +%32 = OpTypePointer PhysicalStorageBuffer %27 +%39 = OpConstant %5 4096 +%40 = OpTypeArray %27 %39 +%41 = OpTypeStruct %40 +%42 = OpTypePointer PhysicalStorageBuffer %41 +%54 = OpTypeRuntimeArray %27 +%55 = OpTypeStruct %54 +%56 = OpTypePointer PhysicalStorageBuffer %55 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %82 -%82 = OpLabel +OpBranch %60 +%60 = OpLabel %11 = OpAccessChain %10 %9 %12 %13 = OpLoad %6 %11 %14 = OpAccessChain %10 %9 %15 @@ -111,54 +117,28 @@ OpBranch %82 %19 = OpLoad %6 %17 %24 = OpAccessChain %23 %22 %18 %25 = OpLoad %5 %24 -%30 = OpIMul %5 %25 %31 -%32 = OpIAdd %5 %30 %18 -%33 = OpCompositeExtract %5 %16 0 -%34 = OpCompositeExtract %5 %16 1 -%36 = OpIAddCarry %35 %33 %32 -%37 = OpCompositeExtract %5 %36 0 -%38 = OpCompositeExtract %5 %36 1 -%39 = OpIAdd %5 %34 %38 -%40 = OpCompositeConstruct %6 %37 %39 -%41 = OpBitcast %29 %40 -%43 = OpAccessChain %42 %41 %18 -%44 = OpLoad %27 %43 Aligned 4 -%45 = OpCompositeExtract %26 %44 0 -%46 = OpCompositeExtract %26 %44 1 -%47 = OpCompositeExtract %26 %44 2 -%48 = OpCompositeExtract %26 %44 3 -%49 = OpIMul %5 %25 %31 -%50 = OpCompositeExtract %5 %19 0 -%51 = OpCompositeExtract %5 %19 1 -%52 = OpIAddCarry %35 %50 %49 -%53 = OpCompositeExtract %5 %52 0 -%54 = OpCompositeExtract %5 %52 1 -%55 = OpIAdd %5 %51 %54 -%56 = OpCompositeConstruct %6 %53 %55 -%57 = OpBitcast %29 %56 -%58 = OpAccessChain %42 %57 %18 -%59 = OpLoad %27 %58 Aligned 16 -%60 = OpCompositeExtract %26 %59 0 -%61 = OpCompositeExtract %26 %59 1 -%62 = OpCompositeExtract %26 %59 2 -%63 = OpCompositeExtract %26 %59 3 -%64 = OpFAdd %26 %60 %45 -%65 = OpFAdd %26 %61 %46 -%66 = OpFAdd %26 %62 %47 -%67 = OpFAdd %26 %63 %48 -%70 = OpIMul %5 %25 %31 -%71 = OpIAdd %5 %70 %18 -%72 = OpCompositeExtract %5 %13 0 -%73 = OpCompositeExtract %5 %13 1 -%74 = OpIAddCarry %35 %72 %71 -%75 = OpCompositeExtract %5 %74 0 -%76 = OpCompositeExtract %5 %74 1 -%77 = OpIAdd %5 %73 %76 -%78 = OpCompositeConstruct %6 %75 %77 -%79 = OpBitcast %69 %78 -%80 = OpAccessChain %42 %79 %18 -%81 = OpCompositeConstruct %27 %64 %65 %66 %67 -OpStore %80 %81 Aligned 4 +%31 = OpBitcast %30 %16 +%33 = OpAccessChain %32 %31 %18 %25 +%34 = OpLoad %27 %33 Aligned 4 +%35 = OpCompositeExtract %26 %34 0 +%36 = OpCompositeExtract %26 %34 1 +%37 = OpCompositeExtract %26 %34 2 +%38 = OpCompositeExtract %26 %34 3 +%43 = OpBitcast %42 %19 +%44 = OpInBoundsAccessChain %32 %43 %18 %25 +%45 = OpLoad %27 %44 Aligned 16 +%46 = OpCompositeExtract %26 %45 0 +%47 = OpCompositeExtract %26 %45 1 +%48 = OpCompositeExtract %26 %45 2 +%49 = OpCompositeExtract %26 %45 3 +%50 = OpFAdd %26 %46 %35 +%51 = OpFAdd %26 %47 %36 +%52 = OpFAdd %26 %48 %37 +%53 = OpFAdd %26 %49 %38 +%57 = OpBitcast %56 %13 +%58 = OpAccessChain %32 %57 %18 %25 +%59 = OpCompositeConstruct %27 %50 %51 %52 %53 +OpStore %58 %59 Aligned 4 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/root-constant-with-bda.root-descriptor.root-constant.comp b/reference/shaders/resources/root-constant-with-bda.root-descriptor.root-constant.comp index 0b0c145..68722cb 100644 --- a/reference/shaders/resources/root-constant-with-bda.root-descriptor.root-constant.comp +++ b/reference/shaders/resources/root-constant-with-bda.root-descriptor.root-constant.comp @@ -1,23 +1,18 @@ #version 460 #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray; +layout(buffer_reference) buffer PhysicalPointerFloat4CoherentArray; +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloat4NonWriteArray { - uint _m0; - uint _m1; + vec4 value[]; }; -layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; -layout(buffer_reference) buffer PhysicalPointerFloat4Coherent; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +layout(buffer_reference, buffer_reference_align = 4, std430) coherent buffer PhysicalPointerFloat4CoherentArray { - vec4 value; -}; - -layout(buffer_reference, std430) coherent buffer PhysicalPointerFloat4Coherent -{ - vec4 value; + vec4 value[]; }; layout(push_constant, std430) uniform RootConstants @@ -46,18 +41,14 @@ layout(push_constant, std430) uniform RootConstants void main() { - AddCarry _34; - _34._m0 = uaddCarry(registers._m1.x, (gl_GlobalInvocationID.x * 16u) + 0u, _34._m1); - PhysicalPointerFloat4NonWrite _39 = PhysicalPointerFloat4NonWrite(uvec2(_34._m0, registers._m1.y + _34._m1)); - vec4 _62 = uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7)); - float _63 = _62.x; - vec4 _81 = uintBitsToFloat(uvec4(registers._m8, registers._m9, registers._m10, registers._m11)); - float _82 = _81.y; - vec4 _100 = uintBitsToFloat(uvec4(registers._m12, registers._m13, registers._m14, registers._m15)); - float _101 = _100.z; - AddCarry _112; - _112._m0 = uaddCarry(registers._m2.x, (gl_GlobalInvocationID.x * 16u) + 0u, _112._m1); - PhysicalPointerFloat4Coherent(uvec2(_112._m0, registers._m2.y + _112._m1)).value = vec4(((_63 + _39.value.x) + _82) + _101, ((_63 + _39.value.y) + _82) + _101, ((_63 + _39.value.z) + _82) + _101, ((_63 + _39.value.w) + _82) + _101); + PhysicalPointerFloat4NonWriteArray _29 = PhysicalPointerFloat4NonWriteArray(registers._m1); + vec4 _52 = uintBitsToFloat(uvec4(registers._m4, registers._m5, registers._m6, registers._m7)); + float _53 = _52.x; + vec4 _71 = uintBitsToFloat(uvec4(registers._m8, registers._m9, registers._m10, registers._m11)); + float _72 = _71.y; + vec4 _90 = uintBitsToFloat(uvec4(registers._m12, registers._m13, registers._m14, registers._m15)); + float _91 = _90.z; + PhysicalPointerFloat4CoherentArray(registers._m2).value[gl_GlobalInvocationID.x] = vec4(((_53 + _29.value[gl_GlobalInvocationID.x].x) + _72) + _91, ((_53 + _29.value[gl_GlobalInvocationID.x].y) + _72) + _91, ((_53 + _29.value[gl_GlobalInvocationID.x].z) + _72) + _91, ((_53 + _29.value[gl_GlobalInvocationID.x].w) + _72) + _91); } @@ -66,7 +57,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 122 +; Bound: 104 ; Schema: 0 OpCapability Shader OpCapability PhysicalStorageBufferAddresses @@ -77,11 +68,10 @@ OpExecutionMode %3 LocalSize 1 1 1 OpName %3 "main" OpName %7 "RootConstants" OpName %9 "registers" -OpName %26 "PhysicalPointerFloat4NonWrite" -OpMemberName %26 0 "value" -OpName %33 "AddCarry" -OpName %106 "PhysicalPointerFloat4Coherent" -OpMemberName %106 0 "value" +OpName %27 "PhysicalPointerFloat4NonWriteArray" +OpMemberName %27 0 "value" +OpName %97 "PhysicalPointerFloat4CoherentArray" +OpMemberName %97 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 @@ -104,12 +94,14 @@ OpMemberDecorate %7 17 Offset 84 OpMemberDecorate %7 18 Offset 88 OpMemberDecorate %7 19 Offset 92 OpDecorate %19 BuiltIn GlobalInvocationId -OpMemberDecorate %26 0 Offset 0 -OpDecorate %26 Block -OpMemberDecorate %26 0 NonWritable -OpMemberDecorate %106 0 Offset 0 -OpDecorate %106 Block -OpMemberDecorate %106 0 Coherent +OpDecorate %26 ArrayStride 16 +OpMemberDecorate %27 0 Offset 0 +OpDecorate %27 Block +OpMemberDecorate %27 0 NonWritable +OpDecorate %96 ArrayStride 16 +OpMemberDecorate %97 0 Offset 0 +OpDecorate %97 Block +OpMemberDecorate %97 0 Coherent %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -127,111 +119,93 @@ OpMemberDecorate %106 0 Coherent %22 = OpConstant %5 0 %24 = OpTypeFloat 32 %25 = OpTypeVector %24 4 -%26 = OpTypeStruct %25 -%27 = OpTypePointer PhysicalStorageBuffer %26 -%29 = OpConstant %5 16 -%33 = OpTypeStruct %5 %5 -%40 = OpTypePointer PhysicalStorageBuffer %25 -%47 = OpTypePointer PushConstant %5 -%49 = OpConstant %5 4 -%52 = OpConstant %5 5 -%55 = OpConstant %5 6 -%58 = OpConstant %5 7 -%60 = OpTypeVector %5 4 -%69 = OpConstant %5 8 -%72 = OpConstant %5 9 -%75 = OpConstant %5 10 -%78 = OpConstant %5 11 -%88 = OpConstant %5 12 -%91 = OpConstant %5 13 -%94 = OpConstant %5 14 -%97 = OpConstant %5 15 -%106 = OpTypeStruct %25 -%107 = OpTypePointer PhysicalStorageBuffer %106 +%26 = OpTypeRuntimeArray %25 +%27 = OpTypeStruct %26 +%28 = OpTypePointer PhysicalStorageBuffer %27 +%30 = OpTypePointer PhysicalStorageBuffer %25 +%37 = OpTypePointer PushConstant %5 +%39 = OpConstant %5 4 +%42 = OpConstant %5 5 +%45 = OpConstant %5 6 +%48 = OpConstant %5 7 +%50 = OpTypeVector %5 4 +%59 = OpConstant %5 8 +%62 = OpConstant %5 9 +%65 = OpConstant %5 10 +%68 = OpConstant %5 11 +%78 = OpConstant %5 12 +%81 = OpConstant %5 13 +%84 = OpConstant %5 14 +%87 = OpConstant %5 15 +%96 = OpTypeRuntimeArray %25 +%97 = OpTypeStruct %96 +%98 = OpTypePointer PhysicalStorageBuffer %97 %3 = OpFunction %1 None %2 %4 = OpLabel -OpBranch %120 -%120 = OpLabel +OpBranch %102 +%102 = OpLabel %11 = OpAccessChain %10 %9 %12 %13 = OpLoad %6 %11 %14 = OpAccessChain %10 %9 %15 %16 = OpLoad %6 %14 %21 = OpAccessChain %20 %19 %22 %23 = OpLoad %5 %21 -%28 = OpIMul %5 %23 %29 -%30 = OpIAdd %5 %28 %22 -%31 = OpCompositeExtract %5 %16 0 -%32 = OpCompositeExtract %5 %16 1 -%34 = OpIAddCarry %33 %31 %30 -%35 = OpCompositeExtract %5 %34 0 -%36 = OpCompositeExtract %5 %34 1 -%37 = OpIAdd %5 %32 %36 -%38 = OpCompositeConstruct %6 %35 %37 -%39 = OpBitcast %27 %38 -%41 = OpAccessChain %40 %39 %22 -%42 = OpLoad %25 %41 Aligned 4 -%43 = OpCompositeExtract %24 %42 0 -%44 = OpCompositeExtract %24 %42 1 -%45 = OpCompositeExtract %24 %42 2 -%46 = OpCompositeExtract %24 %42 3 -%48 = OpAccessChain %47 %9 %49 -%50 = OpLoad %5 %48 -%51 = OpAccessChain %47 %9 %52 -%53 = OpLoad %5 %51 -%54 = OpAccessChain %47 %9 %55 -%56 = OpLoad %5 %54 -%57 = OpAccessChain %47 %9 %58 -%59 = OpLoad %5 %57 -%61 = OpCompositeConstruct %60 %50 %53 %56 %59 -%62 = OpBitcast %25 %61 -%63 = OpCompositeExtract %24 %62 0 -%64 = OpFAdd %24 %63 %43 -%65 = OpFAdd %24 %63 %44 -%66 = OpFAdd %24 %63 %45 -%67 = OpFAdd %24 %63 %46 -%68 = OpAccessChain %47 %9 %69 -%70 = OpLoad %5 %68 -%71 = OpAccessChain %47 %9 %72 -%73 = OpLoad %5 %71 -%74 = OpAccessChain %47 %9 %75 -%76 = OpLoad %5 %74 -%77 = OpAccessChain %47 %9 %78 +%29 = OpBitcast %28 %16 +%31 = OpAccessChain %30 %29 %22 %23 +%32 = OpLoad %25 %31 Aligned 4 +%33 = OpCompositeExtract %24 %32 0 +%34 = OpCompositeExtract %24 %32 1 +%35 = OpCompositeExtract %24 %32 2 +%36 = OpCompositeExtract %24 %32 3 +%38 = OpAccessChain %37 %9 %39 +%40 = OpLoad %5 %38 +%41 = OpAccessChain %37 %9 %42 +%43 = OpLoad %5 %41 +%44 = OpAccessChain %37 %9 %45 +%46 = OpLoad %5 %44 +%47 = OpAccessChain %37 %9 %48 +%49 = OpLoad %5 %47 +%51 = OpCompositeConstruct %50 %40 %43 %46 %49 +%52 = OpBitcast %25 %51 +%53 = OpCompositeExtract %24 %52 0 +%54 = OpFAdd %24 %53 %33 +%55 = OpFAdd %24 %53 %34 +%56 = OpFAdd %24 %53 %35 +%57 = OpFAdd %24 %53 %36 +%58 = OpAccessChain %37 %9 %59 +%60 = OpLoad %5 %58 +%61 = OpAccessChain %37 %9 %62 +%63 = OpLoad %5 %61 +%64 = OpAccessChain %37 %9 %65 +%66 = OpLoad %5 %64 +%67 = OpAccessChain %37 %9 %68 +%69 = OpLoad %5 %67 +%70 = OpCompositeConstruct %50 %60 %63 %66 %69 +%71 = OpBitcast %25 %70 +%72 = OpCompositeExtract %24 %71 1 +%73 = OpFAdd %24 %54 %72 +%74 = OpFAdd %24 %55 %72 +%75 = OpFAdd %24 %56 %72 +%76 = OpFAdd %24 %57 %72 +%77 = OpAccessChain %37 %9 %78 %79 = OpLoad %5 %77 -%80 = OpCompositeConstruct %60 %70 %73 %76 %79 -%81 = OpBitcast %25 %80 -%82 = OpCompositeExtract %24 %81 1 -%83 = OpFAdd %24 %64 %82 -%84 = OpFAdd %24 %65 %82 -%85 = OpFAdd %24 %66 %82 -%86 = OpFAdd %24 %67 %82 -%87 = OpAccessChain %47 %9 %88 -%89 = OpLoad %5 %87 -%90 = OpAccessChain %47 %9 %91 -%92 = OpLoad %5 %90 -%93 = OpAccessChain %47 %9 %94 -%95 = OpLoad %5 %93 -%96 = OpAccessChain %47 %9 %97 -%98 = OpLoad %5 %96 -%99 = OpCompositeConstruct %60 %89 %92 %95 %98 -%100 = OpBitcast %25 %99 -%101 = OpCompositeExtract %24 %100 2 -%102 = OpFAdd %24 %83 %101 -%103 = OpFAdd %24 %84 %101 -%104 = OpFAdd %24 %85 %101 -%105 = OpFAdd %24 %86 %101 -%108 = OpIMul %5 %23 %29 -%109 = OpIAdd %5 %108 %22 -%110 = OpCompositeExtract %5 %13 0 -%111 = OpCompositeExtract %5 %13 1 -%112 = OpIAddCarry %33 %110 %109 -%113 = OpCompositeExtract %5 %112 0 -%114 = OpCompositeExtract %5 %112 1 -%115 = OpIAdd %5 %111 %114 -%116 = OpCompositeConstruct %6 %113 %115 -%117 = OpBitcast %107 %116 -%118 = OpAccessChain %40 %117 %22 -%119 = OpCompositeConstruct %25 %102 %103 %104 %105 -OpStore %118 %119 Aligned 4 +%80 = OpAccessChain %37 %9 %81 +%82 = OpLoad %5 %80 +%83 = OpAccessChain %37 %9 %84 +%85 = OpLoad %5 %83 +%86 = OpAccessChain %37 %9 %87 +%88 = OpLoad %5 %86 +%89 = OpCompositeConstruct %50 %79 %82 %85 %88 +%90 = OpBitcast %25 %89 +%91 = OpCompositeExtract %24 %90 2 +%92 = OpFAdd %24 %73 %91 +%93 = OpFAdd %24 %74 %91 +%94 = OpFAdd %24 %75 %91 +%95 = OpFAdd %24 %76 %91 +%99 = OpBitcast %98 %13 +%100 = OpAccessChain %30 %99 %22 %23 +%101 = OpCompositeConstruct %25 %92 %93 %94 %95 +OpStore %100 %101 Aligned 4 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss b/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss index e4815b2..04cc0ab 100644 --- a/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss +++ b/reference/shaders/resources/rt-resources.bindless.local-root-signature.rmiss @@ -2,7 +2,9 @@ #extension GL_EXT_ray_tracing : require #extension GL_EXT_buffer_reference : require #extension GL_EXT_nonuniform_qualifier : require +#extension GL_EXT_scalar_block_layout : require #extension GL_EXT_samplerless_texture_functions : require +#extension GL_EXT_buffer_reference_uvec2 : require struct _37 { @@ -10,52 +12,52 @@ struct _37 uint _m1; }; -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteCBVArray; +layout(buffer_reference) buffer PhysicalPointerFloat4NonWriteArray; +layout(buffer_reference) buffer PhysicalPointerUintNonWriteArray; +layout(buffer_reference) buffer PhysicalPointerUint2NonWriteArray; +layout(buffer_reference) buffer PhysicalPointerUint3NonWriteArray; +layout(buffer_reference) buffer PhysicalPointerUint4NonWriteArray; +layout(buffer_reference) buffer PhysicalPointerFloatArray; +layout(buffer_reference) buffer PhysicalPointerUintArray; +layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer PhysicalPointerFloat4NonWriteCBVArray { - uint _m0; - uint _m1; + vec4 value[4096]; }; -layout(buffer_reference) buffer PhysicalPointerFloat4NonWrite; -layout(buffer_reference) buffer PhysicalPointerUintNonWrite; -layout(buffer_reference) buffer PhysicalPointerUint2NonWrite; -layout(buffer_reference) buffer PhysicalPointerUint3NonWrite; -layout(buffer_reference) buffer PhysicalPointerUint4NonWrite; -layout(buffer_reference) buffer PhysicalPointerFloat; -layout(buffer_reference) buffer PhysicalPointerUint; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloat4NonWrite +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloat4NonWriteArray { - vec4 value; + vec4 value[]; }; -layout(buffer_reference, std430) readonly buffer PhysicalPointerUintNonWrite +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUintNonWriteArray { - uint value; + uint value[]; }; -layout(buffer_reference, std430) readonly buffer PhysicalPointerUint2NonWrite +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUint2NonWriteArray { - uvec2 value; + uvec2 value[]; }; -layout(buffer_reference, std430) readonly buffer PhysicalPointerUint3NonWrite +layout(buffer_reference, buffer_reference_align = 4, scalar) readonly buffer PhysicalPointerUint3NonWriteArray { - uvec3 value; + uvec3 value[]; }; -layout(buffer_reference, std430) readonly buffer PhysicalPointerUint4NonWrite +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerUint4NonWriteArray { - uvec4 value; + uvec4 value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerFloat +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray { - float value; + float value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerUint +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray { - uint value; + uint value[]; }; layout(shaderRecordEXT, std430) buffer SBTBlock @@ -95,8 +97,8 @@ layout(set = 3, binding = 0, r32f) uniform readonly image2D _25[]; layout(set = 2, binding = 0) uniform sampler _36[]; layout(location = 0) rayPayloadInEXT _37 payload; -vec4 _425; -float _440; +vec4 _372; +float _378; void main() { @@ -110,53 +112,29 @@ void main() uint _146 = ((SBT._m9.x >> 6u) + 13u) + _58; vec4 _169 = uintBitsToFloat(uvec4(SBT._m0[0u], SBT._m0[1u], SBT._m0[2u], SBT._m0[3u])); vec4 _182 = uintBitsToFloat(uvec4(SBT._m0[4u], 0u, 0u, 0u)); - AddCarry _196; - _196._m0 = uaddCarry(SBT._m6.x, 1u * 16u, _196._m1); - PhysicalPointerFloat4NonWrite _203 = PhysicalPointerFloat4NonWrite(uvec2(_196._m0, SBT._m6.y + _196._m1)); - vec4 _232 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _59], _36[(SBT._m10.x >> 5u) + 13u])), vec2(0.5), 0.0); - vec4 _258 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _58], _36[((SBT._m10.x >> 5u) + 14u) + (_58 ^ 1u)])), vec2(0.5), 0.0); - AddCarry _274; - _274._m0 = uaddCarry(SBT._m2.x, (_58 * 16u) + 0u, _274._m1); - PhysicalPointerFloat4NonWrite _279 = PhysicalPointerFloat4NonWrite(uvec2(_274._m0, SBT._m2.y + _274._m1)); - uint _290 = _58 << 2u; - AddCarry _297; - _297._m0 = uaddCarry(SBT._m4.x, _290, _297._m1); - float _306 = uintBitsToFloat(PhysicalPointerUintNonWrite(uvec2(_297._m0, SBT._m4.y + _297._m1)).value); - AddCarry _316; - _316._m0 = uaddCarry(SBT._m4.x, _58 << 3u, _316._m1); - PhysicalPointerUint2NonWrite _321 = PhysicalPointerUint2NonWrite(uvec2(_316._m0, SBT._m4.y + _316._m1)); - float _327 = uintBitsToFloat(_321.value.x); - float _328 = uintBitsToFloat(_321.value.y); - AddCarry _339; - _339._m0 = uaddCarry(SBT._m4.x, _58 * 12u, _339._m1); - PhysicalPointerUint3NonWrite _344 = PhysicalPointerUint3NonWrite(uvec2(_339._m0, SBT._m4.y + _339._m1)); - float _353 = uintBitsToFloat(_344.value.z); - AddCarry _363; - _363._m0 = uaddCarry(SBT._m4.x, _58 << 4u, _363._m1); - PhysicalPointerUint4NonWrite _368 = PhysicalPointerUint4NonWrite(uvec2(_363._m0, SBT._m4.y + _363._m1)); - AddCarry _392; - _392._m0 = uaddCarry(SBT._m3.x, (_58 * 4u) + 0u, _392._m1); - PhysicalPointerFloat _397 = PhysicalPointerFloat(uvec2(_392._m0, SBT._m3.y + _392._m1)); - AddCarry _411; - _411._m0 = uaddCarry(SBT._m5.x, _290, _411._m1); - float _419 = uintBitsToFloat(PhysicalPointerUint(uvec2(_411._m0, SBT._m5.y + _411._m1)).value); - float _420 = ((((((((((((((((_67.x + _80.x) + _99.x) + _119.x) + _32[nonuniformEXT(_53)]._m0[0u].x) + _32[nonuniformEXT(_146)]._m0[0u].x) + _169.x) + _182.x) + _203.value.x) + _232.x) + _258.x) + _279.value.x) + _306) + _327) + uintBitsToFloat(_344.value.x)) + uintBitsToFloat(_368.value.x)) + _397.value) + _419; - float _421 = ((((((((((((((((_67.y + _80.y) + _99.y) + _119.y) + _32[nonuniformEXT(_53)]._m0[0u].y) + _32[nonuniformEXT(_146)]._m0[0u].y) + _169.y) + _182.y) + _203.value.y) + _232.y) + _258.y) + _279.value.y) + _306) + _328) + uintBitsToFloat(_344.value.y)) + uintBitsToFloat(_368.value.y)) + _397.value) + _419; - vec4 _424 = _425; - _424.x = _420; - vec4 _426 = _424; - _426.y = _421; - vec4 _427 = _426; - _427.z = ((((((((((((((((_67.z + _80.z) + _99.z) + _119.z) + _32[nonuniformEXT(_53)]._m0[0u].z) + _32[nonuniformEXT(_146)]._m0[0u].z) + _169.z) + _182.z) + _203.value.z) + _232.z) + _258.z) + _279.value.z) + _306) + _327) + _353) + uintBitsToFloat(_368.value.z)) + _397.value) + _419; - vec4 _428 = _427; - _428.w = ((((((((((((((((_67.w + _80.w) + _99.w) + _119.w) + _32[nonuniformEXT(_53)]._m0[0u].w) + _32[nonuniformEXT(_146)]._m0[0u].w) + _169.w) + _182.w) + _203.value.w) + _232.w) + _258.w) + _279.value.w) + _306) + _328) + _353) + uintBitsToFloat(_368.value.w)) + _397.value) + _419; - payload._m0 = _428; - AddCarry _433; - _433._m0 = uaddCarry(SBT._m3.x, (_58 * 4u) + 0u, _433._m1); - PhysicalPointerFloat(uvec2(_433._m0, SBT._m3.y + _433._m1)).value = _420; - AddCarry _445; - _445._m0 = uaddCarry(SBT._m5.x, _290, _445._m1); - PhysicalPointerFloat(uvec2(_445._m0, SBT._m5.y + _445._m1)).value = _421; + PhysicalPointerFloat4NonWriteCBVArray _194 = PhysicalPointerFloat4NonWriteCBVArray(SBT._m6); + vec4 _223 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _59], _36[(SBT._m10.x >> 5u) + 13u])), vec2(0.5), 0.0); + vec4 _249 = textureLod(nonuniformEXT(sampler2D(_21[registers._m0 + _58], _36[((SBT._m10.x >> 5u) + 14u) + (_58 ^ 1u)])), vec2(0.5), 0.0); + PhysicalPointerFloat4NonWriteArray _264 = PhysicalPointerFloat4NonWriteArray(SBT._m2); + float _285 = uintBitsToFloat(PhysicalPointerUintNonWriteArray(SBT._m4).value[_58]); + PhysicalPointerUint2NonWriteArray _294 = PhysicalPointerUint2NonWriteArray(SBT._m4); + float _300 = uintBitsToFloat(_294.value[_58].x); + float _301 = uintBitsToFloat(_294.value[_58].y); + PhysicalPointerUint3NonWriteArray _311 = PhysicalPointerUint3NonWriteArray(SBT._m4); + float _320 = uintBitsToFloat(_311.value[_58].z); + PhysicalPointerUint4NonWriteArray _329 = PhysicalPointerUint4NonWriteArray(SBT._m4); + PhysicalPointerFloatArray _350 = PhysicalPointerFloatArray(SBT._m3); + float _366 = uintBitsToFloat(PhysicalPointerUintArray(SBT._m5).value[_58]); + float _367 = ((((((((((((((((_67.x + _80.x) + _99.x) + _119.x) + _32[nonuniformEXT(_53)]._m0[0u].x) + _32[nonuniformEXT(_146)]._m0[0u].x) + _169.x) + _182.x) + _194.value[1u].x) + _223.x) + _249.x) + _264.value[_58].x) + _285) + _300) + uintBitsToFloat(_311.value[_58].x)) + uintBitsToFloat(_329.value[_58].x)) + _350.value[_58]) + _366; + float _368 = ((((((((((((((((_67.y + _80.y) + _99.y) + _119.y) + _32[nonuniformEXT(_53)]._m0[0u].y) + _32[nonuniformEXT(_146)]._m0[0u].y) + _169.y) + _182.y) + _194.value[1u].y) + _223.y) + _249.y) + _264.value[_58].y) + _285) + _301) + uintBitsToFloat(_311.value[_58].y)) + uintBitsToFloat(_329.value[_58].y)) + _350.value[_58]) + _366; + vec4 _371; + _371.x = _367; + _371.y = _368; + _371.z = ((((((((((((((((_67.z + _80.z) + _99.z) + _119.z) + _32[nonuniformEXT(_53)]._m0[0u].z) + _32[nonuniformEXT(_146)]._m0[0u].z) + _169.z) + _182.z) + _194.value[1u].z) + _223.z) + _249.z) + _264.value[_58].z) + _285) + _300) + _320) + uintBitsToFloat(_329.value[_58].z)) + _350.value[_58]) + _366; + _371.w = ((((((((((((((((_67.w + _80.w) + _99.w) + _119.w) + _32[nonuniformEXT(_53)]._m0[0u].w) + _32[nonuniformEXT(_146)]._m0[0u].w) + _169.w) + _182.w) + _194.value[1u].w) + _223.w) + _249.w) + _264.value[_58].w) + _285) + _301) + _320) + uintBitsToFloat(_329.value[_58].w)) + _350.value[_58]) + _366; + payload._m0 = _371; + PhysicalPointerFloatArray(SBT._m3).value[_58] = _367; + PhysicalPointerFloatArray(SBT._m5).value[_58] = _368; } @@ -165,7 +143,7 @@ void main() ; SPIR-V ; Version: 1.4 ; Generator: Unknown(30017); 21022 -; Bound: 454 +; Bound: 385 ; Schema: 0 OpCapability Shader OpCapability UniformBufferArrayDynamicIndexing @@ -192,21 +170,22 @@ OpName %16 "SBT" OpName %29 "BindlessCBV" OpName %37 "" OpName %39 "payload" -OpName %195 "AddCarry" -OpName %201 "PhysicalPointerFloat4NonWrite" -OpMemberName %201 0 "value" -OpName %293 "PhysicalPointerUintNonWrite" -OpMemberName %293 0 "value" -OpName %312 "PhysicalPointerUint2NonWrite" -OpMemberName %312 0 "value" -OpName %335 "PhysicalPointerUint3NonWrite" -OpMemberName %335 0 "value" -OpName %359 "PhysicalPointerUint4NonWrite" -OpMemberName %359 0 "value" -OpName %386 "PhysicalPointerFloat" -OpMemberName %386 0 "value" -OpName %407 "PhysicalPointerUint" -OpMemberName %407 0 "value" +OpName %192 "PhysicalPointerFloat4NonWriteCBVArray" +OpMemberName %192 0 "value" +OpName %262 "PhysicalPointerFloat4NonWriteArray" +OpMemberName %262 0 "value" +OpName %279 "PhysicalPointerUintNonWriteArray" +OpMemberName %279 0 "value" +OpName %292 "PhysicalPointerUint2NonWriteArray" +OpMemberName %292 0 "value" +OpName %309 "PhysicalPointerUint3NonWriteArray" +OpMemberName %309 0 "value" +OpName %327 "PhysicalPointerUint4NonWriteArray" +OpMemberName %327 0 "value" +OpName %348 "PhysicalPointerFloatArray" +OpMemberName %348 0 "value" +OpName %361 "PhysicalPointerUintArray" +OpMemberName %361 0 "value" OpDecorate %6 Block OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 4 @@ -251,30 +230,41 @@ OpDecorate %130 NonUniform OpDecorate %146 NonUniform OpDecorate %140 NonUniform OpDecorate %147 NonUniform -OpMemberDecorate %201 0 Offset 0 -OpDecorate %201 Block -OpMemberDecorate %201 0 NonWritable -OpDecorate %227 NonUniform -OpDecorate %229 NonUniform -OpDecorate %255 NonUniform -OpDecorate %256 NonUniform -OpDecorate %257 NonUniform -OpMemberDecorate %293 0 Offset 0 -OpDecorate %293 Block -OpMemberDecorate %293 0 NonWritable -OpMemberDecorate %312 0 Offset 0 -OpDecorate %312 Block -OpMemberDecorate %312 0 NonWritable -OpMemberDecorate %335 0 Offset 0 -OpDecorate %335 Block -OpMemberDecorate %335 0 NonWritable -OpMemberDecorate %359 0 Offset 0 -OpDecorate %359 Block -OpMemberDecorate %359 0 NonWritable -OpMemberDecorate %386 0 Offset 0 -OpDecorate %386 Block -OpMemberDecorate %407 0 Offset 0 -OpDecorate %407 Block +OpDecorate %191 ArrayStride 16 +OpMemberDecorate %192 0 Offset 0 +OpDecorate %192 Block +OpMemberDecorate %192 0 NonWritable +OpDecorate %218 NonUniform +OpDecorate %220 NonUniform +OpDecorate %246 NonUniform +OpDecorate %247 NonUniform +OpDecorate %248 NonUniform +OpDecorate %261 ArrayStride 16 +OpMemberDecorate %262 0 Offset 0 +OpDecorate %262 Block +OpMemberDecorate %262 0 NonWritable +OpDecorate %278 ArrayStride 4 +OpMemberDecorate %279 0 Offset 0 +OpDecorate %279 Block +OpMemberDecorate %279 0 NonWritable +OpDecorate %291 ArrayStride 8 +OpMemberDecorate %292 0 Offset 0 +OpDecorate %292 Block +OpMemberDecorate %292 0 NonWritable +OpDecorate %308 ArrayStride 12 +OpMemberDecorate %309 0 Offset 0 +OpDecorate %309 Block +OpMemberDecorate %309 0 NonWritable +OpDecorate %326 ArrayStride 16 +OpMemberDecorate %327 0 Offset 0 +OpDecorate %327 Block +OpMemberDecorate %327 0 NonWritable +OpDecorate %347 ArrayStride 4 +OpMemberDecorate %348 0 Offset 0 +OpDecorate %348 Block +OpDecorate %360 ArrayStride 4 +OpMemberDecorate %361 0 Offset 0 +OpDecorate %361 Block %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -335,42 +325,50 @@ OpDecorate %407 Block %165 = OpConstant %5 3 %167 = OpTypeVector %5 4 %179 = OpConstant %5 4 -%192 = OpConstant %5 16 -%195 = OpTypeStruct %5 %5 -%201 = OpTypeStruct %26 -%202 = OpTypePointer PhysicalStorageBuffer %201 -%204 = OpTypePointer PhysicalStorageBuffer %26 -%220 = OpTypePointer UniformConstant %33 -%223 = OpConstant %5 10 -%228 = OpTypeSampledImage %18 -%230 = OpConstant %17 0.5 -%231 = OpConstant %17 0 -%233 = OpTypeVector %17 2 -%254 = OpConstant %5 14 -%293 = OpTypeStruct %5 -%294 = OpTypePointer PhysicalStorageBuffer %293 -%303 = OpTypePointer PhysicalStorageBuffer %5 -%312 = OpTypeStruct %13 -%313 = OpTypePointer PhysicalStorageBuffer %312 -%322 = OpTypePointer PhysicalStorageBuffer %13 -%334 = OpTypeVector %5 3 -%335 = OpTypeStruct %334 -%336 = OpTypePointer PhysicalStorageBuffer %335 -%345 = OpTypePointer PhysicalStorageBuffer %334 -%359 = OpTypeStruct %167 -%360 = OpTypePointer PhysicalStorageBuffer %359 -%369 = OpTypePointer PhysicalStorageBuffer %167 -%386 = OpTypeStruct %17 -%387 = OpTypePointer PhysicalStorageBuffer %386 -%398 = OpTypePointer PhysicalStorageBuffer %17 -%407 = OpTypeStruct %5 -%408 = OpTypePointer PhysicalStorageBuffer %407 +%191 = OpTypeArray %26 %27 +%192 = OpTypeStruct %191 +%193 = OpTypePointer PhysicalStorageBuffer %192 +%195 = OpTypePointer PhysicalStorageBuffer %26 +%211 = OpTypePointer UniformConstant %33 +%214 = OpConstant %5 10 +%219 = OpTypeSampledImage %18 +%221 = OpConstant %17 0.5 +%222 = OpConstant %17 0 +%224 = OpTypeVector %17 2 +%245 = OpConstant %5 14 +%261 = OpTypeRuntimeArray %26 +%262 = OpTypeStruct %261 +%263 = OpTypePointer PhysicalStorageBuffer %262 +%278 = OpTypeRuntimeArray %5 +%279 = OpTypeStruct %278 +%280 = OpTypePointer PhysicalStorageBuffer %279 +%282 = OpTypePointer PhysicalStorageBuffer %5 +%291 = OpTypeRuntimeArray %13 +%292 = OpTypeStruct %291 +%293 = OpTypePointer PhysicalStorageBuffer %292 +%295 = OpTypePointer PhysicalStorageBuffer %13 +%307 = OpTypeVector %5 3 +%308 = OpTypeRuntimeArray %307 +%309 = OpTypeStruct %308 +%310 = OpTypePointer PhysicalStorageBuffer %309 +%312 = OpTypePointer PhysicalStorageBuffer %307 +%326 = OpTypeRuntimeArray %167 +%327 = OpTypeStruct %326 +%328 = OpTypePointer PhysicalStorageBuffer %327 +%330 = OpTypePointer PhysicalStorageBuffer %167 +%347 = OpTypeRuntimeArray %17 +%348 = OpTypeStruct %347 +%349 = OpTypePointer PhysicalStorageBuffer %348 +%351 = OpTypePointer PhysicalStorageBuffer %17 +%360 = OpTypeRuntimeArray %5 +%361 = OpTypeStruct %360 +%362 = OpTypePointer PhysicalStorageBuffer %361 %3 = OpFunction %1 None %2 %4 = OpLabel -%425 = OpUndef %26 -%440 = OpUndef %17 -OpBranch %452 -%452 = OpLabel +%372 = OpUndef %26 +%378 = OpUndef %17 +OpBranch %383 +%383 = OpLabel %41 = OpAccessChain %40 %16 %11 %42 = OpLoad %13 %41 %44 = OpAccessChain %43 %16 %45 @@ -499,238 +497,161 @@ OpBranch %452 %188 = OpFAdd %17 %175 %184 %189 = OpFAdd %17 %176 %185 %190 = OpFAdd %17 %177 %186 -%191 = OpIMul %5 %57 %192 -%193 = OpCompositeExtract %5 %42 0 -%194 = OpCompositeExtract %5 %42 1 -%196 = OpIAddCarry %195 %193 %191 -%197 = OpCompositeExtract %5 %196 0 -%198 = OpCompositeExtract %5 %196 1 -%199 = OpIAdd %5 %194 %198 -%200 = OpCompositeConstruct %13 %197 %199 -%203 = OpBitcast %202 %200 -%205 = OpAccessChain %204 %203 %45 -%206 = OpLoad %26 %205 Aligned 16 -%207 = OpCompositeExtract %17 %206 0 -%208 = OpCompositeExtract %17 %206 1 -%209 = OpCompositeExtract %17 %206 2 -%210 = OpCompositeExtract %17 %206 3 -%211 = OpFAdd %17 %187 %207 -%212 = OpFAdd %17 %188 %208 -%213 = OpFAdd %17 %189 %209 -%214 = OpFAdd %17 %190 %210 -%216 = OpAccessChain %62 %8 %45 -%217 = OpLoad %5 %216 -%218 = OpIAdd %5 %217 %59 -%215 = OpAccessChain %60 %21 %218 -%219 = OpLoad %18 %215 -%222 = OpAccessChain %48 %16 %223 %45 -%224 = OpLoad %5 %222 -%225 = OpShiftRightLogical %5 %224 %9 -%226 = OpIAdd %5 %225 %145 -%221 = OpAccessChain %220 %36 %226 -%227 = OpLoad %33 %221 -%229 = OpSampledImage %228 %219 %227 -%234 = OpCompositeConstruct %233 %230 %230 -%232 = OpImageSampleExplicitLod %26 %229 %234 Lod %231 -%235 = OpCompositeExtract %17 %232 0 -%236 = OpCompositeExtract %17 %232 1 -%237 = OpCompositeExtract %17 %232 2 -%238 = OpCompositeExtract %17 %232 3 -%239 = OpFAdd %17 %211 %235 -%240 = OpFAdd %17 %212 %236 -%241 = OpFAdd %17 %213 %237 -%242 = OpFAdd %17 %214 %238 -%243 = OpBitwiseXor %5 %58 %57 -%245 = OpAccessChain %62 %8 %45 -%246 = OpLoad %5 %245 -%247 = OpIAdd %5 %246 %58 -%244 = OpAccessChain %60 %21 %247 -%248 = OpLoad %18 %244 -%250 = OpAccessChain %48 %16 %223 %45 -%251 = OpLoad %5 %250 -%252 = OpShiftRightLogical %5 %251 %9 -%253 = OpIAdd %5 %252 %254 -%255 = OpIAdd %5 %253 %243 -%249 = OpAccessChain %220 %36 %255 -%256 = OpLoad %33 %249 -%257 = OpSampledImage %228 %248 %256 -%259 = OpCompositeConstruct %233 %230 %230 -%258 = OpImageSampleExplicitLod %26 %257 %259 Lod %231 -%260 = OpCompositeExtract %17 %258 0 -%261 = OpCompositeExtract %17 %258 1 -%262 = OpCompositeExtract %17 %258 2 -%263 = OpCompositeExtract %17 %258 3 -%264 = OpFAdd %17 %239 %260 -%265 = OpFAdd %17 %240 %261 -%266 = OpFAdd %17 %241 %262 -%267 = OpFAdd %17 %242 %263 -%268 = OpAccessChain %40 %16 %162 -%269 = OpLoad %13 %268 -%270 = OpIMul %5 %58 %192 -%271 = OpIAdd %5 %270 %45 -%272 = OpCompositeExtract %5 %269 0 -%273 = OpCompositeExtract %5 %269 1 -%274 = OpIAddCarry %195 %272 %271 -%275 = OpCompositeExtract %5 %274 0 -%276 = OpCompositeExtract %5 %274 1 -%277 = OpIAdd %5 %273 %276 -%278 = OpCompositeConstruct %13 %275 %277 -%279 = OpBitcast %202 %278 -%280 = OpAccessChain %204 %279 %45 -%281 = OpLoad %26 %280 Aligned 4 -%282 = OpCompositeExtract %17 %281 0 -%283 = OpCompositeExtract %17 %281 1 -%284 = OpCompositeExtract %17 %281 2 -%285 = OpCompositeExtract %17 %281 3 -%286 = OpFAdd %17 %264 %282 -%287 = OpFAdd %17 %265 %283 -%288 = OpFAdd %17 %266 %284 -%289 = OpFAdd %17 %267 %285 -%290 = OpShiftLeftLogical %5 %58 %162 -%291 = OpAccessChain %40 %16 %179 -%292 = OpLoad %13 %291 -%295 = OpCompositeExtract %5 %292 0 -%296 = OpCompositeExtract %5 %292 1 -%297 = OpIAddCarry %195 %295 %290 +%194 = OpBitcast %193 %42 +%196 = OpInBoundsAccessChain %195 %194 %45 %57 +%197 = OpLoad %26 %196 Aligned 16 +%198 = OpCompositeExtract %17 %197 0 +%199 = OpCompositeExtract %17 %197 1 +%200 = OpCompositeExtract %17 %197 2 +%201 = OpCompositeExtract %17 %197 3 +%202 = OpFAdd %17 %187 %198 +%203 = OpFAdd %17 %188 %199 +%204 = OpFAdd %17 %189 %200 +%205 = OpFAdd %17 %190 %201 +%207 = OpAccessChain %62 %8 %45 +%208 = OpLoad %5 %207 +%209 = OpIAdd %5 %208 %59 +%206 = OpAccessChain %60 %21 %209 +%210 = OpLoad %18 %206 +%213 = OpAccessChain %48 %16 %214 %45 +%215 = OpLoad %5 %213 +%216 = OpShiftRightLogical %5 %215 %9 +%217 = OpIAdd %5 %216 %145 +%212 = OpAccessChain %211 %36 %217 +%218 = OpLoad %33 %212 +%220 = OpSampledImage %219 %210 %218 +%225 = OpCompositeConstruct %224 %221 %221 +%223 = OpImageSampleExplicitLod %26 %220 %225 Lod %222 +%226 = OpCompositeExtract %17 %223 0 +%227 = OpCompositeExtract %17 %223 1 +%228 = OpCompositeExtract %17 %223 2 +%229 = OpCompositeExtract %17 %223 3 +%230 = OpFAdd %17 %202 %226 +%231 = OpFAdd %17 %203 %227 +%232 = OpFAdd %17 %204 %228 +%233 = OpFAdd %17 %205 %229 +%234 = OpBitwiseXor %5 %58 %57 +%236 = OpAccessChain %62 %8 %45 +%237 = OpLoad %5 %236 +%238 = OpIAdd %5 %237 %58 +%235 = OpAccessChain %60 %21 %238 +%239 = OpLoad %18 %235 +%241 = OpAccessChain %48 %16 %214 %45 +%242 = OpLoad %5 %241 +%243 = OpShiftRightLogical %5 %242 %9 +%244 = OpIAdd %5 %243 %245 +%246 = OpIAdd %5 %244 %234 +%240 = OpAccessChain %211 %36 %246 +%247 = OpLoad %33 %240 +%248 = OpSampledImage %219 %239 %247 +%250 = OpCompositeConstruct %224 %221 %221 +%249 = OpImageSampleExplicitLod %26 %248 %250 Lod %222 +%251 = OpCompositeExtract %17 %249 0 +%252 = OpCompositeExtract %17 %249 1 +%253 = OpCompositeExtract %17 %249 2 +%254 = OpCompositeExtract %17 %249 3 +%255 = OpFAdd %17 %230 %251 +%256 = OpFAdd %17 %231 %252 +%257 = OpFAdd %17 %232 %253 +%258 = OpFAdd %17 %233 %254 +%259 = OpAccessChain %40 %16 %162 +%260 = OpLoad %13 %259 +%264 = OpBitcast %263 %260 +%265 = OpAccessChain %195 %264 %45 %58 +%266 = OpLoad %26 %265 Aligned 4 +%267 = OpCompositeExtract %17 %266 0 +%268 = OpCompositeExtract %17 %266 1 +%269 = OpCompositeExtract %17 %266 2 +%270 = OpCompositeExtract %17 %266 3 +%271 = OpFAdd %17 %255 %267 +%272 = OpFAdd %17 %256 %268 +%273 = OpFAdd %17 %257 %269 +%274 = OpFAdd %17 %258 %270 +%275 = OpShiftLeftLogical %5 %58 %162 +%276 = OpAccessChain %40 %16 %179 +%277 = OpLoad %13 %276 +%281 = OpBitcast %280 %277 +%283 = OpAccessChain %282 %281 %45 %58 +%284 = OpLoad %5 %283 Aligned 4 +%285 = OpBitcast %17 %284 +%286 = OpFAdd %17 %271 %285 +%287 = OpFAdd %17 %272 %285 +%288 = OpFAdd %17 %273 %285 +%289 = OpFAdd %17 %274 %285 +%290 = OpShiftLeftLogical %5 %58 %165 +%294 = OpBitcast %293 %277 +%296 = OpAccessChain %295 %294 %45 %58 +%297 = OpLoad %13 %296 Aligned 4 %298 = OpCompositeExtract %5 %297 0 %299 = OpCompositeExtract %5 %297 1 -%300 = OpIAdd %5 %296 %299 -%301 = OpCompositeConstruct %13 %298 %300 -%302 = OpBitcast %294 %301 -%304 = OpAccessChain %303 %302 %45 -%305 = OpLoad %5 %304 Aligned 4 -%306 = OpBitcast %17 %305 -%307 = OpFAdd %17 %286 %306 -%308 = OpFAdd %17 %287 %306 -%309 = OpFAdd %17 %288 %306 -%310 = OpFAdd %17 %289 %306 -%311 = OpShiftLeftLogical %5 %58 %165 -%314 = OpCompositeExtract %5 %292 0 -%315 = OpCompositeExtract %5 %292 1 -%316 = OpIAddCarry %195 %314 %311 -%317 = OpCompositeExtract %5 %316 0 -%318 = OpCompositeExtract %5 %316 1 -%319 = OpIAdd %5 %315 %318 -%320 = OpCompositeConstruct %13 %317 %319 -%321 = OpBitcast %313 %320 -%323 = OpAccessChain %322 %321 %45 -%324 = OpLoad %13 %323 Aligned 4 -%325 = OpCompositeExtract %5 %324 0 -%326 = OpCompositeExtract %5 %324 1 -%327 = OpBitcast %17 %325 -%328 = OpBitcast %17 %326 -%329 = OpFAdd %17 %307 %327 -%330 = OpFAdd %17 %308 %328 -%331 = OpFAdd %17 %309 %327 -%332 = OpFAdd %17 %310 %328 -%333 = OpIMul %5 %58 %54 -%337 = OpCompositeExtract %5 %292 0 -%338 = OpCompositeExtract %5 %292 1 -%339 = OpIAddCarry %195 %337 %333 -%340 = OpCompositeExtract %5 %339 0 -%341 = OpCompositeExtract %5 %339 1 -%342 = OpIAdd %5 %338 %341 -%343 = OpCompositeConstruct %13 %340 %342 -%344 = OpBitcast %336 %343 -%346 = OpAccessChain %345 %344 %45 -%347 = OpLoad %334 %346 Aligned 4 -%348 = OpCompositeExtract %5 %347 0 -%349 = OpCompositeExtract %5 %347 1 -%350 = OpCompositeExtract %5 %347 2 -%351 = OpBitcast %17 %348 -%352 = OpBitcast %17 %349 -%353 = OpBitcast %17 %350 -%354 = OpFAdd %17 %329 %351 -%355 = OpFAdd %17 %330 %352 -%356 = OpFAdd %17 %331 %353 -%357 = OpFAdd %17 %332 %353 -%358 = OpShiftLeftLogical %5 %58 %179 -%361 = OpCompositeExtract %5 %292 0 -%362 = OpCompositeExtract %5 %292 1 -%363 = OpIAddCarry %195 %361 %358 -%364 = OpCompositeExtract %5 %363 0 -%365 = OpCompositeExtract %5 %363 1 -%366 = OpIAdd %5 %362 %365 -%367 = OpCompositeConstruct %13 %364 %366 -%368 = OpBitcast %360 %367 -%370 = OpAccessChain %369 %368 %45 -%371 = OpLoad %167 %370 Aligned 4 -%372 = OpCompositeExtract %5 %371 0 -%373 = OpCompositeExtract %5 %371 1 -%374 = OpCompositeExtract %5 %371 2 -%375 = OpCompositeExtract %5 %371 3 -%376 = OpBitcast %17 %372 -%377 = OpBitcast %17 %373 -%378 = OpBitcast %17 %374 -%379 = OpBitcast %17 %375 -%380 = OpFAdd %17 %354 %376 -%381 = OpFAdd %17 %355 %377 -%382 = OpFAdd %17 %356 %378 -%383 = OpFAdd %17 %357 %379 -%384 = OpAccessChain %40 %16 %165 -%385 = OpLoad %13 %384 -%388 = OpIMul %5 %58 %179 -%389 = OpIAdd %5 %388 %45 -%390 = OpCompositeExtract %5 %385 0 -%391 = OpCompositeExtract %5 %385 1 -%392 = OpIAddCarry %195 %390 %389 -%393 = OpCompositeExtract %5 %392 0 -%394 = OpCompositeExtract %5 %392 1 -%395 = OpIAdd %5 %391 %394 -%396 = OpCompositeConstruct %13 %393 %395 -%397 = OpBitcast %387 %396 -%399 = OpAccessChain %398 %397 %45 -%400 = OpLoad %17 %399 Aligned 4 -%401 = OpFAdd %17 %380 %400 -%402 = OpFAdd %17 %381 %400 -%403 = OpFAdd %17 %382 %400 -%404 = OpFAdd %17 %383 %400 -%405 = OpAccessChain %40 %16 %9 -%406 = OpLoad %13 %405 -%409 = OpCompositeExtract %5 %406 0 -%410 = OpCompositeExtract %5 %406 1 -%411 = OpIAddCarry %195 %409 %290 -%412 = OpCompositeExtract %5 %411 0 -%413 = OpCompositeExtract %5 %411 1 -%414 = OpIAdd %5 %410 %413 -%415 = OpCompositeConstruct %13 %412 %414 -%416 = OpBitcast %408 %415 -%417 = OpAccessChain %303 %416 %45 -%418 = OpLoad %5 %417 Aligned 4 -%419 = OpBitcast %17 %418 -%420 = OpFAdd %17 %401 %419 -%421 = OpFAdd %17 %402 %419 -%422 = OpFAdd %17 %403 %419 -%423 = OpFAdd %17 %404 %419 -%424 = OpCompositeInsert %26 %420 %425 0 -%426 = OpCompositeInsert %26 %421 %424 1 -%427 = OpCompositeInsert %26 %422 %426 2 -%428 = OpCompositeInsert %26 %423 %427 3 -OpStore %74 %428 -%429 = OpIMul %5 %58 %179 -%430 = OpIAdd %5 %429 %45 -%431 = OpCompositeExtract %5 %385 0 -%432 = OpCompositeExtract %5 %385 1 -%433 = OpIAddCarry %195 %431 %430 -%434 = OpCompositeExtract %5 %433 0 -%435 = OpCompositeExtract %5 %433 1 -%436 = OpIAdd %5 %432 %435 -%437 = OpCompositeConstruct %13 %434 %436 -%438 = OpBitcast %387 %437 -%439 = OpAccessChain %398 %438 %45 -OpStore %439 %420 Aligned 4 -%441 = OpAccessChain %40 %16 %9 -%442 = OpLoad %13 %441 -%443 = OpCompositeExtract %5 %442 0 -%444 = OpCompositeExtract %5 %442 1 -%445 = OpIAddCarry %195 %443 %290 -%446 = OpCompositeExtract %5 %445 0 -%447 = OpCompositeExtract %5 %445 1 -%448 = OpIAdd %5 %444 %447 -%449 = OpCompositeConstruct %13 %446 %448 -%450 = OpBitcast %387 %449 -%451 = OpAccessChain %398 %450 %45 -OpStore %451 %421 Aligned 4 +%300 = OpBitcast %17 %298 +%301 = OpBitcast %17 %299 +%302 = OpFAdd %17 %286 %300 +%303 = OpFAdd %17 %287 %301 +%304 = OpFAdd %17 %288 %300 +%305 = OpFAdd %17 %289 %301 +%306 = OpIMul %5 %58 %54 +%311 = OpBitcast %310 %277 +%313 = OpAccessChain %312 %311 %45 %58 +%314 = OpLoad %307 %313 Aligned 4 +%315 = OpCompositeExtract %5 %314 0 +%316 = OpCompositeExtract %5 %314 1 +%317 = OpCompositeExtract %5 %314 2 +%318 = OpBitcast %17 %315 +%319 = OpBitcast %17 %316 +%320 = OpBitcast %17 %317 +%321 = OpFAdd %17 %302 %318 +%322 = OpFAdd %17 %303 %319 +%323 = OpFAdd %17 %304 %320 +%324 = OpFAdd %17 %305 %320 +%325 = OpShiftLeftLogical %5 %58 %179 +%329 = OpBitcast %328 %277 +%331 = OpAccessChain %330 %329 %45 %58 +%332 = OpLoad %167 %331 Aligned 4 +%333 = OpCompositeExtract %5 %332 0 +%334 = OpCompositeExtract %5 %332 1 +%335 = OpCompositeExtract %5 %332 2 +%336 = OpCompositeExtract %5 %332 3 +%337 = OpBitcast %17 %333 +%338 = OpBitcast %17 %334 +%339 = OpBitcast %17 %335 +%340 = OpBitcast %17 %336 +%341 = OpFAdd %17 %321 %337 +%342 = OpFAdd %17 %322 %338 +%343 = OpFAdd %17 %323 %339 +%344 = OpFAdd %17 %324 %340 +%345 = OpAccessChain %40 %16 %165 +%346 = OpLoad %13 %345 +%350 = OpBitcast %349 %346 +%352 = OpAccessChain %351 %350 %45 %58 +%353 = OpLoad %17 %352 Aligned 4 +%354 = OpFAdd %17 %341 %353 +%355 = OpFAdd %17 %342 %353 +%356 = OpFAdd %17 %343 %353 +%357 = OpFAdd %17 %344 %353 +%358 = OpAccessChain %40 %16 %9 +%359 = OpLoad %13 %358 +%363 = OpBitcast %362 %359 +%364 = OpAccessChain %282 %363 %45 %58 +%365 = OpLoad %5 %364 Aligned 4 +%366 = OpBitcast %17 %365 +%367 = OpFAdd %17 %354 %366 +%368 = OpFAdd %17 %355 %366 +%369 = OpFAdd %17 %356 %366 +%370 = OpFAdd %17 %357 %366 +%371 = OpCompositeInsert %26 %367 %372 0 +%373 = OpCompositeInsert %26 %368 %371 1 +%374 = OpCompositeInsert %26 %369 %373 2 +%375 = OpCompositeInsert %26 %370 %374 3 +OpStore %74 %375 +%376 = OpBitcast %349 %346 +%377 = OpAccessChain %351 %376 %45 %58 +OpStore %377 %367 Aligned 4 +%379 = OpAccessChain %40 %16 %9 +%380 = OpLoad %13 %379 +%381 = OpBitcast %349 %380 +%382 = OpAccessChain %351 %381 %45 %58 +OpStore %382 %368 Aligned 4 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/rt-resources.bindless.rmiss b/reference/shaders/resources/rt-resources.bindless.rmiss index 845746d..10f0d7c 100644 --- a/reference/shaders/resources/rt-resources.bindless.rmiss +++ b/reference/shaders/resources/rt-resources.bindless.rmiss @@ -32,15 +32,12 @@ void main() vec4 _31 = texelFetch(_13[registers._m0 + (payload._m1 & 1u)], ivec2(uvec2(0u)), int(0u)); vec4 _45 = texelFetch(_13[registers._m0 + payload._m1], ivec2(uvec2(0u)), int(0u)); vec4 _62 = texelFetch(_13[(registers._m0 + 10u) + payload._m1], ivec2(uvec2(0u)), int(0u)); - vec4 _72 = _73; + vec4 _72; _72.x = (_31.x + _45.x) + _62.x; - vec4 _74 = _72; - _74.y = (_31.y + _45.y) + _62.y; - vec4 _75 = _74; - _75.z = (_31.z + _45.z) + _62.z; - vec4 _76 = _75; - _76.w = (_31.w + _45.w) + _62.w; - payload._m0 = _76; + _72.y = (_31.y + _45.y) + _62.y; + _72.z = (_31.z + _45.z) + _62.z; + _72.w = (_31.w + _45.w) + _62.w; + payload._m0 = _72; } diff --git a/reference/shaders/resources/rt-resources.rmiss b/reference/shaders/resources/rt-resources.rmiss index 7dcb965..81dfb27 100644 --- a/reference/shaders/resources/rt-resources.rmiss +++ b/reference/shaders/resources/rt-resources.rmiss @@ -19,15 +19,12 @@ void main() { vec4 _28 = texelFetch(Tex[payload._m1 & 1u], ivec2(uvec2(0u)), int(0u)); vec4 _39 = texelFetch(TexUnsized[payload._m1], ivec2(uvec2(0u)), int(0u)); - vec4 _49 = _50; + vec4 _49; _49.x = _28.x + _39.x; - vec4 _51 = _49; - _51.y = _28.y + _39.y; - vec4 _52 = _51; - _52.z = _28.z + _39.z; - vec4 _53 = _52; - _53.w = _28.w + _39.w; - payload._m0 = _53; + _49.y = _28.y + _39.y; + _49.z = _28.z + _39.z; + _49.w = _28.w + _39.w; + payload._m0 = _49; } diff --git a/reference/shaders/resources/sm66/atomics-64bit.root-descriptor.sm66.comp b/reference/shaders/resources/sm66/atomics-64bit.root-descriptor.sm66.comp index a4660a7..6a97050 100644 --- a/reference/shaders/resources/sm66/atomics-64bit.root-descriptor.sm66.comp +++ b/reference/shaders/resources/sm66/atomics-64bit.root-descriptor.sm66.comp @@ -1,10 +1,11 @@ #version 460 #extension GL_ARB_gpu_shader_int64 : require #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer PhysicalPointerUint64Array; -layout(buffer_reference, std430) buffer PhysicalPointerUint64Array +layout(buffer_reference, buffer_reference_align = 8, std430) buffer PhysicalPointerUint64Array { uint64_t value[]; }; diff --git a/reference/shaders/resources/sm66/binding-range-selection.bindless.sm66.comp b/reference/shaders/resources/sm66/binding-range-selection.bindless.sm66.comp index 4d353ba..7cd4ed4 100644 --- a/reference/shaders/resources/sm66/binding-range-selection.bindless.sm66.comp +++ b/reference/shaders/resources/sm66/binding-range-selection.bindless.sm66.comp @@ -4,7 +4,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer AtomicCounter; -layout(buffer_reference, std430) buffer AtomicCounter +layout(buffer_reference, buffer_reference_align = 4, std430) buffer AtomicCounter { uint _m0; }; diff --git a/reference/shaders/resources/sm66/structured-buffer-heap.sm66.frag b/reference/shaders/resources/sm66/structured-buffer-heap.sm66.frag index b5b2fa7..550183a 100644 --- a/reference/shaders/resources/sm66/structured-buffer-heap.sm66.frag +++ b/reference/shaders/resources/sm66/structured-buffer-heap.sm66.frag @@ -34,14 +34,9 @@ void main() vec4 _144 = uintBitsToFloat(uvec4(texelFetch(_9[_60], int(_131)).x, texelFetch(_9[_60], int(_131 + 1u)).x, texelFetch(_9[_60], int(_131 + 2u)).x, texelFetch(_9[_60], int(_131 + 3u)).x)); uvec4 _152 = imageLoad(_13[INDEX + 4u], int(_37)); uint _156 = _41 * 2u; - uvec4 _157 = imageLoad(_16[_68], int(_156)); - uvec4 _159 = imageLoad(_16[_68], int(_156 + 1u)); - vec2 _163 = uintBitsToFloat(uvec2(_157.x, _159.x)); + vec2 _163 = uintBitsToFloat(uvec2(imageLoad(_16[_68], int(_156)).x, imageLoad(_16[_68], int(_156 + 1u)).x)); uint _168 = _45 * 3u; - uvec4 _169 = imageLoad(_13[_72], int(_168)); - uvec4 _171 = imageLoad(_13[_72], int(_168 + 1u)); - uvec4 _174 = imageLoad(_13[_72], int(_168 + 2u)); - vec3 _178 = uintBitsToFloat(uvec3(_169.x, _171.x, _174.x)); + vec3 _178 = uintBitsToFloat(uvec3(imageLoad(_13[_72], int(_168)).x, imageLoad(_13[_72], int(_168 + 1u)).x, imageLoad(_13[_72], int(_168 + 2u)).x)); uint _185 = _49 * 4u; uvec4 _186 = imageLoad(_13[_76], int(_185)); uvec4 _188 = imageLoad(_13[_76], int(_185 + 1u)); diff --git a/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.sm66.frag b/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.sm66.frag index 6ec773e..4fdcdf0 100644 --- a/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.sm66.frag +++ b/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.sm66.frag @@ -78,8 +78,7 @@ void main() vec3 _156 = uintBitsToFloat(_22[INDEX + 2u]._m0[_91]); vec4 _165 = uintBitsToFloat(_28[INDEX + 3u]._m0[_95]); uint _174 = _33[INDEX + 4u]._m0[_83]; - uvec2 _178 = _38[_112]._m0[_87]; - vec2 _179 = uintBitsToFloat(_178); + vec2 _179 = uintBitsToFloat(_38[_112]._m0[_87]); uvec3 _185 = _43[INDEX + 6u]._m0[_91]; vec3 _186 = uintBitsToFloat(_185); uvec4 _194 = _48[INDEX + 7u]._m0[_95]; diff --git a/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.ssbo-align.sm66.frag b/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.ssbo-align.sm66.frag index c921f2b..992a3be 100644 --- a/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.ssbo-align.sm66.frag +++ b/reference/shaders/resources/sm66/structured-buffer-heap.ssbo.ssbo-align.sm66.frag @@ -96,14 +96,10 @@ void main() vec3 _224 = uintBitsToFloat(uvec3(_15[_106]._m0[_212], _15[_106]._m0[_212 + 1u], _15[_106]._m0[_212 + 2u])); vec4 _239 = uintBitsToFloat(_26[_112]._m0[(_88 < _118.y) ? (_88 + _118.x) : 268435455u]); uint _253 = _31[_121]._m0[(_76 < _127.y) ? (_76 + _127.x) : 1073741820u]; - uvec2 _262 = _36[_128]._m0[(_80 < _135.y) ? (_80 + _135.x) : 536870911u]; - vec2 _263 = uintBitsToFloat(_262); + vec2 _263 = uintBitsToFloat(_36[_128]._m0[(_80 < _135.y) ? (_80 + _135.x) : 536870911u]); uint _268 = _84 * 3u; uint _273 = (_268 < _142.y) ? (_268 + _142.x) : 1073741820u; - uint _275 = _31[_136]._m0[_273]; - uint _278 = _31[_136]._m0[_273 + 1u]; - uint _281 = _31[_136]._m0[_273 + 2u]; - vec3 _283 = uintBitsToFloat(uvec3(_275, _278, _281)); + vec3 _283 = uintBitsToFloat(uvec3(_31[_136]._m0[_273], _31[_136]._m0[_273 + 1u], _31[_136]._m0[_273 + 2u])); uvec4 _296 = _41[_143]._m0[(_88 < _150.y) ? (_88 + _150.x) : 268435455u]; vec4 _297 = uintBitsToFloat(_296); uint _312 = _46[_151]._m0[(_76 < _158.y) ? (_76 + _158.x) : 1073741820u]; diff --git a/reference/shaders/resources/sm66/structured-buffer-heap.typed-buffer-offset.sm66.frag b/reference/shaders/resources/sm66/structured-buffer-heap.typed-buffer-offset.sm66.frag index d4cb4a8..2a62d9d 100644 --- a/reference/shaders/resources/sm66/structured-buffer-heap.typed-buffer-offset.sm66.frag +++ b/reference/shaders/resources/sm66/structured-buffer-heap.typed-buffer-offset.sm66.frag @@ -71,9 +71,7 @@ void main() uvec4 _223 = imageLoad(_21[_84], int((_45 < _13._m0[_89].y) ? (_45 + _13._m0[_89].x) : 1073741820u)); uint _227 = _49 * 2u; uint _232 = (_227 < _13._m0[_96].y) ? (_227 + _13._m0[_96].x) : 1073741820u; - uvec4 _233 = imageLoad(_24[_92], int(_232)); - uvec4 _235 = imageLoad(_24[_92], int(_232 + 1u)); - vec2 _239 = uintBitsToFloat(uvec2(_233.x, _235.x)); + vec2 _239 = uintBitsToFloat(uvec2(imageLoad(_24[_92], int(_232)).x, imageLoad(_24[_92], int(_232 + 1u)).x)); uint _244 = _53 * 3u; uint _249 = (_244 < _13._m0[_103].y) ? (_244 + _13._m0[_103].x) : 1073741820u; uvec4 _250 = imageLoad(_21[_99], int(_249)); diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag index c886c51..53e1ea3 100644 --- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag +++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag @@ -9,29 +9,24 @@ #error No extension available for FP16. #endif #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloatNonWriteArray; +layout(buffer_reference) buffer PhysicalPointerFloatArray; +layout(buffer_reference) buffer PhysicalPointerUintArray; +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteArray { - uint _m0; - uint _m1; + float value[]; }; -layout(buffer_reference) buffer PhysicalPointerFloatNonWrite; -layout(buffer_reference) buffer PhysicalPointerFloat; -layout(buffer_reference) buffer PhysicalPointerUint; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloatNonWrite +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray { - float value; + float value[]; }; -layout(buffer_reference, std430) buffer PhysicalPointerFloat +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerUintArray { - float value; -}; - -layout(buffer_reference, std430) buffer PhysicalPointerUint -{ - uint value; + uint value[]; }; layout(set = 0, binding = 1, std430) restrict readonly buffer SSBO @@ -50,24 +45,16 @@ layout(push_constant, std430) uniform RootConstants layout(location = 0) flat in mediump int A; layout(location = 0) out int SV_Target; -float16_t _84; -uint16_t _108; +float16_t _57; +uint16_t _73; void main() { uint16_t _31 = uint16_t(A); uint _32 = uint(int16_t(_31)); - AddCarry _43; - _43._m0 = uaddCarry(registers._m1.x, (_32 * 4u) + 0u, _43._m1); - uint _56 = uint(int16_t(_31 + 1us)); - AddCarry _61; - _61._m0 = uaddCarry(registers._m1.x, (_56 * 4u) + 0u, _61._m1); - AddCarry _77; - _77._m0 = uaddCarry(registers._m2.x, (_32 * 4u) + 0u, _77._m1); - PhysicalPointerFloat(uvec2(_77._m0, registers._m2.y + _77._m1)).value = float(float16_t(PhysicalPointerFloatNonWrite(uvec2(_61._m0, registers._m1.y + _61._m1)).value) + float16_t(PhysicalPointerFloatNonWrite(uvec2(_43._m0, registers._m1.y + _43._m1)).value)); - AddCarry _100; - _100._m0 = uaddCarry(registers._m3.x, (_32 * 4u) + 0u, _100._m1); - PhysicalPointerUint(uvec2(_100._m0, registers._m3.y + _100._m1)).value = uint(uint16_t(_13._m0[_56]) + uint16_t(_13._m0[_32])); + uint _46 = uint(int16_t(_31 + 1us)); + PhysicalPointerFloatArray(registers._m2).value[_32] = float(float16_t(PhysicalPointerFloatNonWriteArray(registers._m1).value[_46]) + float16_t(PhysicalPointerFloatNonWriteArray(registers._m1).value[_32])); + PhysicalPointerUintArray(registers._m3).value[_32] = uint(uint16_t(_13._m0[_46]) + uint16_t(_13._m0[_32])); SV_Target = int(10u); } @@ -77,7 +64,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 114 +; Bound: 79 ; Schema: 0 OpCapability Shader OpCapability Float16 @@ -93,13 +80,12 @@ OpName %9 "registers" OpName %11 "SSBO" OpName %16 "A" OpName %18 "SV_Target" -OpName %34 "PhysicalPointerFloatNonWrite" -OpMemberName %34 0 "value" -OpName %42 "AddCarry" -OpName %71 "PhysicalPointerFloat" -OpMemberName %71 0 "value" -OpName %94 "PhysicalPointerUint" -OpMemberName %94 0 "value" +OpName %35 "PhysicalPointerFloatNonWriteArray" +OpMemberName %35 0 "value" +OpName %53 "PhysicalPointerFloatArray" +OpMemberName %53 0 "value" +OpName %68 "PhysicalPointerUintArray" +OpMemberName %68 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 @@ -116,13 +102,16 @@ OpDecorate %16 RelaxedPrecision OpDecorate %16 Flat OpDecorate %16 Location 0 OpDecorate %18 Location 0 -OpMemberDecorate %34 0 Offset 0 -OpDecorate %34 Block -OpMemberDecorate %34 0 NonWritable -OpMemberDecorate %71 0 Offset 0 -OpDecorate %71 Block -OpMemberDecorate %94 0 Offset 0 -OpDecorate %94 Block +OpDecorate %34 ArrayStride 4 +OpMemberDecorate %35 0 Offset 0 +OpDecorate %35 Block +OpMemberDecorate %35 0 NonWritable +OpDecorate %52 ArrayStride 4 +OpMemberDecorate %53 0 Offset 0 +OpDecorate %53 Block +OpDecorate %67 ArrayStride 4 +OpMemberDecorate %68 0 Offset 0 +OpDecorate %68 Block %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -145,27 +134,28 @@ OpDecorate %94 Block %27 = OpConstant %5 1 %30 = OpTypeInt 16 0 %33 = OpTypeFloat 32 -%34 = OpTypeStruct %33 -%35 = OpTypePointer PhysicalStorageBuffer %34 -%36 = OpConstant %5 0 -%38 = OpConstant %5 4 -%42 = OpTypeStruct %5 %5 -%49 = OpTypePointer PhysicalStorageBuffer %33 -%52 = OpTypeFloat 16 -%55 = OpConstant %30 1 -%71 = OpTypeStruct %33 -%72 = OpTypePointer PhysicalStorageBuffer %71 -%86 = OpTypePointer StorageBuffer %5 -%94 = OpTypeStruct %5 -%95 = OpTypePointer PhysicalStorageBuffer %94 -%106 = OpTypePointer PhysicalStorageBuffer %5 -%110 = OpConstant %5 10 +%34 = OpTypeRuntimeArray %33 +%35 = OpTypeStruct %34 +%36 = OpTypePointer PhysicalStorageBuffer %35 +%38 = OpTypePointer PhysicalStorageBuffer %33 +%40 = OpConstant %5 0 +%42 = OpTypeFloat 16 +%45 = OpConstant %30 1 +%52 = OpTypeRuntimeArray %33 +%53 = OpTypeStruct %52 +%54 = OpTypePointer PhysicalStorageBuffer %53 +%59 = OpTypePointer StorageBuffer %5 +%67 = OpTypeRuntimeArray %5 +%68 = OpTypeStruct %67 +%69 = OpTypePointer PhysicalStorageBuffer %68 +%71 = OpTypePointer PhysicalStorageBuffer %5 +%75 = OpConstant %5 10 %3 = OpFunction %1 None %2 %4 = OpLabel -%84 = OpUndef %52 -%108 = OpUndef %30 -OpBranch %112 -%112 = OpLabel +%57 = OpUndef %42 +%73 = OpUndef %30 +OpBranch %77 +%77 = OpLabel %20 = OpAccessChain %19 %9 %21 %22 = OpLoad %6 %20 %23 = OpAccessChain %19 %9 %24 @@ -175,70 +165,34 @@ OpBranch %112 %29 = OpLoad %14 %16 %31 = OpSConvert %30 %29 %32 = OpSConvert %5 %31 -%37 = OpIMul %5 %32 %38 -%39 = OpIAdd %5 %37 %36 -%40 = OpCompositeExtract %5 %28 0 -%41 = OpCompositeExtract %5 %28 1 -%43 = OpIAddCarry %42 %40 %39 -%44 = OpCompositeExtract %5 %43 0 -%45 = OpCompositeExtract %5 %43 1 -%46 = OpIAdd %5 %41 %45 -%47 = OpCompositeConstruct %6 %44 %46 -%48 = OpBitcast %35 %47 -%50 = OpAccessChain %49 %48 %36 -%51 = OpLoad %33 %50 Aligned 4 -%53 = OpFConvert %52 %51 -%54 = OpIAdd %30 %31 %55 -%56 = OpSConvert %5 %54 -%57 = OpIMul %5 %56 %38 -%58 = OpIAdd %5 %57 %36 -%59 = OpCompositeExtract %5 %28 0 -%60 = OpCompositeExtract %5 %28 1 -%61 = OpIAddCarry %42 %59 %58 -%62 = OpCompositeExtract %5 %61 0 -%63 = OpCompositeExtract %5 %61 1 -%64 = OpIAdd %5 %60 %63 -%65 = OpCompositeConstruct %6 %62 %64 -%66 = OpBitcast %35 %65 -%67 = OpAccessChain %49 %66 %36 -%68 = OpLoad %33 %67 Aligned 4 -%69 = OpFConvert %52 %68 -%70 = OpFAdd %52 %69 %53 -%73 = OpIMul %5 %32 %38 -%74 = OpIAdd %5 %73 %36 -%75 = OpCompositeExtract %5 %25 0 -%76 = OpCompositeExtract %5 %25 1 -%77 = OpIAddCarry %42 %75 %74 -%78 = OpCompositeExtract %5 %77 0 -%79 = OpCompositeExtract %5 %77 1 -%80 = OpIAdd %5 %76 %79 -%81 = OpCompositeConstruct %6 %78 %80 -%82 = OpBitcast %72 %81 -%83 = OpAccessChain %49 %82 %36 -%85 = OpFConvert %33 %70 -OpStore %83 %85 Aligned 4 -%87 = OpAccessChain %86 %13 %36 %32 -%88 = OpLoad %5 %87 -%89 = OpUConvert %30 %88 -%90 = OpAccessChain %86 %13 %36 %56 -%91 = OpLoad %5 %90 -%92 = OpUConvert %30 %91 -%93 = OpIAdd %30 %92 %89 -%96 = OpIMul %5 %32 %38 -%97 = OpIAdd %5 %96 %36 -%98 = OpCompositeExtract %5 %22 0 -%99 = OpCompositeExtract %5 %22 1 -%100 = OpIAddCarry %42 %98 %97 -%101 = OpCompositeExtract %5 %100 0 -%102 = OpCompositeExtract %5 %100 1 -%103 = OpIAdd %5 %99 %102 -%104 = OpCompositeConstruct %6 %101 %103 -%105 = OpBitcast %95 %104 -%107 = OpAccessChain %106 %105 %36 -%109 = OpUConvert %5 %93 -OpStore %107 %109 Aligned 4 -%111 = OpBitcast %14 %110 -OpStore %18 %111 +%37 = OpBitcast %36 %28 +%39 = OpAccessChain %38 %37 %40 %32 +%41 = OpLoad %33 %39 Aligned 4 +%43 = OpFConvert %42 %41 +%44 = OpIAdd %30 %31 %45 +%46 = OpSConvert %5 %44 +%47 = OpBitcast %36 %28 +%48 = OpAccessChain %38 %47 %40 %46 +%49 = OpLoad %33 %48 Aligned 4 +%50 = OpFConvert %42 %49 +%51 = OpFAdd %42 %50 %43 +%55 = OpBitcast %54 %25 +%56 = OpAccessChain %38 %55 %40 %32 +%58 = OpFConvert %33 %51 +OpStore %56 %58 Aligned 4 +%60 = OpAccessChain %59 %13 %40 %32 +%61 = OpLoad %5 %60 +%62 = OpUConvert %30 %61 +%63 = OpAccessChain %59 %13 %40 %46 +%64 = OpLoad %5 %63 +%65 = OpUConvert %30 %64 +%66 = OpIAdd %30 %65 %62 +%70 = OpBitcast %69 %22 +%72 = OpAccessChain %71 %70 %40 %32 +%74 = OpUConvert %5 %66 +OpStore %72 %74 Aligned 4 +%76 = OpBitcast %14 %75 +OpStore %18 %76 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag index 8cb4a89..dd70b90 100644 --- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag +++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag @@ -1,22 +1,17 @@ #version 460 #extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require -struct AddCarry +layout(buffer_reference) buffer PhysicalPointerFloatNonWriteArray; +layout(buffer_reference) buffer PhysicalPointerFloatArray; +layout(buffer_reference, buffer_reference_align = 4, std430) readonly buffer PhysicalPointerFloatNonWriteArray { - uint _m0; - uint _m1; + float value[]; }; -layout(buffer_reference) buffer PhysicalPointerFloatNonWrite; -layout(buffer_reference) buffer PhysicalPointerFloat; -layout(buffer_reference, std430) readonly buffer PhysicalPointerFloatNonWrite +layout(buffer_reference, buffer_reference_align = 4, std430) buffer PhysicalPointerFloatArray { - float value; -}; - -layout(buffer_reference, std430) buffer PhysicalPointerFloat -{ - float value; + float value[]; }; layout(push_constant, std430) uniform RootConstants @@ -30,18 +25,12 @@ layout(push_constant, std430) uniform RootConstants layout(location = 0) flat in mediump int A; layout(location = 0) out int SV_Target; -float _70; +float _43; void main() { uint _23 = uint(A); - AddCarry _34; - _34._m0 = uaddCarry(registers._m1.x, (_23 * 4u) + 0u, _34._m1); - AddCarry _48; - _48._m0 = uaddCarry(registers._m1.x, ((_23 + 1u) * 4u) + 0u, _48._m1); - AddCarry _63; - _63._m0 = uaddCarry(registers._m2.x, (_23 * 4u) + 0u, _63._m1); - PhysicalPointerFloat(uvec2(_63._m0, registers._m2.y + _63._m1)).value = PhysicalPointerFloatNonWrite(uvec2(_48._m0, registers._m1.y + _48._m1)).value + PhysicalPointerFloatNonWrite(uvec2(_34._m0, registers._m1.y + _34._m1)).value; + PhysicalPointerFloatArray(registers._m2).value[_23] = PhysicalPointerFloatNonWriteArray(registers._m1).value[_23 + 1u] + PhysicalPointerFloatNonWriteArray(registers._m1).value[_23]; SV_Target = int(10u); } @@ -51,7 +40,7 @@ void main() ; SPIR-V ; Version: 1.3 ; Generator: Unknown(30017); 21022 -; Bound: 75 +; Bound: 48 ; Schema: 0 OpCapability Shader OpCapability PhysicalStorageBufferAddresses @@ -64,11 +53,10 @@ OpName %7 "RootConstants" OpName %9 "registers" OpName %12 "A" OpName %14 "SV_Target" -OpName %25 "PhysicalPointerFloatNonWrite" -OpMemberName %25 0 "value" -OpName %33 "AddCarry" -OpName %57 "PhysicalPointerFloat" -OpMemberName %57 0 "value" +OpName %26 "PhysicalPointerFloatNonWriteArray" +OpMemberName %26 0 "value" +OpName %39 "PhysicalPointerFloatArray" +OpMemberName %39 0 "value" OpDecorate %7 Block OpMemberDecorate %7 0 Offset 0 OpMemberDecorate %7 1 Offset 8 @@ -78,11 +66,13 @@ OpDecorate %12 RelaxedPrecision OpDecorate %12 Flat OpDecorate %12 Location 0 OpDecorate %14 Location 0 -OpMemberDecorate %25 0 Offset 0 -OpDecorate %25 Block -OpMemberDecorate %25 0 NonWritable -OpMemberDecorate %57 0 Offset 0 -OpDecorate %57 Block +OpDecorate %25 ArrayStride 4 +OpMemberDecorate %26 0 Offset 0 +OpDecorate %26 Block +OpMemberDecorate %26 0 NonWritable +OpDecorate %38 ArrayStride 4 +OpMemberDecorate %39 0 Offset 0 +OpDecorate %39 Block %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 @@ -99,66 +89,39 @@ OpDecorate %57 Block %17 = OpConstant %5 2 %20 = OpConstant %5 1 %24 = OpTypeFloat 32 -%25 = OpTypeStruct %24 -%26 = OpTypePointer PhysicalStorageBuffer %25 -%27 = OpConstant %5 0 -%29 = OpConstant %5 4 -%33 = OpTypeStruct %5 %5 -%40 = OpTypePointer PhysicalStorageBuffer %24 -%57 = OpTypeStruct %24 -%58 = OpTypePointer PhysicalStorageBuffer %57 -%71 = OpConstant %5 10 +%25 = OpTypeRuntimeArray %24 +%26 = OpTypeStruct %25 +%27 = OpTypePointer PhysicalStorageBuffer %26 +%29 = OpTypePointer PhysicalStorageBuffer %24 +%31 = OpConstant %5 0 +%38 = OpTypeRuntimeArray %24 +%39 = OpTypeStruct %38 +%40 = OpTypePointer PhysicalStorageBuffer %39 +%44 = OpConstant %5 10 %3 = OpFunction %1 None %2 %4 = OpLabel -%70 = OpUndef %24 -OpBranch %73 -%73 = OpLabel +%43 = OpUndef %24 +OpBranch %46 +%46 = OpLabel %16 = OpAccessChain %15 %9 %17 %18 = OpLoad %6 %16 %19 = OpAccessChain %15 %9 %20 %21 = OpLoad %6 %19 %22 = OpLoad %10 %12 %23 = OpBitcast %5 %22 -%28 = OpIMul %5 %23 %29 -%30 = OpIAdd %5 %28 %27 -%31 = OpCompositeExtract %5 %21 0 -%32 = OpCompositeExtract %5 %21 1 -%34 = OpIAddCarry %33 %31 %30 -%35 = OpCompositeExtract %5 %34 0 -%36 = OpCompositeExtract %5 %34 1 -%37 = OpIAdd %5 %32 %36 -%38 = OpCompositeConstruct %6 %35 %37 -%39 = OpBitcast %26 %38 -%41 = OpAccessChain %40 %39 %27 -%42 = OpLoad %24 %41 Aligned 4 -%43 = OpIAdd %5 %23 %20 -%44 = OpIMul %5 %43 %29 -%45 = OpIAdd %5 %44 %27 -%46 = OpCompositeExtract %5 %21 0 -%47 = OpCompositeExtract %5 %21 1 -%48 = OpIAddCarry %33 %46 %45 -%49 = OpCompositeExtract %5 %48 0 -%50 = OpCompositeExtract %5 %48 1 -%51 = OpIAdd %5 %47 %50 -%52 = OpCompositeConstruct %6 %49 %51 -%53 = OpBitcast %26 %52 -%54 = OpAccessChain %40 %53 %27 -%55 = OpLoad %24 %54 Aligned 4 -%56 = OpFAdd %24 %55 %42 -%59 = OpIMul %5 %23 %29 -%60 = OpIAdd %5 %59 %27 -%61 = OpCompositeExtract %5 %18 0 -%62 = OpCompositeExtract %5 %18 1 -%63 = OpIAddCarry %33 %61 %60 -%64 = OpCompositeExtract %5 %63 0 -%65 = OpCompositeExtract %5 %63 1 -%66 = OpIAdd %5 %62 %65 -%67 = OpCompositeConstruct %6 %64 %66 -%68 = OpBitcast %58 %67 -%69 = OpAccessChain %40 %68 %27 -OpStore %69 %56 Aligned 4 -%72 = OpBitcast %10 %71 -OpStore %14 %72 +%28 = OpBitcast %27 %21 +%30 = OpAccessChain %29 %28 %31 %23 +%32 = OpLoad %24 %30 Aligned 4 +%33 = OpIAdd %5 %23 %20 +%34 = OpBitcast %27 %21 +%35 = OpAccessChain %29 %34 %31 %33 +%36 = OpLoad %24 %35 Aligned 4 +%37 = OpFAdd %24 %36 %32 +%41 = OpBitcast %40 %18 +%42 = OpAccessChain %29 %41 %31 %23 +OpStore %42 %37 Aligned 4 +%45 = OpBitcast %10 %44 +OpStore %14 %45 OpReturn OpFunctionEnd #endif diff --git a/reference/shaders/resources/typed-resources-16bit-sparse.frag b/reference/shaders/resources/typed-resources-16bit-sparse.frag index f172beb..5bb8c77 100644 --- a/reference/shaders/resources/typed-resources-16bit-sparse.frag +++ b/reference/shaders/resources/typed-resources-16bit-sparse.frag @@ -120,16 +120,16 @@ void main() float _435; _434 = sparseTextureClampARB(sampler2DShadow(_8, _29), vec3(vec2(UV.x, UV.y), 0.5), 0.0, _435); SparseTexel_3 _200 = SparseTexel_3(_434, _435); - float _203 = _200._m1; + mediump float _203 = _200._m1; _204 _205 = _204(_203, _203, _203, _203, _200._m0); - float _206 = _205._m0; + mediump float _206 = _205._m0; uint _436; float _437; _436 = sparseTextureLodARB(sampler2DShadow(_8, _29), vec3(vec2(UV.x, UV.y), 0.5), 0.0, _437); SparseTexel_3 _222 = SparseTexel_3(_436, _437); - float _225 = _222._m1; + mediump float _225 = _222._m1; _204 _226 = _204(_225, _225, _225, _225, _222._m0); - float _227 = _226._m0; + mediump float _227 = _226._m0; vec2 _243 = vec2(UV.x, UV.y); uint _438; vec4 _439; diff --git a/reference/shaders/resources/typed-resources-16bit.bindless.frag b/reference/shaders/resources/typed-resources-16bit.bindless.frag index 1c7618f..e6be43d 100644 --- a/reference/shaders/resources/typed-resources-16bit.bindless.frag +++ b/reference/shaders/resources/typed-resources-16bit.bindless.frag @@ -67,10 +67,10 @@ void main() f16vec4 _225 = f16vec4(textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y))); u16vec4 _238 = u16vec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u))); u16vec4 _251 = u16vec4(textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u))); - vec4 _266 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))); - float _267 = _266.x; - vec4 _282 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); - float _283 = _282.x; + mediump vec4 _266 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))); + mediump float _267 = _266.x; + mediump vec4 _282 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); + mediump float _283 = _282.x; vec2 _296 = vec2(UV.x, UV.y); f16vec4 _298 = f16vec4(textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _296, 0.5)); f16vec4 _309 = f16vec4(textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0)); diff --git a/reference/shaders/resources/typed-resources-16bit.frag b/reference/shaders/resources/typed-resources-16bit.frag index 63ab00a..649a4d3 100644 --- a/reference/shaders/resources/typed-resources-16bit.frag +++ b/reference/shaders/resources/typed-resources-16bit.frag @@ -52,10 +52,10 @@ void main() f16vec4 _143 = f16vec4(textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y))); u16vec4 _156 = u16vec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u))); u16vec4 _169 = u16vec4(textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u))); - vec4 _184 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))); - float _185 = _184.x; - vec4 _200 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); - float _201 = _200.x; + mediump vec4 _184 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))); + mediump float _185 = _184.x; + mediump vec4 _200 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); + mediump float _201 = _200.x; vec2 _214 = vec2(UV.x, UV.y); f16vec4 _216 = f16vec4(textureGather(sampler2DShadow(_8, _47), _214, 0.5)); f16vec4 _227 = f16vec4(textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0)); diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag b/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag index 75c9408..4a3719c 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag @@ -50,36 +50,68 @@ void main() imageStore(_50[registers._m4 + 3u], int(_167), vec4(8.0)); imageStore(_54[registers._m4 + 4u], int(_167), ivec4(uvec4(4294967276u))); imageStore(_58[registers._m4 + 5u], int(_167), uvec4(80u)); - vec4 _191 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)); + mediump vec4 _191 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)); + mediump float _193 = _191.x; + float hp_copy_193 = _193; + mediump float _194 = _191.y; + float hp_copy_194 = _194; + mediump float _195 = _191.z; + float hp_copy_195 = _195; + mediump float _196 = _191.w; + float hp_copy_196 = _196; uvec4 _199 = uvec4(texelFetch(_18[_145], ivec2(uvec2(1u, 2u)), int(3u))); - uvec4 _205 = texelFetch(_22[_139], ivec2(uvec2(4u, 5u)), int(6u)); - vec4 _212 = textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)); + mediump uvec4 _205 = texelFetch(_22[_139], ivec2(uvec2(4u, 5u)), int(6u)); + mediump uint _207 = _205.x; + uint hp_copy_207 = _207; + mediump uint _208 = _205.y; + uint hp_copy_208 = _208; + mediump uint _209 = _205.z; + uint hp_copy_209 = _209; + mediump uint _210 = _205.w; + uint hp_copy_210 = _210; + mediump vec4 _212 = textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)); + mediump float _213 = _212.x; + float hp_copy_213 = _213; + mediump float _214 = _212.y; + float hp_copy_214 = _214; + mediump float _215 = _212.z; + float hp_copy_215 = _215; + mediump float _216 = _212.w; + float hp_copy_216 = _216; uvec4 _225 = uvec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u))); - uvec4 _237 = textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u)); - vec4 _252 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))); - float _253 = _252.x; - vec4 _260 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); - float _261 = _260.x; + mediump uvec4 _237 = textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u)); + mediump uint _238 = _237.x; + uint hp_copy_238 = _238; + mediump uint _239 = _237.y; + uint hp_copy_239 = _239; + mediump uint _240 = _237.z; + uint hp_copy_240 = _240; + mediump uint _241 = _237.w; + uint hp_copy_241 = _241; + mediump vec4 _252 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))); + mediump float _253 = _252.x; + mediump vec4 _260 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); + mediump float _261 = _260.x; vec2 _266 = vec2(UV.x, UV.y); - vec4 _267 = textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _266, 0.5); - vec4 _276 = textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0); - vec4 _289 = textureGrad(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)); - vec4 _301 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.5); - vec4 _311 = texelFetch(_26[registers._m1 + 3u], int(_167)); + mediump vec4 _267 = textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _266, 0.5); + mediump vec4 _276 = textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0); + mediump vec4 _289 = textureGrad(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)); + mediump vec4 _301 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.5); + mediump vec4 _311 = texelFetch(_26[registers._m1 + 3u], int(_167)); uvec4 _321 = uvec4(texelFetch(_30[registers._m1 + 4u], int(_167))); - uvec4 _330 = texelFetch(_34[registers._m1 + 5u], int(_167)); - SV_Target.x = ((((_267.x + (_261 + (_253 + (_212.x + _191.x)))) + _276.x) + _289.x) + _301.x) + _311.x; - SV_Target.y = ((((_267.y + (_261 + (_253 + (_212.y + _191.y)))) + _276.y) + _289.y) + _301.y) + _311.y; - SV_Target.z = ((((_267.z + (_261 + (_253 + (_212.z + _191.z)))) + _276.z) + _289.z) + _301.z) + _311.z; - SV_Target.w = ((((_267.w + (_261 + (_253 + (_212.w + _191.w)))) + _276.w) + _289.w) + _301.w) + _311.w; + mediump uvec4 _330 = texelFetch(_34[registers._m1 + 5u], int(_167)); + SV_Target.x = ((((_267.x + (_261 + (_253 + (hp_copy_213 + hp_copy_193)))) + _276.x) + _289.x) + _301.x) + _311.x; + SV_Target.y = ((((_267.y + (_261 + (_253 + (hp_copy_214 + hp_copy_194)))) + _276.y) + _289.y) + _301.y) + _311.y; + SV_Target.z = ((((_267.z + (_261 + (_253 + (hp_copy_215 + hp_copy_195)))) + _276.z) + _289.z) + _301.z) + _311.z; + SV_Target.w = ((((_267.w + (_261 + (_253 + (hp_copy_216 + hp_copy_196)))) + _276.w) + _289.w) + _301.w) + _311.w; SV_Target_1.x = int((_225.x + _199.x) + _321.x); SV_Target_1.y = int((_225.y + _199.y) + _321.y); SV_Target_1.z = int((_225.z + _199.z) + _321.z); SV_Target_1.w = int((_225.w + _199.w) + _321.w); - SV_Target_2.x = (_237.x + _205.x) + _330.x; - SV_Target_2.y = (_237.y + _205.y) + _330.y; - SV_Target_2.z = (_237.z + _205.z) + _330.z; - SV_Target_2.w = (_237.w + _205.w) + _330.w; + SV_Target_2.x = (hp_copy_238 + hp_copy_207) + _330.x; + SV_Target_2.y = (hp_copy_239 + hp_copy_208) + _330.y; + SV_Target_2.z = (hp_copy_240 + hp_copy_209) + _330.z; + SV_Target_2.w = (hp_copy_241 + hp_copy_210) + _330.w; } diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.frag b/reference/shaders/resources/typed-resources-16bit.sm60.frag index c6b90d0..9305754 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.frag @@ -35,36 +35,68 @@ void main() imageStore(_37, int(_81), vec4(8.0)); imageStore(_40, int(_81), ivec4(uvec4(4294967276u))); imageStore(_43, int(_81), uvec4(80u)); - vec4 _105 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y)); + mediump vec4 _105 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y)); + mediump float _107 = _105.x; + float hp_copy_107 = _107; + mediump float _108 = _105.y; + float hp_copy_108 = _108; + mediump float _109 = _105.z; + float hp_copy_109 = _109; + mediump float _110 = _105.w; + float hp_copy_110 = _110; uvec4 _115 = uvec4(texelFetch(_12, ivec2(uvec2(1u, 2u)), int(3u))); - uvec4 _123 = texelFetch(_16, ivec2(uvec2(4u, 5u)), int(6u)); - vec4 _130 = textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y)); + mediump uvec4 _123 = texelFetch(_16, ivec2(uvec2(4u, 5u)), int(6u)); + mediump uint _125 = _123.x; + uint hp_copy_125 = _125; + mediump uint _126 = _123.y; + uint hp_copy_126 = _126; + mediump uint _127 = _123.z; + uint hp_copy_127 = _127; + mediump uint _128 = _123.w; + uint hp_copy_128 = _128; + mediump vec4 _130 = textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y)); + mediump float _131 = _130.x; + float hp_copy_131 = _131; + mediump float _132 = _130.y; + float hp_copy_132 = _132; + mediump float _133 = _130.z; + float hp_copy_133 = _133; + mediump float _134 = _130.w; + float hp_copy_134 = _134; uvec4 _143 = uvec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u))); - uvec4 _155 = textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u)); - vec4 _170 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))); - float _171 = _170.x; - vec4 _178 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); - float _179 = _178.x; + mediump uvec4 _155 = textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u)); + mediump uint _156 = _155.x; + uint hp_copy_156 = _156; + mediump uint _157 = _155.y; + uint hp_copy_157 = _157; + mediump uint _158 = _155.z; + uint hp_copy_158 = _158; + mediump uint _159 = _155.w; + uint hp_copy_159 = _159; + mediump vec4 _170 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))); + mediump float _171 = _170.x; + mediump vec4 _178 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); + mediump float _179 = _178.x; vec2 _184 = vec2(UV.x, UV.y); - vec4 _185 = textureGather(sampler2DShadow(_8, _47), _184, 0.5); - vec4 _194 = textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0); - vec4 _207 = textureGrad(sampler2D(_8, _46), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)); - vec4 _219 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.5); - vec4 _229 = texelFetch(_19, int(_81)); + mediump vec4 _185 = textureGather(sampler2DShadow(_8, _47), _184, 0.5); + mediump vec4 _194 = textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0); + mediump vec4 _207 = textureGrad(sampler2D(_8, _46), vec2(UV.x, UV.y), vec2(0.20000000298023223876953125, 0.300000011920928955078125), vec2(0.4000000059604644775390625, 0.5)); + mediump vec4 _219 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.5); + mediump vec4 _229 = texelFetch(_19, int(_81)); uvec4 _239 = uvec4(texelFetch(_22, int(_81))); - uvec4 _248 = texelFetch(_25, int(_81)); - SV_Target.x = ((((_185.x + (_179 + (_171 + (_130.x + _105.x)))) + _194.x) + _207.x) + _219.x) + _229.x; - SV_Target.y = ((((_185.y + (_179 + (_171 + (_130.y + _105.y)))) + _194.y) + _207.y) + _219.y) + _229.y; - SV_Target.z = ((((_185.z + (_179 + (_171 + (_130.z + _105.z)))) + _194.z) + _207.z) + _219.z) + _229.z; - SV_Target.w = ((((_185.w + (_179 + (_171 + (_130.w + _105.w)))) + _194.w) + _207.w) + _219.w) + _229.w; + mediump uvec4 _248 = texelFetch(_25, int(_81)); + SV_Target.x = ((((_185.x + (_179 + (_171 + (hp_copy_131 + hp_copy_107)))) + _194.x) + _207.x) + _219.x) + _229.x; + SV_Target.y = ((((_185.y + (_179 + (_171 + (hp_copy_132 + hp_copy_108)))) + _194.y) + _207.y) + _219.y) + _229.y; + SV_Target.z = ((((_185.z + (_179 + (_171 + (hp_copy_133 + hp_copy_109)))) + _194.z) + _207.z) + _219.z) + _229.z; + SV_Target.w = ((((_185.w + (_179 + (_171 + (hp_copy_134 + hp_copy_110)))) + _194.w) + _207.w) + _219.w) + _229.w; SV_Target_1.x = int((_143.x + _115.x) + _239.x); SV_Target_1.y = int((_143.y + _115.y) + _239.y); SV_Target_1.z = int((_143.z + _115.z) + _239.z); SV_Target_1.w = int((_143.w + _115.w) + _239.w); - SV_Target_2.x = (_155.x + _123.x) + _248.x; - SV_Target_2.y = (_155.y + _123.y) + _248.y; - SV_Target_2.z = (_155.z + _123.z) + _248.z; - SV_Target_2.w = (_155.w + _123.w) + _248.w; + SV_Target_2.x = (hp_copy_156 + hp_copy_125) + _248.x; + SV_Target_2.y = (hp_copy_157 + hp_copy_126) + _248.y; + SV_Target_2.z = (hp_copy_158 + hp_copy_127) + _248.z; + SV_Target_2.w = (hp_copy_159 + hp_copy_128) + _248.w; } diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag index 1c7618f..e6be43d 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag @@ -67,10 +67,10 @@ void main() f16vec4 _225 = f16vec4(textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y))); u16vec4 _238 = u16vec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u))); u16vec4 _251 = u16vec4(textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u))); - vec4 _266 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))); - float _267 = _266.x; - vec4 _282 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); - float _283 = _282.x; + mediump vec4 _266 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))); + mediump float _267 = _266.x; + mediump vec4 _282 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); + mediump float _283 = _282.x; vec2 _296 = vec2(UV.x, UV.y); f16vec4 _298 = f16vec4(textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _296, 0.5)); f16vec4 _309 = f16vec4(textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0)); diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag index 63ab00a..649a4d3 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag @@ -52,10 +52,10 @@ void main() f16vec4 _143 = f16vec4(textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y))); u16vec4 _156 = u16vec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u))); u16vec4 _169 = u16vec4(textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u))); - vec4 _184 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))); - float _185 = _184.x; - vec4 _200 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); - float _201 = _200.x; + mediump vec4 _184 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))); + mediump float _185 = _184.x; + mediump vec4 _200 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); + mediump float _201 = _200.x; vec2 _214 = vec2(UV.x, UV.y); f16vec4 _216 = f16vec4(textureGather(sampler2DShadow(_8, _47), _214, 0.5)); f16vec4 _227 = f16vec4(textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0)); diff --git a/reference/shaders/resources/uav-counter.bindless.root-constant.comp b/reference/shaders/resources/uav-counter.bindless.root-constant.comp index f56bafc..339c02c 100644 --- a/reference/shaders/resources/uav-counter.bindless.root-constant.comp +++ b/reference/shaders/resources/uav-counter.bindless.root-constant.comp @@ -4,7 +4,7 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(buffer_reference) buffer AtomicCounter; -layout(buffer_reference, std430) buffer AtomicCounter +layout(buffer_reference, buffer_reference_align = 4, std430) buffer AtomicCounter { uint _m0; }; diff --git a/third_party/SPIRV-Cross b/third_party/SPIRV-Cross -Subproject bab4e5911b1bfa5a86bc80006b7301ae4836384 +Subproject b3ff97d0feafd2b7ca72aec7215cfc3d0998fb7 diff --git a/third_party/SPIRV-Tools b/third_party/SPIRV-Tools -Subproject 2d12367ced2dd34444822340070b4545ae7c02f +Subproject 98340ec500e92a534dc8384d5c45d2f488e40f6 diff --git a/third_party/spirv-headers b/third_party/spirv-headers -Subproject ae217c17809fadb232ec94b29304b4afcd417bb +Subproject b765c355f488837ca4c77980ba69484f3ff277f |