diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2021-12-02 17:31:59 +0300 |
---|---|---|
committer | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-05-31 12:42:30 +0300 |
commit | 677148ed111a8a3ee9ce3b002f46032361a48069 (patch) | |
tree | 4820cb837a0e5264043b546b88c046c704ceaa31 | |
parent | d97bb340d8f79317fb7b8049717ff6ffd0dace0f (diff) |
Extend usage of relaxed precision.
Attempt to extend use of relaxed precision to variable storage and
arithmetic as well.
28 files changed, 943 insertions, 63 deletions
diff --git a/cfg_structurizer.cpp b/cfg_structurizer.cpp index aa9762e..e987080 100644 --- a/cfg_structurizer.cpp +++ b/cfg_structurizer.cpp @@ -1273,6 +1273,7 @@ void CFGStructurizer::insert_phi(PHINode &node) PHI frontier_phi; frontier_phi.id = module.allocate_id(); frontier_phi.type_id = phi.type_id; + frontier_phi.relaxed = phi.relaxed; module.get_builder().addName(frontier_phi.id, (String("frontier_phi_") + frontier->name).c_str()); assert(!frontier->pred_back_edge); @@ -1359,6 +1360,7 @@ void CFGStructurizer::insert_phi(PHINode &node) // If we came from such a block, // we should replace the incoming value of dominating_incoming rather than adding a new incoming value. PHI merge_phi = {}; + merge_phi.relaxed = phi.relaxed; // Here we need to figure out if we have a cross branch which functions as a ladder. // If we have such a special edge, the PHI value we find here will override any other value on this path. diff --git a/dxil_converter.cpp b/dxil_converter.cpp index 1ce8f76..bca7bae 100644 --- a/dxil_converter.cpp +++ b/dxil_converter.cpp @@ -4116,6 +4116,8 @@ bool Converter::Impl::emit_global_variables() spv::Id var_id = create_variable_with_initializer( address_space == DXIL::AddressSpace::GroupShared ? spv::StorageClassWorkgroup : spv::StorageClassPrivate, pointee_type_id, initializer_id); + + decorate_relaxed_precision(global.getType()->getPointerElementType(), var_id, false); rewrite_value(&global, var_id); } @@ -4735,6 +4737,7 @@ bool Converter::Impl::emit_phi_instruction(CFGNode *block, const llvm::PHINode & PHI phi; phi.id = get_id_for_value(&instruction); phi.type_id = get_type_id(instruction.getType()); + phi.relaxed = type_can_relax_precision(instruction.getType(), false); for (unsigned i = 0; i < count; i++) { @@ -5738,6 +5741,29 @@ spv::Id Converter::Impl::get_effective_input_output_type_id(DXIL::ComponentType return get_type_id(get_effective_input_output_type(type), 1, 1); } +bool Converter::Impl::type_can_relax_precision(const llvm::Type *type, bool known_integer_sign) const +{ + if (type->getTypeID() == llvm::Type::TypeID::ArrayTyID) + type = llvm::cast<llvm::ArrayType>(type)->getArrayElementType(); + if (type->getTypeID() == llvm::Type::TypeID::VectorTyID) + type = llvm::cast<llvm::VectorType>(type)->getElementType(); + + return !execution_mode_meta.native_16bit_operations && + (type->getTypeID() == llvm::Type::TypeID::HalfTyID || + (type->getTypeID() == llvm::Type::TypeID::IntegerTyID && type->getIntegerBitWidth() == 16 && + known_integer_sign)); +} + +void Converter::Impl::decorate_relaxed_precision(const llvm::Type *type, spv::Id id, bool known_integer_sign) +{ + // Ignore RelaxedPrecision for integers since they are untyped in LLVM for the most part. + // For texture loading operations and similar, we load in the appropriate sign, so it's safe to use RelaxedPrecision, + // since RelaxedPrecision may sign-extend based on the OpTypeInt's signage. + // DXIL is kinda broken in this regard since min16int and min16uint lower to the same i16 type ... :( + if (type_can_relax_precision(type, known_integer_sign)) + builder().addDecoration(id, spv::DecorationRelaxedPrecision); +} + void Converter::Impl::set_option(const OptionBase &cap) { switch (cap.type) @@ -60,6 +60,7 @@ struct PHI { uint32_t id = 0; uint32_t type_id = 0; + bool relaxed = false; Vector<IncomingValue> incoming; }; diff --git a/opcodes/converter_impl.hpp b/opcodes/converter_impl.hpp index 8f9536d..2b3c5ad 100644 --- a/opcodes/converter_impl.hpp +++ b/opcodes/converter_impl.hpp @@ -606,5 +606,8 @@ struct Converter::Impl UnorderedSet<const llvm::CallInst *> resource_handles_needing_sink; UnorderedSet<const llvm::CallInst *> resource_handle_is_conservative; UnorderedMap<const llvm::BasicBlock *, Vector<const llvm::Instruction *>> bb_to_sinks; + + bool type_can_relax_precision(const llvm::Type *type, bool known_integer_sign) const; + void decorate_relaxed_precision(const llvm::Type *type, spv::Id id, bool known_integer_sign); }; } // namespace dxil_spv diff --git a/opcodes/dxil/dxil_arithmetic.cpp b/opcodes/dxil/dxil_arithmetic.cpp index bdc2ebe..ec091b9 100644 --- a/opcodes/dxil/dxil_arithmetic.cpp +++ b/opcodes/dxil/dxil_arithmetic.cpp @@ -47,6 +47,7 @@ bool emit_imad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction) { auto &builder = impl.builder(); + spv::Id result_id; if (instruction->getMetadata("dx.precise") != nullptr) { @@ -61,11 +62,15 @@ bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct impl.add(mul_op); builder.addDecoration(mul_op->id, spv::DecorationNoContraction); + impl.decorate_relaxed_precision(instruction->getType(), mul_op->id, false); + Operation *add_op = impl.allocate(spv::OpFAdd, instruction); add_op->add_id(mul_op->id); add_op->add_id(impl.get_id_for_value(instruction->getOperand(3))); impl.add(add_op); builder.addDecoration(add_op->id, spv::DecorationNoContraction); + + result_id = add_op->id; } else { @@ -78,8 +83,11 @@ bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct for (unsigned i = 1; i < 4; i++) op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); + + result_id = op->id; } + impl.decorate_relaxed_precision(instruction->getType(), result_id, false); return true; } @@ -152,6 +160,7 @@ bool emit_dxil_std450_binary_instruction(GLSLstd450 opcode, Converter::Impl &imp op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -169,6 +178,7 @@ bool emit_dxil_std450_trinary_instruction(GLSLstd450 opcode, Converter::Impl &im op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -184,6 +194,7 @@ bool emit_dxil_std450_unary_instruction(GLSLstd450 opcode, Converter::Impl &impl op->add_id(impl.get_id_for_value(instruction->getOperand(1))); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -192,6 +203,7 @@ bool emit_dxil_unary_instruction(spv::Op opcode, Converter::Impl &impl, const ll Operation *op = impl.allocate(opcode, instruction); op->add_id(impl.get_id_for_value(instruction->getOperand(1))); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -239,6 +251,7 @@ bool emit_saturate_instruction(Converter::Impl &impl, const llvm::CallInst *inst constant_0, constant_1 }); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -258,6 +271,7 @@ bool emit_dot_instruction(unsigned dimensions, Converter::Impl &impl, const llvm op->add_ids({ vec0, vec1 }); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -365,11 +379,19 @@ bool emit_legacy_f16_to_f32_instruction(Converter::Impl &impl, const llvm::CallI unpack_op->add_id(impl.get_id_for_value(instruction->getOperand(1))); impl.add(unpack_op); + // By construction, these are relaxed precision, but spams lots of unrelated shader changes, + // and doesn't make too much sense to add ... + //builder.addDecoration(unpack_op->id, spv::DecorationRelaxedPrecision); + Operation *op = impl.allocate(spv::OpCompositeExtract, instruction); op->add_id(unpack_op->id); op->add_literal(0); impl.add(op); + // By construction, these are relaxed precision, but spams lots of unrelated shader changes, + // and doesn't make too much sense to add ... + //builder.addDecoration(op->id, spv::DecorationRelaxedPrecision); + return true; } @@ -406,6 +428,7 @@ bool emit_legacy_f32_to_f16_instruction(Converter::Impl &impl, const llvm::CallI spv::Id inputs[2] = { input_id, builder.makeFloatConstant(0.0f) }; op->add_id(impl.build_vector(builder.makeFloatType(32), inputs, 2)); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -539,6 +562,8 @@ bool emit_dot2_add_half_instruction(Converter::Impl &impl, const llvm::CallInst if (precise) builder.addDecoration(acc_op->id, spv::DecorationNoContraction); + // This opcode requires native FP16, so RelaxedPrecision is useless. + return true; } diff --git a/opcodes/dxil/dxil_buffer.cpp b/opcodes/dxil/dxil_buffer.cpp index 442b77d..10feda7 100644 --- a/opcodes/dxil/dxil_buffer.cpp +++ b/opcodes/dxil/dxil_buffer.cpp @@ -804,6 +804,9 @@ bool emit_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *i Operation *op = impl.allocate(opcode, instruction, sample_type); + if (!sparse) + impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true); + op->add_ids({ image_id, access.index_id }); impl.add(op); diff --git a/opcodes/dxil/dxil_sampling.cpp b/opcodes/dxil/dxil_sampling.cpp index 164009c..3e714d6 100644 --- a/opcodes/dxil/dxil_sampling.cpp +++ b/opcodes/dxil/dxil_sampling.cpp @@ -262,6 +262,9 @@ bool emit_sample_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm: // Comparison sampling only returns a scalar, so we'll need to splat out result. Operation *op = impl.allocate(spv_op, instruction, sample_type); + if (!sparse) + impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true); + op->add_id(combined_image_sampler_id); op->add_id(impl.build_vector(builder.makeFloatType(32), coord, num_coords_full)); @@ -366,6 +369,9 @@ bool emit_sample_grad_instruction(Converter::Impl &impl, const llvm::CallInst *i impl.allocate(sparse ? spv::OpImageSparseSampleExplicitLod : spv::OpImageSampleExplicitLod, instruction, sample_type); + if (!sparse) + impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true); + op->add_ids({ combined_image_sampler_id, impl.build_vector(builder.makeFloatType(32), coord, num_coords_full), @@ -457,6 +463,8 @@ bool emit_texture_load_instruction(Converter::Impl &impl, const llvm::CallInst * opcode = sparse ? spv::OpImageSparseFetch : spv::OpImageFetch; Operation *op = impl.allocate(opcode, instruction, sample_type); + if (!sparse) + impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true); op->add_ids({ image_id, impl.build_vector(builder.makeUintType(32), coord, num_coords_full) }); op->add_literal(image_ops); @@ -733,6 +741,8 @@ bool emit_texture_gather_instruction(bool compare, Converter::Impl &impl, const opcode = sparse ? spv::OpImageSparseGather : spv::OpImageGather; Operation *op = impl.allocate(opcode, instruction, sample_type); + if (!sparse) + impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true); op->add_ids({ combined_image_sampler_id, coord_id, aux_id }); diff --git a/opcodes/opcodes_llvm_builtins.cpp b/opcodes/opcodes_llvm_builtins.cpp index 95cc0d8..4a51d1f 100644 --- a/opcodes/opcodes_llvm_builtins.cpp +++ b/opcodes/opcodes_llvm_builtins.cpp @@ -140,6 +140,7 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator * bool signed_input = false; bool is_width_sensitive = false; bool is_precision_sensitive = false; + bool can_relax_precision = false; spv::Op opcode; switch (instruction->getOpcode()) @@ -147,16 +148,19 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator * case llvm::BinaryOperator::BinaryOps::FAdd: opcode = spv::OpFAdd; is_precision_sensitive = true; + can_relax_precision = true; break; case llvm::BinaryOperator::BinaryOps::FSub: opcode = spv::OpFSub; is_precision_sensitive = true; + can_relax_precision = true; break; case llvm::BinaryOperator::BinaryOps::FMul: opcode = spv::OpFMul; is_precision_sensitive = true; + can_relax_precision = true; if (peephole_trivial_arithmetic_identity(impl, instruction, llvm::BinaryOperator::BinaryOps::FDiv, true)) return true; break; @@ -164,6 +168,7 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator * case llvm::BinaryOperator::BinaryOps::FDiv: opcode = spv::OpFDiv; is_precision_sensitive = true; + can_relax_precision = true; if (peephole_trivial_arithmetic_identity(impl, instruction, llvm::BinaryOperator::BinaryOps::FMul, false)) return true; break; @@ -215,6 +220,7 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator * case llvm::BinaryOperator::BinaryOps::FRem: opcode = spv::OpFRem; is_precision_sensitive = true; + can_relax_precision = true; break; case llvm::BinaryOperator::BinaryOps::URem: @@ -291,6 +297,10 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator * impl.add(op); if (is_precision_sensitive && !instruction->isFast()) impl.builder().addDecoration(op->id, spv::DecorationNoContraction); + + // Only bother relaxing FP, since Integers are murky w.r.t. signage in DXIL. + if (can_relax_precision) + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -311,6 +321,7 @@ bool emit_unary_instruction(Converter::Impl &impl, const llvm::UnaryOperator *in Operation *op = impl.allocate(opcode, instruction); op->add_id(impl.get_id_for_value(instruction->getOperand(0))); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); impl.add(op); return true; @@ -421,6 +432,7 @@ static spv::Id emit_boolean_convert_instruction(Converter::Impl &impl, const Ins op->add_id(impl.get_id_for_value(instruction->getOperand(0))); op->add_ids({ const_1, const_0 }); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return op->id; } @@ -509,8 +521,9 @@ static bool value_cast_is_noop(Converter::Impl &impl, const InstructionType *ins template <typename InstructionType> static spv::Id emit_cast_instruction_impl(Converter::Impl &impl, const InstructionType *instruction) { - spv::Op opcode; + bool can_relax_precision = false; bool signed_input = false; + spv::Op opcode; if (value_cast_is_noop(impl, instruction)) { @@ -553,6 +566,8 @@ static spv::Id emit_cast_instruction_impl(Converter::Impl &impl, const Instructi case llvm::Instruction::CastOps::FPTrunc: case llvm::Instruction::CastOps::FPExt: opcode = spv::OpFConvert; + // Relaxing precision on integers in DXIL is very sketchy, so don't bother. + can_relax_precision = true; break; case llvm::Instruction::CastOps::FPToUI: @@ -623,6 +638,8 @@ static spv::Id emit_cast_instruction_impl(Converter::Impl &impl, const Instructi Operation *op = impl.allocate(opcode, instruction); op->add_id(build_naturally_extended_value(impl, instruction->getOperand(0), signed_input)); impl.add(op); + if (can_relax_precision) + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return op->id; } } @@ -1053,6 +1070,7 @@ bool emit_extract_value_instruction(Converter::Impl &impl, const llvm::ExtractVa op->add_literal(instruction->getIndices()[i]); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); } return true; @@ -1091,6 +1109,7 @@ bool emit_alloca_instruction(Converter::Impl &impl, const llvm::AllocaInst *inst spv::Id var_id = impl.create_variable(storage, pointee_type_id); impl.rewrite_value(instruction, var_id); impl.handle_to_storage_class[instruction] = storage; + impl.decorate_relaxed_precision(element_type, var_id, false); return true; } @@ -1102,6 +1121,7 @@ bool emit_select_instruction(Converter::Impl &impl, const llvm::SelectInst *inst op->add_id(impl.get_id_for_value(instruction->getOperand(i))); impl.add(op); + impl.decorate_relaxed_precision(instruction->getType(), op->id, false); return true; } @@ -1224,12 +1244,14 @@ bool emit_shufflevector_instruction(Converter::Impl &impl, const llvm::ShuffleVe bool emit_extractelement_instruction(Converter::Impl &impl, const llvm::ExtractElementInst *inst) { + spv::Id id; if (auto *constant_int = llvm::dyn_cast<llvm::ConstantInt>(inst->getIndexOperand())) { Operation *op = impl.allocate(spv::OpCompositeExtract, inst); op->add_id(impl.get_id_for_value(inst->getVectorOperand())); op->add_literal(uint32_t(constant_int->getUniqueInteger().getZExtValue())); impl.add(op); + id = op->id; } else { @@ -1237,7 +1259,9 @@ bool emit_extractelement_instruction(Converter::Impl &impl, const llvm::ExtractE op->add_id(impl.get_id_for_value(inst->getVectorOperand())); op->add_id(impl.get_id_for_value(inst->getIndexOperand())); impl.add(op); + id = op->id; } + impl.decorate_relaxed_precision(inst->getType(), id, false); return true; } diff --git a/reference/shaders/fp16/saturate.sm60.frag b/reference/shaders/fp16/saturate.sm60.frag index 5ed9216..d954d13 100644 --- a/reference/shaders/fp16/saturate.sm60.frag +++ b/reference/shaders/fp16/saturate.sm60.frag @@ -5,22 +5,14 @@ layout(location = 0) out mediump vec4 SV_Target; void main() { - mediump float _15 = V.x; - float hp_copy_15 = _15; - mediump float _18 = V.y; - float hp_copy_18 = _18; - mediump float _21 = V.z; - float hp_copy_21 = _21; - mediump float _24 = V.w; - float hp_copy_24 = _24; - float _40 = isnan(0.0) ? hp_copy_15 : (isnan(hp_copy_15) ? 0.0 : max(hp_copy_15, 0.0)); - float _52 = isnan(0.0) ? hp_copy_18 : (isnan(hp_copy_18) ? 0.0 : max(hp_copy_18, 0.0)); - float _64 = isnan(0.0) ? hp_copy_21 : (isnan(hp_copy_21) ? 0.0 : max(hp_copy_21, 0.0)); - float _76 = isnan(0.0) ? hp_copy_24 : (isnan(hp_copy_24) ? 0.0 : max(hp_copy_24, 0.0)); - SV_Target.x = isnan(1.0) ? _40 : (isnan(_40) ? 1.0 : min(_40, 1.0)); - SV_Target.y = isnan(1.0) ? _52 : (isnan(_52) ? 1.0 : min(_52, 1.0)); - SV_Target.z = isnan(1.0) ? _64 : (isnan(_64) ? 1.0 : min(_64, 1.0)); - SV_Target.w = isnan(1.0) ? _76 : (isnan(_76) ? 1.0 : min(_76, 1.0)); + mediump float _39 = isnan(0.0) ? V.x : (isnan(V.x) ? 0.0 : max(V.x, 0.0)); + mediump float _50 = isnan(0.0) ? V.y : (isnan(V.y) ? 0.0 : max(V.y, 0.0)); + mediump float _61 = isnan(0.0) ? V.z : (isnan(V.z) ? 0.0 : max(V.z, 0.0)); + mediump float _72 = isnan(0.0) ? V.w : (isnan(V.w) ? 0.0 : max(V.w, 0.0)); + SV_Target.x = isnan(1.0) ? _39 : (isnan(_39) ? 1.0 : min(_39, 1.0)); + SV_Target.y = isnan(1.0) ? _50 : (isnan(_50) ? 1.0 : min(_50, 1.0)); + SV_Target.z = isnan(1.0) ? _61 : (isnan(_61) ? 1.0 : min(_61, 1.0)); + SV_Target.w = isnan(1.0) ? _72 : (isnan(_72) ? 1.0 : min(_72, 1.0)); } @@ -43,6 +35,10 @@ OpDecorate %8 RelaxedPrecision OpDecorate %8 Location 0 OpDecorate %10 RelaxedPrecision OpDecorate %10 Location 0 +OpDecorate %28 RelaxedPrecision +OpDecorate %29 RelaxedPrecision +OpDecorate %30 RelaxedPrecision +OpDecorate %31 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 diff --git a/reference/shaders/fp16/saturate.sm60.native-fp16.frag b/reference/shaders/fp16/saturate.sm60.native-fp16.frag index 9c91a9f..2455267 100644 --- a/reference/shaders/fp16/saturate.sm60.native-fp16.frag +++ b/reference/shaders/fp16/saturate.sm60.native-fp16.frag @@ -48,6 +48,10 @@ OpDecorate %8 RelaxedPrecision OpDecorate %8 Location 0 OpDecorate %10 RelaxedPrecision OpDecorate %10 Location 0 +OpDecorate %33 RelaxedPrecision +OpDecorate %34 RelaxedPrecision +OpDecorate %35 RelaxedPrecision +OpDecorate %36 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 diff --git a/reference/shaders/llvm-builtin/min16-phi.sm60.comp b/reference/shaders/llvm-builtin/min16-phi.sm60.comp new file mode 100644 index 0000000..60d79f5 --- /dev/null +++ b/reference/shaders/llvm-builtin/min16-phi.sm60.comp @@ -0,0 +1,213 @@ +#version 460 +#extension GL_EXT_samplerless_texture_functions : require +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform mediump texture2D _8; +layout(set = 0, binding = 1) uniform mediump texture2D _9; +layout(set = 0, binding = 2) uniform mediump texture2D _10; +layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _14; + +void main() +{ + mediump float _45; + mediump float _48; + mediump float _51; + mediump float _54; + if (gl_GlobalInvocationID.x < 20u) + { + mediump vec4 _36 = texelFetch(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z)); + _45 = _36.x; + _48 = _36.y; + _51 = _36.z; + _54 = _36.w; + } + else + { + mediump float frontier_phi_3_2_ladder; + mediump float frontier_phi_3_2_ladder_1; + mediump float frontier_phi_3_2_ladder_2; + mediump float frontier_phi_3_2_ladder_3; + if (gl_GlobalInvocationID.y < 40u) + { + mediump vec4 _72 = texelFetch(_9, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z)); + frontier_phi_3_2_ladder = _72.x; + frontier_phi_3_2_ladder_1 = _72.y; + frontier_phi_3_2_ladder_2 = _72.z; + frontier_phi_3_2_ladder_3 = _72.w; + } + else + { + mediump vec4 _74 = texelFetch(_10, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z)); + frontier_phi_3_2_ladder = _74.x; + frontier_phi_3_2_ladder_1 = _74.y; + frontier_phi_3_2_ladder_2 = _74.z; + frontier_phi_3_2_ladder_3 = _74.w; + } + _45 = frontier_phi_3_2_ladder; + _48 = frontier_phi_3_2_ladder_1; + _51 = frontier_phi_3_2_ladder_2; + _54 = frontier_phi_3_2_ladder_3; + } + uint _57 = gl_GlobalInvocationID.x * 4u; + imageStore(_14, int(_57), uvec4(floatBitsToUint(_45))); + imageStore(_14, int(_57 + 1u), uvec4(floatBitsToUint(_48))); + imageStore(_14, int(_57 + 2u), uvec4(floatBitsToUint(_51))); + imageStore(_14, int(_57 + 3u), uvec4(floatBitsToUint(_54))); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 88 +; Schema: 0 +OpCapability Shader +OpCapability ImageBuffer +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %3 "main" %21 +OpExecutionMode %3 LocalSize 64 1 1 +OpName %3 "main" +OpName %76 "frontier_phi_3.2.ladder" +OpName %77 "frontier_phi_3.2.ladder" +OpName %78 "frontier_phi_3.2.ladder" +OpName %79 "frontier_phi_3.2.ladder" +OpDecorate %8 RelaxedPrecision +OpDecorate %8 DescriptorSet 0 +OpDecorate %8 Binding 0 +OpDecorate %9 RelaxedPrecision +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 1 +OpDecorate %10 RelaxedPrecision +OpDecorate %10 DescriptorSet 0 +OpDecorate %10 Binding 2 +OpDecorate %14 DescriptorSet 0 +OpDecorate %14 Binding 0 +OpDecorate %14 NonReadable +OpDecorate %21 BuiltIn GlobalInvocationId +OpDecorate %36 RelaxedPrecision +OpDecorate %39 RelaxedPrecision +OpDecorate %40 RelaxedPrecision +OpDecorate %41 RelaxedPrecision +OpDecorate %42 RelaxedPrecision +OpDecorate %72 RelaxedPrecision +OpDecorate %46 RelaxedPrecision +OpDecorate %49 RelaxedPrecision +OpDecorate %52 RelaxedPrecision +OpDecorate %55 RelaxedPrecision +OpDecorate %74 RelaxedPrecision +OpDecorate %47 RelaxedPrecision +OpDecorate %50 RelaxedPrecision +OpDecorate %53 RelaxedPrecision +OpDecorate %56 RelaxedPrecision +OpDecorate %76 RelaxedPrecision +OpDecorate %77 RelaxedPrecision +OpDecorate %78 RelaxedPrecision +OpDecorate %79 RelaxedPrecision +OpDecorate %45 RelaxedPrecision +OpDecorate %48 RelaxedPrecision +OpDecorate %51 RelaxedPrecision +OpDecorate %54 RelaxedPrecision +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeFloat 32 +%6 = OpTypeImage %5 2D 0 0 0 1 Unknown +%7 = OpTypePointer UniformConstant %6 +%8 = OpVariable %7 UniformConstant +%9 = OpVariable %7 UniformConstant +%10 = OpVariable %7 UniformConstant +%11 = OpTypeInt 32 0 +%12 = OpTypeImage %11 Buffer 0 0 0 2 R32ui +%13 = OpTypePointer UniformConstant %12 +%14 = OpVariable %13 UniformConstant +%19 = OpTypeVector %11 3 +%20 = OpTypePointer Input %19 +%21 = OpVariable %20 Input +%22 = OpTypePointer Input %11 +%24 = OpConstant %11 0 +%27 = OpConstant %11 1 +%30 = OpConstant %11 2 +%32 = OpTypeBool +%34 = OpConstant %11 20 +%35 = OpTypeVector %5 4 +%37 = OpTypeVector %11 2 +%44 = OpConstant %11 40 +%58 = OpConstant %11 4 +%63 = OpTypeVector %11 4 +%71 = OpConstant %11 3 +%3 = OpFunction %1 None %2 +%4 = OpLabel +OpBranch %80 +%80 = OpLabel +%15 = OpLoad %12 %14 +%16 = OpLoad %6 %10 +%17 = OpLoad %6 %9 +%18 = OpLoad %6 %8 +%23 = OpAccessChain %22 %21 %24 +%25 = OpLoad %11 %23 +%26 = OpAccessChain %22 %21 %27 +%28 = OpLoad %11 %26 +%29 = OpAccessChain %22 %21 %30 +%31 = OpLoad %11 %29 +%33 = OpULessThan %32 %25 %34 +OpSelectionMerge %86 None +OpBranchConditional %33 %85 %81 +%85 = OpLabel +%38 = OpCompositeConstruct %37 %25 %28 +%36 = OpImageFetch %35 %18 %38 Lod %31 +%39 = OpCompositeExtract %5 %36 0 +%40 = OpCompositeExtract %5 %36 1 +%41 = OpCompositeExtract %5 %36 2 +%42 = OpCompositeExtract %5 %36 3 +OpBranch %86 +%81 = OpLabel +%43 = OpULessThan %32 %28 %44 +OpSelectionMerge %84 None +OpBranchConditional %43 %83 %82 +%83 = OpLabel +%73 = OpCompositeConstruct %37 %25 %28 +%72 = OpImageFetch %35 %17 %73 Lod %31 +%46 = OpCompositeExtract %5 %72 0 +%49 = OpCompositeExtract %5 %72 1 +%52 = OpCompositeExtract %5 %72 2 +%55 = OpCompositeExtract %5 %72 3 +OpBranch %84 +%82 = OpLabel +%75 = OpCompositeConstruct %37 %25 %28 +%74 = OpImageFetch %35 %16 %75 Lod %31 +%47 = OpCompositeExtract %5 %74 0 +%50 = OpCompositeExtract %5 %74 1 +%53 = OpCompositeExtract %5 %74 2 +%56 = OpCompositeExtract %5 %74 3 +OpBranch %84 +%84 = OpLabel +%76 = OpPhi %5 %46 %83 %47 %82 +%77 = OpPhi %5 %49 %83 %50 %82 +%78 = OpPhi %5 %52 %83 %53 %82 +%79 = OpPhi %5 %55 %83 %56 %82 +OpBranch %86 +%86 = OpLabel +%45 = OpPhi %5 %39 %85 %76 %84 +%48 = OpPhi %5 %40 %85 %77 %84 +%51 = OpPhi %5 %41 %85 %78 %84 +%54 = OpPhi %5 %42 %85 %79 %84 +%57 = OpIMul %11 %25 %58 +%59 = OpBitcast %11 %45 +%60 = OpBitcast %11 %48 +%61 = OpBitcast %11 %51 +%62 = OpBitcast %11 %54 +%64 = OpCompositeConstruct %63 %59 %59 %59 %59 +OpImageWrite %15 %57 %64 +%65 = OpCompositeConstruct %63 %60 %60 %60 %60 +%66 = OpIAdd %11 %57 %27 +OpImageWrite %15 %66 %65 +%67 = OpCompositeConstruct %63 %61 %61 %61 %61 +%68 = OpIAdd %11 %57 %30 +OpImageWrite %15 %68 %67 +%69 = OpCompositeConstruct %63 %62 %62 %62 %62 +%70 = OpIAdd %11 %57 %71 +OpImageWrite %15 %70 %69 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag index 8b2179b..e8ae469 100644 --- a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag @@ -71,6 +71,14 @@ OpDecorate %19 ArrayStride 16 OpMemberDecorate %20 0 Offset 0 OpDecorate %20 Block OpMemberDecorate %20 0 NonWritable +OpDecorate %34 RelaxedPrecision +OpDecorate %35 RelaxedPrecision +OpDecorate %36 RelaxedPrecision +OpDecorate %37 RelaxedPrecision +OpDecorate %46 RelaxedPrecision +OpDecorate %47 RelaxedPrecision +OpDecorate %48 RelaxedPrecision +OpDecorate %49 RelaxedPrecision OpDecorate %57 ArrayStride 16 OpMemberDecorate %58 0 Offset 0 OpDecorate %58 Block diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag index 6ff9f96..cad0869 100644 --- a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag @@ -80,6 +80,14 @@ OpDecorate %19 ArrayStride 16 OpMemberDecorate %20 0 Offset 0 OpDecorate %20 Block OpMemberDecorate %20 0 NonWritable +OpDecorate %37 RelaxedPrecision +OpDecorate %38 RelaxedPrecision +OpDecorate %39 RelaxedPrecision +OpDecorate %40 RelaxedPrecision +OpDecorate %54 RelaxedPrecision +OpDecorate %55 RelaxedPrecision +OpDecorate %56 RelaxedPrecision +OpDecorate %57 RelaxedPrecision OpDecorate %69 ArrayStride 16 OpMemberDecorate %70 0 Offset 0 OpDecorate %70 Block diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag index 0e8c333..99d1a3b 100644 --- a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag @@ -52,6 +52,14 @@ OpDecorate %12 Binding 0 OpDecorate %18 DescriptorSet 0 OpDecorate %18 Binding 0 OpDecorate %20 Location 0 +OpDecorate %32 RelaxedPrecision +OpDecorate %33 RelaxedPrecision +OpDecorate %34 RelaxedPrecision +OpDecorate %35 RelaxedPrecision +OpDecorate %43 RelaxedPrecision +OpDecorate %44 RelaxedPrecision +OpDecorate %45 RelaxedPrecision +OpDecorate %46 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag index 1a732b3..996fc55 100644 --- a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag +++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag @@ -63,6 +63,14 @@ OpDecorate %12 Binding 0 OpDecorate %18 DescriptorSet 0 OpDecorate %18 Binding 0 OpDecorate %20 Location 0 +OpDecorate %35 RelaxedPrecision +OpDecorate %36 RelaxedPrecision +OpDecorate %37 RelaxedPrecision +OpDecorate %38 RelaxedPrecision +OpDecorate %51 RelaxedPrecision +OpDecorate %52 RelaxedPrecision +OpDecorate %53 RelaxedPrecision +OpDecorate %54 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 diff --git a/reference/shaders/resources/min16-alloca-groupshared.sm60.comp b/reference/shaders/resources/min16-alloca-groupshared.sm60.comp new file mode 100644 index 0000000..98c1f90 --- /dev/null +++ b/reference/shaders/resources/min16-alloca-groupshared.sm60.comp @@ -0,0 +1,210 @@ +#version 460 +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8; + +shared mediump float _13[64]; +shared mediump float _17[256]; + +void main() +{ + mediump float _31[4]; + _31[0u] = 0.0; + _31[1u] = 0.0; + _31[2u] = 0.0; + _31[3u] = 0.0; + float _41 = float(gl_LocalInvocationIndex); + mediump float mp_copy_41 = _41; + _13[gl_LocalInvocationIndex] = _41; + _17[0u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 1.0; + _17[1u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 2.0; + _17[2u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 3.0; + _17[3u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 4.0; + barrier(); + _31[gl_LocalInvocationIndex & 3u] = _13[gl_LocalInvocationIndex ^ 5u]; + _31[(gl_LocalInvocationIndex + 1u) & 3u] = _13[gl_LocalInvocationIndex ^ 4u]; + uint _77 = gl_LocalInvocationIndex ^ 1u; + uint _80 = gl_LocalInvocationIndex ^ 3u; + uint _109 = gl_GlobalInvocationID.x * 4u; + imageStore(_8, int(_109), uvec4(floatBitsToUint((_17[0u + (_80 * 4u)] + _13[_77]) + _31[0u]))); + imageStore(_8, int(_109 + 1u), uvec4(floatBitsToUint((_17[1u + (_80 * 4u)] + _13[_77]) + _31[1u]))); + imageStore(_8, int(_109 + 2u), uvec4(floatBitsToUint((_17[2u + (_80 * 4u)] + _13[_77]) + _31[2u]))); + imageStore(_8, int(_109 + 3u), uvec4(floatBitsToUint((_17[3u + (_80 * 4u)] + _13[_77]) + _31[3u]))); +} + + +#if 0 +// SPIR-V disassembly +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 124 +; Schema: 0 +OpCapability Shader +OpCapability ImageBuffer +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %3 "main" %21 %26 +OpExecutionMode %3 LocalSize 64 1 1 +OpName %3 "main" +OpDecorate %8 DescriptorSet 0 +OpDecorate %8 Binding 0 +OpDecorate %8 NonReadable +OpDecorate %13 RelaxedPrecision +OpDecorate %17 RelaxedPrecision +OpDecorate %21 BuiltIn GlobalInvocationId +OpDecorate %26 BuiltIn LocalInvocationIndex +OpDecorate %31 RelaxedPrecision +OpDecorate %44 RelaxedPrecision +OpDecorate %46 RelaxedPrecision +OpDecorate %48 RelaxedPrecision +OpDecorate %50 RelaxedPrecision +OpDecorate %97 RelaxedPrecision +OpDecorate %98 RelaxedPrecision +OpDecorate %99 RelaxedPrecision +OpDecorate %100 RelaxedPrecision +OpDecorate %105 RelaxedPrecision +OpDecorate %106 RelaxedPrecision +OpDecorate %107 RelaxedPrecision +OpDecorate %108 RelaxedPrecision +%1 = OpTypeVoid +%2 = OpTypeFunction %1 +%5 = OpTypeInt 32 0 +%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui +%7 = OpTypePointer UniformConstant %6 +%8 = OpVariable %7 UniformConstant +%9 = OpConstant %5 64 +%10 = OpTypeFloat 32 +%11 = OpTypeArray %10 %9 +%12 = OpTypePointer Workgroup %11 +%13 = OpVariable %12 Workgroup +%14 = OpConstant %5 256 +%15 = OpTypeArray %10 %14 +%16 = OpTypePointer Workgroup %15 +%17 = OpVariable %16 Workgroup +%19 = OpTypeVector %5 3 +%20 = OpTypePointer Input %19 +%21 = OpVariable %20 Input +%22 = OpTypePointer Input %5 +%24 = OpConstant %5 0 +%26 = OpVariable %22 Input +%28 = OpConstant %5 4 +%29 = OpTypeArray %10 %28 +%30 = OpTypePointer Function %29 +%32 = OpTypePointer Function %10 +%35 = OpConstant %5 1 +%37 = OpConstant %5 2 +%39 = OpConstant %5 3 +%40 = OpConstant %10 0 +%42 = OpTypePointer Workgroup %10 +%45 = OpConstant %10 1 +%47 = OpConstant %10 2 +%49 = OpConstant %10 3 +%51 = OpConstant %10 4 +%64 = OpConstant %5 264 +%66 = OpConstant %5 5 +%114 = OpTypeVector %5 4 +%3 = OpFunction %1 None %2 +%4 = OpLabel +%31 = OpVariable %30 Function +OpBranch %122 +%122 = OpLabel +%18 = OpLoad %6 %8 +%23 = OpAccessChain %22 %21 %24 +%25 = OpLoad %5 %23 +%27 = OpLoad %5 %26 +%33 = OpInBoundsAccessChain %32 %31 %24 +%34 = OpInBoundsAccessChain %32 %31 %35 +%36 = OpInBoundsAccessChain %32 %31 %37 +%38 = OpInBoundsAccessChain %32 %31 %39 +OpStore %33 %40 +OpStore %34 %40 +OpStore %36 %40 +OpStore %38 %40 +%41 = OpConvertUToF %10 %27 +%43 = OpAccessChain %42 %13 %27 +OpStore %43 %41 +%44 = OpFAdd %10 %41 %45 +%46 = OpFAdd %10 %41 %47 +%48 = OpFAdd %10 %41 %49 +%50 = OpFAdd %10 %41 %51 +%52 = OpIMul %5 %27 %28 +%53 = OpIAdd %5 %24 %52 +%54 = OpAccessChain %42 %17 %53 +OpStore %54 %44 +%55 = OpIMul %5 %27 %28 +%56 = OpIAdd %5 %35 %55 +%57 = OpAccessChain %42 %17 %56 +OpStore %57 %46 +%58 = OpIMul %5 %27 %28 +%59 = OpIAdd %5 %37 %58 +%60 = OpAccessChain %42 %17 %59 +OpStore %60 %48 +%61 = OpIMul %5 %27 %28 +%62 = OpIAdd %5 %39 %61 +%63 = OpAccessChain %42 %17 %62 +OpStore %63 %50 +OpControlBarrier %37 %37 %64 +%65 = OpBitwiseXor %5 %27 %66 +%67 = OpAccessChain %42 %13 %65 +%68 = OpLoad %10 %67 +%69 = OpBitwiseAnd %5 %27 %39 +%70 = OpAccessChain %32 %31 %69 +OpStore %70 %68 +%71 = OpBitwiseXor %5 %27 %28 +%72 = OpAccessChain %42 %13 %71 +%73 = OpLoad %10 %72 +%74 = OpIAdd %5 %27 %35 +%75 = OpBitwiseAnd %5 %74 %39 +%76 = OpAccessChain %32 %31 %75 +OpStore %76 %73 +%77 = OpBitwiseXor %5 %27 %35 +%78 = OpAccessChain %42 %13 %77 +%79 = OpLoad %10 %78 +%80 = OpBitwiseXor %5 %27 %39 +%81 = OpIMul %5 %80 %28 +%82 = OpIAdd %5 %24 %81 +%83 = OpAccessChain %42 %17 %82 +%84 = OpLoad %10 %83 +%85 = OpIMul %5 %80 %28 +%86 = OpIAdd %5 %35 %85 +%87 = OpAccessChain %42 %17 %86 +%88 = OpLoad %10 %87 +%89 = OpIMul %5 %80 %28 +%90 = OpIAdd %5 %37 %89 +%91 = OpAccessChain %42 %17 %90 +%92 = OpLoad %10 %91 +%93 = OpIMul %5 %80 %28 +%94 = OpIAdd %5 %39 %93 +%95 = OpAccessChain %42 %17 %94 +%96 = OpLoad %10 %95 +%97 = OpFAdd %10 %84 %79 +%98 = OpFAdd %10 %88 %79 +%99 = OpFAdd %10 %92 %79 +%100 = OpFAdd %10 %96 %79 +%101 = OpLoad %10 %33 +%102 = OpLoad %10 %34 +%103 = OpLoad %10 %36 +%104 = OpLoad %10 %38 +%105 = OpFAdd %10 %97 %101 +%106 = OpFAdd %10 %98 %102 +%107 = OpFAdd %10 %99 %103 +%108 = OpFAdd %10 %100 %104 +%109 = OpIMul %5 %25 %28 +%110 = OpBitcast %5 %105 +%111 = OpBitcast %5 %106 +%112 = OpBitcast %5 %107 +%113 = OpBitcast %5 %108 +%115 = OpCompositeConstruct %114 %110 %110 %110 %110 +OpImageWrite %18 %109 %115 +%116 = OpCompositeConstruct %114 %111 %111 %111 %111 +%117 = OpIAdd %5 %109 %35 +OpImageWrite %18 %117 %116 +%118 = OpCompositeConstruct %114 %112 %112 %112 %112 +%119 = OpIAdd %5 %109 %37 +OpImageWrite %18 %119 %118 +%120 = OpCompositeConstruct %114 %113 %113 %113 %113 +%121 = OpIAdd %5 %109 %39 +OpImageWrite %18 %121 %120 +OpReturn +OpFunctionEnd +#endif diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag b/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag index a92d895..bee9305 100644 --- a/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag +++ b/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag @@ -60,6 +60,7 @@ OpDecorate %16 RelaxedPrecision OpDecorate %16 Flat OpDecorate %16 Location 0 OpDecorate %18 Location 0 +OpDecorate %41 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag index b733765..9e3aac8 100644 --- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag +++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag @@ -16,7 +16,11 @@ layout(location = 0) out int SV_Target; void main() { uint _20 = uint(A); - _13._m0[_20] = floatBitsToUint(uintBitsToFloat(_9._m0[_20 + 1u]) + uintBitsToFloat(_9._m0[_20])); + float _26 = uintBitsToFloat(_9._m0[_20]); + mediump float mp_copy_26 = _26; + float _31 = uintBitsToFloat(_9._m0[_20 + 1u]); + mediump float mp_copy_31 = _31; + _13._m0[_20] = floatBitsToUint(mp_copy_31 + mp_copy_26); SV_Target = int(10u); } @@ -54,6 +58,7 @@ OpDecorate %16 RelaxedPrecision OpDecorate %16 Flat OpDecorate %16 Location 0 OpDecorate %18 Location 0 +OpDecorate %32 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag index 5a5fbb3..d1288f8 100644 --- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag +++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag @@ -93,6 +93,7 @@ OpDecorate %24 RelaxedPrecision OpDecorate %24 Flat OpDecorate %24 Location 0 OpDecorate %26 Location 0 +OpDecorate %46 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag index 53e1ea3..531354a 100644 --- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag +++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag @@ -106,6 +106,7 @@ OpDecorate %34 ArrayStride 4 OpMemberDecorate %35 0 Offset 0 OpDecorate %35 Block OpMemberDecorate %35 0 NonWritable +OpDecorate %51 RelaxedPrecision OpDecorate %52 ArrayStride 4 OpMemberDecorate %53 0 Offset 0 OpDecorate %53 Block diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag index dd70b90..1862c46 100644 --- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag +++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag @@ -30,7 +30,11 @@ float _43; void main() { uint _23 = uint(A); - PhysicalPointerFloatArray(registers._m2).value[_23] = PhysicalPointerFloatNonWriteArray(registers._m1).value[_23 + 1u] + PhysicalPointerFloatNonWriteArray(registers._m1).value[_23]; + float _32 = PhysicalPointerFloatNonWriteArray(registers._m1).value[_23]; + mediump float mp_copy_32 = _32; + float _36 = PhysicalPointerFloatNonWriteArray(registers._m1).value[_23 + 1u]; + mediump float mp_copy_36 = _36; + PhysicalPointerFloatArray(registers._m2).value[_23] = mp_copy_36 + mp_copy_32; SV_Target = int(10u); } @@ -70,6 +74,7 @@ OpDecorate %25 ArrayStride 4 OpMemberDecorate %26 0 Offset 0 OpDecorate %26 Block OpMemberDecorate %26 0 NonWritable +OpDecorate %37 RelaxedPrecision OpDecorate %38 ArrayStride 4 OpMemberDecorate %39 0 Offset 0 OpDecorate %39 Block diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag b/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag index 4a3719c..6f67537 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag @@ -51,14 +51,6 @@ void main() imageStore(_54[registers._m4 + 4u], int(_167), ivec4(uvec4(4294967276u))); imageStore(_58[registers._m4 + 5u], int(_167), uvec4(80u)); mediump vec4 _191 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)); - mediump float _193 = _191.x; - float hp_copy_193 = _193; - mediump float _194 = _191.y; - float hp_copy_194 = _194; - mediump float _195 = _191.z; - float hp_copy_195 = _195; - mediump float _196 = _191.w; - float hp_copy_196 = _196; uvec4 _199 = uvec4(texelFetch(_18[_145], ivec2(uvec2(1u, 2u)), int(3u))); mediump uvec4 _205 = texelFetch(_22[_139], ivec2(uvec2(4u, 5u)), int(6u)); mediump uint _207 = _205.x; @@ -70,14 +62,14 @@ void main() mediump uint _210 = _205.w; uint hp_copy_210 = _210; mediump vec4 _212 = textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y)); - mediump float _213 = _212.x; - float hp_copy_213 = _213; - mediump float _214 = _212.y; - float hp_copy_214 = _214; - mediump float _215 = _212.z; - float hp_copy_215 = _215; - mediump float _216 = _212.w; - float hp_copy_216 = _216; + mediump float _217 = _212.x + _191.x; + float hp_copy_217 = _217; + mediump float _218 = _212.y + _191.y; + float hp_copy_218 = _218; + mediump float _219 = _212.z + _191.z; + float hp_copy_219 = _219; + mediump float _220 = _212.w + _191.w; + float hp_copy_220 = _220; uvec4 _225 = uvec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u))); mediump uvec4 _237 = textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u)); mediump uint _238 = _237.x; @@ -88,10 +80,18 @@ void main() uint hp_copy_240 = _240; mediump uint _241 = _237.w; uint hp_copy_241 = _241; - mediump vec4 _252 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))); - mediump float _253 = _252.x; + mediump float _253 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))).x; + float hp_copy_253 = _253; mediump vec4 _260 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); mediump float _261 = _260.x; + float _262 = _261 + (hp_copy_253 + hp_copy_217); + mediump float mp_copy_262 = _262; + float _263 = _261 + (hp_copy_253 + hp_copy_218); + mediump float mp_copy_263 = _263; + float _264 = _261 + (hp_copy_253 + hp_copy_219); + mediump float mp_copy_264 = _264; + float _265 = _261 + (hp_copy_253 + hp_copy_220); + mediump float mp_copy_265 = _265; vec2 _266 = vec2(UV.x, UV.y); mediump vec4 _267 = textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _266, 0.5); mediump vec4 _276 = textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0); @@ -100,10 +100,10 @@ void main() mediump vec4 _311 = texelFetch(_26[registers._m1 + 3u], int(_167)); uvec4 _321 = uvec4(texelFetch(_30[registers._m1 + 4u], int(_167))); mediump uvec4 _330 = texelFetch(_34[registers._m1 + 5u], int(_167)); - SV_Target.x = ((((_267.x + (_261 + (_253 + (hp_copy_213 + hp_copy_193)))) + _276.x) + _289.x) + _301.x) + _311.x; - SV_Target.y = ((((_267.y + (_261 + (_253 + (hp_copy_214 + hp_copy_194)))) + _276.y) + _289.y) + _301.y) + _311.y; - SV_Target.z = ((((_267.z + (_261 + (_253 + (hp_copy_215 + hp_copy_195)))) + _276.z) + _289.z) + _301.z) + _311.z; - SV_Target.w = ((((_267.w + (_261 + (_253 + (hp_copy_216 + hp_copy_196)))) + _276.w) + _289.w) + _301.w) + _311.w; + SV_Target.x = ((((_267.x + mp_copy_262) + _276.x) + _289.x) + _301.x) + _311.x; + SV_Target.y = ((((_267.y + mp_copy_263) + _276.y) + _289.y) + _301.y) + _311.y; + SV_Target.z = ((((_267.z + mp_copy_264) + _276.z) + _289.z) + _301.z) + _311.z; + SV_Target.w = ((((_267.w + mp_copy_265) + _276.w) + _289.w) + _301.w) + _311.w; SV_Target_1.x = int((_225.x + _199.x) + _321.x); SV_Target_1.y = int((_225.y + _199.y) + _321.y); SV_Target_1.z = int((_225.z + _199.z) + _321.z); @@ -200,6 +200,71 @@ OpDecorate %71 RelaxedPrecision OpDecorate %71 Location 1 OpDecorate %74 RelaxedPrecision OpDecorate %74 Location 2 +OpDecorate %191 RelaxedPrecision +OpDecorate %193 RelaxedPrecision +OpDecorate %194 RelaxedPrecision +OpDecorate %195 RelaxedPrecision +OpDecorate %196 RelaxedPrecision +OpDecorate %197 RelaxedPrecision +OpDecorate %205 RelaxedPrecision +OpDecorate %212 RelaxedPrecision +OpDecorate %213 RelaxedPrecision +OpDecorate %214 RelaxedPrecision +OpDecorate %215 RelaxedPrecision +OpDecorate %216 RelaxedPrecision +OpDecorate %217 RelaxedPrecision +OpDecorate %218 RelaxedPrecision +OpDecorate %219 RelaxedPrecision +OpDecorate %220 RelaxedPrecision +OpDecorate %224 RelaxedPrecision +OpDecorate %237 RelaxedPrecision +OpDecorate %267 RelaxedPrecision +OpDecorate %268 RelaxedPrecision +OpDecorate %269 RelaxedPrecision +OpDecorate %270 RelaxedPrecision +OpDecorate %271 RelaxedPrecision +OpDecorate %272 RelaxedPrecision +OpDecorate %273 RelaxedPrecision +OpDecorate %274 RelaxedPrecision +OpDecorate %275 RelaxedPrecision +OpDecorate %276 RelaxedPrecision +OpDecorate %278 RelaxedPrecision +OpDecorate %279 RelaxedPrecision +OpDecorate %280 RelaxedPrecision +OpDecorate %281 RelaxedPrecision +OpDecorate %282 RelaxedPrecision +OpDecorate %283 RelaxedPrecision +OpDecorate %284 RelaxedPrecision +OpDecorate %285 RelaxedPrecision +OpDecorate %289 RelaxedPrecision +OpDecorate %293 RelaxedPrecision +OpDecorate %294 RelaxedPrecision +OpDecorate %295 RelaxedPrecision +OpDecorate %296 RelaxedPrecision +OpDecorate %297 RelaxedPrecision +OpDecorate %298 RelaxedPrecision +OpDecorate %299 RelaxedPrecision +OpDecorate %300 RelaxedPrecision +OpDecorate %301 RelaxedPrecision +OpDecorate %303 RelaxedPrecision +OpDecorate %304 RelaxedPrecision +OpDecorate %305 RelaxedPrecision +OpDecorate %306 RelaxedPrecision +OpDecorate %307 RelaxedPrecision +OpDecorate %308 RelaxedPrecision +OpDecorate %309 RelaxedPrecision +OpDecorate %310 RelaxedPrecision +OpDecorate %311 RelaxedPrecision +OpDecorate %312 RelaxedPrecision +OpDecorate %313 RelaxedPrecision +OpDecorate %314 RelaxedPrecision +OpDecorate %315 RelaxedPrecision +OpDecorate %316 RelaxedPrecision +OpDecorate %317 RelaxedPrecision +OpDecorate %318 RelaxedPrecision +OpDecorate %319 RelaxedPrecision +OpDecorate %320 RelaxedPrecision +OpDecorate %330 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.frag b/reference/shaders/resources/typed-resources-16bit.sm60.frag index 9305754..444e249 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.frag @@ -36,14 +36,6 @@ void main() imageStore(_40, int(_81), ivec4(uvec4(4294967276u))); imageStore(_43, int(_81), uvec4(80u)); mediump vec4 _105 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y)); - mediump float _107 = _105.x; - float hp_copy_107 = _107; - mediump float _108 = _105.y; - float hp_copy_108 = _108; - mediump float _109 = _105.z; - float hp_copy_109 = _109; - mediump float _110 = _105.w; - float hp_copy_110 = _110; uvec4 _115 = uvec4(texelFetch(_12, ivec2(uvec2(1u, 2u)), int(3u))); mediump uvec4 _123 = texelFetch(_16, ivec2(uvec2(4u, 5u)), int(6u)); mediump uint _125 = _123.x; @@ -55,14 +47,14 @@ void main() mediump uint _128 = _123.w; uint hp_copy_128 = _128; mediump vec4 _130 = textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y)); - mediump float _131 = _130.x; - float hp_copy_131 = _131; - mediump float _132 = _130.y; - float hp_copy_132 = _132; - mediump float _133 = _130.z; - float hp_copy_133 = _133; - mediump float _134 = _130.w; - float hp_copy_134 = _134; + mediump float _135 = _130.x + _105.x; + float hp_copy_135 = _135; + mediump float _136 = _130.y + _105.y; + float hp_copy_136 = _136; + mediump float _137 = _130.z + _105.z; + float hp_copy_137 = _137; + mediump float _138 = _130.w + _105.w; + float hp_copy_138 = _138; uvec4 _143 = uvec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u))); mediump uvec4 _155 = textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u)); mediump uint _156 = _155.x; @@ -73,10 +65,18 @@ void main() uint hp_copy_158 = _158; mediump uint _159 = _155.w; uint hp_copy_159 = _159; - mediump vec4 _170 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))); - mediump float _171 = _170.x; + mediump float _171 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))).x; + float hp_copy_171 = _171; mediump vec4 _178 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0)); mediump float _179 = _178.x; + float _180 = _179 + (hp_copy_171 + hp_copy_135); + mediump float mp_copy_180 = _180; + float _181 = _179 + (hp_copy_171 + hp_copy_136); + mediump float mp_copy_181 = _181; + float _182 = _179 + (hp_copy_171 + hp_copy_137); + mediump float mp_copy_182 = _182; + float _183 = _179 + (hp_copy_171 + hp_copy_138); + mediump float mp_copy_183 = _183; vec2 _184 = vec2(UV.x, UV.y); mediump vec4 _185 = textureGather(sampler2DShadow(_8, _47), _184, 0.5); mediump vec4 _194 = textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0); @@ -85,10 +85,10 @@ void main() mediump vec4 _229 = texelFetch(_19, int(_81)); uvec4 _239 = uvec4(texelFetch(_22, int(_81))); mediump uvec4 _248 = texelFetch(_25, int(_81)); - SV_Target.x = ((((_185.x + (_179 + (_171 + (hp_copy_131 + hp_copy_107)))) + _194.x) + _207.x) + _219.x) + _229.x; - SV_Target.y = ((((_185.y + (_179 + (_171 + (hp_copy_132 + hp_copy_108)))) + _194.y) + _207.y) + _219.y) + _229.y; - SV_Target.z = ((((_185.z + (_179 + (_171 + (hp_copy_133 + hp_copy_109)))) + _194.z) + _207.z) + _219.z) + _229.z; - SV_Target.w = ((((_185.w + (_179 + (_171 + (hp_copy_134 + hp_copy_110)))) + _194.w) + _207.w) + _219.w) + _229.w; + SV_Target.x = ((((_185.x + mp_copy_180) + _194.x) + _207.x) + _219.x) + _229.x; + SV_Target.y = ((((_185.y + mp_copy_181) + _194.y) + _207.y) + _219.y) + _229.y; + SV_Target.z = ((((_185.z + mp_copy_182) + _194.z) + _207.z) + _219.z) + _229.z; + SV_Target.w = ((((_185.w + mp_copy_183) + _194.w) + _207.w) + _219.w) + _229.w; SV_Target_1.x = int((_143.x + _115.x) + _239.x); SV_Target_1.y = int((_143.y + _115.y) + _239.y); SV_Target_1.z = int((_143.z + _115.z) + _239.z); @@ -172,6 +172,71 @@ OpDecorate %56 RelaxedPrecision OpDecorate %56 Location 1 OpDecorate %59 RelaxedPrecision OpDecorate %59 Location 2 +OpDecorate %105 RelaxedPrecision +OpDecorate %107 RelaxedPrecision +OpDecorate %108 RelaxedPrecision +OpDecorate %109 RelaxedPrecision +OpDecorate %110 RelaxedPrecision +OpDecorate %113 RelaxedPrecision +OpDecorate %123 RelaxedPrecision +OpDecorate %130 RelaxedPrecision +OpDecorate %131 RelaxedPrecision +OpDecorate %132 RelaxedPrecision +OpDecorate %133 RelaxedPrecision +OpDecorate %134 RelaxedPrecision +OpDecorate %135 RelaxedPrecision +OpDecorate %136 RelaxedPrecision +OpDecorate %137 RelaxedPrecision +OpDecorate %138 RelaxedPrecision +OpDecorate %142 RelaxedPrecision +OpDecorate %155 RelaxedPrecision +OpDecorate %185 RelaxedPrecision +OpDecorate %186 RelaxedPrecision +OpDecorate %187 RelaxedPrecision +OpDecorate %188 RelaxedPrecision +OpDecorate %189 RelaxedPrecision +OpDecorate %190 RelaxedPrecision +OpDecorate %191 RelaxedPrecision +OpDecorate %192 RelaxedPrecision +OpDecorate %193 RelaxedPrecision +OpDecorate %194 RelaxedPrecision +OpDecorate %196 RelaxedPrecision +OpDecorate %197 RelaxedPrecision +OpDecorate %198 RelaxedPrecision +OpDecorate %199 RelaxedPrecision +OpDecorate %200 RelaxedPrecision +OpDecorate %201 RelaxedPrecision +OpDecorate %202 RelaxedPrecision +OpDecorate %203 RelaxedPrecision +OpDecorate %207 RelaxedPrecision +OpDecorate %211 RelaxedPrecision +OpDecorate %212 RelaxedPrecision +OpDecorate %213 RelaxedPrecision +OpDecorate %214 RelaxedPrecision +OpDecorate %215 RelaxedPrecision +OpDecorate %216 RelaxedPrecision +OpDecorate %217 RelaxedPrecision +OpDecorate %218 RelaxedPrecision +OpDecorate %219 RelaxedPrecision +OpDecorate %221 RelaxedPrecision +OpDecorate %222 RelaxedPrecision +OpDecorate %223 RelaxedPrecision +OpDecorate %224 RelaxedPrecision +OpDecorate %225 RelaxedPrecision +OpDecorate %226 RelaxedPrecision +OpDecorate %227 RelaxedPrecision +OpDecorate %228 RelaxedPrecision +OpDecorate %229 RelaxedPrecision +OpDecorate %230 RelaxedPrecision +OpDecorate %231 RelaxedPrecision +OpDecorate %232 RelaxedPrecision +OpDecorate %233 RelaxedPrecision +OpDecorate %234 RelaxedPrecision +OpDecorate %235 RelaxedPrecision +OpDecorate %236 RelaxedPrecision +OpDecorate %237 RelaxedPrecision +OpDecorate %238 RelaxedPrecision +OpDecorate %248 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag index e6be43d..fd454c9 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag @@ -181,6 +181,81 @@ OpDecorate %71 RelaxedPrecision OpDecorate %71 Location 1 OpDecorate %74 RelaxedPrecision OpDecorate %74 Location 2 +OpDecorate %168 RelaxedPrecision +OpDecorate %169 RelaxedPrecision +OpDecorate %201 RelaxedPrecision +OpDecorate %204 RelaxedPrecision +OpDecorate %205 RelaxedPrecision +OpDecorate %206 RelaxedPrecision +OpDecorate %207 RelaxedPrecision +OpDecorate %208 RelaxedPrecision +OpDecorate %216 RelaxedPrecision +OpDecorate %224 RelaxedPrecision +OpDecorate %226 RelaxedPrecision +OpDecorate %227 RelaxedPrecision +OpDecorate %228 RelaxedPrecision +OpDecorate %229 RelaxedPrecision +OpDecorate %230 RelaxedPrecision +OpDecorate %231 RelaxedPrecision +OpDecorate %232 RelaxedPrecision +OpDecorate %233 RelaxedPrecision +OpDecorate %237 RelaxedPrecision +OpDecorate %250 RelaxedPrecision +OpDecorate %276 RelaxedPrecision +OpDecorate %277 RelaxedPrecision +OpDecorate %278 RelaxedPrecision +OpDecorate %279 RelaxedPrecision +OpDecorate %292 RelaxedPrecision +OpDecorate %293 RelaxedPrecision +OpDecorate %294 RelaxedPrecision +OpDecorate %295 RelaxedPrecision +OpDecorate %297 RelaxedPrecision +OpDecorate %299 RelaxedPrecision +OpDecorate %300 RelaxedPrecision +OpDecorate %301 RelaxedPrecision +OpDecorate %302 RelaxedPrecision +OpDecorate %303 RelaxedPrecision +OpDecorate %304 RelaxedPrecision +OpDecorate %305 RelaxedPrecision +OpDecorate %306 RelaxedPrecision +OpDecorate %307 RelaxedPrecision +OpDecorate %310 RelaxedPrecision +OpDecorate %311 RelaxedPrecision +OpDecorate %312 RelaxedPrecision +OpDecorate %313 RelaxedPrecision +OpDecorate %314 RelaxedPrecision +OpDecorate %315 RelaxedPrecision +OpDecorate %316 RelaxedPrecision +OpDecorate %317 RelaxedPrecision +OpDecorate %321 RelaxedPrecision +OpDecorate %326 RelaxedPrecision +OpDecorate %327 RelaxedPrecision +OpDecorate %328 RelaxedPrecision +OpDecorate %329 RelaxedPrecision +OpDecorate %330 RelaxedPrecision +OpDecorate %331 RelaxedPrecision +OpDecorate %332 RelaxedPrecision +OpDecorate %333 RelaxedPrecision +OpDecorate %334 RelaxedPrecision +OpDecorate %337 RelaxedPrecision +OpDecorate %338 RelaxedPrecision +OpDecorate %339 RelaxedPrecision +OpDecorate %340 RelaxedPrecision +OpDecorate %341 RelaxedPrecision +OpDecorate %342 RelaxedPrecision +OpDecorate %343 RelaxedPrecision +OpDecorate %344 RelaxedPrecision +OpDecorate %345 RelaxedPrecision +OpDecorate %347 RelaxedPrecision +OpDecorate %348 RelaxedPrecision +OpDecorate %349 RelaxedPrecision +OpDecorate %350 RelaxedPrecision +OpDecorate %351 RelaxedPrecision +OpDecorate %352 RelaxedPrecision +OpDecorate %353 RelaxedPrecision +OpDecorate %354 RelaxedPrecision +OpDecorate %355 RelaxedPrecision +OpDecorate %365 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeInt 32 0 diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag index 649a4d3..9348cac 100644 --- a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag +++ b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag @@ -153,6 +153,81 @@ OpDecorate %56 RelaxedPrecision OpDecorate %56 Location 1 OpDecorate %59 RelaxedPrecision OpDecorate %59 Location 2 +OpDecorate %82 RelaxedPrecision +OpDecorate %83 RelaxedPrecision +OpDecorate %115 RelaxedPrecision +OpDecorate %118 RelaxedPrecision +OpDecorate %119 RelaxedPrecision +OpDecorate %120 RelaxedPrecision +OpDecorate %121 RelaxedPrecision +OpDecorate %124 RelaxedPrecision +OpDecorate %134 RelaxedPrecision +OpDecorate %142 RelaxedPrecision +OpDecorate %144 RelaxedPrecision +OpDecorate %145 RelaxedPrecision +OpDecorate %146 RelaxedPrecision +OpDecorate %147 RelaxedPrecision +OpDecorate %148 RelaxedPrecision +OpDecorate %149 RelaxedPrecision +OpDecorate %150 RelaxedPrecision +OpDecorate %151 RelaxedPrecision +OpDecorate %155 RelaxedPrecision +OpDecorate %168 RelaxedPrecision +OpDecorate %194 RelaxedPrecision +OpDecorate %195 RelaxedPrecision +OpDecorate %196 RelaxedPrecision +OpDecorate %197 RelaxedPrecision +OpDecorate %210 RelaxedPrecision +OpDecorate %211 RelaxedPrecision +OpDecorate %212 RelaxedPrecision +OpDecorate %213 RelaxedPrecision +OpDecorate %215 RelaxedPrecision +OpDecorate %217 RelaxedPrecision +OpDecorate %218 RelaxedPrecision +OpDecorate %219 RelaxedPrecision +OpDecorate %220 RelaxedPrecision +OpDecorate %221 RelaxedPrecision +OpDecorate %222 RelaxedPrecision +OpDecorate %223 RelaxedPrecision +OpDecorate %224 RelaxedPrecision +OpDecorate %225 RelaxedPrecision +OpDecorate %228 RelaxedPrecision +OpDecorate %229 RelaxedPrecision +OpDecorate %230 RelaxedPrecision +OpDecorate %231 RelaxedPrecision +OpDecorate %232 RelaxedPrecision +OpDecorate %233 RelaxedPrecision +OpDecorate %234 RelaxedPrecision +OpDecorate %235 RelaxedPrecision +OpDecorate %239 RelaxedPrecision +OpDecorate %244 RelaxedPrecision +OpDecorate %245 RelaxedPrecision +OpDecorate %246 RelaxedPrecision +OpDecorate %247 RelaxedPrecision +OpDecorate %248 RelaxedPrecision +OpDecorate %249 RelaxedPrecision +OpDecorate %250 RelaxedPrecision +OpDecorate %251 RelaxedPrecision +OpDecorate %252 RelaxedPrecision +OpDecorate %255 RelaxedPrecision +OpDecorate %256 RelaxedPrecision +OpDecorate %257 RelaxedPrecision +OpDecorate %258 RelaxedPrecision +OpDecorate %259 RelaxedPrecision +OpDecorate %260 RelaxedPrecision +OpDecorate %261 RelaxedPrecision +OpDecorate %262 RelaxedPrecision +OpDecorate %263 RelaxedPrecision +OpDecorate %265 RelaxedPrecision +OpDecorate %266 RelaxedPrecision +OpDecorate %267 RelaxedPrecision +OpDecorate %268 RelaxedPrecision +OpDecorate %269 RelaxedPrecision +OpDecorate %270 RelaxedPrecision +OpDecorate %271 RelaxedPrecision +OpDecorate %272 RelaxedPrecision +OpDecorate %273 RelaxedPrecision +OpDecorate %283 RelaxedPrecision %1 = OpTypeVoid %2 = OpTypeFunction %1 %5 = OpTypeFloat 32 diff --git a/shaders/llvm-builtin/min16-phi.sm60.comp b/shaders/llvm-builtin/min16-phi.sm60.comp new file mode 100644 index 0000000..d6f196c --- /dev/null +++ b/shaders/llvm-builtin/min16-phi.sm60.comp @@ -0,0 +1,18 @@ +RWStructuredBuffer<min16float4> B; +Texture2D<min16float4> T0; +Texture2D<min16float4> T1; +Texture2D<min16float4> T2; + +[numthreads(64, 1, 1)] +void main(uint3 thr : SV_DispatchThreadID) +{ + min16float4 v; + if (thr.x < 20) + v = T0.Load(int3(thr)); + else if (thr.y < 40) + v = T1.Load(int3(thr)); + else + v = T2.Load(int3(thr)); + + B[thr.x] = v; +} diff --git a/shaders/resources/min16-alloca-groupshared.sm60.comp b/shaders/resources/min16-alloca-groupshared.sm60.comp new file mode 100644 index 0000000..96c8852 --- /dev/null +++ b/shaders/resources/min16-alloca-groupshared.sm60.comp @@ -0,0 +1,17 @@ +groupshared min16float bar[64]; +groupshared min16float4 bar4[64]; + +RWStructuredBuffer<min16float4> O; +static min16float4 Vec; + +[numthreads(64, 1, 1)] +void main(uint thr : SV_DispatchThreadID, uint l : SV_GroupIndex) +{ + Vec = min16float4(0, 0, 0, 0); + bar[l] = min16float(l); + bar4[l] = min16float(l) + min16float4(1, 2, 3, 4); + GroupMemoryBarrierWithGroupSync(); + Vec[l & 3] = bar[l ^ 5]; + Vec[(l + 1) & 3] = bar[l ^ 4]; + O[thr] = bar[l ^ 1] + bar4[l ^ 3] + Vec; +} diff --git a/spirv_module.cpp b/spirv_module.cpp index d894024..762c466 100644 --- a/spirv_module.cpp +++ b/spirv_module.cpp @@ -822,6 +822,9 @@ void SPIRVModule::Impl::emit_basic_block(CFGNode *node) phi_op->addIdOperand(fake_loop_block->getId()); } + if (phi.relaxed) + builder.addDecoration(phi.id, spv::DecorationRelaxedPrecision); + bb->addInstruction(std::move(phi_op)); } |