Extend usage of relaxed precision.

Attempt to extend use of relaxed precision to variable storage and arithmetic as well.
author: Hans-Kristian Arntzen <post@arntzen-software.no> 2021-12-02 17:31:59 +0300
committer: Hans-Kristian Arntzen <post@arntzen-software.no> 2022-05-31 12:42:30 +0300
commit: 677148ed111a8a3ee9ce3b002f46032361a48069 (patch)
tree: 4820cb837a0e5264043b546b88c046c704ceaa31
parent: d97bb340d8f79317fb7b8049717ff6ffd0dace0f (diff)
28 files changed, 943 insertions, 63 deletions
diff --git a/cfg_structurizer.cpp b/cfg_structurizer.cpp
index aa9762e..e987080 100644
--- a/cfg_structurizer.cpp
+++ b/cfg_structurizer.cpp
@@ -1273,6 +1273,7 @@ void CFGStructurizer::insert_phi(PHINode &node)
 		PHI frontier_phi;
 		frontier_phi.id = module.allocate_id();
 		frontier_phi.type_id = phi.type_id;
+		frontier_phi.relaxed = phi.relaxed;
 		module.get_builder().addName(frontier_phi.id, (String("frontier_phi_") + frontier->name).c_str());
 
 		assert(!frontier->pred_back_edge);
@@ -1359,6 +1360,7 @@ void CFGStructurizer::insert_phi(PHINode &node)
 			// If we came from such a block,
 			// we should replace the incoming value of dominating_incoming rather than adding a new incoming value.
 			PHI merge_phi = {};
+			merge_phi.relaxed = phi.relaxed;
 
 			// Here we need to figure out if we have a cross branch which functions as a ladder.
 			// If we have such a special edge, the PHI value we find here will override any other value on this path.
diff --git a/dxil_converter.cpp b/dxil_converter.cpp
index 1ce8f76..bca7bae 100644
--- a/dxil_converter.cpp
+++ b/dxil_converter.cpp
@@ -4116,6 +4116,8 @@ bool Converter::Impl::emit_global_variables()
 		spv::Id var_id = create_variable_with_initializer(
 		    address_space == DXIL::AddressSpace::GroupShared ? spv::StorageClassWorkgroup : spv::StorageClassPrivate,
 		    pointee_type_id, initializer_id);
+
+		decorate_relaxed_precision(global.getType()->getPointerElementType(), var_id, false);
 		rewrite_value(&global, var_id);
 	}
 
@@ -4735,6 +4737,7 @@ bool Converter::Impl::emit_phi_instruction(CFGNode *block, const llvm::PHINode &
 		PHI phi;
 		phi.id = get_id_for_value(&instruction);
 		phi.type_id = get_type_id(instruction.getType());
+		phi.relaxed = type_can_relax_precision(instruction.getType(), false);
 
 		for (unsigned i = 0; i < count; i++)
 		{
@@ -5738,6 +5741,29 @@ spv::Id Converter::Impl::get_effective_input_output_type_id(DXIL::ComponentType
 	return get_type_id(get_effective_input_output_type(type), 1, 1);
 }
 
+bool Converter::Impl::type_can_relax_precision(const llvm::Type *type, bool known_integer_sign) const
+{
+	if (type->getTypeID() == llvm::Type::TypeID::ArrayTyID)
+		type = llvm::cast<llvm::ArrayType>(type)->getArrayElementType();
+	if (type->getTypeID() == llvm::Type::TypeID::VectorTyID)
+		type = llvm::cast<llvm::VectorType>(type)->getElementType();
+
+	return !execution_mode_meta.native_16bit_operations &&
+	       (type->getTypeID() == llvm::Type::TypeID::HalfTyID ||
+	        (type->getTypeID() == llvm::Type::TypeID::IntegerTyID && type->getIntegerBitWidth() == 16 &&
+	         known_integer_sign));
+}
+
+void Converter::Impl::decorate_relaxed_precision(const llvm::Type *type, spv::Id id, bool known_integer_sign)
+{
+	// Ignore RelaxedPrecision for integers since they are untyped in LLVM for the most part.
+	// For texture loading operations and similar, we load in the appropriate sign, so it's safe to use RelaxedPrecision,
+	// since RelaxedPrecision may sign-extend based on the OpTypeInt's signage.
+	// DXIL is kinda broken in this regard since min16int and min16uint lower to the same i16 type ... :(
+	if (type_can_relax_precision(type, known_integer_sign))
+		builder().addDecoration(id, spv::DecorationRelaxedPrecision);
+}
+
 void Converter::Impl::set_option(const OptionBase &cap)
 {
 	switch (cap.type)
diff --git a/ir.hpp b/ir.hpp
index d0e80c9..8713c0f 100644
--- a/ir.hpp
+++ b/ir.hpp
@@ -60,6 +60,7 @@ struct PHI
 {
 	uint32_t id = 0;
 	uint32_t type_id = 0;
+	bool relaxed = false;
 	Vector<IncomingValue> incoming;
 };
 
diff --git a/opcodes/converter_impl.hpp b/opcodes/converter_impl.hpp
index 8f9536d..2b3c5ad 100644
--- a/opcodes/converter_impl.hpp
+++ b/opcodes/converter_impl.hpp
@@ -606,5 +606,8 @@ struct Converter::Impl
 	UnorderedSet<const llvm::CallInst *> resource_handles_needing_sink;
 	UnorderedSet<const llvm::CallInst *> resource_handle_is_conservative;
 	UnorderedMap<const llvm::BasicBlock *, Vector<const llvm::Instruction *>> bb_to_sinks;
+
+	bool type_can_relax_precision(const llvm::Type *type, bool known_integer_sign) const;
+	void decorate_relaxed_precision(const llvm::Type *type, spv::Id id, bool known_integer_sign);
 };
 } // namespace dxil_spv
diff --git a/opcodes/dxil/dxil_arithmetic.cpp b/opcodes/dxil/dxil_arithmetic.cpp
index bdc2ebe..ec091b9 100644
--- a/opcodes/dxil/dxil_arithmetic.cpp
+++ b/opcodes/dxil/dxil_arithmetic.cpp
@@ -47,6 +47,7 @@ bool emit_imad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct
 bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruction)
 {
 	auto &builder = impl.builder();
+	spv::Id result_id;
 
 	if (instruction->getMetadata("dx.precise") != nullptr)
 	{
@@ -61,11 +62,15 @@ bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct
 		impl.add(mul_op);
 		builder.addDecoration(mul_op->id, spv::DecorationNoContraction);
 
+		impl.decorate_relaxed_precision(instruction->getType(), mul_op->id, false);
+
 		Operation *add_op = impl.allocate(spv::OpFAdd, instruction);
 		add_op->add_id(mul_op->id);
 		add_op->add_id(impl.get_id_for_value(instruction->getOperand(3)));
 		impl.add(add_op);
 		builder.addDecoration(add_op->id, spv::DecorationNoContraction);
+
+		result_id = add_op->id;
 	}
 	else
 	{
@@ -78,8 +83,11 @@ bool emit_fmad_instruction(Converter::Impl &impl, const llvm::CallInst *instruct
 		for (unsigned i = 1; i < 4; i++)
 			op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
 		impl.add(op);
+
+		result_id = op->id;
 	}
 
+	impl.decorate_relaxed_precision(instruction->getType(), result_id, false);
 	return true;
 }
 
@@ -152,6 +160,7 @@ bool emit_dxil_std450_binary_instruction(GLSLstd450 opcode, Converter::Impl &imp
 		op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
 
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -169,6 +178,7 @@ bool emit_dxil_std450_trinary_instruction(GLSLstd450 opcode, Converter::Impl &im
 		op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
 
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -184,6 +194,7 @@ bool emit_dxil_std450_unary_instruction(GLSLstd450 opcode, Converter::Impl &impl
 	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
 
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -192,6 +203,7 @@ bool emit_dxil_unary_instruction(spv::Op opcode, Converter::Impl &impl, const ll
 	Operation *op = impl.allocate(opcode, instruction);
 	op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -239,6 +251,7 @@ bool emit_saturate_instruction(Converter::Impl &impl, const llvm::CallInst *inst
 	              constant_0, constant_1 });
 
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -258,6 +271,7 @@ bool emit_dot_instruction(unsigned dimensions, Converter::Impl &impl, const llvm
 
 	op->add_ids({ vec0, vec1 });
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -365,11 +379,19 @@ bool emit_legacy_f16_to_f32_instruction(Converter::Impl &impl, const llvm::CallI
 	unpack_op->add_id(impl.get_id_for_value(instruction->getOperand(1)));
 	impl.add(unpack_op);
 
+	// By construction, these are relaxed precision, but spams lots of unrelated shader changes,
+	// and doesn't make too much sense to add ...
+	//builder.addDecoration(unpack_op->id, spv::DecorationRelaxedPrecision);
+
 	Operation *op = impl.allocate(spv::OpCompositeExtract, instruction);
 	op->add_id(unpack_op->id);
 	op->add_literal(0);
 	impl.add(op);
 
+	// By construction, these are relaxed precision, but spams lots of unrelated shader changes,
+	// and doesn't make too much sense to add ...
+	//builder.addDecoration(op->id, spv::DecorationRelaxedPrecision);
+
 	return true;
 }
 
@@ -406,6 +428,7 @@ bool emit_legacy_f32_to_f16_instruction(Converter::Impl &impl, const llvm::CallI
 	spv::Id inputs[2] = { input_id, builder.makeFloatConstant(0.0f) };
 	op->add_id(impl.build_vector(builder.makeFloatType(32), inputs, 2));
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -539,6 +562,8 @@ bool emit_dot2_add_half_instruction(Converter::Impl &impl, const llvm::CallInst
 	if (precise)
 		builder.addDecoration(acc_op->id, spv::DecorationNoContraction);
 
+	// This opcode requires native FP16, so RelaxedPrecision is useless.
+
 	return true;
 }
 
diff --git a/opcodes/dxil/dxil_buffer.cpp b/opcodes/dxil/dxil_buffer.cpp
index 442b77d..10feda7 100644
--- a/opcodes/dxil/dxil_buffer.cpp
+++ b/opcodes/dxil/dxil_buffer.cpp
@@ -804,6 +804,9 @@ bool emit_buffer_load_instruction(Converter::Impl &impl, const llvm::CallInst *i
 
 		Operation *op = impl.allocate(opcode, instruction, sample_type);
 
+		if (!sparse)
+			impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true);
+
 		op->add_ids({ image_id, access.index_id });
 		impl.add(op);
 
diff --git a/opcodes/dxil/dxil_sampling.cpp b/opcodes/dxil/dxil_sampling.cpp
index 164009c..3e714d6 100644
--- a/opcodes/dxil/dxil_sampling.cpp
+++ b/opcodes/dxil/dxil_sampling.cpp
@@ -262,6 +262,9 @@ bool emit_sample_instruction(DXIL::Op opcode, Converter::Impl &impl, const llvm:
 	// Comparison sampling only returns a scalar, so we'll need to splat out result.
 	Operation *op = impl.allocate(spv_op, instruction, sample_type);
 
+	if (!sparse)
+		impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true);
+
 	op->add_id(combined_image_sampler_id);
 	op->add_id(impl.build_vector(builder.makeFloatType(32), coord, num_coords_full));
 
@@ -366,6 +369,9 @@ bool emit_sample_grad_instruction(Converter::Impl &impl, const llvm::CallInst *i
 		impl.allocate(sparse ? spv::OpImageSparseSampleExplicitLod : spv::OpImageSampleExplicitLod,
 		              instruction, sample_type);
 
+	if (!sparse)
+		impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true);
+
 	op->add_ids({
 	    combined_image_sampler_id,
 	    impl.build_vector(builder.makeFloatType(32), coord, num_coords_full),
@@ -457,6 +463,8 @@ bool emit_texture_load_instruction(Converter::Impl &impl, const llvm::CallInst *
 		opcode = sparse ? spv::OpImageSparseFetch : spv::OpImageFetch;
 
 	Operation *op = impl.allocate(opcode, instruction, sample_type);
+	if (!sparse)
+		impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true);
 
 	op->add_ids({ image_id, impl.build_vector(builder.makeUintType(32), coord, num_coords_full) });
 	op->add_literal(image_ops);
@@ -733,6 +741,8 @@ bool emit_texture_gather_instruction(bool compare, Converter::Impl &impl, const
 		opcode = sparse ? spv::OpImageSparseGather : spv::OpImageGather;
 
 	Operation *op = impl.allocate(opcode, instruction, sample_type);
+	if (!sparse)
+		impl.decorate_relaxed_precision(instruction->getType()->getStructElementType(0), op->id, true);
 
 	op->add_ids({ combined_image_sampler_id, coord_id, aux_id });
 
diff --git a/opcodes/opcodes_llvm_builtins.cpp b/opcodes/opcodes_llvm_builtins.cpp
index 95cc0d8..4a51d1f 100644
--- a/opcodes/opcodes_llvm_builtins.cpp
+++ b/opcodes/opcodes_llvm_builtins.cpp
@@ -140,6 +140,7 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator *
 	bool signed_input = false;
 	bool is_width_sensitive = false;
 	bool is_precision_sensitive = false;
+	bool can_relax_precision = false;
 	spv::Op opcode;
 
 	switch (instruction->getOpcode())
@@ -147,16 +148,19 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator *
 	case llvm::BinaryOperator::BinaryOps::FAdd:
 		opcode = spv::OpFAdd;
 		is_precision_sensitive = true;
+		can_relax_precision = true;
 		break;
 
 	case llvm::BinaryOperator::BinaryOps::FSub:
 		opcode = spv::OpFSub;
 		is_precision_sensitive = true;
+		can_relax_precision = true;
 		break;
 
 	case llvm::BinaryOperator::BinaryOps::FMul:
 		opcode = spv::OpFMul;
 		is_precision_sensitive = true;
+		can_relax_precision = true;
 		if (peephole_trivial_arithmetic_identity(impl, instruction, llvm::BinaryOperator::BinaryOps::FDiv, true))
 			return true;
 		break;
@@ -164,6 +168,7 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator *
 	case llvm::BinaryOperator::BinaryOps::FDiv:
 		opcode = spv::OpFDiv;
 		is_precision_sensitive = true;
+		can_relax_precision = true;
 		if (peephole_trivial_arithmetic_identity(impl, instruction, llvm::BinaryOperator::BinaryOps::FMul, false))
 			return true;
 		break;
@@ -215,6 +220,7 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator *
 	case llvm::BinaryOperator::BinaryOps::FRem:
 		opcode = spv::OpFRem;
 		is_precision_sensitive = true;
+		can_relax_precision = true;
 		break;
 
 	case llvm::BinaryOperator::BinaryOps::URem:
@@ -291,6 +297,10 @@ bool emit_binary_instruction(Converter::Impl &impl, const llvm::BinaryOperator *
 	impl.add(op);
 	if (is_precision_sensitive && !instruction->isFast())
 		impl.builder().addDecoration(op->id, spv::DecorationNoContraction);
+
+	// Only bother relaxing FP, since Integers are murky w.r.t. signage in DXIL.
+	if (can_relax_precision)
+		impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -311,6 +321,7 @@ bool emit_unary_instruction(Converter::Impl &impl, const llvm::UnaryOperator *in
 
 	Operation *op = impl.allocate(opcode, instruction);
 	op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 
 	impl.add(op);
 	return true;
@@ -421,6 +432,7 @@ static spv::Id emit_boolean_convert_instruction(Converter::Impl &impl, const Ins
 	op->add_id(impl.get_id_for_value(instruction->getOperand(0)));
 	op->add_ids({ const_1, const_0 });
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return op->id;
 }
 
@@ -509,8 +521,9 @@ static bool value_cast_is_noop(Converter::Impl &impl, const InstructionType *ins
 template <typename InstructionType>
 static spv::Id emit_cast_instruction_impl(Converter::Impl &impl, const InstructionType *instruction)
 {
-	spv::Op opcode;
+	bool can_relax_precision = false;
 	bool signed_input = false;
+	spv::Op opcode;
 
 	if (value_cast_is_noop(impl, instruction))
 	{
@@ -553,6 +566,8 @@ static spv::Id emit_cast_instruction_impl(Converter::Impl &impl, const Instructi
 	case llvm::Instruction::CastOps::FPTrunc:
 	case llvm::Instruction::CastOps::FPExt:
 		opcode = spv::OpFConvert;
+		// Relaxing precision on integers in DXIL is very sketchy, so don't bother.
+		can_relax_precision = true;
 		break;
 
 	case llvm::Instruction::CastOps::FPToUI:
@@ -623,6 +638,8 @@ static spv::Id emit_cast_instruction_impl(Converter::Impl &impl, const Instructi
 		Operation *op = impl.allocate(opcode, instruction);
 		op->add_id(build_naturally_extended_value(impl, instruction->getOperand(0), signed_input));
 		impl.add(op);
+		if (can_relax_precision)
+			impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 		return op->id;
 	}
 }
@@ -1053,6 +1070,7 @@ bool emit_extract_value_instruction(Converter::Impl &impl, const llvm::ExtractVa
 			op->add_literal(instruction->getIndices()[i]);
 
 		impl.add(op);
+		impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	}
 
 	return true;
@@ -1091,6 +1109,7 @@ bool emit_alloca_instruction(Converter::Impl &impl, const llvm::AllocaInst *inst
 	spv::Id var_id = impl.create_variable(storage, pointee_type_id);
 	impl.rewrite_value(instruction, var_id);
 	impl.handle_to_storage_class[instruction] = storage;
+	impl.decorate_relaxed_precision(element_type, var_id, false);
 	return true;
 }
 
@@ -1102,6 +1121,7 @@ bool emit_select_instruction(Converter::Impl &impl, const llvm::SelectInst *inst
 		op->add_id(impl.get_id_for_value(instruction->getOperand(i)));
 
 	impl.add(op);
+	impl.decorate_relaxed_precision(instruction->getType(), op->id, false);
 	return true;
 }
 
@@ -1224,12 +1244,14 @@ bool emit_shufflevector_instruction(Converter::Impl &impl, const llvm::ShuffleVe
 
 bool emit_extractelement_instruction(Converter::Impl &impl, const llvm::ExtractElementInst *inst)
 {
+	spv::Id id;
 	if (auto *constant_int = llvm::dyn_cast<llvm::ConstantInt>(inst->getIndexOperand()))
 	{
 		Operation *op = impl.allocate(spv::OpCompositeExtract, inst);
 		op->add_id(impl.get_id_for_value(inst->getVectorOperand()));
 		op->add_literal(uint32_t(constant_int->getUniqueInteger().getZExtValue()));
 		impl.add(op);
+		id = op->id;
 	}
 	else
 	{
@@ -1237,7 +1259,9 @@ bool emit_extractelement_instruction(Converter::Impl &impl, const llvm::ExtractE
 		op->add_id(impl.get_id_for_value(inst->getVectorOperand()));
 		op->add_id(impl.get_id_for_value(inst->getIndexOperand()));
 		impl.add(op);
+		id = op->id;
 	}
+	impl.decorate_relaxed_precision(inst->getType(), id, false);
 	return true;
 }
 
diff --git a/reference/shaders/fp16/saturate.sm60.frag b/reference/shaders/fp16/saturate.sm60.frag
index 5ed9216..d954d13 100644
--- a/reference/shaders/fp16/saturate.sm60.frag
+++ b/reference/shaders/fp16/saturate.sm60.frag
@@ -5,22 +5,14 @@ layout(location = 0) out mediump vec4 SV_Target;
 
 void main()
 {
-    mediump float _15 = V.x;
-    float hp_copy_15 = _15;
-    mediump float _18 = V.y;
-    float hp_copy_18 = _18;
-    mediump float _21 = V.z;
-    float hp_copy_21 = _21;
-    mediump float _24 = V.w;
-    float hp_copy_24 = _24;
-    float _40 = isnan(0.0) ? hp_copy_15 : (isnan(hp_copy_15) ? 0.0 : max(hp_copy_15, 0.0));
-    float _52 = isnan(0.0) ? hp_copy_18 : (isnan(hp_copy_18) ? 0.0 : max(hp_copy_18, 0.0));
-    float _64 = isnan(0.0) ? hp_copy_21 : (isnan(hp_copy_21) ? 0.0 : max(hp_copy_21, 0.0));
-    float _76 = isnan(0.0) ? hp_copy_24 : (isnan(hp_copy_24) ? 0.0 : max(hp_copy_24, 0.0));
-    SV_Target.x = isnan(1.0) ? _40 : (isnan(_40) ? 1.0 : min(_40, 1.0));
-    SV_Target.y = isnan(1.0) ? _52 : (isnan(_52) ? 1.0 : min(_52, 1.0));
-    SV_Target.z = isnan(1.0) ? _64 : (isnan(_64) ? 1.0 : min(_64, 1.0));
-    SV_Target.w = isnan(1.0) ? _76 : (isnan(_76) ? 1.0 : min(_76, 1.0));
+    mediump float _39 = isnan(0.0) ? V.x : (isnan(V.x) ? 0.0 : max(V.x, 0.0));
+    mediump float _50 = isnan(0.0) ? V.y : (isnan(V.y) ? 0.0 : max(V.y, 0.0));
+    mediump float _61 = isnan(0.0) ? V.z : (isnan(V.z) ? 0.0 : max(V.z, 0.0));
+    mediump float _72 = isnan(0.0) ? V.w : (isnan(V.w) ? 0.0 : max(V.w, 0.0));
+    SV_Target.x = isnan(1.0) ? _39 : (isnan(_39) ? 1.0 : min(_39, 1.0));
+    SV_Target.y = isnan(1.0) ? _50 : (isnan(_50) ? 1.0 : min(_50, 1.0));
+    SV_Target.z = isnan(1.0) ? _61 : (isnan(_61) ? 1.0 : min(_61, 1.0));
+    SV_Target.w = isnan(1.0) ? _72 : (isnan(_72) ? 1.0 : min(_72, 1.0));
 }
 
 
@@ -43,6 +35,10 @@ OpDecorate %8 RelaxedPrecision
 OpDecorate %8 Location 0
 OpDecorate %10 RelaxedPrecision
 OpDecorate %10 Location 0
+OpDecorate %28 RelaxedPrecision
+OpDecorate %29 RelaxedPrecision
+OpDecorate %30 RelaxedPrecision
+OpDecorate %31 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeFloat 32
diff --git a/reference/shaders/fp16/saturate.sm60.native-fp16.frag b/reference/shaders/fp16/saturate.sm60.native-fp16.frag
index 9c91a9f..2455267 100644
--- a/reference/shaders/fp16/saturate.sm60.native-fp16.frag
+++ b/reference/shaders/fp16/saturate.sm60.native-fp16.frag
@@ -48,6 +48,10 @@ OpDecorate %8 RelaxedPrecision
 OpDecorate %8 Location 0
 OpDecorate %10 RelaxedPrecision
 OpDecorate %10 Location 0
+OpDecorate %33 RelaxedPrecision
+OpDecorate %34 RelaxedPrecision
+OpDecorate %35 RelaxedPrecision
+OpDecorate %36 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeFloat 32
diff --git a/reference/shaders/llvm-builtin/min16-phi.sm60.comp b/reference/shaders/llvm-builtin/min16-phi.sm60.comp
new file mode 100644
index 0000000..60d79f5
--- /dev/null
+++ b/reference/shaders/llvm-builtin/min16-phi.sm60.comp
@@ -0,0 +1,213 @@
+#version 460
+#extension GL_EXT_samplerless_texture_functions : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0) uniform mediump texture2D _8;
+layout(set = 0, binding = 1) uniform mediump texture2D _9;
+layout(set = 0, binding = 2) uniform mediump texture2D _10;
+layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _14;
+
+void main()
+{
+    mediump float _45;
+    mediump float _48;
+    mediump float _51;
+    mediump float _54;
+    if (gl_GlobalInvocationID.x < 20u)
+    {
+        mediump vec4 _36 = texelFetch(_8, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z));
+        _45 = _36.x;
+        _48 = _36.y;
+        _51 = _36.z;
+        _54 = _36.w;
+    }
+    else
+    {
+        mediump float frontier_phi_3_2_ladder;
+        mediump float frontier_phi_3_2_ladder_1;
+        mediump float frontier_phi_3_2_ladder_2;
+        mediump float frontier_phi_3_2_ladder_3;
+        if (gl_GlobalInvocationID.y < 40u)
+        {
+            mediump vec4 _72 = texelFetch(_9, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z));
+            frontier_phi_3_2_ladder = _72.x;
+            frontier_phi_3_2_ladder_1 = _72.y;
+            frontier_phi_3_2_ladder_2 = _72.z;
+            frontier_phi_3_2_ladder_3 = _72.w;
+        }
+        else
+        {
+            mediump vec4 _74 = texelFetch(_10, ivec2(uvec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)), int(gl_GlobalInvocationID.z));
+            frontier_phi_3_2_ladder = _74.x;
+            frontier_phi_3_2_ladder_1 = _74.y;
+            frontier_phi_3_2_ladder_2 = _74.z;
+            frontier_phi_3_2_ladder_3 = _74.w;
+        }
+        _45 = frontier_phi_3_2_ladder;
+        _48 = frontier_phi_3_2_ladder_1;
+        _51 = frontier_phi_3_2_ladder_2;
+        _54 = frontier_phi_3_2_ladder_3;
+    }
+    uint _57 = gl_GlobalInvocationID.x * 4u;
+    imageStore(_14, int(_57), uvec4(floatBitsToUint(_45)));
+    imageStore(_14, int(_57 + 1u), uvec4(floatBitsToUint(_48)));
+    imageStore(_14, int(_57 + 2u), uvec4(floatBitsToUint(_51)));
+    imageStore(_14, int(_57 + 3u), uvec4(floatBitsToUint(_54)));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 88
+; Schema: 0
+OpCapability Shader
+OpCapability ImageBuffer
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %3 "main" %21
+OpExecutionMode %3 LocalSize 64 1 1
+OpName %3 "main"
+OpName %76 "frontier_phi_3.2.ladder"
+OpName %77 "frontier_phi_3.2.ladder"
+OpName %78 "frontier_phi_3.2.ladder"
+OpName %79 "frontier_phi_3.2.ladder"
+OpDecorate %8 RelaxedPrecision
+OpDecorate %8 DescriptorSet 0
+OpDecorate %8 Binding 0
+OpDecorate %9 RelaxedPrecision
+OpDecorate %9 DescriptorSet 0
+OpDecorate %9 Binding 1
+OpDecorate %10 RelaxedPrecision
+OpDecorate %10 DescriptorSet 0
+OpDecorate %10 Binding 2
+OpDecorate %14 DescriptorSet 0
+OpDecorate %14 Binding 0
+OpDecorate %14 NonReadable
+OpDecorate %21 BuiltIn GlobalInvocationId
+OpDecorate %36 RelaxedPrecision
+OpDecorate %39 RelaxedPrecision
+OpDecorate %40 RelaxedPrecision
+OpDecorate %41 RelaxedPrecision
+OpDecorate %42 RelaxedPrecision
+OpDecorate %72 RelaxedPrecision
+OpDecorate %46 RelaxedPrecision
+OpDecorate %49 RelaxedPrecision
+OpDecorate %52 RelaxedPrecision
+OpDecorate %55 RelaxedPrecision
+OpDecorate %74 RelaxedPrecision
+OpDecorate %47 RelaxedPrecision
+OpDecorate %50 RelaxedPrecision
+OpDecorate %53 RelaxedPrecision
+OpDecorate %56 RelaxedPrecision
+OpDecorate %76 RelaxedPrecision
+OpDecorate %77 RelaxedPrecision
+OpDecorate %78 RelaxedPrecision
+OpDecorate %79 RelaxedPrecision
+OpDecorate %45 RelaxedPrecision
+OpDecorate %48 RelaxedPrecision
+OpDecorate %51 RelaxedPrecision
+OpDecorate %54 RelaxedPrecision
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeFloat 32
+%6 = OpTypeImage %5 2D 0 0 0 1 Unknown
+%7 = OpTypePointer UniformConstant %6
+%8 = OpVariable %7 UniformConstant
+%9 = OpVariable %7 UniformConstant
+%10 = OpVariable %7 UniformConstant
+%11 = OpTypeInt 32 0
+%12 = OpTypeImage %11 Buffer 0 0 0 2 R32ui
+%13 = OpTypePointer UniformConstant %12
+%14 = OpVariable %13 UniformConstant
+%19 = OpTypeVector %11 3
+%20 = OpTypePointer Input %19
+%21 = OpVariable %20 Input
+%22 = OpTypePointer Input %11
+%24 = OpConstant %11 0
+%27 = OpConstant %11 1
+%30 = OpConstant %11 2
+%32 = OpTypeBool
+%34 = OpConstant %11 20
+%35 = OpTypeVector %5 4
+%37 = OpTypeVector %11 2
+%44 = OpConstant %11 40
+%58 = OpConstant %11 4
+%63 = OpTypeVector %11 4
+%71 = OpConstant %11 3
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+OpBranch %80
+%80 = OpLabel
+%15 = OpLoad %12 %14
+%16 = OpLoad %6 %10
+%17 = OpLoad %6 %9
+%18 = OpLoad %6 %8
+%23 = OpAccessChain %22 %21 %24
+%25 = OpLoad %11 %23
+%26 = OpAccessChain %22 %21 %27
+%28 = OpLoad %11 %26
+%29 = OpAccessChain %22 %21 %30
+%31 = OpLoad %11 %29
+%33 = OpULessThan %32 %25 %34
+OpSelectionMerge %86 None
+OpBranchConditional %33 %85 %81
+%85 = OpLabel
+%38 = OpCompositeConstruct %37 %25 %28
+%36 = OpImageFetch %35 %18 %38 Lod %31
+%39 = OpCompositeExtract %5 %36 0
+%40 = OpCompositeExtract %5 %36 1
+%41 = OpCompositeExtract %5 %36 2
+%42 = OpCompositeExtract %5 %36 3
+OpBranch %86
+%81 = OpLabel
+%43 = OpULessThan %32 %28 %44
+OpSelectionMerge %84 None
+OpBranchConditional %43 %83 %82
+%83 = OpLabel
+%73 = OpCompositeConstruct %37 %25 %28
+%72 = OpImageFetch %35 %17 %73 Lod %31
+%46 = OpCompositeExtract %5 %72 0
+%49 = OpCompositeExtract %5 %72 1
+%52 = OpCompositeExtract %5 %72 2
+%55 = OpCompositeExtract %5 %72 3
+OpBranch %84
+%82 = OpLabel
+%75 = OpCompositeConstruct %37 %25 %28
+%74 = OpImageFetch %35 %16 %75 Lod %31
+%47 = OpCompositeExtract %5 %74 0
+%50 = OpCompositeExtract %5 %74 1
+%53 = OpCompositeExtract %5 %74 2
+%56 = OpCompositeExtract %5 %74 3
+OpBranch %84
+%84 = OpLabel
+%76 = OpPhi %5 %46 %83 %47 %82
+%77 = OpPhi %5 %49 %83 %50 %82
+%78 = OpPhi %5 %52 %83 %53 %82
+%79 = OpPhi %5 %55 %83 %56 %82
+OpBranch %86
+%86 = OpLabel
+%45 = OpPhi %5 %39 %85 %76 %84
+%48 = OpPhi %5 %40 %85 %77 %84
+%51 = OpPhi %5 %41 %85 %78 %84
+%54 = OpPhi %5 %42 %85 %79 %84
+%57 = OpIMul %11 %25 %58
+%59 = OpBitcast %11 %45
+%60 = OpBitcast %11 %48
+%61 = OpBitcast %11 %51
+%62 = OpBitcast %11 %54
+%64 = OpCompositeConstruct %63 %59 %59 %59 %59
+OpImageWrite %15 %57 %64
+%65 = OpCompositeConstruct %63 %60 %60 %60 %60
+%66 = OpIAdd %11 %57 %27
+OpImageWrite %15 %66 %65
+%67 = OpCompositeConstruct %63 %61 %61 %61 %61
+%68 = OpIAdd %11 %57 %30
+OpImageWrite %15 %68 %67
+%69 = OpCompositeConstruct %63 %62 %62 %62 %62
+%70 = OpIAdd %11 %57 %71
+OpImageWrite %15 %70 %69
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
index 8b2179b..e8ae469 100644
--- a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.frag
@@ -71,6 +71,14 @@ OpDecorate %19 ArrayStride 16
 OpMemberDecorate %20 0 Offset 0
 OpDecorate %20 Block
 OpMemberDecorate %20 0 NonWritable
+OpDecorate %34 RelaxedPrecision
+OpDecorate %35 RelaxedPrecision
+OpDecorate %36 RelaxedPrecision
+OpDecorate %37 RelaxedPrecision
+OpDecorate %46 RelaxedPrecision
+OpDecorate %47 RelaxedPrecision
+OpDecorate %48 RelaxedPrecision
+OpDecorate %49 RelaxedPrecision
 OpDecorate %57 ArrayStride 16
 OpMemberDecorate %58 0 Offset 0
 OpDecorate %58 Block
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
index 6ff9f96..cad0869 100644
--- a/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag
@@ -80,6 +80,14 @@ OpDecorate %19 ArrayStride 16
 OpMemberDecorate %20 0 Offset 0
 OpDecorate %20 Block
 OpMemberDecorate %20 0 NonWritable
+OpDecorate %37 RelaxedPrecision
+OpDecorate %38 RelaxedPrecision
+OpDecorate %39 RelaxedPrecision
+OpDecorate %40 RelaxedPrecision
+OpDecorate %54 RelaxedPrecision
+OpDecorate %55 RelaxedPrecision
+OpDecorate %56 RelaxedPrecision
+OpDecorate %57 RelaxedPrecision
 OpDecorate %69 ArrayStride 16
 OpMemberDecorate %70 0 Offset 0
 OpDecorate %70 Block
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
index 0e8c333..99d1a3b 100644
--- a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.frag
@@ -52,6 +52,14 @@ OpDecorate %12 Binding 0
 OpDecorate %18 DescriptorSet 0
 OpDecorate %18 Binding 0
 OpDecorate %20 Location 0
+OpDecorate %32 RelaxedPrecision
+OpDecorate %33 RelaxedPrecision
+OpDecorate %34 RelaxedPrecision
+OpDecorate %35 RelaxedPrecision
+OpDecorate %43 RelaxedPrecision
+OpDecorate %44 RelaxedPrecision
+OpDecorate %45 RelaxedPrecision
+OpDecorate %46 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeInt 32 0
diff --git a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
index 1a732b3..996fc55 100644
--- a/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
+++ b/reference/shaders/resources/cbv-legacy-fp16-fp64.sm60.native-fp16.frag
@@ -63,6 +63,14 @@ OpDecorate %12 Binding 0
 OpDecorate %18 DescriptorSet 0
 OpDecorate %18 Binding 0
 OpDecorate %20 Location 0
+OpDecorate %35 RelaxedPrecision
+OpDecorate %36 RelaxedPrecision
+OpDecorate %37 RelaxedPrecision
+OpDecorate %38 RelaxedPrecision
+OpDecorate %51 RelaxedPrecision
+OpDecorate %52 RelaxedPrecision
+OpDecorate %53 RelaxedPrecision
+OpDecorate %54 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeInt 32 0
diff --git a/reference/shaders/resources/min16-alloca-groupshared.sm60.comp b/reference/shaders/resources/min16-alloca-groupshared.sm60.comp
new file mode 100644
index 0000000..98c1f90
--- /dev/null
+++ b/reference/shaders/resources/min16-alloca-groupshared.sm60.comp
@@ -0,0 +1,210 @@
+#version 460
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _8;
+
+shared mediump float _13[64];
+shared mediump float _17[256];
+
+void main()
+{
+    mediump float _31[4];
+    _31[0u] = 0.0;
+    _31[1u] = 0.0;
+    _31[2u] = 0.0;
+    _31[3u] = 0.0;
+    float _41 = float(gl_LocalInvocationIndex);
+    mediump float mp_copy_41 = _41;
+    _13[gl_LocalInvocationIndex] = _41;
+    _17[0u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 1.0;
+    _17[1u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 2.0;
+    _17[2u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 3.0;
+    _17[3u + (gl_LocalInvocationIndex * 4u)] = mp_copy_41 + 4.0;
+    barrier();
+    _31[gl_LocalInvocationIndex & 3u] = _13[gl_LocalInvocationIndex ^ 5u];
+    _31[(gl_LocalInvocationIndex + 1u) & 3u] = _13[gl_LocalInvocationIndex ^ 4u];
+    uint _77 = gl_LocalInvocationIndex ^ 1u;
+    uint _80 = gl_LocalInvocationIndex ^ 3u;
+    uint _109 = gl_GlobalInvocationID.x * 4u;
+    imageStore(_8, int(_109), uvec4(floatBitsToUint((_17[0u + (_80 * 4u)] + _13[_77]) + _31[0u])));
+    imageStore(_8, int(_109 + 1u), uvec4(floatBitsToUint((_17[1u + (_80 * 4u)] + _13[_77]) + _31[1u])));
+    imageStore(_8, int(_109 + 2u), uvec4(floatBitsToUint((_17[2u + (_80 * 4u)] + _13[_77]) + _31[2u])));
+    imageStore(_8, int(_109 + 3u), uvec4(floatBitsToUint((_17[3u + (_80 * 4u)] + _13[_77]) + _31[3u])));
+}
+
+
+#if 0
+// SPIR-V disassembly
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 124
+; Schema: 0
+OpCapability Shader
+OpCapability ImageBuffer
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %3 "main" %21 %26
+OpExecutionMode %3 LocalSize 64 1 1
+OpName %3 "main"
+OpDecorate %8 DescriptorSet 0
+OpDecorate %8 Binding 0
+OpDecorate %8 NonReadable
+OpDecorate %13 RelaxedPrecision
+OpDecorate %17 RelaxedPrecision
+OpDecorate %21 BuiltIn GlobalInvocationId
+OpDecorate %26 BuiltIn LocalInvocationIndex
+OpDecorate %31 RelaxedPrecision
+OpDecorate %44 RelaxedPrecision
+OpDecorate %46 RelaxedPrecision
+OpDecorate %48 RelaxedPrecision
+OpDecorate %50 RelaxedPrecision
+OpDecorate %97 RelaxedPrecision
+OpDecorate %98 RelaxedPrecision
+OpDecorate %99 RelaxedPrecision
+OpDecorate %100 RelaxedPrecision
+OpDecorate %105 RelaxedPrecision
+OpDecorate %106 RelaxedPrecision
+OpDecorate %107 RelaxedPrecision
+OpDecorate %108 RelaxedPrecision
+%1 = OpTypeVoid
+%2 = OpTypeFunction %1
+%5 = OpTypeInt 32 0
+%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
+%7 = OpTypePointer UniformConstant %6
+%8 = OpVariable %7 UniformConstant
+%9 = OpConstant %5 64
+%10 = OpTypeFloat 32
+%11 = OpTypeArray %10 %9
+%12 = OpTypePointer Workgroup %11
+%13 = OpVariable %12 Workgroup
+%14 = OpConstant %5 256
+%15 = OpTypeArray %10 %14
+%16 = OpTypePointer Workgroup %15
+%17 = OpVariable %16 Workgroup
+%19 = OpTypeVector %5 3
+%20 = OpTypePointer Input %19
+%21 = OpVariable %20 Input
+%22 = OpTypePointer Input %5
+%24 = OpConstant %5 0
+%26 = OpVariable %22 Input
+%28 = OpConstant %5 4
+%29 = OpTypeArray %10 %28
+%30 = OpTypePointer Function %29
+%32 = OpTypePointer Function %10
+%35 = OpConstant %5 1
+%37 = OpConstant %5 2
+%39 = OpConstant %5 3
+%40 = OpConstant %10 0
+%42 = OpTypePointer Workgroup %10
+%45 = OpConstant %10 1
+%47 = OpConstant %10 2
+%49 = OpConstant %10 3
+%51 = OpConstant %10 4
+%64 = OpConstant %5 264
+%66 = OpConstant %5 5
+%114 = OpTypeVector %5 4
+%3 = OpFunction %1 None %2
+%4 = OpLabel
+%31 = OpVariable %30 Function
+OpBranch %122
+%122 = OpLabel
+%18 = OpLoad %6 %8
+%23 = OpAccessChain %22 %21 %24
+%25 = OpLoad %5 %23
+%27 = OpLoad %5 %26
+%33 = OpInBoundsAccessChain %32 %31 %24
+%34 = OpInBoundsAccessChain %32 %31 %35
+%36 = OpInBoundsAccessChain %32 %31 %37
+%38 = OpInBoundsAccessChain %32 %31 %39
+OpStore %33 %40
+OpStore %34 %40
+OpStore %36 %40
+OpStore %38 %40
+%41 = OpConvertUToF %10 %27
+%43 = OpAccessChain %42 %13 %27
+OpStore %43 %41
+%44 = OpFAdd %10 %41 %45
+%46 = OpFAdd %10 %41 %47
+%48 = OpFAdd %10 %41 %49
+%50 = OpFAdd %10 %41 %51
+%52 = OpIMul %5 %27 %28
+%53 = OpIAdd %5 %24 %52
+%54 = OpAccessChain %42 %17 %53
+OpStore %54 %44
+%55 = OpIMul %5 %27 %28
+%56 = OpIAdd %5 %35 %55
+%57 = OpAccessChain %42 %17 %56
+OpStore %57 %46
+%58 = OpIMul %5 %27 %28
+%59 = OpIAdd %5 %37 %58
+%60 = OpAccessChain %42 %17 %59
+OpStore %60 %48
+%61 = OpIMul %5 %27 %28
+%62 = OpIAdd %5 %39 %61
+%63 = OpAccessChain %42 %17 %62
+OpStore %63 %50
+OpControlBarrier %37 %37 %64
+%65 = OpBitwiseXor %5 %27 %66
+%67 = OpAccessChain %42 %13 %65
+%68 = OpLoad %10 %67
+%69 = OpBitwiseAnd %5 %27 %39
+%70 = OpAccessChain %32 %31 %69
+OpStore %70 %68
+%71 = OpBitwiseXor %5 %27 %28
+%72 = OpAccessChain %42 %13 %71
+%73 = OpLoad %10 %72
+%74 = OpIAdd %5 %27 %35
+%75 = OpBitwiseAnd %5 %74 %39
+%76 = OpAccessChain %32 %31 %75
+OpStore %76 %73
+%77 = OpBitwiseXor %5 %27 %35
+%78 = OpAccessChain %42 %13 %77
+%79 = OpLoad %10 %78
+%80 = OpBitwiseXor %5 %27 %39
+%81 = OpIMul %5 %80 %28
+%82 = OpIAdd %5 %24 %81
+%83 = OpAccessChain %42 %17 %82
+%84 = OpLoad %10 %83
+%85 = OpIMul %5 %80 %28
+%86 = OpIAdd %5 %35 %85
+%87 = OpAccessChain %42 %17 %86
+%88 = OpLoad %10 %87
+%89 = OpIMul %5 %80 %28
+%90 = OpIAdd %5 %37 %89
+%91 = OpAccessChain %42 %17 %90
+%92 = OpLoad %10 %91
+%93 = OpIMul %5 %80 %28
+%94 = OpIAdd %5 %39 %93
+%95 = OpAccessChain %42 %17 %94
+%96 = OpLoad %10 %95
+%97 = OpFAdd %10 %84 %79
+%98 = OpFAdd %10 %88 %79
+%99 = OpFAdd %10 %92 %79
+%100 = OpFAdd %10 %96 %79
+%101 = OpLoad %10 %33
+%102 = OpLoad %10 %34
+%103 = OpLoad %10 %36
+%104 = OpLoad %10 %38
+%105 = OpFAdd %10 %97 %101
+%106 = OpFAdd %10 %98 %102
+%107 = OpFAdd %10 %99 %103
+%108 = OpFAdd %10 %100 %104
+%109 = OpIMul %5 %25 %28
+%110 = OpBitcast %5 %105
+%111 = OpBitcast %5 %106
+%112 = OpBitcast %5 %107
+%113 = OpBitcast %5 %108
+%115 = OpCompositeConstruct %114 %110 %110 %110 %110
+OpImageWrite %18 %109 %115
+%116 = OpCompositeConstruct %114 %111 %111 %111 %111
+%117 = OpIAdd %5 %109 %35
+OpImageWrite %18 %117 %116
+%118 = OpCompositeConstruct %114 %112 %112 %112 %112
+%119 = OpIAdd %5 %109 %37
+OpImageWrite %18 %119 %118
+%120 = OpCompositeConstruct %114 %113 %113 %113 %113
+%121 = OpIAdd %5 %109 %39
+OpImageWrite %18 %121 %120
+OpReturn
+OpFunctionEnd
+#endif
diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag b/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag
index a92d895..bee9305 100644
--- a/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag
+++ b/reference/shaders/resources/ssbo-minprecision.sm60.native-fp16.frag
@@ -60,6 +60,7 @@ OpDecorate %16 RelaxedPrecision
 OpDecorate %16 Flat
 OpDecorate %16 Location 0
 OpDecorate %18 Location 0
+OpDecorate %41 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeInt 32 0
diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag
index b733765..9e3aac8 100644
--- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag
+++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.frag
@@ -16,7 +16,11 @@ layout(location = 0) out int SV_Target;
 void main()
 {
     uint _20 = uint(A);
-    _13._m0[_20] = floatBitsToUint(uintBitsToFloat(_9._m0[_20 + 1u]) + uintBitsToFloat(_9._m0[_20]));
+    float _26 = uintBitsToFloat(_9._m0[_20]);
+    mediump float mp_copy_26 = _26;
+    float _31 = uintBitsToFloat(_9._m0[_20 + 1u]);
+    mediump float mp_copy_31 = _31;
+    _13._m0[_20] = floatBitsToUint(mp_copy_31 + mp_copy_26);
     SV_Target = int(10u);
 }
 
@@ -54,6 +58,7 @@ OpDecorate %16 RelaxedPrecision
 OpDecorate %16 Flat
 OpDecorate %16 Location 0
 OpDecorate %18 Location 0
+OpDecorate %32 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeInt 32 0
diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag
index 5a5fbb3..d1288f8 100644
--- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag
+++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.frag
@@ -93,6 +93,7 @@ OpDecorate %24 RelaxedPrecision
 OpDecorate %24 Flat
 OpDecorate %24 Location 0
 OpDecorate %26 Location 0
+OpDecorate %46 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeInt 32 0
diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag
index 53e1ea3..531354a 100644
--- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag
+++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag
@@ -106,6 +106,7 @@ OpDecorate %34 ArrayStride 4
 OpMemberDecorate %35 0 Offset 0
 OpDecorate %35 Block
 OpMemberDecorate %35 0 NonWritable
+OpDecorate %51 RelaxedPrecision
 OpDecorate %52 ArrayStride 4
 OpMemberDecorate %53 0 Offset 0
 OpDecorate %53 Block
diff --git a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag
index dd70b90..1862c46 100644
--- a/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag
+++ b/reference/shaders/resources/ssbo-minprecision.sm60.ssbo.root-descriptor.frag
@@ -30,7 +30,11 @@ float _43;
 void main()
 {
     uint _23 = uint(A);
-    PhysicalPointerFloatArray(registers._m2).value[_23] = PhysicalPointerFloatNonWriteArray(registers._m1).value[_23 + 1u] + PhysicalPointerFloatNonWriteArray(registers._m1).value[_23];
+    float _32 = PhysicalPointerFloatNonWriteArray(registers._m1).value[_23];
+    mediump float mp_copy_32 = _32;
+    float _36 = PhysicalPointerFloatNonWriteArray(registers._m1).value[_23 + 1u];
+    mediump float mp_copy_36 = _36;
+    PhysicalPointerFloatArray(registers._m2).value[_23] = mp_copy_36 + mp_copy_32;
     SV_Target = int(10u);
 }
 
@@ -70,6 +74,7 @@ OpDecorate %25 ArrayStride 4
 OpMemberDecorate %26 0 Offset 0
 OpDecorate %26 Block
 OpMemberDecorate %26 0 NonWritable
+OpDecorate %37 RelaxedPrecision
 OpDecorate %38 ArrayStride 4
 OpMemberDecorate %39 0 Offset 0
 OpDecorate %39 Block
diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag b/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag
index 4a3719c..6f67537 100644
--- a/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag
+++ b/reference/shaders/resources/typed-resources-16bit.sm60.bindless.frag
@@ -51,14 +51,6 @@ void main()
     imageStore(_54[registers._m4 + 4u], int(_167), ivec4(uvec4(4294967276u)));
     imageStore(_58[registers._m4 + 5u], int(_167), uvec4(80u));
     mediump vec4 _191 = texture(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y));
-    mediump float _193 = _191.x;
-    float hp_copy_193 = _193;
-    mediump float _194 = _191.y;
-    float hp_copy_194 = _194;
-    mediump float _195 = _191.z;
-    float hp_copy_195 = _195;
-    mediump float _196 = _191.w;
-    float hp_copy_196 = _196;
     uvec4 _199 = uvec4(texelFetch(_18[_145], ivec2(uvec2(1u, 2u)), int(3u)));
     mediump uvec4 _205 = texelFetch(_22[_139], ivec2(uvec2(4u, 5u)), int(6u));
     mediump uint _207 = _205.x;
@@ -70,14 +62,14 @@ void main()
     mediump uint _210 = _205.w;
     uint hp_copy_210 = _210;
     mediump vec4 _212 = textureGather(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y));
-    mediump float _213 = _212.x;
-    float hp_copy_213 = _213;
-    mediump float _214 = _212.y;
-    float hp_copy_214 = _214;
-    mediump float _215 = _212.z;
-    float hp_copy_215 = _215;
-    mediump float _216 = _212.w;
-    float hp_copy_216 = _216;
+    mediump float _217 = _212.x + _191.x;
+    float hp_copy_217 = _217;
+    mediump float _218 = _212.y + _191.y;
+    float hp_copy_218 = _218;
+    mediump float _219 = _212.z + _191.z;
+    float hp_copy_219 = _219;
+    mediump float _220 = _212.w + _191.w;
+    float hp_copy_220 = _220;
     uvec4 _225 = uvec4(textureGather(isampler2D(_18[_145], _62[registers._m2]), vec2(UV.x, UV.y), int(1u)));
     mediump uvec4 _237 = textureGather(usampler2D(_22[_139], _62[registers._m2]), vec2(UV.x, UV.y), int(2u));
     mediump uint _238 = _237.x;
@@ -88,10 +80,18 @@ void main()
     uint hp_copy_240 = _240;
     mediump uint _241 = _237.w;
     uint hp_copy_241 = _241;
-    mediump vec4 _252 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5)));
-    mediump float _253 = _252.x;
+    mediump float _253 = vec4(texture(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5))).x;
+    float hp_copy_253 = _253;
     mediump vec4 _260 = vec4(textureLod(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), vec3(vec2(UV.x, UV.y), 0.5), 0.0));
     mediump float _261 = _260.x;
+    float _262 = _261 + (hp_copy_253 + hp_copy_217);
+    mediump float mp_copy_262 = _262;
+    float _263 = _261 + (hp_copy_253 + hp_copy_218);
+    mediump float mp_copy_263 = _263;
+    float _264 = _261 + (hp_copy_253 + hp_copy_219);
+    mediump float mp_copy_264 = _264;
+    float _265 = _261 + (hp_copy_253 + hp_copy_220);
+    mediump float mp_copy_265 = _265;
     vec2 _266 = vec2(UV.x, UV.y);
     mediump vec4 _267 = textureGather(sampler2DShadow(_13[registers._m0], _62[registers._m2 + 1u]), _266, 0.5);
     mediump vec4 _276 = textureLod(sampler2D(_13[registers._m0], _62[registers._m2]), vec2(UV.x, UV.y), 0.0);
@@ -100,10 +100,10 @@ void main()
     mediump vec4 _311 = texelFetch(_26[registers._m1 + 3u], int(_167));
     uvec4 _321 = uvec4(texelFetch(_30[registers._m1 + 4u], int(_167)));
     mediump uvec4 _330 = texelFetch(_34[registers._m1 + 5u], int(_167));
-    SV_Target.x = ((((_267.x + (_261 + (_253 + (hp_copy_213 + hp_copy_193)))) + _276.x) + _289.x) + _301.x) + _311.x;
-    SV_Target.y = ((((_267.y + (_261 + (_253 + (hp_copy_214 + hp_copy_194)))) + _276.y) + _289.y) + _301.y) + _311.y;
-    SV_Target.z = ((((_267.z + (_261 + (_253 + (hp_copy_215 + hp_copy_195)))) + _276.z) + _289.z) + _301.z) + _311.z;
-    SV_Target.w = ((((_267.w + (_261 + (_253 + (hp_copy_216 + hp_copy_196)))) + _276.w) + _289.w) + _301.w) + _311.w;
+    SV_Target.x = ((((_267.x + mp_copy_262) + _276.x) + _289.x) + _301.x) + _311.x;
+    SV_Target.y = ((((_267.y + mp_copy_263) + _276.y) + _289.y) + _301.y) + _311.y;
+    SV_Target.z = ((((_267.z + mp_copy_264) + _276.z) + _289.z) + _301.z) + _311.z;
+    SV_Target.w = ((((_267.w + mp_copy_265) + _276.w) + _289.w) + _301.w) + _311.w;
     SV_Target_1.x = int((_225.x + _199.x) + _321.x);
     SV_Target_1.y = int((_225.y + _199.y) + _321.y);
     SV_Target_1.z = int((_225.z + _199.z) + _321.z);
@@ -200,6 +200,71 @@ OpDecorate %71 RelaxedPrecision
 OpDecorate %71 Location 1
 OpDecorate %74 RelaxedPrecision
 OpDecorate %74 Location 2
+OpDecorate %191 RelaxedPrecision
+OpDecorate %193 RelaxedPrecision
+OpDecorate %194 RelaxedPrecision
+OpDecorate %195 RelaxedPrecision
+OpDecorate %196 RelaxedPrecision
+OpDecorate %197 RelaxedPrecision
+OpDecorate %205 RelaxedPrecision
+OpDecorate %212 RelaxedPrecision
+OpDecorate %213 RelaxedPrecision
+OpDecorate %214 RelaxedPrecision
+OpDecorate %215 RelaxedPrecision
+OpDecorate %216 RelaxedPrecision
+OpDecorate %217 RelaxedPrecision
+OpDecorate %218 RelaxedPrecision
+OpDecorate %219 RelaxedPrecision
+OpDecorate %220 RelaxedPrecision
+OpDecorate %224 RelaxedPrecision
+OpDecorate %237 RelaxedPrecision
+OpDecorate %267 RelaxedPrecision
+OpDecorate %268 RelaxedPrecision
+OpDecorate %269 RelaxedPrecision
+OpDecorate %270 RelaxedPrecision
+OpDecorate %271 RelaxedPrecision
+OpDecorate %272 RelaxedPrecision
+OpDecorate %273 RelaxedPrecision
+OpDecorate %274 RelaxedPrecision
+OpDecorate %275 RelaxedPrecision
+OpDecorate %276 RelaxedPrecision
+OpDecorate %278 RelaxedPrecision
+OpDecorate %279 RelaxedPrecision
+OpDecorate %280 RelaxedPrecision
+OpDecorate %281 RelaxedPrecision
+OpDecorate %282 RelaxedPrecision
+OpDecorate %283 RelaxedPrecision
+OpDecorate %284 RelaxedPrecision
+OpDecorate %285 RelaxedPrecision
+OpDecorate %289 RelaxedPrecision
+OpDecorate %293 RelaxedPrecision
+OpDecorate %294 RelaxedPrecision
+OpDecorate %295 RelaxedPrecision
+OpDecorate %296 RelaxedPrecision
+OpDecorate %297 RelaxedPrecision
+OpDecorate %298 RelaxedPrecision
+OpDecorate %299 RelaxedPrecision
+OpDecorate %300 RelaxedPrecision
+OpDecorate %301 RelaxedPrecision
+OpDecorate %303 RelaxedPrecision
+OpDecorate %304 RelaxedPrecision
+OpDecorate %305 RelaxedPrecision
+OpDecorate %306 RelaxedPrecision
+OpDecorate %307 RelaxedPrecision
+OpDecorate %308 RelaxedPrecision
+OpDecorate %309 RelaxedPrecision
+OpDecorate %310 RelaxedPrecision
+OpDecorate %311 RelaxedPrecision
+OpDecorate %312 RelaxedPrecision
+OpDecorate %313 RelaxedPrecision
+OpDecorate %314 RelaxedPrecision
+OpDecorate %315 RelaxedPrecision
+OpDecorate %316 RelaxedPrecision
+OpDecorate %317 RelaxedPrecision
+OpDecorate %318 RelaxedPrecision
+OpDecorate %319 RelaxedPrecision
+OpDecorate %320 RelaxedPrecision
+OpDecorate %330 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeInt 32 0
diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.frag b/reference/shaders/resources/typed-resources-16bit.sm60.frag
index 9305754..444e249 100644
--- a/reference/shaders/resources/typed-resources-16bit.sm60.frag
+++ b/reference/shaders/resources/typed-resources-16bit.sm60.frag
@@ -36,14 +36,6 @@ void main()
     imageStore(_40, int(_81), ivec4(uvec4(4294967276u)));
     imageStore(_43, int(_81), uvec4(80u));
     mediump vec4 _105 = texture(sampler2D(_8, _46), vec2(UV.x, UV.y));
-    mediump float _107 = _105.x;
-    float hp_copy_107 = _107;
-    mediump float _108 = _105.y;
-    float hp_copy_108 = _108;
-    mediump float _109 = _105.z;
-    float hp_copy_109 = _109;
-    mediump float _110 = _105.w;
-    float hp_copy_110 = _110;
     uvec4 _115 = uvec4(texelFetch(_12, ivec2(uvec2(1u, 2u)), int(3u)));
     mediump uvec4 _123 = texelFetch(_16, ivec2(uvec2(4u, 5u)), int(6u));
     mediump uint _125 = _123.x;
@@ -55,14 +47,14 @@ void main()
     mediump uint _128 = _123.w;
     uint hp_copy_128 = _128;
     mediump vec4 _130 = textureGather(sampler2D(_8, _46), vec2(UV.x, UV.y));
-    mediump float _131 = _130.x;
-    float hp_copy_131 = _131;
-    mediump float _132 = _130.y;
-    float hp_copy_132 = _132;
-    mediump float _133 = _130.z;
-    float hp_copy_133 = _133;
-    mediump float _134 = _130.w;
-    float hp_copy_134 = _134;
+    mediump float _135 = _130.x + _105.x;
+    float hp_copy_135 = _135;
+    mediump float _136 = _130.y + _105.y;
+    float hp_copy_136 = _136;
+    mediump float _137 = _130.z + _105.z;
+    float hp_copy_137 = _137;
+    mediump float _138 = _130.w + _105.w;
+    float hp_copy_138 = _138;
     uvec4 _143 = uvec4(textureGather(isampler2D(_12, _46), vec2(UV.x, UV.y), int(1u)));
     mediump uvec4 _155 = textureGather(usampler2D(_16, _46), vec2(UV.x, UV.y), int(2u));
     mediump uint _156 = _155.x;
@@ -73,10 +65,18 @@ void main()
     uint hp_copy_158 = _158;
     mediump uint _159 = _155.w;
     uint hp_copy_159 = _159;
-    mediump vec4 _170 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5)));
-    mediump float _171 = _170.x;
+    mediump float _171 = vec4(texture(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5))).x;
+    float hp_copy_171 = _171;
     mediump vec4 _178 = vec4(textureLod(sampler2DShadow(_8, _47), vec3(vec2(UV.x, UV.y), 0.5), 0.0));
     mediump float _179 = _178.x;
+    float _180 = _179 + (hp_copy_171 + hp_copy_135);
+    mediump float mp_copy_180 = _180;
+    float _181 = _179 + (hp_copy_171 + hp_copy_136);
+    mediump float mp_copy_181 = _181;
+    float _182 = _179 + (hp_copy_171 + hp_copy_137);
+    mediump float mp_copy_182 = _182;
+    float _183 = _179 + (hp_copy_171 + hp_copy_138);
+    mediump float mp_copy_183 = _183;
     vec2 _184 = vec2(UV.x, UV.y);
     mediump vec4 _185 = textureGather(sampler2DShadow(_8, _47), _184, 0.5);
     mediump vec4 _194 = textureLod(sampler2D(_8, _46), vec2(UV.x, UV.y), 0.0);
@@ -85,10 +85,10 @@ void main()
     mediump vec4 _229 = texelFetch(_19, int(_81));
     uvec4 _239 = uvec4(texelFetch(_22, int(_81)));
     mediump uvec4 _248 = texelFetch(_25, int(_81));
-    SV_Target.x = ((((_185.x + (_179 + (_171 + (hp_copy_131 + hp_copy_107)))) + _194.x) + _207.x) + _219.x) + _229.x;
-    SV_Target.y = ((((_185.y + (_179 + (_171 + (hp_copy_132 + hp_copy_108)))) + _194.y) + _207.y) + _219.y) + _229.y;
-    SV_Target.z = ((((_185.z + (_179 + (_171 + (hp_copy_133 + hp_copy_109)))) + _194.z) + _207.z) + _219.z) + _229.z;
-    SV_Target.w = ((((_185.w + (_179 + (_171 + (hp_copy_134 + hp_copy_110)))) + _194.w) + _207.w) + _219.w) + _229.w;
+    SV_Target.x = ((((_185.x + mp_copy_180) + _194.x) + _207.x) + _219.x) + _229.x;
+    SV_Target.y = ((((_185.y + mp_copy_181) + _194.y) + _207.y) + _219.y) + _229.y;
+    SV_Target.z = ((((_185.z + mp_copy_182) + _194.z) + _207.z) + _219.z) + _229.z;
+    SV_Target.w = ((((_185.w + mp_copy_183) + _194.w) + _207.w) + _219.w) + _229.w;
     SV_Target_1.x = int((_143.x + _115.x) + _239.x);
     SV_Target_1.y = int((_143.y + _115.y) + _239.y);
     SV_Target_1.z = int((_143.z + _115.z) + _239.z);
@@ -172,6 +172,71 @@ OpDecorate %56 RelaxedPrecision
 OpDecorate %56 Location 1
 OpDecorate %59 RelaxedPrecision
 OpDecorate %59 Location 2
+OpDecorate %105 RelaxedPrecision
+OpDecorate %107 RelaxedPrecision
+OpDecorate %108 RelaxedPrecision
+OpDecorate %109 RelaxedPrecision
+OpDecorate %110 RelaxedPrecision
+OpDecorate %113 RelaxedPrecision
+OpDecorate %123 RelaxedPrecision
+OpDecorate %130 RelaxedPrecision
+OpDecorate %131 RelaxedPrecision
+OpDecorate %132 RelaxedPrecision
+OpDecorate %133 RelaxedPrecision
+OpDecorate %134 RelaxedPrecision
+OpDecorate %135 RelaxedPrecision
+OpDecorate %136 RelaxedPrecision
+OpDecorate %137 RelaxedPrecision
+OpDecorate %138 RelaxedPrecision
+OpDecorate %142 RelaxedPrecision
+OpDecorate %155 RelaxedPrecision
+OpDecorate %185 RelaxedPrecision
+OpDecorate %186 RelaxedPrecision
+OpDecorate %187 RelaxedPrecision
+OpDecorate %188 RelaxedPrecision
+OpDecorate %189 RelaxedPrecision
+OpDecorate %190 RelaxedPrecision
+OpDecorate %191 RelaxedPrecision
+OpDecorate %192 RelaxedPrecision
+OpDecorate %193 RelaxedPrecision
+OpDecorate %194 RelaxedPrecision
+OpDecorate %196 RelaxedPrecision
+OpDecorate %197 RelaxedPrecision
+OpDecorate %198 RelaxedPrecision
+OpDecorate %199 RelaxedPrecision
+OpDecorate %200 RelaxedPrecision
+OpDecorate %201 RelaxedPrecision
+OpDecorate %202 RelaxedPrecision
+OpDecorate %203 RelaxedPrecision
+OpDecorate %207 RelaxedPrecision
+OpDecorate %211 RelaxedPrecision
+OpDecorate %212 RelaxedPrecision
+OpDecorate %213 RelaxedPrecision
+OpDecorate %214 RelaxedPrecision
+OpDecorate %215 RelaxedPrecision
+OpDecorate %216 RelaxedPrecision
+OpDecorate %217 RelaxedPrecision
+OpDecorate %218 RelaxedPrecision
+OpDecorate %219 RelaxedPrecision
+OpDecorate %221 RelaxedPrecision
+OpDecorate %222 RelaxedPrecision
+OpDecorate %223 RelaxedPrecision
+OpDecorate %224 RelaxedPrecision
+OpDecorate %225 RelaxedPrecision
+OpDecorate %226 RelaxedPrecision
+OpDecorate %227 RelaxedPrecision
+OpDecorate %228 RelaxedPrecision
+OpDecorate %229 RelaxedPrecision
+OpDecorate %230 RelaxedPrecision
+OpDecorate %231 RelaxedPrecision
+OpDecorate %232 RelaxedPrecision
+OpDecorate %233 RelaxedPrecision
+OpDecorate %234 RelaxedPrecision
+OpDecorate %235 RelaxedPrecision
+OpDecorate %236 RelaxedPrecision
+OpDecorate %237 RelaxedPrecision
+OpDecorate %238 RelaxedPrecision
+OpDecorate %248 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeFloat 32
diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag
index e6be43d..fd454c9 100644
--- a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag
+++ b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.bindless.frag
@@ -181,6 +181,81 @@ OpDecorate %71 RelaxedPrecision
 OpDecorate %71 Location 1
 OpDecorate %74 RelaxedPrecision
 OpDecorate %74 Location 2
+OpDecorate %168 RelaxedPrecision
+OpDecorate %169 RelaxedPrecision
+OpDecorate %201 RelaxedPrecision
+OpDecorate %204 RelaxedPrecision
+OpDecorate %205 RelaxedPrecision
+OpDecorate %206 RelaxedPrecision
+OpDecorate %207 RelaxedPrecision
+OpDecorate %208 RelaxedPrecision
+OpDecorate %216 RelaxedPrecision
+OpDecorate %224 RelaxedPrecision
+OpDecorate %226 RelaxedPrecision
+OpDecorate %227 RelaxedPrecision
+OpDecorate %228 RelaxedPrecision
+OpDecorate %229 RelaxedPrecision
+OpDecorate %230 RelaxedPrecision
+OpDecorate %231 RelaxedPrecision
+OpDecorate %232 RelaxedPrecision
+OpDecorate %233 RelaxedPrecision
+OpDecorate %237 RelaxedPrecision
+OpDecorate %250 RelaxedPrecision
+OpDecorate %276 RelaxedPrecision
+OpDecorate %277 RelaxedPrecision
+OpDecorate %278 RelaxedPrecision
+OpDecorate %279 RelaxedPrecision
+OpDecorate %292 RelaxedPrecision
+OpDecorate %293 RelaxedPrecision
+OpDecorate %294 RelaxedPrecision
+OpDecorate %295 RelaxedPrecision
+OpDecorate %297 RelaxedPrecision
+OpDecorate %299 RelaxedPrecision
+OpDecorate %300 RelaxedPrecision
+OpDecorate %301 RelaxedPrecision
+OpDecorate %302 RelaxedPrecision
+OpDecorate %303 RelaxedPrecision
+OpDecorate %304 RelaxedPrecision
+OpDecorate %305 RelaxedPrecision
+OpDecorate %306 RelaxedPrecision
+OpDecorate %307 RelaxedPrecision
+OpDecorate %310 RelaxedPrecision
+OpDecorate %311 RelaxedPrecision
+OpDecorate %312 RelaxedPrecision
+OpDecorate %313 RelaxedPrecision
+OpDecorate %314 RelaxedPrecision
+OpDecorate %315 RelaxedPrecision
+OpDecorate %316 RelaxedPrecision
+OpDecorate %317 RelaxedPrecision
+OpDecorate %321 RelaxedPrecision
+OpDecorate %326 RelaxedPrecision
+OpDecorate %327 RelaxedPrecision
+OpDecorate %328 RelaxedPrecision
+OpDecorate %329 RelaxedPrecision
+OpDecorate %330 RelaxedPrecision
+OpDecorate %331 RelaxedPrecision
+OpDecorate %332 RelaxedPrecision
+OpDecorate %333 RelaxedPrecision
+OpDecorate %334 RelaxedPrecision
+OpDecorate %337 RelaxedPrecision
+OpDecorate %338 RelaxedPrecision
+OpDecorate %339 RelaxedPrecision
+OpDecorate %340 RelaxedPrecision
+OpDecorate %341 RelaxedPrecision
+OpDecorate %342 RelaxedPrecision
+OpDecorate %343 RelaxedPrecision
+OpDecorate %344 RelaxedPrecision
+OpDecorate %345 RelaxedPrecision
+OpDecorate %347 RelaxedPrecision
+OpDecorate %348 RelaxedPrecision
+OpDecorate %349 RelaxedPrecision
+OpDecorate %350 RelaxedPrecision
+OpDecorate %351 RelaxedPrecision
+OpDecorate %352 RelaxedPrecision
+OpDecorate %353 RelaxedPrecision
+OpDecorate %354 RelaxedPrecision
+OpDecorate %355 RelaxedPrecision
+OpDecorate %365 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeInt 32 0
diff --git a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag
index 649a4d3..9348cac 100644
--- a/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag
+++ b/reference/shaders/resources/typed-resources-16bit.sm60.native-fp16.frag
@@ -153,6 +153,81 @@ OpDecorate %56 RelaxedPrecision
 OpDecorate %56 Location 1
 OpDecorate %59 RelaxedPrecision
 OpDecorate %59 Location 2
+OpDecorate %82 RelaxedPrecision
+OpDecorate %83 RelaxedPrecision
+OpDecorate %115 RelaxedPrecision
+OpDecorate %118 RelaxedPrecision
+OpDecorate %119 RelaxedPrecision
+OpDecorate %120 RelaxedPrecision
+OpDecorate %121 RelaxedPrecision
+OpDecorate %124 RelaxedPrecision
+OpDecorate %134 RelaxedPrecision
+OpDecorate %142 RelaxedPrecision
+OpDecorate %144 RelaxedPrecision
+OpDecorate %145 RelaxedPrecision
+OpDecorate %146 RelaxedPrecision
+OpDecorate %147 RelaxedPrecision
+OpDecorate %148 RelaxedPrecision
+OpDecorate %149 RelaxedPrecision
+OpDecorate %150 RelaxedPrecision
+OpDecorate %151 RelaxedPrecision
+OpDecorate %155 RelaxedPrecision
+OpDecorate %168 RelaxedPrecision
+OpDecorate %194 RelaxedPrecision
+OpDecorate %195 RelaxedPrecision
+OpDecorate %196 RelaxedPrecision
+OpDecorate %197 RelaxedPrecision
+OpDecorate %210 RelaxedPrecision
+OpDecorate %211 RelaxedPrecision
+OpDecorate %212 RelaxedPrecision
+OpDecorate %213 RelaxedPrecision
+OpDecorate %215 RelaxedPrecision
+OpDecorate %217 RelaxedPrecision
+OpDecorate %218 RelaxedPrecision
+OpDecorate %219 RelaxedPrecision
+OpDecorate %220 RelaxedPrecision
+OpDecorate %221 RelaxedPrecision
+OpDecorate %222 RelaxedPrecision
+OpDecorate %223 RelaxedPrecision
+OpDecorate %224 RelaxedPrecision
+OpDecorate %225 RelaxedPrecision
+OpDecorate %228 RelaxedPrecision
+OpDecorate %229 RelaxedPrecision
+OpDecorate %230 RelaxedPrecision
+OpDecorate %231 RelaxedPrecision
+OpDecorate %232 RelaxedPrecision
+OpDecorate %233 RelaxedPrecision
+OpDecorate %234 RelaxedPrecision
+OpDecorate %235 RelaxedPrecision
+OpDecorate %239 RelaxedPrecision
+OpDecorate %244 RelaxedPrecision
+OpDecorate %245 RelaxedPrecision
+OpDecorate %246 RelaxedPrecision
+OpDecorate %247 RelaxedPrecision
+OpDecorate %248 RelaxedPrecision
+OpDecorate %249 RelaxedPrecision
+OpDecorate %250 RelaxedPrecision
+OpDecorate %251 RelaxedPrecision
+OpDecorate %252 RelaxedPrecision
+OpDecorate %255 RelaxedPrecision
+OpDecorate %256 RelaxedPrecision
+OpDecorate %257 RelaxedPrecision
+OpDecorate %258 RelaxedPrecision
+OpDecorate %259 RelaxedPrecision
+OpDecorate %260 RelaxedPrecision
+OpDecorate %261 RelaxedPrecision
+OpDecorate %262 RelaxedPrecision
+OpDecorate %263 RelaxedPrecision
+OpDecorate %265 RelaxedPrecision
+OpDecorate %266 RelaxedPrecision
+OpDecorate %267 RelaxedPrecision
+OpDecorate %268 RelaxedPrecision
+OpDecorate %269 RelaxedPrecision
+OpDecorate %270 RelaxedPrecision
+OpDecorate %271 RelaxedPrecision
+OpDecorate %272 RelaxedPrecision
+OpDecorate %273 RelaxedPrecision
+OpDecorate %283 RelaxedPrecision
 %1 = OpTypeVoid
 %2 = OpTypeFunction %1
 %5 = OpTypeFloat 32
diff --git a/shaders/llvm-builtin/min16-phi.sm60.comp b/shaders/llvm-builtin/min16-phi.sm60.comp
new file mode 100644
index 0000000..d6f196c
--- /dev/null
+++ b/shaders/llvm-builtin/min16-phi.sm60.comp
@@ -0,0 +1,18 @@
+RWStructuredBuffer<min16float4> B;
+Texture2D<min16float4> T0;
+Texture2D<min16float4> T1;
+Texture2D<min16float4> T2;
+
+[numthreads(64, 1, 1)]
+void main(uint3 thr : SV_DispatchThreadID)
+{
+	min16float4 v;
+	if (thr.x < 20)
+		v = T0.Load(int3(thr));
+	else if (thr.y < 40)
+		v = T1.Load(int3(thr));
+	else
+		v = T2.Load(int3(thr));
+
+	B[thr.x] = v;
+}
diff --git a/shaders/resources/min16-alloca-groupshared.sm60.comp b/shaders/resources/min16-alloca-groupshared.sm60.comp
new file mode 100644
index 0000000..96c8852
--- /dev/null
+++ b/shaders/resources/min16-alloca-groupshared.sm60.comp
@@ -0,0 +1,17 @@
+groupshared min16float bar[64];
+groupshared min16float4 bar4[64];
+
+RWStructuredBuffer<min16float4> O;
+static min16float4 Vec;
+
+[numthreads(64, 1, 1)]
+void main(uint thr : SV_DispatchThreadID, uint l : SV_GroupIndex)
+{
+	Vec = min16float4(0, 0, 0, 0);
+	bar[l] = min16float(l);
+	bar4[l] = min16float(l) + min16float4(1, 2, 3, 4);
+	GroupMemoryBarrierWithGroupSync();
+	Vec[l & 3] = bar[l ^ 5];
+	Vec[(l + 1) & 3] = bar[l ^ 4];
+	O[thr] = bar[l ^ 1] + bar4[l ^ 3] + Vec;
+}
diff --git a/spirv_module.cpp b/spirv_module.cpp
index d894024..762c466 100644
--- a/spirv_module.cpp
+++ b/spirv_module.cpp
@@ -822,6 +822,9 @@ void SPIRVModule::Impl::emit_basic_block(CFGNode *node)
 			phi_op->addIdOperand(fake_loop_block->getId());
 		}
 
+		if (phi.relaxed)
+			builder.addDecoration(phi.id, spv::DecorationRelaxedPrecision);
+
 		bb->addInstruction(std::move(phi_op));
 	}
author	Hans-Kristian Arntzen <post@arntzen-software.no>	2021-12-02 17:31:59 +0300
committer	Hans-Kristian Arntzen <post@arntzen-software.no>	2022-05-31 12:42:30 +0300
commit	677148ed111a8a3ee9ce3b002f46032361a48069 (patch)
tree	4820cb837a0e5264043b546b88c046c704ceaa31
parent	d97bb340d8f79317fb7b8049717ff6ffd0dace0f (diff)