Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/KhronosGroup/SPIRV-Cross.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans-Kristian Arntzen <post@arntzen-software.no>2022-03-03 13:04:45 +0300
committerHans-Kristian Arntzen <post@arntzen-software.no>2022-03-03 16:50:56 +0300
commit31be74a853b92d98893e2c35d2319cb227afc9b0 (patch)
treec776cf480601d240a6d655a83f67cb98cc5c2170
parentb91ecf607784af2afc336d09ada9d7ad83165e3a (diff)
Add relax_nan_checks options.
Makes codegen from typical D3D emulation SPIR-V more readable. Also makes cross compilation with NotEqual more sensible. It's very rare to actually need the strict NaN-checks in practice. Also, glslang now emits UnordNotEqual by default it seems, so give up trying to assume OrdNotEqual. Harmonize for UnordNotEqual as the sane default.
-rw-r--r--CMakeLists.txt2
-rw-r--r--main.cpp5
-rw-r--r--reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp30
-rw-r--r--reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag22
-rw-r--r--reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp3
-rw-r--r--reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp2
-rw-r--r--reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp42
-rw-r--r--reference/opt/shaders/comp/cfg.comp2
-rw-r--r--reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag1
-rw-r--r--reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag52
-rw-r--r--reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert (renamed from reference/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert)0
-rw-r--r--reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp27
-rw-r--r--reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag4
-rw-r--r--reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag69
-rw-r--r--reference/shaders-msl/asm/frag/unord-relational-op.asm.frag9
-rw-r--r--reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag59
-rw-r--r--reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp2
-rw-r--r--reference/shaders-msl/comp/threadgroup-boolean-workaround.comp2
-rw-r--r--reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag5
-rw-r--r--reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag34
-rw-r--r--reference/shaders-no-opt/frag/fp16.invalid.desktop.frag4
-rw-r--r--reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp39
-rw-r--r--reference/shaders/comp/cfg.comp10
-rw-r--r--shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag2
-rw-r--r--shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag179
-rw-r--r--shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert (renamed from shaders-hlsl-no-opt/vert/empty-shader.sm30.vert)0
-rw-r--r--shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp203
-rw-r--r--shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag293
-rw-r--r--shaders-msl/asm/frag/unord-relational-op.asm.frag2
-rw-r--r--shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag207
-rw-r--r--shaders-no-opt/asm/frag/unordered-compare.asm.frag2
-rw-r--r--shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag179
-rw-r--r--shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp203
-rw-r--r--spirv_cross_c.cpp3
-rw-r--r--spirv_cross_c.h4
-rw-r--r--spirv_glsl.cpp74
-rw-r--r--spirv_glsl.hpp12
-rw-r--r--spirv_hlsl.cpp4
-rw-r--r--spirv_msl.cpp12
-rwxr-xr-xtest_shaders.py6
40 files changed, 1777 insertions, 33 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6860c5bc..d8ea49a6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -332,7 +332,7 @@ if (SPIRV_CROSS_STATIC)
endif()
set(spirv-cross-abi-major 0)
-set(spirv-cross-abi-minor 48)
+set(spirv-cross-abi-minor 49)
set(spirv-cross-abi-patch 0)
if (SPIRV_CROSS_SHARED)
diff --git a/main.cpp b/main.cpp
index fe197376..9124ecaf 100644
--- a/main.cpp
+++ b/main.cpp
@@ -669,6 +669,7 @@ struct CLIArguments
bool emit_line_directives = false;
bool enable_storage_image_qualifier_deduction = true;
bool force_zero_initialized_variables = false;
+ bool relax_nan_checks = false;
uint32_t force_recompile_max_debug_iterations = 3;
SmallVector<uint32_t> msl_discrete_descriptor_sets;
SmallVector<uint32_t> msl_device_argument_buffers;
@@ -919,6 +920,7 @@ static void print_help_common()
"\t[--mask-stage-output-builtin <Position|PointSize|ClipDistance|CullDistance>]:\n"
"\t\tIf a stage output variable with matching builtin is active, "
"optimize away the variable if it can affect cross-stage linking correctness.\n"
+ "\t[--relax-nan-checks]:\n\t\tRelax NaN checks for N{Clamp,Min,Max} and ordered vs. unordered compare instructions.\n"
);
// clang-format on
}
@@ -1292,6 +1294,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
opts.emit_line_directives = args.emit_line_directives;
opts.enable_storage_image_qualifier_deduction = args.enable_storage_image_qualifier_deduction;
opts.force_zero_initialized_variables = args.force_zero_initialized_variables;
+ opts.relax_nan_checks = args.relax_nan_checks;
opts.force_recompile_max_debug_iterations = args.force_recompile_max_debug_iterations;
compiler->set_common_options(opts);
@@ -1689,6 +1692,8 @@ static int main_inner(int argc, char *argv[])
args.force_recompile_max_debug_iterations = parser.next_uint();
});
+ cbs.add("--relax-nan-checks", [&](CLIParser &) { args.relax_nan_checks = true; });
+
cbs.default_handler = [&args](const char *value) { args.input = value; };
cbs.add("-", [&args](CLIParser &) { args.input = "-"; });
cbs.error_handler = [] { print_help(); };
diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..9f51eff1
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,30 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+ _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+ _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+ _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+ _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+ _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+ _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+ _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+ _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+ _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+ for (int _139 = 0; _139 < 2; )
+ {
+ _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60)))));
+ _139++;
+ continue;
+ }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+ comp_main();
+}
diff --git a/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000..aee290f5
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float a_tmp [[function_constant(1)]];
+constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0;
+constant float b_tmp [[function_constant(2)]];
+constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0;
+
+struct main0_out
+{
+ float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+ main0_out out = {};
+ out.FragColor = float4(a + b);
+ return out;
+}
+
diff --git a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
index 47c83afe..cf26178e 100644
--- a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
+++ b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
@@ -12,8 +12,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
kernel void main0(device model_t& model [[buffer(0)]])
{
- float _38 = (transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x;
- if ((isunordered(_38, 0.0) || _38 != 0.0))
+ if ((transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x != 0.0)
{
model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0)));
}
diff --git a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
index 8b80929a..c1eccf27 100644
--- a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
+++ b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
@@ -13,7 +13,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup short4 foo[4];
- foo[gl_LocalInvocationIndex] = short4((isunordered(_23.values[gl_GlobalInvocationID.x], float4(10.0)) || _23.values[gl_GlobalInvocationID.x] != float4(10.0)));
+ foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0));
threadgroup_barrier(mem_flags::mem_threadgroup);
_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));
}
diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..32d8e025
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,42 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+ float a1;
+ vec2 a2;
+ vec3 a3;
+ vec4 a4;
+ float b1;
+ vec2 b2;
+ vec3 b3;
+ vec4 b4;
+ float c1;
+ vec2 c2;
+ vec3 c3;
+ vec4 c4;
+} _4;
+
+void main()
+{
+ _4.a1 = min(_4.b1, _4.c1);
+ _4.a2 = min(_4.b2, _4.c2);
+ _4.a3 = min(_4.b3, _4.c3);
+ _4.a4 = min(_4.b4, _4.c4);
+ _4.a1 = max(_4.b1, _4.c1);
+ _4.a2 = max(_4.b2, _4.c2);
+ _4.a3 = max(_4.b3, _4.c3);
+ _4.a4 = max(_4.b4, _4.c4);
+ _4.a1 = clamp(_4.a1, _4.b1, _4.c1);
+ _4.a2 = clamp(_4.a2, _4.b2, _4.c2);
+ _4.a3 = clamp(_4.a3, _4.b3, _4.c3);
+ _4.a4 = clamp(_4.a4, _4.b4, _4.c4);
+ for (int _139 = 0; _139 < 2; )
+ {
+ _4.a2 = min(_4.b2, _4.c2);
+ _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y);
+ _139++;
+ continue;
+ }
+}
+
diff --git a/reference/opt/shaders/comp/cfg.comp b/reference/opt/shaders/comp/cfg.comp
index 97cdbc76..af207378 100644
--- a/reference/opt/shaders/comp/cfg.comp
+++ b/reference/opt/shaders/comp/cfg.comp
@@ -10,7 +10,7 @@ float _188;
void main()
{
- if (!(_11.data == 0.0))
+ if (_11.data != 0.0)
{
_11.data = 10.0;
}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
index f18e8e77..021333cc 100644
--- a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -21,6 +21,7 @@ float4 test_vector()
bool4 geq = bool4(!(A.x < B.x), !(A.y < B.y), !(A.z < B.z), !(A.w < B.w));
bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w);
bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+ neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq);
}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..0172c20b
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,52 @@
+static float4 A;
+static float4 B;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+ float4 A : TEXCOORD0;
+ float4 B : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+ float4 FragColor : SV_Target0;
+};
+
+float4 test_vector()
+{
+ bool4 le = bool4(A.x < B.x, A.y < B.y, A.z < B.z, A.w < B.w);
+ bool4 leq = bool4(A.x <= B.x, A.y <= B.y, A.z <= B.z, A.w <= B.w);
+ bool4 ge = bool4(A.x > B.x, A.y > B.y, A.z > B.z, A.w > B.w);
+ bool4 geq = bool4(A.x >= B.x, A.y >= B.y, A.z >= B.z, A.w >= B.w);
+ bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w);
+ bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+ neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+ return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq);
+}
+
+float test_scalar()
+{
+ bool le = A.x < B.x;
+ bool leq = A.x <= B.x;
+ bool ge = A.x > B.x;
+ bool geq = A.x >= B.x;
+ bool eq = A.x == B.x;
+ bool neq = A.x != B.x;
+ return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void frag_main()
+{
+ FragColor = test_vector() + test_scalar().xxxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+ A = stage_input.A;
+ B = stage_input.B;
+ frag_main();
+ SPIRV_Cross_Output stage_output;
+ stage_output.FragColor = FragColor;
+ return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
index 103ff46a..103ff46a 100644
--- a/reference/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert
+++ b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..88f53a4c
--- /dev/null
+++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,27 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+ _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+ _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+ _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+ _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+ _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+ _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+ _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+ _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+ _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+ for (int i = 0; i < 2; i++, _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60))))))
+ {
+ _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+ }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+ comp_main();
+}
diff --git a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
index 9eb0f595..16182ae2 100644
--- a/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
+++ b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
@@ -77,7 +77,7 @@ void test_conversions()
half one = test_result();
int a = int(one);
uint b = uint(one);
- bool c = (isunordered(one, half(0.0)) || one != half(0.0));
+ bool c = one != half(0.0);
float d = float(one);
half a2 = half(a);
half b2 = half(b);
@@ -152,7 +152,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
btmp = v4 > v4;
btmp = v4 >= v4;
btmp = v4 == v4;
- btmp = (isunordered(v4, v4) || v4 != v4);
+ btmp = v4 != v4;
res = dfdx(v4);
res = dfdy(v4);
res = dfdx(v4);
diff --git a/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
new file mode 100644
index 00000000..7835e013
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
@@ -0,0 +1,69 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+ float v1 [[user(locn0)]];
+ float2 v2 [[user(locn1)]];
+ float3 v3 [[user(locn2)]];
+ float4 v4 [[user(locn3)]];
+ half h1 [[user(locn4)]];
+ half2 h2 [[user(locn5)]];
+ half3 h3 [[user(locn6)]];
+ half4 h4 [[user(locn7)]];
+};
+
+fragment void main0(main0_in in [[stage_in]])
+{
+ float res = fast::min(in.v1, in.v1);
+ res = fast::max(in.v1, in.v1);
+ res = fast::clamp(in.v1, in.v1, in.v1);
+ res = fast::min(in.v1, in.v1);
+ res = fast::max(in.v1, in.v1);
+ res = fast::clamp(in.v1, in.v1, in.v1);
+ float2 res2 = fast::min(in.v2, in.v2);
+ res2 = fast::max(in.v2, in.v2);
+ res2 = fast::clamp(in.v2, in.v2, in.v2);
+ res2 = fast::min(in.v2, in.v2);
+ res2 = fast::max(in.v2, in.v2);
+ res2 = fast::clamp(in.v2, in.v2, in.v2);
+ float3 res3 = fast::min(in.v3, in.v3);
+ res3 = fast::max(in.v3, in.v3);
+ res3 = fast::clamp(in.v3, in.v3, in.v3);
+ res3 = fast::min(in.v3, in.v3);
+ res3 = fast::max(in.v3, in.v3);
+ res3 = fast::clamp(in.v3, in.v3, in.v3);
+ float4 res4 = fast::min(in.v4, in.v4);
+ res4 = fast::max(in.v4, in.v4);
+ res4 = fast::clamp(in.v4, in.v4, in.v4);
+ res4 = fast::min(in.v4, in.v4);
+ res4 = fast::max(in.v4, in.v4);
+ res4 = fast::clamp(in.v4, in.v4, in.v4);
+ half hres = min(in.h1, in.h1);
+ hres = max(in.h1, in.h1);
+ hres = clamp(in.h1, in.h1, in.h1);
+ hres = min(in.h1, in.h1);
+ hres = max(in.h1, in.h1);
+ hres = clamp(in.h1, in.h1, in.h1);
+ half2 hres2 = min(in.h2, in.h2);
+ hres2 = max(in.h2, in.h2);
+ hres2 = clamp(in.h2, in.h2, in.h2);
+ hres2 = min(in.h2, in.h2);
+ hres2 = max(in.h2, in.h2);
+ hres2 = clamp(in.h2, in.h2, in.h2);
+ half3 hres3 = min(in.h3, in.h3);
+ hres3 = max(in.h3, in.h3);
+ hres3 = clamp(in.h3, in.h3, in.h3);
+ hres3 = min(in.h3, in.h3);
+ hres3 = max(in.h3, in.h3);
+ hres3 = clamp(in.h3, in.h3, in.h3);
+ half4 hres4 = min(in.h4, in.h4);
+ hres4 = max(in.h4, in.h4);
+ hres4 = clamp(in.h4, in.h4, in.h4);
+ hres4 = min(in.h4, in.h4);
+ hres4 = max(in.h4, in.h4);
+ hres4 = clamp(in.h4, in.h4, in.h4);
+}
+
diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
index 8df57c55..624408c4 100644
--- a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
+++ b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
@@ -29,25 +29,26 @@ fragment main0_out main0(main0_in in [[stage_in]])
float t0 = a;
float t1 = b;
bool c1 = (isunordered(a, b) || a == b);
- bool c2 = (isunordered(a, b) || a != b);
+ c1 = a != b;
+ bool c2 = a != b;
bool c3 = (isunordered(a, b) || a < b);
bool c4 = (isunordered(a, b) || a > b);
bool c5 = (isunordered(a, b) || a <= b);
bool c6 = (isunordered(a, b) || a >= b);
bool2 c7 = (isunordered(in.c, in.d) || in.c == in.d);
- bool2 c8 = (isunordered(in.c, in.d) || in.c != in.d);
+ bool2 c8 = in.c != in.d;
bool2 c9 = (isunordered(in.c, in.d) || in.c < in.d);
bool2 c10 = (isunordered(in.c, in.d) || in.c > in.d);
bool2 c11 = (isunordered(in.c, in.d) || in.c <= in.d);
bool2 c12 = (isunordered(in.c, in.d) || in.c >= in.d);
bool3 c13 = (isunordered(in.e, in.f) || in.e == in.f);
- bool3 c14 = (isunordered(in.e, in.f) || in.e != in.f);
+ bool3 c14 = in.e != in.f;
bool3 c15 = (isunordered(in.e, in.f) || in.e < in.f);
bool3 c16 = (isunordered(in.e, in.f) || in.e > in.f);
bool3 c17 = (isunordered(in.e, in.f) || in.e <= in.f);
bool3 c18 = (isunordered(in.e, in.f) || in.e >= in.f);
bool4 c19 = (isunordered(in.g, in.h) || in.g == in.h);
- bool4 c20 = (isunordered(in.g, in.h) || in.g != in.h);
+ bool4 c20 = in.g != in.h;
bool4 c21 = (isunordered(in.g, in.h) || in.g < in.h);
bool4 c22 = (isunordered(in.g, in.h) || in.g > in.h);
bool4 c23 = (isunordered(in.g, in.h) || in.g <= in.h);
diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000..48482806
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,59 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float a_tmp [[function_constant(1)]];
+constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0;
+constant float b_tmp [[function_constant(2)]];
+constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0;
+
+struct main0_out
+{
+ float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+ float2 c [[user(locn2)]];
+ float2 d [[user(locn3)]];
+ float3 e [[user(locn4)]];
+ float3 f [[user(locn5)]];
+ float4 g [[user(locn6)]];
+ float4 h [[user(locn7)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+ main0_out out = {};
+ float t0 = a;
+ float t1 = b;
+ bool c1 = a == b;
+ c1 = a != b;
+ bool c2 = a != b;
+ bool c3 = a < b;
+ bool c4 = a > b;
+ bool c5 = a <= b;
+ bool c6 = a >= b;
+ bool2 c7 = in.c == in.d;
+ bool2 c8 = in.c != in.d;
+ bool2 c9 = in.c < in.d;
+ bool2 c10 = in.c > in.d;
+ bool2 c11 = in.c <= in.d;
+ bool2 c12 = in.c >= in.d;
+ bool3 c13 = in.e == in.f;
+ bool3 c14 = in.e != in.f;
+ bool3 c15 = in.e < in.f;
+ bool3 c16 = in.e > in.f;
+ bool3 c17 = in.e <= in.f;
+ bool3 c18 = in.e >= in.f;
+ bool4 c19 = in.g == in.h;
+ bool4 c20 = in.g != in.h;
+ bool4 c21 = in.g < in.h;
+ bool4 c22 = in.g > in.h;
+ bool4 c23 = in.g <= in.h;
+ bool4 c24 = in.g >= in.h;
+ out.FragColor = float4(t0 + t1);
+ return out;
+}
+
diff --git a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
index 54fb89bc..3de0ef44 100644
--- a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
+++ b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
@@ -14,7 +14,7 @@ kernel void main0(device model_t& model [[buffer(0)]])
{
float3x3 mtx_cm = transpose(model.mtx_rm);
float3x3 mtx1 = mtx_cm * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0));
- if ((isunordered(mtx1[0].x, 0.0) || mtx1[0].x != 0.0))
+ if (mtx1[0].x != 0.0)
{
model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0)));
}
diff --git a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
index d01b1351..754f7357 100644
--- a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
+++ b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
@@ -15,7 +15,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
static inline __attribute__((always_inline))
void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIndex, device SSBO& v_23, thread uint3& gl_GlobalInvocationID)
{
- foo[gl_LocalInvocationIndex] = short4((isunordered(v_23.values[gl_GlobalInvocationID.x], float4(10.0)) || v_23.values[gl_GlobalInvocationID.x] != float4(10.0)));
+ foo[gl_LocalInvocationIndex] = short4(v_23.values[gl_GlobalInvocationID.x] != float4(10.0));
threadgroup_barrier(mem_flags::mem_threadgroup);
v_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));
}
diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
index 21aadfb1..61122bbd 100644
--- a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -11,7 +11,8 @@ vec4 test_vector()
bvec4 ge = not(lessThanEqual(A, B));
bvec4 geq = not(lessThan(A, B));
bvec4 eq = not(notEqual(A, B));
- bvec4 neq = not(equal(A, B));
+ bvec4 neq = notEqual(A, B);
+ neq = notEqual(A, B);
return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq);
}
@@ -22,7 +23,7 @@ float test_scalar()
bool ge = !(A.x <= B.x);
bool geq = !(A.x < B.x);
bool eq = !(A.x != B.x);
- bool neq = !(A.x == B.x);
+ bool neq = A.x != B.x;
return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
}
diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..24db7c9f
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(location = 0) in vec4 A;
+layout(location = 1) in vec4 B;
+layout(location = 0) out vec4 FragColor;
+
+vec4 test_vector()
+{
+ bvec4 le = lessThan(A, B);
+ bvec4 leq = lessThanEqual(A, B);
+ bvec4 ge = greaterThan(A, B);
+ bvec4 geq = greaterThanEqual(A, B);
+ bvec4 eq = equal(A, B);
+ bvec4 neq = notEqual(A, B);
+ neq = notEqual(A, B);
+ return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq);
+}
+
+float test_scalar()
+{
+ bool le = A.x < B.x;
+ bool leq = A.x <= B.x;
+ bool ge = A.x > B.x;
+ bool geq = A.x >= B.x;
+ bool eq = A.x == B.x;
+ bool neq = A.x != B.x;
+ return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void main()
+{
+ FragColor = test_vector() + vec4(test_scalar());
+}
+
diff --git a/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
index 55f5235e..faf79b2b 100644
--- a/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
+++ b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
@@ -50,7 +50,7 @@ void test_conversions()
float16_t one = test_result();
int a = int(one);
uint b = uint(one);
- bool c = !(one == float16_t(0.0));
+ bool c = one != float16_t(0.0);
float d = float(one);
double e = double(one);
float16_t a2 = float16_t(a);
@@ -126,7 +126,7 @@ void test_builtins()
btmp = greaterThan(v4, v4);
btmp = greaterThanEqual(v4, v4);
btmp = equal(v4, v4);
- btmp = not(equal(v4, v4));
+ btmp = notEqual(v4, v4);
res = dFdx(v4);
res = dFdy(v4);
res = dFdxFine(v4);
diff --git a/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..449a87d0
--- /dev/null
+++ b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,39 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+ float a1;
+ vec2 a2;
+ vec3 a3;
+ vec4 a4;
+ float b1;
+ vec2 b2;
+ vec3 b3;
+ vec4 b4;
+ float c1;
+ vec2 c2;
+ vec3 c3;
+ vec4 c4;
+} _4;
+
+void main()
+{
+ _4.a1 = min(_4.b1, _4.c1);
+ _4.a2 = min(_4.b2, _4.c2);
+ _4.a3 = min(_4.b3, _4.c3);
+ _4.a4 = min(_4.b4, _4.c4);
+ _4.a1 = max(_4.b1, _4.c1);
+ _4.a2 = max(_4.b2, _4.c2);
+ _4.a3 = max(_4.b3, _4.c3);
+ _4.a4 = max(_4.b4, _4.c4);
+ _4.a1 = clamp(_4.a1, _4.b1, _4.c1);
+ _4.a2 = clamp(_4.a2, _4.b2, _4.c2);
+ _4.a3 = clamp(_4.a3, _4.b3, _4.c3);
+ _4.a4 = clamp(_4.a4, _4.b4, _4.c4);
+ for (int i = 0; i < 2; i++, _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y))
+ {
+ _4.a2 = min(_4.b2, _4.c2);
+ }
+}
+
diff --git a/reference/shaders/comp/cfg.comp b/reference/shaders/comp/cfg.comp
index a91c8732..77ad312c 100644
--- a/reference/shaders/comp/cfg.comp
+++ b/reference/shaders/comp/cfg.comp
@@ -8,7 +8,7 @@ layout(binding = 0, std430) buffer SSBO
void test()
{
- if (!(_11.data == 0.0))
+ if (_11.data != 0.0)
{
float tmp = 10.0;
_11.data = tmp;
@@ -18,12 +18,12 @@ void test()
float tmp_1 = 15.0;
_11.data = tmp_1;
}
- if (!(_11.data == 0.0))
+ if (_11.data != 0.0)
{
float e;
- if (!(_11.data == 5.0))
+ if (_11.data != 5.0)
{
- if (!(_11.data == 6.0))
+ if (_11.data != 6.0)
{
e = 10.0;
}
@@ -70,7 +70,7 @@ void test()
float m;
do
{
- } while (!(m == 20.0));
+ } while (m != 20.0);
_11.data = m;
}
diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
index 4ad8fc5a..2e5e0309 100644
--- a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -89,6 +89,8 @@
%38 = OpLoad %v4float %B
%39 = OpFUnordEqual %v4bool %37 %38
OpStore %eq %39
+ %ordered = OpFOrdNotEqual %v4bool %37 %38
+ OpStore %neq %ordered
%41 = OpLoad %v4float %A
%42 = OpLoad %v4float %B
%43 = OpFUnordNotEqual %v4bool %41 %42
diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..2e5e0309
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %A %B %FragColor
+ OpExecutionMode %main OriginUpperLeft
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %test_vector_ "test_vector("
+ OpName %test_scalar_ "test_scalar("
+ OpName %le "le"
+ OpName %A "A"
+ OpName %B "B"
+ OpName %leq "leq"
+ OpName %ge "ge"
+ OpName %geq "geq"
+ OpName %eq "eq"
+ OpName %neq "neq"
+ OpName %le_0 "le"
+ OpName %leq_0 "leq"
+ OpName %ge_0 "ge"
+ OpName %geq_0 "geq"
+ OpName %eq_0 "eq"
+ OpName %neq_0 "neq"
+ OpName %FragColor "FragColor"
+ OpDecorate %A Location 0
+ OpDecorate %B Location 1
+ OpDecorate %FragColor Location 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+ %8 = OpTypeFunction %v4float
+ %11 = OpTypeFunction %float
+ %bool = OpTypeBool
+ %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+ %A = OpVariable %_ptr_Input_v4float Input
+ %B = OpVariable %_ptr_Input_v4float Input
+ %float_0 = OpConstant %float 0
+ %float_1 = OpConstant %float 1
+ %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+ %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+ %uint = OpTypeInt 32 0
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %FragColor = OpVariable %_ptr_Output_v4float Output
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %128 = OpFunctionCall %v4float %test_vector_
+ %129 = OpFunctionCall %float %test_scalar_
+ %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+ %131 = OpFAdd %v4float %128 %130
+ OpStore %FragColor %131
+ OpReturn
+ OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+ %10 = OpLabel
+ %le = OpVariable %_ptr_Function_v4bool Function
+ %leq = OpVariable %_ptr_Function_v4bool Function
+ %ge = OpVariable %_ptr_Function_v4bool Function
+ %geq = OpVariable %_ptr_Function_v4bool Function
+ %eq = OpVariable %_ptr_Function_v4bool Function
+ %neq = OpVariable %_ptr_Function_v4bool Function
+ %20 = OpLoad %v4float %A
+ %22 = OpLoad %v4float %B
+ %23 = OpFUnordLessThan %v4bool %20 %22
+ OpStore %le %23
+ %25 = OpLoad %v4float %A
+ %26 = OpLoad %v4float %B
+ %27 = OpFUnordLessThanEqual %v4bool %25 %26
+ OpStore %leq %27
+ %29 = OpLoad %v4float %A
+ %30 = OpLoad %v4float %B
+ %31 = OpFUnordGreaterThan %v4bool %29 %30
+ OpStore %ge %31
+ %33 = OpLoad %v4float %A
+ %34 = OpLoad %v4float %B
+ %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+ OpStore %geq %35
+ %37 = OpLoad %v4float %A
+ %38 = OpLoad %v4float %B
+ %39 = OpFUnordEqual %v4bool %37 %38
+ OpStore %eq %39
+ %ordered = OpFOrdNotEqual %v4bool %37 %38
+ OpStore %neq %ordered
+ %41 = OpLoad %v4float %A
+ %42 = OpLoad %v4float %B
+ %43 = OpFUnordNotEqual %v4bool %41 %42
+ OpStore %neq %43
+ %44 = OpLoad %v4bool %le
+ %49 = OpSelect %v4float %44 %48 %47
+ %50 = OpLoad %v4bool %leq
+ %51 = OpSelect %v4float %50 %48 %47
+ %52 = OpFAdd %v4float %49 %51
+ %53 = OpLoad %v4bool %ge
+ %54 = OpSelect %v4float %53 %48 %47
+ %55 = OpFAdd %v4float %52 %54
+ %56 = OpLoad %v4bool %geq
+ %57 = OpSelect %v4float %56 %48 %47
+ %58 = OpFAdd %v4float %55 %57
+ %59 = OpLoad %v4bool %eq
+ %60 = OpSelect %v4float %59 %48 %47
+ %61 = OpFAdd %v4float %58 %60
+ %62 = OpLoad %v4bool %neq
+ %63 = OpSelect %v4float %62 %48 %47
+ %64 = OpFAdd %v4float %61 %63
+ OpReturnValue %64
+ OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+ %13 = OpLabel
+ %le_0 = OpVariable %_ptr_Function_bool Function
+ %leq_0 = OpVariable %_ptr_Function_bool Function
+ %ge_0 = OpVariable %_ptr_Function_bool Function
+ %geq_0 = OpVariable %_ptr_Function_bool Function
+ %eq_0 = OpVariable %_ptr_Function_bool Function
+ %neq_0 = OpVariable %_ptr_Function_bool Function
+ %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %73 = OpLoad %float %72
+ %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %75 = OpLoad %float %74
+ %76 = OpFUnordLessThan %bool %73 %75
+ OpStore %le_0 %76
+ %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %79 = OpLoad %float %78
+ %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %81 = OpLoad %float %80
+ %82 = OpFUnordLessThanEqual %bool %79 %81
+ OpStore %leq_0 %82
+ %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %85 = OpLoad %float %84
+ %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %87 = OpLoad %float %86
+ %88 = OpFUnordGreaterThan %bool %85 %87
+ OpStore %ge_0 %88
+ %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %91 = OpLoad %float %90
+ %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %93 = OpLoad %float %92
+ %94 = OpFUnordGreaterThanEqual %bool %91 %93
+ OpStore %geq_0 %94
+ %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %97 = OpLoad %float %96
+ %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %99 = OpLoad %float %98
+ %100 = OpFUnordEqual %bool %97 %99
+ OpStore %eq_0 %100
+ %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %103 = OpLoad %float %102
+ %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %105 = OpLoad %float %104
+ %106 = OpFUnordNotEqual %bool %103 %105
+ OpStore %neq_0 %106
+ %107 = OpLoad %bool %le_0
+ %108 = OpSelect %float %107 %float_1 %float_0
+ %109 = OpLoad %bool %leq_0
+ %110 = OpSelect %float %109 %float_1 %float_0
+ %111 = OpFAdd %float %108 %110
+ %112 = OpLoad %bool %ge_0
+ %113 = OpSelect %float %112 %float_1 %float_0
+ %114 = OpFAdd %float %111 %113
+ %115 = OpLoad %bool %geq_0
+ %116 = OpSelect %float %115 %float_1 %float_0
+ %117 = OpFAdd %float %114 %116
+ %118 = OpLoad %bool %eq_0
+ %119 = OpSelect %float %118 %float_1 %float_0
+ %120 = OpFAdd %float %117 %119
+ %121 = OpLoad %bool %neq_0
+ %122 = OpSelect %float %121 %float_1 %float_0
+ %123 = OpFAdd %float %120 %122
+ OpReturnValue %123
+ OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
index 0b8dbb5a..0b8dbb5a 100644
--- a/shaders-hlsl-no-opt/vert/empty-shader.sm30.vert
+++ b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main"
+ OpExecutionMode %main LocalSize 1 1 1
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %SSBO "SSBO"
+ OpMemberName %SSBO 0 "a1"
+ OpMemberName %SSBO 1 "a2"
+ OpMemberName %SSBO 2 "a3"
+ OpMemberName %SSBO 3 "a4"
+ OpMemberName %SSBO 4 "b1"
+ OpMemberName %SSBO 5 "b2"
+ OpMemberName %SSBO 6 "b3"
+ OpMemberName %SSBO 7 "b4"
+ OpMemberName %SSBO 8 "c1"
+ OpMemberName %SSBO 9 "c2"
+ OpMemberName %SSBO 10 "c3"
+ OpMemberName %SSBO 11 "c4"
+ OpName %_ ""
+ OpName %i "i"
+ OpMemberDecorate %SSBO 0 Offset 0
+ OpMemberDecorate %SSBO 1 Offset 8
+ OpMemberDecorate %SSBO 2 Offset 16
+ OpMemberDecorate %SSBO 3 Offset 32
+ OpMemberDecorate %SSBO 4 Offset 48
+ OpMemberDecorate %SSBO 5 Offset 56
+ OpMemberDecorate %SSBO 6 Offset 64
+ OpMemberDecorate %SSBO 7 Offset 80
+ OpMemberDecorate %SSBO 8 Offset 96
+ OpMemberDecorate %SSBO 9 Offset 104
+ OpMemberDecorate %SSBO 10 Offset 112
+ OpMemberDecorate %SSBO 11 Offset 128
+ OpDecorate %SSBO BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 0
+ %void = OpTypeVoid
+ %7 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v2float = OpTypeVector %float 2
+ %v3float = OpTypeVector %float 3
+ %v4float = OpTypeVector %float 4
+ %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+ %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %int_8 = OpConstant %int 8
+ %int_1 = OpConstant %int 1
+ %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+ %int_9 = OpConstant %int 9
+ %int_2 = OpConstant %int 2
+ %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+ %int_10 = OpConstant %int 10
+ %int_3 = OpConstant %int 3
+ %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+ %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+ %bool = OpTypeBool
+ %uint = OpTypeInt 32 0
+ %uint_0 = OpConstant %uint 0
+ %uint_1 = OpConstant %uint 1
+ %main = OpFunction %void None %7
+ %35 = OpLabel
+ %i = OpVariable %_ptr_Function_int Function
+ %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %37 = OpLoad %float %36
+ %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %39 = OpLoad %float %38
+ %40 = OpExtInst %float %1 NMin %37 %39
+ %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %41 %40
+ %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %43 = OpLoad %v2float %42
+ %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %45 = OpLoad %v2float %44
+ %46 = OpExtInst %v2float %1 NMin %43 %45
+ %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %47 %46
+ %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %49 = OpLoad %v3float %48
+ %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %51 = OpLoad %v3float %50
+ %52 = OpExtInst %v3float %1 NMin %49 %51
+ %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %53 %52
+ %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %55 = OpLoad %v4float %54
+ %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %57 = OpLoad %v4float %56
+ %58 = OpExtInst %v4float %1 NMin %55 %57
+ %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %59 %58
+ %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %61 = OpLoad %float %60
+ %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %63 = OpLoad %float %62
+ %64 = OpExtInst %float %1 NMax %61 %63
+ %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %65 %64
+ %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %67 = OpLoad %v2float %66
+ %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %69 = OpLoad %v2float %68
+ %70 = OpExtInst %v2float %1 NMax %67 %69
+ %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %71 %70
+ %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %73 = OpLoad %v3float %72
+ %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %75 = OpLoad %v3float %74
+ %76 = OpExtInst %v3float %1 NMax %73 %75
+ %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %77 %76
+ %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %79 = OpLoad %v4float %78
+ %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %81 = OpLoad %v4float %80
+ %82 = OpExtInst %v4float %1 NMax %79 %81
+ %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %83 %82
+ %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %85 = OpLoad %float %84
+ %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %87 = OpLoad %float %86
+ %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %89 = OpLoad %float %88
+ %90 = OpExtInst %float %1 NClamp %85 %87 %89
+ %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %91 %90
+ %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ %93 = OpLoad %v2float %92
+ %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %95 = OpLoad %v2float %94
+ %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %97 = OpLoad %v2float %96
+ %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+ %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %99 %98
+ %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ %101 = OpLoad %v3float %100
+ %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %103 = OpLoad %v3float %102
+ %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %105 = OpLoad %v3float %104
+ %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+ %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %107 %106
+ %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ %109 = OpLoad %v4float %108
+ %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %111 = OpLoad %v4float %110
+ %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %113 = OpLoad %v4float %112
+ %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+ %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %115 %114
+ OpStore %i %int_0
+ OpBranch %116
+ %116 = OpLabel
+ OpLoopMerge %117 %118 None
+ OpBranch %119
+ %119 = OpLabel
+ %120 = OpLoad %int %i
+ %121 = OpSLessThan %bool %120 %int_2
+ OpBranchConditional %121 %122 %117
+ %122 = OpLabel
+ %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %124 = OpLoad %v2float %123
+ %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %126 = OpLoad %v2float %125
+ %127 = OpExtInst %v2float %1 NMin %124 %126
+ %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %128 %127
+ OpBranch %118
+ %118 = OpLabel
+ %129 = OpLoad %int %i
+ %130 = OpIAdd %int %129 %int_1
+ OpStore %i %130
+ %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %132 = OpLoad %float %131
+ %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+ %134 = OpLoad %float %133
+ %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+ %136 = OpLoad %float %135
+ %137 = OpExtInst %float %1 NClamp %132 %134 %136
+ %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %138 %137
+ OpBranch %116
+ %117 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
new file mode 100644
index 00000000..ad566615
--- /dev/null
+++ b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
@@ -0,0 +1,293 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 205
+; Schema: 0
+ OpCapability Shader
+ OpCapability Float16
+ OpExtension "SPV_AMD_gpu_shader_half_float"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4
+ OpExecutionMode %main OriginUpperLeft
+ OpSource GLSL 450
+ OpSourceExtension "GL_AMD_gpu_shader_half_float"
+ OpName %main "main"
+ OpName %res "res"
+ OpName %res2 "res2"
+ OpName %res3 "res3"
+ OpName %res4 "res4"
+ OpName %hres "hres"
+ OpName %hres2 "hres2"
+ OpName %hres3 "hres3"
+ OpName %hres4 "hres4"
+ OpName %v1 "v1"
+ OpName %v2 "v2"
+ OpName %v3 "v3"
+ OpName %v4 "v4"
+ OpName %h1 "h1"
+ OpName %h2 "h2"
+ OpName %h3 "h3"
+ OpName %h4 "h4"
+ OpDecorate %v1 Location 0
+ OpDecorate %v2 Location 1
+ OpDecorate %v3 Location 2
+ OpDecorate %v4 Location 3
+ OpDecorate %h1 Location 4
+ OpDecorate %h2 Location 5
+ OpDecorate %h3 Location 6
+ OpDecorate %h4 Location 7
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v2float = OpTypeVector %float 2
+ %v3float = OpTypeVector %float 3
+ %v4float = OpTypeVector %float 4
+ %half = OpTypeFloat 16
+ %v2half = OpTypeVector %half 2
+ %v3half = OpTypeVector %half 3
+ %v4half = OpTypeVector %half 4
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Function_half = OpTypePointer Function %half
+%_ptr_Input_half = OpTypePointer Input %half
+%_ptr_Function_v2half = OpTypePointer Function %v2half
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+%_ptr_Function_v3half = OpTypePointer Function %v3half
+%_ptr_Input_v3half = OpTypePointer Input %v3half
+%_ptr_Function_v4half = OpTypePointer Function %v4half
+%_ptr_Input_v4half = OpTypePointer Input %v4half
+ %v1 = OpVariable %_ptr_Input_float Input
+ %v2 = OpVariable %_ptr_Input_v2float Input
+ %v3 = OpVariable %_ptr_Input_v3float Input
+ %v4 = OpVariable %_ptr_Input_v4float Input
+ %h1 = OpVariable %_ptr_Input_half Input
+ %h2 = OpVariable %_ptr_Input_v2half Input
+ %h3 = OpVariable %_ptr_Input_v3half Input
+ %h4 = OpVariable %_ptr_Input_v4half Input
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %res = OpVariable %_ptr_Function_float Function
+ %46 = OpLoad %float %v1
+ %47 = OpLoad %float %v1
+ %48 = OpExtInst %float %1 FMin %46 %47
+ OpStore %res %48
+ %49 = OpLoad %float %v1
+ %50 = OpLoad %float %v1
+ %51 = OpExtInst %float %1 FMax %49 %50
+ OpStore %res %51
+ %52 = OpLoad %float %v1
+ %53 = OpLoad %float %v1
+ %54 = OpLoad %float %v1
+ %55 = OpExtInst %float %1 FClamp %52 %53 %54
+ OpStore %res %55
+ %56 = OpLoad %float %v1
+ %57 = OpLoad %float %v1
+ %58 = OpExtInst %float %1 NMin %56 %57
+ OpStore %res %58
+ %59 = OpLoad %float %v1
+ %60 = OpLoad %float %v1
+ %61 = OpExtInst %float %1 NMax %59 %60
+ OpStore %res %61
+ %62 = OpLoad %float %v1
+ %63 = OpLoad %float %v1
+ %64 = OpLoad %float %v1
+ %65 = OpExtInst %float %1 NClamp %62 %63 %64
+ OpStore %res %65
+ %res2 = OpVariable %_ptr_Function_v2float Function
+ %66 = OpLoad %v2float %v2
+ %67 = OpLoad %v2float %v2
+ %68 = OpExtInst %v2float %1 FMin %66 %67
+ OpStore %res2 %68
+ %69 = OpLoad %v2float %v2
+ %70 = OpLoad %v2float %v2
+ %71 = OpExtInst %v2float %1 FMax %69 %70
+ OpStore %res2 %71
+ %72 = OpLoad %v2float %v2
+ %73 = OpLoad %v2float %v2
+ %74 = OpLoad %v2float %v2
+ %75 = OpExtInst %v2float %1 FClamp %72 %73 %74
+ OpStore %res2 %75
+ %76 = OpLoad %v2float %v2
+ %77 = OpLoad %v2float %v2
+ %78 = OpExtInst %v2float %1 NMin %76 %77
+ OpStore %res2 %78
+ %79 = OpLoad %v2float %v2
+ %80 = OpLoad %v2float %v2
+ %81 = OpExtInst %v2float %1 NMax %79 %80
+ OpStore %res2 %81
+ %82 = OpLoad %v2float %v2
+ %83 = OpLoad %v2float %v2
+ %84 = OpLoad %v2float %v2
+ %85 = OpExtInst %v2float %1 NClamp %82 %83 %84
+ OpStore %res2 %85
+ %res3 = OpVariable %_ptr_Function_v3float Function
+ %86 = OpLoad %v3float %v3
+ %87 = OpLoad %v3float %v3
+ %88 = OpExtInst %v3float %1 FMin %86 %87
+ OpStore %res3 %88
+ %89 = OpLoad %v3float %v3
+ %90 = OpLoad %v3float %v3
+ %91 = OpExtInst %v3float %1 FMax %89 %90
+ OpStore %res3 %91
+ %92 = OpLoad %v3float %v3
+ %93 = OpLoad %v3float %v3
+ %94 = OpLoad %v3float %v3
+ %95 = OpExtInst %v3float %1 FClamp %92 %93 %94
+ OpStore %res3 %95
+ %96 = OpLoad %v3float %v3
+ %97 = OpLoad %v3float %v3
+ %98 = OpExtInst %v3float %1 NMin %96 %97
+ OpStore %res3 %98
+ %99 = OpLoad %v3float %v3
+ %100 = OpLoad %v3float %v3
+ %101 = OpExtInst %v3float %1 NMax %99 %100
+ OpStore %res3 %101
+ %102 = OpLoad %v3float %v3
+ %103 = OpLoad %v3float %v3
+ %104 = OpLoad %v3float %v3
+ %105 = OpExtInst %v3float %1 NClamp %102 %103 %104
+ OpStore %res3 %105
+ %res4 = OpVariable %_ptr_Function_v4float Function
+ %106 = OpLoad %v4float %v4
+ %107 = OpLoad %v4float %v4
+ %108 = OpExtInst %v4float %1 FMin %106 %107
+ OpStore %res4 %108
+ %109 = OpLoad %v4float %v4
+ %110 = OpLoad %v4float %v4
+ %111 = OpExtInst %v4float %1 FMax %109 %110
+ OpStore %res4 %111
+ %112 = OpLoad %v4float %v4
+ %113 = OpLoad %v4float %v4
+ %114 = OpLoad %v4float %v4
+ %115 = OpExtInst %v4float %1 FClamp %112 %113 %114
+ OpStore %res4 %115
+ %116 = OpLoad %v4float %v4
+ %117 = OpLoad %v4float %v4
+ %118 = OpExtInst %v4float %1 NMin %116 %117
+ OpStore %res4 %118
+ %119 = OpLoad %v4float %v4
+ %120 = OpLoad %v4float %v4
+ %121 = OpExtInst %v4float %1 NMax %119 %120
+ OpStore %res4 %121
+ %122 = OpLoad %v4float %v4
+ %123 = OpLoad %v4float %v4
+ %124 = OpLoad %v4float %v4
+ %125 = OpExtInst %v4float %1 NClamp %122 %123 %124
+ OpStore %res4 %125
+ %hres = OpVariable %_ptr_Function_half Function
+ %126 = OpLoad %half %h1
+ %127 = OpLoad %half %h1
+ %128 = OpExtInst %half %1 FMin %126 %127
+ OpStore %hres %128
+ %129 = OpLoad %half %h1
+ %130 = OpLoad %half %h1
+ %131 = OpExtInst %half %1 FMax %129 %130
+ OpStore %hres %131
+ %132 = OpLoad %half %h1
+ %133 = OpLoad %half %h1
+ %134 = OpLoad %half %h1
+ %135 = OpExtInst %half %1 FClamp %132 %133 %134
+ OpStore %hres %135
+ %136 = OpLoad %half %h1
+ %137 = OpLoad %half %h1
+ %138 = OpExtInst %half %1 NMin %136 %137
+ OpStore %hres %138
+ %139 = OpLoad %half %h1
+ %140 = OpLoad %half %h1
+ %141 = OpExtInst %half %1 NMax %139 %140
+ OpStore %hres %141
+ %142 = OpLoad %half %h1
+ %143 = OpLoad %half %h1
+ %144 = OpLoad %half %h1
+ %145 = OpExtInst %half %1 NClamp %142 %143 %144
+ OpStore %hres %145
+ %hres2 = OpVariable %_ptr_Function_v2half Function
+ %146 = OpLoad %v2half %h2
+ %147 = OpLoad %v2half %h2
+ %148 = OpExtInst %v2half %1 FMin %146 %147
+ OpStore %hres2 %148
+ %149 = OpLoad %v2half %h2
+ %150 = OpLoad %v2half %h2
+ %151 = OpExtInst %v2half %1 FMax %149 %150
+ OpStore %hres2 %151
+ %152 = OpLoad %v2half %h2
+ %153 = OpLoad %v2half %h2
+ %154 = OpLoad %v2half %h2
+ %155 = OpExtInst %v2half %1 FClamp %152 %153 %154
+ OpStore %hres2 %155
+ %156 = OpLoad %v2half %h2
+ %157 = OpLoad %v2half %h2
+ %158 = OpExtInst %v2half %1 NMin %156 %157
+ OpStore %hres2 %158
+ %159 = OpLoad %v2half %h2
+ %160 = OpLoad %v2half %h2
+ %161 = OpExtInst %v2half %1 NMax %159 %160
+ OpStore %hres2 %161
+ %162 = OpLoad %v2half %h2
+ %163 = OpLoad %v2half %h2
+ %164 = OpLoad %v2half %h2
+ %165 = OpExtInst %v2half %1 NClamp %162 %163 %164
+ OpStore %hres2 %165
+ %hres3 = OpVariable %_ptr_Function_v3half Function
+ %166 = OpLoad %v3half %h3
+ %167 = OpLoad %v3half %h3
+ %168 = OpExtInst %v3half %1 FMin %166 %167
+ OpStore %hres3 %168
+ %169 = OpLoad %v3half %h3
+ %170 = OpLoad %v3half %h3
+ %171 = OpExtInst %v3half %1 FMax %169 %170
+ OpStore %hres3 %171
+ %172 = OpLoad %v3half %h3
+ %173 = OpLoad %v3half %h3
+ %174 = OpLoad %v3half %h3
+ %175 = OpExtInst %v3half %1 FClamp %172 %173 %174
+ OpStore %hres3 %175
+ %176 = OpLoad %v3half %h3
+ %177 = OpLoad %v3half %h3
+ %178 = OpExtInst %v3half %1 NMin %176 %177
+ OpStore %hres3 %178
+ %179 = OpLoad %v3half %h3
+ %180 = OpLoad %v3half %h3
+ %181 = OpExtInst %v3half %1 NMax %179 %180
+ OpStore %hres3 %181
+ %182 = OpLoad %v3half %h3
+ %183 = OpLoad %v3half %h3
+ %184 = OpLoad %v3half %h3
+ %185 = OpExtInst %v3half %1 NClamp %182 %183 %184
+ OpStore %hres3 %185
+ %hres4 = OpVariable %_ptr_Function_v4half Function
+ %186 = OpLoad %v4half %h4
+ %187 = OpLoad %v4half %h4
+ %188 = OpExtInst %v4half %1 FMin %186 %187
+ OpStore %hres4 %188
+ %189 = OpLoad %v4half %h4
+ %190 = OpLoad %v4half %h4
+ %191 = OpExtInst %v4half %1 FMax %189 %190
+ OpStore %hres4 %191
+ %192 = OpLoad %v4half %h4
+ %193 = OpLoad %v4half %h4
+ %194 = OpLoad %v4half %h4
+ %195 = OpExtInst %v4half %1 FClamp %192 %193 %194
+ OpStore %hres4 %195
+ %196 = OpLoad %v4half %h4
+ %197 = OpLoad %v4half %h4
+ %198 = OpExtInst %v4half %1 NMin %196 %197
+ OpStore %hres4 %198
+ %199 = OpLoad %v4half %h4
+ %200 = OpLoad %v4half %h4
+ %201 = OpExtInst %v4half %1 NMax %199 %200
+ OpStore %hres4 %201
+ %202 = OpLoad %v4half %h4
+ %203 = OpLoad %v4half %h4
+ %204 = OpLoad %v4half %h4
+ %205 = OpExtInst %v4half %1 NClamp %202 %203 %204
+ OpStore %hres4 %205
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-msl/asm/frag/unord-relational-op.asm.frag b/shaders-msl/asm/frag/unord-relational-op.asm.frag
index 3e4cd6c2..824c0512 100644
--- a/shaders-msl/asm/frag/unord-relational-op.asm.frag
+++ b/shaders-msl/asm/frag/unord-relational-op.asm.frag
@@ -114,6 +114,8 @@
OpStore %t1 %b
%15 = OpFUnordEqual %bool %a %b
OpStore %c1 %15
+ %ordered = OpFOrdNotEqual %bool %a %b
+ OpStore %c1 %ordered
%17 = OpFUnordNotEqual %bool %a %b
OpStore %c2 %17
%19 = OpFUnordLessThan %bool %a %b
diff --git a/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000..824c0512
--- /dev/null
+++ b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,207 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 122
+; Schema: 0
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %c %d %e %f %g %h %FragColor
+ OpExecutionMode %main OriginUpperLeft
+ OpSource GLSL 460
+ OpName %main "main"
+ OpName %t0 "t0"
+ OpName %a "a"
+ OpName %t1 "t1"
+ OpName %b "b"
+ OpName %c1 "c1"
+ OpName %c2 "c2"
+ OpName %c3 "c3"
+ OpName %c4 "c4"
+ OpName %c5 "c5"
+ OpName %c6 "c6"
+ OpName %c7 "c7"
+ OpName %c "c"
+ OpName %d "d"
+ OpName %c8 "c8"
+ OpName %c9 "c9"
+ OpName %c10 "c10"
+ OpName %c11 "c11"
+ OpName %c12 "c12"
+ OpName %c13 "c13"
+ OpName %e "e"
+ OpName %f "f"
+ OpName %c14 "c14"
+ OpName %c15 "c15"
+ OpName %c16 "c16"
+ OpName %c17 "c17"
+ OpName %c18 "c18"
+ OpName %c19 "c19"
+ OpName %g "g"
+ OpName %h "h"
+ OpName %c20 "c20"
+ OpName %c21 "c21"
+ OpName %c22 "c22"
+ OpName %c23 "c23"
+ OpName %c24 "c24"
+ OpName %FragColor "FragColor"
+ OpDecorate %a SpecId 1
+ OpDecorate %b SpecId 2
+ OpDecorate %c Location 2
+ OpDecorate %d Location 3
+ OpDecorate %e Location 4
+ OpDecorate %f Location 5
+ OpDecorate %g Location 6
+ OpDecorate %h Location 7
+ OpDecorate %FragColor Location 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+ %a = OpSpecConstant %float 1
+ %b = OpSpecConstant %float 2
+ %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+ %v2bool = OpTypeVector %bool 2
+%_ptr_Function_v2bool = OpTypePointer Function %v2bool
+ %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+ %c = OpVariable %_ptr_Input_v2float Input
+ %d = OpVariable %_ptr_Input_v2float Input
+ %v3bool = OpTypeVector %bool 3
+%_ptr_Function_v3bool = OpTypePointer Function %v3bool
+ %v3float = OpTypeVector %float 3
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+ %e = OpVariable %_ptr_Input_v3float Input
+ %f = OpVariable %_ptr_Input_v3float Input
+ %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+ %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+ %g = OpVariable %_ptr_Input_v4float Input
+ %h = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %FragColor = OpVariable %_ptr_Output_v4float Output
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %t0 = OpVariable %_ptr_Function_float Function
+ %t1 = OpVariable %_ptr_Function_float Function
+ %c1 = OpVariable %_ptr_Function_bool Function
+ %c2 = OpVariable %_ptr_Function_bool Function
+ %c3 = OpVariable %_ptr_Function_bool Function
+ %c4 = OpVariable %_ptr_Function_bool Function
+ %c5 = OpVariable %_ptr_Function_bool Function
+ %c6 = OpVariable %_ptr_Function_bool Function
+ %c7 = OpVariable %_ptr_Function_v2bool Function
+ %c8 = OpVariable %_ptr_Function_v2bool Function
+ %c9 = OpVariable %_ptr_Function_v2bool Function
+ %c10 = OpVariable %_ptr_Function_v2bool Function
+ %c11 = OpVariable %_ptr_Function_v2bool Function
+ %c12 = OpVariable %_ptr_Function_v2bool Function
+ %c13 = OpVariable %_ptr_Function_v3bool Function
+ %c14 = OpVariable %_ptr_Function_v3bool Function
+ %c15 = OpVariable %_ptr_Function_v3bool Function
+ %c16 = OpVariable %_ptr_Function_v3bool Function
+ %c17 = OpVariable %_ptr_Function_v3bool Function
+ %c18 = OpVariable %_ptr_Function_v3bool Function
+ %c19 = OpVariable %_ptr_Function_v4bool Function
+ %c20 = OpVariable %_ptr_Function_v4bool Function
+ %c21 = OpVariable %_ptr_Function_v4bool Function
+ %c22 = OpVariable %_ptr_Function_v4bool Function
+ %c23 = OpVariable %_ptr_Function_v4bool Function
+ %c24 = OpVariable %_ptr_Function_v4bool Function
+ OpStore %t0 %a
+ OpStore %t1 %b
+ %15 = OpFUnordEqual %bool %a %b
+ OpStore %c1 %15
+ %ordered = OpFOrdNotEqual %bool %a %b
+ OpStore %c1 %ordered
+ %17 = OpFUnordNotEqual %bool %a %b
+ OpStore %c2 %17
+ %19 = OpFUnordLessThan %bool %a %b
+ OpStore %c3 %19
+ %21 = OpFUnordGreaterThan %bool %a %b
+ OpStore %c4 %21
+ %23 = OpFUnordLessThanEqual %bool %a %b
+ OpStore %c5 %23
+ %25 = OpFUnordGreaterThanEqual %bool %a %b
+ OpStore %c6 %25
+ %32 = OpLoad %v2float %c
+ %34 = OpLoad %v2float %d
+ %35 = OpFUnordEqual %v2bool %32 %34
+ OpStore %c7 %35
+ %37 = OpLoad %v2float %c
+ %38 = OpLoad %v2float %d
+ %39 = OpFUnordNotEqual %v2bool %37 %38
+ OpStore %c8 %39
+ %41 = OpLoad %v2float %c
+ %42 = OpLoad %v2float %d
+ %43 = OpFUnordLessThan %v2bool %41 %42
+ OpStore %c9 %43
+ %45 = OpLoad %v2float %c
+ %46 = OpLoad %v2float %d
+ %47 = OpFUnordGreaterThan %v2bool %45 %46
+ OpStore %c10 %47
+ %49 = OpLoad %v2float %c
+ %50 = OpLoad %v2float %d
+ %51 = OpFUnordLessThanEqual %v2bool %49 %50
+ OpStore %c11 %51
+ %53 = OpLoad %v2float %c
+ %54 = OpLoad %v2float %d
+ %55 = OpFUnordGreaterThanEqual %v2bool %53 %54
+ OpStore %c12 %55
+ %62 = OpLoad %v3float %e
+ %64 = OpLoad %v3float %f
+ %65 = OpFUnordEqual %v3bool %62 %64
+ OpStore %c13 %65
+ %67 = OpLoad %v3float %e
+ %68 = OpLoad %v3float %f
+ %69 = OpFUnordNotEqual %v3bool %67 %68
+ OpStore %c14 %69
+ %71 = OpLoad %v3float %e
+ %72 = OpLoad %v3float %f
+ %73 = OpFUnordLessThan %v3bool %71 %72
+ OpStore %c15 %73
+ %75 = OpLoad %v3float %e
+ %76 = OpLoad %v3float %f
+ %77 = OpFUnordGreaterThan %v3bool %75 %76
+ OpStore %c16 %77
+ %79 = OpLoad %v3float %e
+ %80 = OpLoad %v3float %f
+ %81 = OpFUnordLessThanEqual %v3bool %79 %80
+ OpStore %c17 %81
+ %83 = OpLoad %v3float %e
+ %84 = OpLoad %v3float %f
+ %85 = OpFUnordGreaterThanEqual %v3bool %83 %84
+ OpStore %c18 %85
+ %92 = OpLoad %v4float %g
+ %94 = OpLoad %v4float %h
+ %95 = OpFUnordEqual %v4bool %92 %94
+ OpStore %c19 %95
+ %97 = OpLoad %v4float %g
+ %98 = OpLoad %v4float %h
+ %99 = OpFUnordNotEqual %v4bool %97 %98
+ OpStore %c20 %99
+ %101 = OpLoad %v4float %g
+ %102 = OpLoad %v4float %h
+ %103 = OpFUnordLessThan %v4bool %101 %102
+ OpStore %c21 %103
+ %105 = OpLoad %v4float %g
+ %106 = OpLoad %v4float %h
+ %107 = OpFUnordGreaterThan %v4bool %105 %106
+ OpStore %c22 %107
+ %109 = OpLoad %v4float %g
+ %110 = OpLoad %v4float %h
+ %111 = OpFUnordLessThanEqual %v4bool %109 %110
+ OpStore %c23 %111
+ %113 = OpLoad %v4float %g
+ %114 = OpLoad %v4float %h
+ %115 = OpFUnordGreaterThanEqual %v4bool %113 %114
+ OpStore %c24 %115
+ %118 = OpLoad %float %t0
+ %119 = OpLoad %float %t1
+ %120 = OpFAdd %float %118 %119
+ %121 = OpCompositeConstruct %v4float %120 %120 %120 %120
+ OpStore %FragColor %121
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
index 4ad8fc5a..15286e08 100644
--- a/shaders-no-opt/asm/frag/unordered-compare.asm.frag
+++ b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -93,6 +93,8 @@
%42 = OpLoad %v4float %B
%43 = OpFUnordNotEqual %v4bool %41 %42
OpStore %neq %43
+ %ordered = OpFOrdNotEqual %v4bool %41 %42
+ OpStore %neq %ordered
%44 = OpLoad %v4bool %le
%49 = OpSelect %v4float %44 %48 %47
%50 = OpLoad %v4bool %leq
diff --git a/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000..15286e08
--- /dev/null
+++ b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %A %B %FragColor
+ OpExecutionMode %main OriginUpperLeft
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %test_vector_ "test_vector("
+ OpName %test_scalar_ "test_scalar("
+ OpName %le "le"
+ OpName %A "A"
+ OpName %B "B"
+ OpName %leq "leq"
+ OpName %ge "ge"
+ OpName %geq "geq"
+ OpName %eq "eq"
+ OpName %neq "neq"
+ OpName %le_0 "le"
+ OpName %leq_0 "leq"
+ OpName %ge_0 "ge"
+ OpName %geq_0 "geq"
+ OpName %eq_0 "eq"
+ OpName %neq_0 "neq"
+ OpName %FragColor "FragColor"
+ OpDecorate %A Location 0
+ OpDecorate %B Location 1
+ OpDecorate %FragColor Location 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+ %8 = OpTypeFunction %v4float
+ %11 = OpTypeFunction %float
+ %bool = OpTypeBool
+ %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+ %A = OpVariable %_ptr_Input_v4float Input
+ %B = OpVariable %_ptr_Input_v4float Input
+ %float_0 = OpConstant %float 0
+ %float_1 = OpConstant %float 1
+ %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+ %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+ %uint = OpTypeInt 32 0
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %FragColor = OpVariable %_ptr_Output_v4float Output
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %128 = OpFunctionCall %v4float %test_vector_
+ %129 = OpFunctionCall %float %test_scalar_
+ %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+ %131 = OpFAdd %v4float %128 %130
+ OpStore %FragColor %131
+ OpReturn
+ OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+ %10 = OpLabel
+ %le = OpVariable %_ptr_Function_v4bool Function
+ %leq = OpVariable %_ptr_Function_v4bool Function
+ %ge = OpVariable %_ptr_Function_v4bool Function
+ %geq = OpVariable %_ptr_Function_v4bool Function
+ %eq = OpVariable %_ptr_Function_v4bool Function
+ %neq = OpVariable %_ptr_Function_v4bool Function
+ %20 = OpLoad %v4float %A
+ %22 = OpLoad %v4float %B
+ %23 = OpFUnordLessThan %v4bool %20 %22
+ OpStore %le %23
+ %25 = OpLoad %v4float %A
+ %26 = OpLoad %v4float %B
+ %27 = OpFUnordLessThanEqual %v4bool %25 %26
+ OpStore %leq %27
+ %29 = OpLoad %v4float %A
+ %30 = OpLoad %v4float %B
+ %31 = OpFUnordGreaterThan %v4bool %29 %30
+ OpStore %ge %31
+ %33 = OpLoad %v4float %A
+ %34 = OpLoad %v4float %B
+ %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+ OpStore %geq %35
+ %37 = OpLoad %v4float %A
+ %38 = OpLoad %v4float %B
+ %39 = OpFUnordEqual %v4bool %37 %38
+ OpStore %eq %39
+ %41 = OpLoad %v4float %A
+ %42 = OpLoad %v4float %B
+ %43 = OpFUnordNotEqual %v4bool %41 %42
+ OpStore %neq %43
+ %ordered = OpFOrdNotEqual %v4bool %41 %42
+ OpStore %neq %ordered
+ %44 = OpLoad %v4bool %le
+ %49 = OpSelect %v4float %44 %48 %47
+ %50 = OpLoad %v4bool %leq
+ %51 = OpSelect %v4float %50 %48 %47
+ %52 = OpFAdd %v4float %49 %51
+ %53 = OpLoad %v4bool %ge
+ %54 = OpSelect %v4float %53 %48 %47
+ %55 = OpFAdd %v4float %52 %54
+ %56 = OpLoad %v4bool %geq
+ %57 = OpSelect %v4float %56 %48 %47
+ %58 = OpFAdd %v4float %55 %57
+ %59 = OpLoad %v4bool %eq
+ %60 = OpSelect %v4float %59 %48 %47
+ %61 = OpFAdd %v4float %58 %60
+ %62 = OpLoad %v4bool %neq
+ %63 = OpSelect %v4float %62 %48 %47
+ %64 = OpFAdd %v4float %61 %63
+ OpReturnValue %64
+ OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+ %13 = OpLabel
+ %le_0 = OpVariable %_ptr_Function_bool Function
+ %leq_0 = OpVariable %_ptr_Function_bool Function
+ %ge_0 = OpVariable %_ptr_Function_bool Function
+ %geq_0 = OpVariable %_ptr_Function_bool Function
+ %eq_0 = OpVariable %_ptr_Function_bool Function
+ %neq_0 = OpVariable %_ptr_Function_bool Function
+ %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %73 = OpLoad %float %72
+ %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %75 = OpLoad %float %74
+ %76 = OpFUnordLessThan %bool %73 %75
+ OpStore %le_0 %76
+ %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %79 = OpLoad %float %78
+ %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %81 = OpLoad %float %80
+ %82 = OpFUnordLessThanEqual %bool %79 %81
+ OpStore %leq_0 %82
+ %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %85 = OpLoad %float %84
+ %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %87 = OpLoad %float %86
+ %88 = OpFUnordGreaterThan %bool %85 %87
+ OpStore %ge_0 %88
+ %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %91 = OpLoad %float %90
+ %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %93 = OpLoad %float %92
+ %94 = OpFUnordGreaterThanEqual %bool %91 %93
+ OpStore %geq_0 %94
+ %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %97 = OpLoad %float %96
+ %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %99 = OpLoad %float %98
+ %100 = OpFUnordEqual %bool %97 %99
+ OpStore %eq_0 %100
+ %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+ %103 = OpLoad %float %102
+ %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+ %105 = OpLoad %float %104
+ %106 = OpFUnordNotEqual %bool %103 %105
+ OpStore %neq_0 %106
+ %107 = OpLoad %bool %le_0
+ %108 = OpSelect %float %107 %float_1 %float_0
+ %109 = OpLoad %bool %leq_0
+ %110 = OpSelect %float %109 %float_1 %float_0
+ %111 = OpFAdd %float %108 %110
+ %112 = OpLoad %bool %ge_0
+ %113 = OpSelect %float %112 %float_1 %float_0
+ %114 = OpFAdd %float %111 %113
+ %115 = OpLoad %bool %geq_0
+ %116 = OpSelect %float %115 %float_1 %float_0
+ %117 = OpFAdd %float %114 %116
+ %118 = OpLoad %bool %eq_0
+ %119 = OpSelect %float %118 %float_1 %float_0
+ %120 = OpFAdd %float %117 %119
+ %121 = OpLoad %bool %neq_0
+ %122 = OpSelect %float %121 %float_1 %float_0
+ %123 = OpFAdd %float %120 %122
+ OpReturnValue %123
+ OpFunctionEnd
diff --git a/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main"
+ OpExecutionMode %main LocalSize 1 1 1
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %SSBO "SSBO"
+ OpMemberName %SSBO 0 "a1"
+ OpMemberName %SSBO 1 "a2"
+ OpMemberName %SSBO 2 "a3"
+ OpMemberName %SSBO 3 "a4"
+ OpMemberName %SSBO 4 "b1"
+ OpMemberName %SSBO 5 "b2"
+ OpMemberName %SSBO 6 "b3"
+ OpMemberName %SSBO 7 "b4"
+ OpMemberName %SSBO 8 "c1"
+ OpMemberName %SSBO 9 "c2"
+ OpMemberName %SSBO 10 "c3"
+ OpMemberName %SSBO 11 "c4"
+ OpName %_ ""
+ OpName %i "i"
+ OpMemberDecorate %SSBO 0 Offset 0
+ OpMemberDecorate %SSBO 1 Offset 8
+ OpMemberDecorate %SSBO 2 Offset 16
+ OpMemberDecorate %SSBO 3 Offset 32
+ OpMemberDecorate %SSBO 4 Offset 48
+ OpMemberDecorate %SSBO 5 Offset 56
+ OpMemberDecorate %SSBO 6 Offset 64
+ OpMemberDecorate %SSBO 7 Offset 80
+ OpMemberDecorate %SSBO 8 Offset 96
+ OpMemberDecorate %SSBO 9 Offset 104
+ OpMemberDecorate %SSBO 10 Offset 112
+ OpMemberDecorate %SSBO 11 Offset 128
+ OpDecorate %SSBO BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 0
+ %void = OpTypeVoid
+ %7 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v2float = OpTypeVector %float 2
+ %v3float = OpTypeVector %float 3
+ %v4float = OpTypeVector %float 4
+ %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+ %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %int_8 = OpConstant %int 8
+ %int_1 = OpConstant %int 1
+ %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+ %int_9 = OpConstant %int 9
+ %int_2 = OpConstant %int 2
+ %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+ %int_10 = OpConstant %int 10
+ %int_3 = OpConstant %int 3
+ %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+ %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+ %bool = OpTypeBool
+ %uint = OpTypeInt 32 0
+ %uint_0 = OpConstant %uint 0
+ %uint_1 = OpConstant %uint 1
+ %main = OpFunction %void None %7
+ %35 = OpLabel
+ %i = OpVariable %_ptr_Function_int Function
+ %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %37 = OpLoad %float %36
+ %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %39 = OpLoad %float %38
+ %40 = OpExtInst %float %1 NMin %37 %39
+ %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %41 %40
+ %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %43 = OpLoad %v2float %42
+ %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %45 = OpLoad %v2float %44
+ %46 = OpExtInst %v2float %1 NMin %43 %45
+ %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %47 %46
+ %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %49 = OpLoad %v3float %48
+ %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %51 = OpLoad %v3float %50
+ %52 = OpExtInst %v3float %1 NMin %49 %51
+ %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %53 %52
+ %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %55 = OpLoad %v4float %54
+ %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %57 = OpLoad %v4float %56
+ %58 = OpExtInst %v4float %1 NMin %55 %57
+ %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %59 %58
+ %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %61 = OpLoad %float %60
+ %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %63 = OpLoad %float %62
+ %64 = OpExtInst %float %1 NMax %61 %63
+ %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %65 %64
+ %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %67 = OpLoad %v2float %66
+ %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %69 = OpLoad %v2float %68
+ %70 = OpExtInst %v2float %1 NMax %67 %69
+ %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %71 %70
+ %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %73 = OpLoad %v3float %72
+ %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %75 = OpLoad %v3float %74
+ %76 = OpExtInst %v3float %1 NMax %73 %75
+ %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %77 %76
+ %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %79 = OpLoad %v4float %78
+ %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %81 = OpLoad %v4float %80
+ %82 = OpExtInst %v4float %1 NMax %79 %81
+ %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %83 %82
+ %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %85 = OpLoad %float %84
+ %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %87 = OpLoad %float %86
+ %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %89 = OpLoad %float %88
+ %90 = OpExtInst %float %1 NClamp %85 %87 %89
+ %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %91 %90
+ %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ %93 = OpLoad %v2float %92
+ %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %95 = OpLoad %v2float %94
+ %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %97 = OpLoad %v2float %96
+ %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+ %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %99 %98
+ %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ %101 = OpLoad %v3float %100
+ %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %103 = OpLoad %v3float %102
+ %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %105 = OpLoad %v3float %104
+ %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+ %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %107 %106
+ %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ %109 = OpLoad %v4float %108
+ %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %111 = OpLoad %v4float %110
+ %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %113 = OpLoad %v4float %112
+ %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+ %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %115 %114
+ OpStore %i %int_0
+ OpBranch %116
+ %116 = OpLabel
+ OpLoopMerge %117 %118 None
+ OpBranch %119
+ %119 = OpLabel
+ %120 = OpLoad %int %i
+ %121 = OpSLessThan %bool %120 %int_2
+ OpBranchConditional %121 %122 %117
+ %122 = OpLabel
+ %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %124 = OpLoad %v2float %123
+ %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %126 = OpLoad %v2float %125
+ %127 = OpExtInst %v2float %1 NMin %124 %126
+ %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %128 %127
+ OpBranch %118
+ %118 = OpLabel
+ %129 = OpLoad %int %i
+ %130 = OpIAdd %int %129 %int_1
+ OpStore %i %130
+ %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %132 = OpLoad %float %131
+ %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+ %134 = OpLoad %float %133
+ %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+ %136 = OpLoad %float %135
+ %137 = OpExtInst %float %1 NClamp %132 %134 %136
+ %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %138 %137
+ OpBranch %116
+ %117 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp
index db98de15..4a62b635 100644
--- a/spirv_cross_c.cpp
+++ b/spirv_cross_c.cpp
@@ -475,6 +475,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
case SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT:
options->glsl.ovr_multiview_view_count = value;
break;
+ case SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS:
+ options->glsl.relax_nan_checks = value != 0;
+ break;
#endif
#if SPIRV_CROSS_C_API_HLSL
diff --git a/spirv_cross_c.h b/spirv_cross_c.h
index a590c805..a35a5d65 100644
--- a/spirv_cross_c.h
+++ b/spirv_cross_c.h
@@ -40,7 +40,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 48
+#define SPVC_C_API_VERSION_MINOR 49
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@@ -677,6 +677,8 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT = 77 | SPVC_COMPILER_OPTION_GLSL_BIT,
+ SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS = 78 | SPVC_COMPILER_OPTION_COMMON_BIT,
+
SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
} spvc_compiler_option;
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index bacf709c..4c465133 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -7307,6 +7307,63 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool
return farg_str;
}
+Op CompilerGLSL::get_remapped_spirv_op(Op op) const
+{
+ if (options.relax_nan_checks)
+ {
+ switch (op)
+ {
+ case OpFUnordLessThan:
+ op = OpFOrdLessThan;
+ break;
+ case OpFUnordLessThanEqual:
+ op = OpFOrdLessThanEqual;
+ break;
+ case OpFUnordGreaterThan:
+ op = OpFOrdGreaterThan;
+ break;
+ case OpFUnordGreaterThanEqual:
+ op = OpFOrdGreaterThanEqual;
+ break;
+ case OpFUnordEqual:
+ op = OpFOrdEqual;
+ break;
+ case OpFOrdNotEqual:
+ op = OpFUnordNotEqual;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return op;
+}
+
+GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
+{
+ // Relax to non-NaN aware opcodes.
+ if (options.relax_nan_checks)
+ {
+ switch (std450_op)
+ {
+ case GLSLstd450NClamp:
+ std450_op = GLSLstd450FClamp;
+ break;
+ case GLSLstd450NMin:
+ std450_op = GLSLstd450FMin;
+ break;
+ case GLSLstd450NMax:
+ std450_op = GLSLstd450FMax;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return std450_op;
+}
+
void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
{
auto op = static_cast<GLSLstd450>(eop);
@@ -7319,6 +7376,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
auto int_type = to_signed_basetype(integer_width);
auto uint_type = to_unsigned_basetype(integer_width);
+ op = get_remapped_glsl_op(op);
+
switch (op)
{
// FP fiddling
@@ -10094,6 +10153,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
auto int_type = to_signed_basetype(integer_width);
auto uint_type = to_unsigned_basetype(integer_width);
+ opcode = get_remapped_spirv_op(opcode);
+
switch (opcode)
{
// Dealing with memory
@@ -11220,7 +11281,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpLogicalNotEqual:
case OpFOrdNotEqual:
+ case OpFUnordNotEqual:
{
+ // GLSL is fuzzy on what to do with ordered vs unordered not equal.
+ // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
+ // but this means we have no easy way of implementing ordered not equal.
if (expression_type(ops[2]).vecsize > 1)
GLSL_BFOP(notEqual);
else
@@ -12540,7 +12605,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
break;
case OpFUnordEqual:
- case OpFUnordNotEqual:
case OpFUnordLessThan:
case OpFUnordGreaterThan:
case OpFUnordLessThanEqual:
@@ -12563,10 +12627,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
comp_op = "notEqual";
break;
- case OpFUnordNotEqual:
- comp_op = "equal";
- break;
-
case OpFUnordLessThan:
comp_op = "greaterThanEqual";
break;
@@ -12599,10 +12659,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
comp_op = " != ";
break;
- case OpFUnordNotEqual:
- comp_op = " == ";
- break;
-
case OpFUnordLessThan:
comp_op = " >= ";
break;
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index b2073abd..b892e0c3 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -138,6 +138,13 @@ public:
// what happens on legacy GLSL targets for blocks and structs.
bool force_flattened_io_blocks = false;
+ // For opcodes where we have to perform explicit additional nan checks, very ugly code is generated.
+ // If we opt-in, ignore these requirements.
+ // In opcodes like NClamp/NMin/NMax and FP compare, ignore NaN behavior.
+ // Use FClamp/FMin/FMax semantics for clamps and lets implementation choose ordered or unordered
+ // compares.
+ bool relax_nan_checks = false;
+
// If non-zero, controls layout(num_views = N) in; in GL_OVR_multiview2.
uint32_t ovr_multiview_view_count = 0;
@@ -362,6 +369,11 @@ protected:
virtual void emit_instruction(const Instruction &instr);
void emit_block_instructions(SPIRBlock &block);
+
+ // For relax_nan_checks.
+ GLSLstd450 get_remapped_glsl_op(GLSLstd450 std450_op) const;
+ spv::Op get_remapped_spirv_op(spv::Op op) const;
+
virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
uint32_t count);
virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 4e4e4ca7..646fce33 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -3524,6 +3524,8 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
auto int_type = to_signed_basetype(integer_width);
auto uint_type = to_unsigned_basetype(integer_width);
+ op = get_remapped_glsl_op(op);
+
switch (op)
{
case GLSLstd450InverseSqrt:
@@ -4792,6 +4794,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
auto int_type = to_signed_basetype(integer_width);
auto uint_type = to_unsigned_basetype(integer_width);
+ opcode = get_remapped_spirv_op(opcode);
+
switch (opcode)
{
case OpAccessChain:
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index db5ef5d1..4eeb3b7f 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -7632,6 +7632,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
auto ops = stream(instruction);
auto opcode = static_cast<Op>(instruction.op);
+ opcode = get_remapped_spirv_op(opcode);
+
// If we need to do implicit bitcasts, make sure we do it with the correct type.
uint32_t integer_width = get_integer_width_for_instruction(instruction);
auto int_type = to_signed_basetype(integer_width);
@@ -7674,6 +7676,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
case OpLogicalNotEqual:
case OpFOrdNotEqual:
+ // TODO: Should probably negate the == result here.
+ // Typically OrdNotEqual comes from GLSL which itself does not really specify what
+ // happens with NaN.
+ // Consider fixing this if we run into real issues.
MSL_BOP(!=);
break;
@@ -7730,7 +7736,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
break;
case OpFUnordNotEqual:
- MSL_UNORD_BOP(!=);
+ // not equal in MSL generates une opcodes to begin with.
+ // Since unordered not equal is how it works in C, just inherit that behavior.
+ MSL_BOP(!=);
break;
case OpFUnordGreaterThan:
@@ -8993,6 +9001,8 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
auto int_type = to_signed_basetype(integer_width);
auto uint_type = to_unsigned_basetype(integer_width);
+ op = get_remapped_glsl_op(op);
+
switch (op)
{
case GLSLstd450Sinh:
diff --git a/test_shaders.py b/test_shaders.py
index 640baa23..49038939 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -350,6 +350,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
if '.mask-clip-distance.' in shader:
msl_args.append('--mask-stage-output-builtin')
msl_args.append('ClipDistance')
+ if '.relax-nan.' in shader:
+ msl_args.append('--relax-nan-checks')
subprocess.check_call(msl_args)
@@ -474,6 +476,8 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati
hlsl_args.append('--hlsl-enable-16bit-types')
if '.flatten-matrix-vertex-input.' in shader:
hlsl_args.append('--hlsl-flatten-matrix-vertex-input-semantics')
+ if '.relax-nan.' in shader:
+ hlsl_args.append('--relax-nan-checks')
subprocess.check_call(hlsl_args)
@@ -576,6 +580,8 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl
extra_args += ['--force-zero-initialized-variables']
if '.force-flattened-io.' in shader:
extra_args += ['--glsl-force-flattened-io-blocks']
+ if '.relax-nan.' in shader:
+ extra_args.append('--relax-nan-checks')
spirv_cross_path = paths.spirv_cross