Merge pull request #900 from KhronosGroup/fix-8992019-03-22

GLSL/HLSL: Implement NMin/NMax/NClamp.
author: Hans-Kristian Arntzen <post@arntzen-software.no> 2019-03-21 19:17:28 +0300
committer: GitHub <noreply@github.com> 2019-03-21 19:17:28 +0300
commit: 9dbb25783fb5625a67a84b0ef2842b0ed27150bb (patch)
tree: 36a884f438cc96a50f393c2c95d6d8c7d677044c
parent: 5dacfa9dc20d0380a67cd4f84dd17cfd8f96fd5d (diff)
parent: 2a0365c813a453c0232a719a9f41759cf95dded0 (diff)
8 files changed, 721 insertions, 4 deletions
diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..7acd67d0
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,87 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+    bool2 _146 = isnan(asfloat(_4.Load2(56)));
+    bool2 _147 = isnan(asfloat(_4.Load2(104)));
+    float2 _148 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+    float2 _149 = float2(_146.x ? asfloat(_4.Load2(104)).x : _148.x, _146.y ? asfloat(_4.Load2(104)).y : _148.y);
+    _4.Store2(8, asuint(float2(_147.x ? asfloat(_4.Load2(56)).x : _149.x, _147.y ? asfloat(_4.Load2(56)).y : _149.y)));
+    bool3 _151 = isnan(asfloat(_4.Load3(64)));
+    bool3 _152 = isnan(asfloat(_4.Load3(112)));
+    float3 _153 = min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+    float3 _154 = float3(_151.x ? asfloat(_4.Load3(112)).x : _153.x, _151.y ? asfloat(_4.Load3(112)).y : _153.y, _151.z ? asfloat(_4.Load3(112)).z : _153.z);
+    _4.Store3(16, asuint(float3(_152.x ? asfloat(_4.Load3(64)).x : _154.x, _152.y ? asfloat(_4.Load3(64)).y : _154.y, _152.z ? asfloat(_4.Load3(64)).z : _154.z)));
+    bool4 _156 = isnan(asfloat(_4.Load4(80)));
+    bool4 _157 = isnan(asfloat(_4.Load4(128)));
+    float4 _158 = min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+    float4 _159 = float4(_156.x ? asfloat(_4.Load4(128)).x : _158.x, _156.y ? asfloat(_4.Load4(128)).y : _158.y, _156.z ? asfloat(_4.Load4(128)).z : _158.z, _156.w ? asfloat(_4.Load4(128)).w : _158.w);
+    _4.Store4(32, asuint(float4(_157.x ? asfloat(_4.Load4(80)).x : _159.x, _157.y ? asfloat(_4.Load4(80)).y : _159.y, _157.z ? asfloat(_4.Load4(80)).z : _159.z, _157.w ? asfloat(_4.Load4(80)).w : _159.w)));
+    _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+    bool2 _166 = isnan(asfloat(_4.Load2(56)));
+    bool2 _167 = isnan(asfloat(_4.Load2(104)));
+    float2 _168 = max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+    float2 _169 = float2(_166.x ? asfloat(_4.Load2(104)).x : _168.x, _166.y ? asfloat(_4.Load2(104)).y : _168.y);
+    _4.Store2(8, asuint(float2(_167.x ? asfloat(_4.Load2(56)).x : _169.x, _167.y ? asfloat(_4.Load2(56)).y : _169.y)));
+    bool3 _171 = isnan(asfloat(_4.Load3(64)));
+    bool3 _172 = isnan(asfloat(_4.Load3(112)));
+    float3 _173 = max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+    float3 _174 = float3(_171.x ? asfloat(_4.Load3(112)).x : _173.x, _171.y ? asfloat(_4.Load3(112)).y : _173.y, _171.z ? asfloat(_4.Load3(112)).z : _173.z);
+    _4.Store3(16, asuint(float3(_172.x ? asfloat(_4.Load3(64)).x : _174.x, _172.y ? asfloat(_4.Load3(64)).y : _174.y, _172.z ? asfloat(_4.Load3(64)).z : _174.z)));
+    bool4 _176 = isnan(asfloat(_4.Load4(80)));
+    bool4 _177 = isnan(asfloat(_4.Load4(128)));
+    float4 _178 = max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+    float4 _179 = float4(_176.x ? asfloat(_4.Load4(128)).x : _178.x, _176.y ? asfloat(_4.Load4(128)).y : _178.y, _176.z ? asfloat(_4.Load4(128)).z : _178.z, _176.w ? asfloat(_4.Load4(128)).w : _178.w);
+    _4.Store4(32, asuint(float4(_177.x ? asfloat(_4.Load4(80)).x : _179.x, _177.y ? asfloat(_4.Load4(80)).y : _179.y, _177.z ? asfloat(_4.Load4(80)).z : _179.z, _177.w ? asfloat(_4.Load4(80)).w : _179.w)));
+    float _180 = isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(48)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(48))));
+    _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? _180 : (isnan(_180) ? asfloat(_4.Load(96)) : min(_180, asfloat(_4.Load(96))))));
+    bool2 _193 = isnan(asfloat(_4.Load2(8)));
+    bool2 _194 = isnan(asfloat(_4.Load2(56)));
+    float2 _195 = max(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)));
+    float2 _196 = float2(_193.x ? asfloat(_4.Load2(56)).x : _195.x, _193.y ? asfloat(_4.Load2(56)).y : _195.y);
+    float2 _191 = float2(_194.x ? asfloat(_4.Load2(8)).x : _196.x, _194.y ? asfloat(_4.Load2(8)).y : _196.y);
+    bool2 _198 = isnan(_191);
+    bool2 _199 = isnan(asfloat(_4.Load2(104)));
+    float2 _200 = min(_191, asfloat(_4.Load2(104)));
+    float2 _201 = float2(_198.x ? asfloat(_4.Load2(104)).x : _200.x, _198.y ? asfloat(_4.Load2(104)).y : _200.y);
+    _4.Store2(8, asuint(float2(_199.x ? _191.x : _201.x, _199.y ? _191.y : _201.y)));
+    bool3 _204 = isnan(asfloat(_4.Load3(16)));
+    bool3 _205 = isnan(asfloat(_4.Load3(64)));
+    float3 _206 = max(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)));
+    float3 _207 = float3(_204.x ? asfloat(_4.Load3(64)).x : _206.x, _204.y ? asfloat(_4.Load3(64)).y : _206.y, _204.z ? asfloat(_4.Load3(64)).z : _206.z);
+    float3 _202 = float3(_205.x ? asfloat(_4.Load3(16)).x : _207.x, _205.y ? asfloat(_4.Load3(16)).y : _207.y, _205.z ? asfloat(_4.Load3(16)).z : _207.z);
+    bool3 _209 = isnan(_202);
+    bool3 _210 = isnan(asfloat(_4.Load3(112)));
+    float3 _211 = min(_202, asfloat(_4.Load3(112)));
+    float3 _212 = float3(_209.x ? asfloat(_4.Load3(112)).x : _211.x, _209.y ? asfloat(_4.Load3(112)).y : _211.y, _209.z ? asfloat(_4.Load3(112)).z : _211.z);
+    _4.Store3(16, asuint(float3(_210.x ? _202.x : _212.x, _210.y ? _202.y : _212.y, _210.z ? _202.z : _212.z)));
+    bool4 _215 = isnan(asfloat(_4.Load4(32)));
+    bool4 _216 = isnan(asfloat(_4.Load4(80)));
+    float4 _217 = max(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)));
+    float4 _218 = float4(_215.x ? asfloat(_4.Load4(80)).x : _217.x, _215.y ? asfloat(_4.Load4(80)).y : _217.y, _215.z ? asfloat(_4.Load4(80)).z : _217.z, _215.w ? asfloat(_4.Load4(80)).w : _217.w);
+    float4 _213 = float4(_216.x ? asfloat(_4.Load4(32)).x : _218.x, _216.y ? asfloat(_4.Load4(32)).y : _218.y, _216.z ? asfloat(_4.Load4(32)).z : _218.z, _216.w ? asfloat(_4.Load4(32)).w : _218.w);
+    bool4 _220 = isnan(_213);
+    bool4 _221 = isnan(asfloat(_4.Load4(128)));
+    float4 _222 = min(_213, asfloat(_4.Load4(128)));
+    float4 _223 = float4(_220.x ? asfloat(_4.Load4(128)).x : _222.x, _220.y ? asfloat(_4.Load4(128)).y : _222.y, _220.z ? asfloat(_4.Load4(128)).z : _222.z, _220.w ? asfloat(_4.Load4(128)).w : _222.w);
+    _4.Store4(32, asuint(float4(_221.x ? _213.x : _223.x, _221.y ? _213.y : _223.y, _221.z ? _213.z : _223.z, _221.w ? _213.w : _223.w)));
+    for (int _139 = 0; _139 < 2; )
+    {
+        bool2 _225 = isnan(asfloat(_4.Load2(56)));
+        bool2 _226 = isnan(asfloat(_4.Load2(104)));
+        float2 _227 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+        float2 _228 = float2(_225.x ? asfloat(_4.Load2(104)).x : _227.x, _225.y ? asfloat(_4.Load2(104)).y : _227.y);
+        _4.Store2(8, asuint(float2(_226.x ? asfloat(_4.Load2(56)).x : _228.x, _226.y ? asfloat(_4.Load2(56)).y : _228.y)));
+        float _229 = isnan(asfloat(_4.Load(56))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(56)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(56))));
+        _4.Store(0, asuint(isnan(asfloat(_4.Load(60))) ? _229 : (isnan(_229) ? asfloat(_4.Load(60)) : min(_229, asfloat(_4.Load(60))))));
+        _139++;
+        continue;
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..5ef1bc91
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,47 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a1;
+    vec2 a2;
+    vec3 a3;
+    vec4 a4;
+    float b1;
+    vec2 b2;
+    vec3 b3;
+    vec4 b4;
+    float c1;
+    vec2 c2;
+    vec3 c3;
+    vec4 c4;
+} _4;
+
+void main()
+{
+    _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : min(_4.b1, _4.c1));
+    _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+    _4.a3 = mix(mix(min(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+    _4.a4 = mix(mix(min(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+    _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : max(_4.b1, _4.c1));
+    _4.a2 = mix(mix(max(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+    _4.a3 = mix(mix(max(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+    _4.a4 = mix(mix(max(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+    float _180 = isnan(_4.b1) ? _4.a1 : (isnan(_4.a1) ? _4.b1 : max(_4.a1, _4.b1));
+    _4.a1 = isnan(_4.c1) ? _180 : (isnan(_180) ? _4.c1 : min(_180, _4.c1));
+    vec2 _191 = mix(mix(max(_4.a2, _4.b2), _4.b2, isnan(_4.a2)), _4.a2, isnan(_4.b2));
+    _4.a2 = mix(mix(min(_191, _4.c2), _4.c2, isnan(_191)), _191, isnan(_4.c2));
+    vec3 _202 = mix(mix(max(_4.a3, _4.b3), _4.b3, isnan(_4.a3)), _4.a3, isnan(_4.b3));
+    _4.a3 = mix(mix(min(_202, _4.c3), _4.c3, isnan(_202)), _202, isnan(_4.c3));
+    vec4 _213 = mix(mix(max(_4.a4, _4.b4), _4.b4, isnan(_4.a4)), _4.a4, isnan(_4.b4));
+    _4.a4 = mix(mix(min(_213, _4.c4), _4.c4, isnan(_213)), _213, isnan(_4.c4));
+    for (int _139 = 0; _139 < 2; )
+    {
+        _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+        float _229 = isnan(_4.b2.x) ? _4.a1 : (isnan(_4.a1) ? _4.b2.x : max(_4.a1, _4.b2.x));
+        _4.a1 = isnan(_4.b2.y) ? _229 : (isnan(_229) ? _4.b2.y : min(_229, _4.b2.y));
+        _139++;
+        continue;
+    }
+}
+
diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..a6e88684
--- /dev/null
+++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,84 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+    bool2 _145 = isnan(asfloat(_4.Load2(56)));
+    bool2 _146 = isnan(asfloat(_4.Load2(104)));
+    float2 _147 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+    float2 _148 = float2(_145.x ? asfloat(_4.Load2(104)).x : _147.x, _145.y ? asfloat(_4.Load2(104)).y : _147.y);
+    _4.Store2(8, asuint(float2(_146.x ? asfloat(_4.Load2(56)).x : _148.x, _146.y ? asfloat(_4.Load2(56)).y : _148.y)));
+    bool3 _150 = isnan(asfloat(_4.Load3(64)));
+    bool3 _151 = isnan(asfloat(_4.Load3(112)));
+    float3 _152 = min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+    float3 _153 = float3(_150.x ? asfloat(_4.Load3(112)).x : _152.x, _150.y ? asfloat(_4.Load3(112)).y : _152.y, _150.z ? asfloat(_4.Load3(112)).z : _152.z);
+    _4.Store3(16, asuint(float3(_151.x ? asfloat(_4.Load3(64)).x : _153.x, _151.y ? asfloat(_4.Load3(64)).y : _153.y, _151.z ? asfloat(_4.Load3(64)).z : _153.z)));
+    bool4 _155 = isnan(asfloat(_4.Load4(80)));
+    bool4 _156 = isnan(asfloat(_4.Load4(128)));
+    float4 _157 = min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+    float4 _158 = float4(_155.x ? asfloat(_4.Load4(128)).x : _157.x, _155.y ? asfloat(_4.Load4(128)).y : _157.y, _155.z ? asfloat(_4.Load4(128)).z : _157.z, _155.w ? asfloat(_4.Load4(128)).w : _157.w);
+    _4.Store4(32, asuint(float4(_156.x ? asfloat(_4.Load4(80)).x : _158.x, _156.y ? asfloat(_4.Load4(80)).y : _158.y, _156.z ? asfloat(_4.Load4(80)).z : _158.z, _156.w ? asfloat(_4.Load4(80)).w : _158.w)));
+    _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+    bool2 _165 = isnan(asfloat(_4.Load2(56)));
+    bool2 _166 = isnan(asfloat(_4.Load2(104)));
+    float2 _167 = max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+    float2 _168 = float2(_165.x ? asfloat(_4.Load2(104)).x : _167.x, _165.y ? asfloat(_4.Load2(104)).y : _167.y);
+    _4.Store2(8, asuint(float2(_166.x ? asfloat(_4.Load2(56)).x : _168.x, _166.y ? asfloat(_4.Load2(56)).y : _168.y)));
+    bool3 _170 = isnan(asfloat(_4.Load3(64)));
+    bool3 _171 = isnan(asfloat(_4.Load3(112)));
+    float3 _172 = max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+    float3 _173 = float3(_170.x ? asfloat(_4.Load3(112)).x : _172.x, _170.y ? asfloat(_4.Load3(112)).y : _172.y, _170.z ? asfloat(_4.Load3(112)).z : _172.z);
+    _4.Store3(16, asuint(float3(_171.x ? asfloat(_4.Load3(64)).x : _173.x, _171.y ? asfloat(_4.Load3(64)).y : _173.y, _171.z ? asfloat(_4.Load3(64)).z : _173.z)));
+    bool4 _175 = isnan(asfloat(_4.Load4(80)));
+    bool4 _176 = isnan(asfloat(_4.Load4(128)));
+    float4 _177 = max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+    float4 _178 = float4(_175.x ? asfloat(_4.Load4(128)).x : _177.x, _175.y ? asfloat(_4.Load4(128)).y : _177.y, _175.z ? asfloat(_4.Load4(128)).z : _177.z, _175.w ? asfloat(_4.Load4(128)).w : _177.w);
+    _4.Store4(32, asuint(float4(_176.x ? asfloat(_4.Load4(80)).x : _178.x, _176.y ? asfloat(_4.Load4(80)).y : _178.y, _176.z ? asfloat(_4.Load4(80)).z : _178.z, _176.w ? asfloat(_4.Load4(80)).w : _178.w)));
+    float _179 = isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(48)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(48))));
+    _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? _179 : (isnan(_179) ? asfloat(_4.Load(96)) : min(_179, asfloat(_4.Load(96))))));
+    bool2 _192 = isnan(asfloat(_4.Load2(8)));
+    bool2 _193 = isnan(asfloat(_4.Load2(56)));
+    float2 _194 = max(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)));
+    float2 _195 = float2(_192.x ? asfloat(_4.Load2(56)).x : _194.x, _192.y ? asfloat(_4.Load2(56)).y : _194.y);
+    float2 _190 = float2(_193.x ? asfloat(_4.Load2(8)).x : _195.x, _193.y ? asfloat(_4.Load2(8)).y : _195.y);
+    bool2 _197 = isnan(_190);
+    bool2 _198 = isnan(asfloat(_4.Load2(104)));
+    float2 _199 = min(_190, asfloat(_4.Load2(104)));
+    float2 _200 = float2(_197.x ? asfloat(_4.Load2(104)).x : _199.x, _197.y ? asfloat(_4.Load2(104)).y : _199.y);
+    _4.Store2(8, asuint(float2(_198.x ? _190.x : _200.x, _198.y ? _190.y : _200.y)));
+    bool3 _203 = isnan(asfloat(_4.Load3(16)));
+    bool3 _204 = isnan(asfloat(_4.Load3(64)));
+    float3 _205 = max(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)));
+    float3 _206 = float3(_203.x ? asfloat(_4.Load3(64)).x : _205.x, _203.y ? asfloat(_4.Load3(64)).y : _205.y, _203.z ? asfloat(_4.Load3(64)).z : _205.z);
+    float3 _201 = float3(_204.x ? asfloat(_4.Load3(16)).x : _206.x, _204.y ? asfloat(_4.Load3(16)).y : _206.y, _204.z ? asfloat(_4.Load3(16)).z : _206.z);
+    bool3 _208 = isnan(_201);
+    bool3 _209 = isnan(asfloat(_4.Load3(112)));
+    float3 _210 = min(_201, asfloat(_4.Load3(112)));
+    float3 _211 = float3(_208.x ? asfloat(_4.Load3(112)).x : _210.x, _208.y ? asfloat(_4.Load3(112)).y : _210.y, _208.z ? asfloat(_4.Load3(112)).z : _210.z);
+    _4.Store3(16, asuint(float3(_209.x ? _201.x : _211.x, _209.y ? _201.y : _211.y, _209.z ? _201.z : _211.z)));
+    bool4 _214 = isnan(asfloat(_4.Load4(32)));
+    bool4 _215 = isnan(asfloat(_4.Load4(80)));
+    float4 _216 = max(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)));
+    float4 _217 = float4(_214.x ? asfloat(_4.Load4(80)).x : _216.x, _214.y ? asfloat(_4.Load4(80)).y : _216.y, _214.z ? asfloat(_4.Load4(80)).z : _216.z, _214.w ? asfloat(_4.Load4(80)).w : _216.w);
+    float4 _212 = float4(_215.x ? asfloat(_4.Load4(32)).x : _217.x, _215.y ? asfloat(_4.Load4(32)).y : _217.y, _215.z ? asfloat(_4.Load4(32)).z : _217.z, _215.w ? asfloat(_4.Load4(32)).w : _217.w);
+    bool4 _219 = isnan(_212);
+    bool4 _220 = isnan(asfloat(_4.Load4(128)));
+    float4 _221 = min(_212, asfloat(_4.Load4(128)));
+    float4 _222 = float4(_219.x ? asfloat(_4.Load4(128)).x : _221.x, _219.y ? asfloat(_4.Load4(128)).y : _221.y, _219.z ? asfloat(_4.Load4(128)).z : _221.z, _219.w ? asfloat(_4.Load4(128)).w : _221.w);
+    _4.Store4(32, asuint(float4(_220.x ? _212.x : _222.x, _220.y ? _212.y : _222.y, _220.z ? _212.z : _222.z, _220.w ? _212.w : _222.w)));
+    float _223;
+    for (int i = 0; i < 2; i++, _223 = isnan(asfloat(_4.Load(56))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(56)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(56)))), _4.Store(0, asuint(isnan(asfloat(_4.Load(60))) ? _223 : (isnan(_223) ? asfloat(_4.Load(60)) : min(_223, asfloat(_4.Load(60)))))))
+    {
+        bool2 _235 = isnan(asfloat(_4.Load2(56)));
+        bool2 _236 = isnan(asfloat(_4.Load2(104)));
+        float2 _237 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+        float2 _238 = float2(_235.x ? asfloat(_4.Load2(104)).x : _237.x, _235.y ? asfloat(_4.Load2(104)).y : _237.y);
+        _4.Store2(8, asuint(float2(_236.x ? asfloat(_4.Load2(56)).x : _238.x, _236.y ? asfloat(_4.Load2(56)).y : _238.y)));
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders/asm/comp/nmin-max-clamp.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..54c452c5
--- /dev/null
+++ b/reference/shaders/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,44 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a1;
+    vec2 a2;
+    vec3 a3;
+    vec4 a4;
+    float b1;
+    vec2 b2;
+    vec3 b3;
+    vec4 b4;
+    float c1;
+    vec2 c2;
+    vec3 c3;
+    vec4 c4;
+} _4;
+
+void main()
+{
+    _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : min(_4.b1, _4.c1));
+    _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+    _4.a3 = mix(mix(min(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+    _4.a4 = mix(mix(min(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+    _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : max(_4.b1, _4.c1));
+    _4.a2 = mix(mix(max(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+    _4.a3 = mix(mix(max(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+    _4.a4 = mix(mix(max(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+    float _179 = isnan(_4.b1) ? _4.a1 : (isnan(_4.a1) ? _4.b1 : max(_4.a1, _4.b1));
+    _4.a1 = isnan(_4.c1) ? _179 : (isnan(_179) ? _4.c1 : min(_179, _4.c1));
+    vec2 _190 = mix(mix(max(_4.a2, _4.b2), _4.b2, isnan(_4.a2)), _4.a2, isnan(_4.b2));
+    _4.a2 = mix(mix(min(_190, _4.c2), _4.c2, isnan(_190)), _190, isnan(_4.c2));
+    vec3 _201 = mix(mix(max(_4.a3, _4.b3), _4.b3, isnan(_4.a3)), _4.a3, isnan(_4.b3));
+    _4.a3 = mix(mix(min(_201, _4.c3), _4.c3, isnan(_201)), _201, isnan(_4.c3));
+    vec4 _212 = mix(mix(max(_4.a4, _4.b4), _4.b4, isnan(_4.a4)), _4.a4, isnan(_4.b4));
+    _4.a4 = mix(mix(min(_212, _4.c4), _4.c4, isnan(_212)), _212, isnan(_4.c4));
+    float _223;
+    for (int i = 0; i < 2; i++, _223 = isnan(_4.b2.x) ? _4.a1 : (isnan(_4.a1) ? _4.b2.x : max(_4.a1, _4.b2.x)), _4.a1 = isnan(_4.b2.y) ? _223 : (isnan(_223) ? _4.b2.y : min(_223, _4.b2.y)))
+    {
+        _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+    }
+}
+
diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a1"
+               OpMemberName %SSBO 1 "a2"
+               OpMemberName %SSBO 2 "a3"
+               OpMemberName %SSBO 3 "a4"
+               OpMemberName %SSBO 4 "b1"
+               OpMemberName %SSBO 5 "b2"
+               OpMemberName %SSBO 6 "b3"
+               OpMemberName %SSBO 7 "b4"
+               OpMemberName %SSBO 8 "c1"
+               OpMemberName %SSBO 9 "c2"
+               OpMemberName %SSBO 10 "c3"
+               OpMemberName %SSBO 11 "c4"
+               OpName %_ ""
+               OpName %i "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 8
+               OpMemberDecorate %SSBO 2 Offset 16
+               OpMemberDecorate %SSBO 3 Offset 32
+               OpMemberDecorate %SSBO 4 Offset 48
+               OpMemberDecorate %SSBO 5 Offset 56
+               OpMemberDecorate %SSBO 6 Offset 64
+               OpMemberDecorate %SSBO 7 Offset 80
+               OpMemberDecorate %SSBO 8 Offset 96
+               OpMemberDecorate %SSBO 9 Offset 104
+               OpMemberDecorate %SSBO 10 Offset 112
+               OpMemberDecorate %SSBO 11 Offset 128
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_8 = OpConstant %int 8
+      %int_1 = OpConstant %int 1
+      %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %int_9 = OpConstant %int 9
+      %int_2 = OpConstant %int 2
+      %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %int_10 = OpConstant %int 10
+      %int_3 = OpConstant %int 3
+      %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+     %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %7
+         %35 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %37 = OpLoad %float %36
+         %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %39 = OpLoad %float %38
+         %40 = OpExtInst %float %1 NMin %37 %39
+         %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %41 %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %43 = OpLoad %v2float %42
+         %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %45 = OpLoad %v2float %44
+         %46 = OpExtInst %v2float %1 NMin %43 %45
+         %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %47 %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %49 = OpLoad %v3float %48
+         %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %51 = OpLoad %v3float %50
+         %52 = OpExtInst %v3float %1 NMin %49 %51
+         %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %53 %52
+         %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %55 = OpLoad %v4float %54
+         %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %57 = OpLoad %v4float %56
+         %58 = OpExtInst %v4float %1 NMin %55 %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %59 %58
+         %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %61 = OpLoad %float %60
+         %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %63 = OpLoad %float %62
+         %64 = OpExtInst %float %1 NMax %61 %63
+         %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %65 %64
+         %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %67 = OpLoad %v2float %66
+         %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %69 = OpLoad %v2float %68
+         %70 = OpExtInst %v2float %1 NMax %67 %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %71 %70
+         %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %73 = OpLoad %v3float %72
+         %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %75 = OpLoad %v3float %74
+         %76 = OpExtInst %v3float %1 NMax %73 %75
+         %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %77 %76
+         %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %79 = OpLoad %v4float %78
+         %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %81 = OpLoad %v4float %80
+         %82 = OpExtInst %v4float %1 NMax %79 %81
+         %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %83 %82
+         %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %87 = OpLoad %float %86
+         %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %89 = OpLoad %float %88
+         %90 = OpExtInst %float %1 NClamp %85 %87 %89
+         %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %91 %90
+         %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+         %93 = OpLoad %v2float %92
+         %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %95 = OpLoad %v2float %94
+         %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %97 = OpLoad %v2float %96
+         %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+         %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %99 %98
+        %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+        %101 = OpLoad %v3float %100
+        %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+        %103 = OpLoad %v3float %102
+        %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+        %105 = OpLoad %v3float %104
+        %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+        %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %107 %106
+        %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+        %109 = OpLoad %v4float %108
+        %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+        %111 = OpLoad %v4float %110
+        %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+        %113 = OpLoad %v4float %112
+        %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+        %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %115 %114
+               OpStore %i %int_0
+               OpBranch %116
+        %116 = OpLabel
+               OpLoopMerge %117 %118 None
+               OpBranch %119
+        %119 = OpLabel
+        %120 = OpLoad %int %i
+        %121 = OpSLessThan %bool %120 %int_2
+               OpBranchConditional %121 %122 %117
+        %122 = OpLabel
+        %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+        %124 = OpLoad %v2float %123
+        %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+        %126 = OpLoad %v2float %125
+        %127 = OpExtInst %v2float %1 NMin %124 %126
+        %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %128 %127
+               OpBranch %118
+        %118 = OpLabel
+        %129 = OpLoad %int %i
+        %130 = OpIAdd %int %129 %int_1
+               OpStore %i %130
+        %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+        %132 = OpLoad %float %131
+        %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+        %134 = OpLoad %float %133
+        %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+        %136 = OpLoad %float %135
+        %137 = OpExtInst %float %1 NClamp %132 %134 %136
+        %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %138 %137
+               OpBranch %116
+        %117 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/nmin-max-clamp.asm.comp b/shaders/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a1"
+               OpMemberName %SSBO 1 "a2"
+               OpMemberName %SSBO 2 "a3"
+               OpMemberName %SSBO 3 "a4"
+               OpMemberName %SSBO 4 "b1"
+               OpMemberName %SSBO 5 "b2"
+               OpMemberName %SSBO 6 "b3"
+               OpMemberName %SSBO 7 "b4"
+               OpMemberName %SSBO 8 "c1"
+               OpMemberName %SSBO 9 "c2"
+               OpMemberName %SSBO 10 "c3"
+               OpMemberName %SSBO 11 "c4"
+               OpName %_ ""
+               OpName %i "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 8
+               OpMemberDecorate %SSBO 2 Offset 16
+               OpMemberDecorate %SSBO 3 Offset 32
+               OpMemberDecorate %SSBO 4 Offset 48
+               OpMemberDecorate %SSBO 5 Offset 56
+               OpMemberDecorate %SSBO 6 Offset 64
+               OpMemberDecorate %SSBO 7 Offset 80
+               OpMemberDecorate %SSBO 8 Offset 96
+               OpMemberDecorate %SSBO 9 Offset 104
+               OpMemberDecorate %SSBO 10 Offset 112
+               OpMemberDecorate %SSBO 11 Offset 128
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_8 = OpConstant %int 8
+      %int_1 = OpConstant %int 1
+      %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %int_9 = OpConstant %int 9
+      %int_2 = OpConstant %int 2
+      %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %int_10 = OpConstant %int 10
+      %int_3 = OpConstant %int 3
+      %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+     %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %7
+         %35 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %37 = OpLoad %float %36
+         %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %39 = OpLoad %float %38
+         %40 = OpExtInst %float %1 NMin %37 %39
+         %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %41 %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %43 = OpLoad %v2float %42
+         %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %45 = OpLoad %v2float %44
+         %46 = OpExtInst %v2float %1 NMin %43 %45
+         %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %47 %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %49 = OpLoad %v3float %48
+         %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %51 = OpLoad %v3float %50
+         %52 = OpExtInst %v3float %1 NMin %49 %51
+         %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %53 %52
+         %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %55 = OpLoad %v4float %54
+         %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %57 = OpLoad %v4float %56
+         %58 = OpExtInst %v4float %1 NMin %55 %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %59 %58
+         %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %61 = OpLoad %float %60
+         %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %63 = OpLoad %float %62
+         %64 = OpExtInst %float %1 NMax %61 %63
+         %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %65 %64
+         %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %67 = OpLoad %v2float %66
+         %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %69 = OpLoad %v2float %68
+         %70 = OpExtInst %v2float %1 NMax %67 %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %71 %70
+         %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %73 = OpLoad %v3float %72
+         %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %75 = OpLoad %v3float %74
+         %76 = OpExtInst %v3float %1 NMax %73 %75
+         %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %77 %76
+         %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %79 = OpLoad %v4float %78
+         %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %81 = OpLoad %v4float %80
+         %82 = OpExtInst %v4float %1 NMax %79 %81
+         %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %83 %82
+         %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %87 = OpLoad %float %86
+         %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %89 = OpLoad %float %88
+         %90 = OpExtInst %float %1 NClamp %85 %87 %89
+         %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %91 %90
+         %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+         %93 = OpLoad %v2float %92
+         %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %95 = OpLoad %v2float %94
+         %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %97 = OpLoad %v2float %96
+         %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+         %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %99 %98
+        %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+        %101 = OpLoad %v3float %100
+        %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+        %103 = OpLoad %v3float %102
+        %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+        %105 = OpLoad %v3float %104
+        %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+        %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %107 %106
+        %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+        %109 = OpLoad %v4float %108
+        %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+        %111 = OpLoad %v4float %110
+        %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+        %113 = OpLoad %v4float %112
+        %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+        %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %115 %114
+               OpStore %i %int_0
+               OpBranch %116
+        %116 = OpLabel
+               OpLoopMerge %117 %118 None
+               OpBranch %119
+        %119 = OpLabel
+        %120 = OpLoad %int %i
+        %121 = OpSLessThan %bool %120 %int_2
+               OpBranchConditional %121 %122 %117
+        %122 = OpLabel
+        %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+        %124 = OpLoad %v2float %123
+        %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+        %126 = OpLoad %v2float %125
+        %127 = OpExtInst %v2float %1 NMin %124 %126
+        %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %128 %127
+               OpBranch %118
+        %118 = OpLabel
+        %129 = OpLoad %int %i
+        %130 = OpIAdd %int %129 %int_1
+               OpStore %i %130
+        %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+        %132 = OpLoad %float %131
+        %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+        %134 = OpLoad %float %133
+        %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+        %136 = OpLoad %float %135
+        %137 = OpExtInst %float %1 NClamp %132 %134 %136
+        %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %138 %137
+               OpBranch %116
+        %117 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index d04adae4..9d7939c5 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -4935,14 +4935,27 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		break;
 
 	case GLSLstd450NMin:
-		emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nmin");
-		break;
 	case GLSLstd450NMax:
-		emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nmax");
+	{
+		emit_nminmax_op(result_type, id, args[0], args[1], op);
 		break;
+	}
+
 	case GLSLstd450NClamp:
-		emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nclamp");
+	{
+		// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
+		// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
+		uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
+		if (!max_id)
+			max_id = ir.increase_bound_by(1);
+
+		// Inherit precision qualifiers.
+		ir.meta[max_id] = ir.meta[id];
+
+		emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
+		emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
 		break;
+	}
 
 	default:
 		statement("// unimplemented GLSL op ", eop);
@@ -4950,6 +4963,35 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 }
 
+void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
+{
+	// Need to emulate this call.
+	uint32_t &ids = extra_sub_expressions[id];
+	if (!ids)
+	{
+		ids = ir.increase_bound_by(5);
+		auto btype = get<SPIRType>(result_type);
+		btype.basetype = SPIRType::Boolean;
+		set<SPIRType>(ids, btype);
+	}
+
+	uint32_t btype_id = ids + 0;
+	uint32_t left_nan_id = ids + 1;
+	uint32_t right_nan_id = ids + 2;
+	uint32_t tmp_id = ids + 3;
+	uint32_t mixed_first_id = ids + 4;
+
+	// Inherit precision qualifiers.
+	ir.meta[tmp_id] = ir.meta[id];
+	ir.meta[mixed_first_id] = ir.meta[id];
+
+	emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
+	emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
+	emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
+	emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
+	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
+}
+
 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
                                                  uint32_t)
 {
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 57576cf2..5d773195 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -18,6 +18,7 @@
 #define SPIRV_CROSS_GLSL_HPP
 
 #include "spirv_cross.hpp"
+#include "GLSL.std.450.h"
 #include <sstream>
 #include <unordered_map>
 #include <unordered_set>
@@ -421,6 +422,7 @@ protected:
 	bool should_dereference(uint32_t id);
 	bool should_forward(uint32_t id);
 	void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
+	void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op);
 	bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp);
 	void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
 	                             uint32_t op3, const char *op);
@@ -552,6 +554,11 @@ protected:
 	std::vector<std::string> forced_extensions;
 	std::vector<std::string> header_lines;
 
+	// Used when expressions emit extra opcodes with their own unique IDs,
+	// and we need to reuse the IDs across recompilation loops.
+	// Currently used by NMin/Max/Clamp implementations.
+	std::unordered_map<uint32_t, uint32_t> extra_sub_expressions;
+
 	uint32_t statement_count;
 
 	inline bool is_legacy() const
author	Hans-Kristian Arntzen <post@arntzen-software.no>	2019-03-21 19:17:28 +0300
committer	GitHub <noreply@github.com>	2019-03-21 19:17:28 +0300
commit	9dbb25783fb5625a67a84b0ef2842b0ed27150bb (patch)
tree	36a884f438cc96a50f393c2c95d6d8c7d677044c
parent	5dacfa9dc20d0380a67cd4f84dd17cfd8f96fd5d (diff)
parent	2a0365c813a453c0232a719a9f41759cf95dded0 (diff)