Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/KhronosGroup/SPIRV-Cross.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans-Kristian Arntzen <post@arntzen-software.no>2019-03-21 19:17:28 +0300
committerGitHub <noreply@github.com>2019-03-21 19:17:28 +0300
commit9dbb25783fb5625a67a84b0ef2842b0ed27150bb (patch)
tree36a884f438cc96a50f393c2c95d6d8c7d677044c
parent5dacfa9dc20d0380a67cd4f84dd17cfd8f96fd5d (diff)
parent2a0365c813a453c0232a719a9f41759cf95dded0 (diff)
Merge pull request #900 from KhronosGroup/fix-8992019-03-22
GLSL/HLSL: Implement NMin/NMax/NClamp.
-rw-r--r--reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp87
-rw-r--r--reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp47
-rw-r--r--reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp84
-rw-r--r--reference/shaders/asm/comp/nmin-max-clamp.asm.comp44
-rw-r--r--shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp203
-rw-r--r--shaders/asm/comp/nmin-max-clamp.asm.comp203
-rw-r--r--spirv_glsl.cpp50
-rw-r--r--spirv_glsl.hpp7
8 files changed, 721 insertions, 4 deletions
diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..7acd67d0
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,87 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+ _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+ bool2 _146 = isnan(asfloat(_4.Load2(56)));
+ bool2 _147 = isnan(asfloat(_4.Load2(104)));
+ float2 _148 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+ float2 _149 = float2(_146.x ? asfloat(_4.Load2(104)).x : _148.x, _146.y ? asfloat(_4.Load2(104)).y : _148.y);
+ _4.Store2(8, asuint(float2(_147.x ? asfloat(_4.Load2(56)).x : _149.x, _147.y ? asfloat(_4.Load2(56)).y : _149.y)));
+ bool3 _151 = isnan(asfloat(_4.Load3(64)));
+ bool3 _152 = isnan(asfloat(_4.Load3(112)));
+ float3 _153 = min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+ float3 _154 = float3(_151.x ? asfloat(_4.Load3(112)).x : _153.x, _151.y ? asfloat(_4.Load3(112)).y : _153.y, _151.z ? asfloat(_4.Load3(112)).z : _153.z);
+ _4.Store3(16, asuint(float3(_152.x ? asfloat(_4.Load3(64)).x : _154.x, _152.y ? asfloat(_4.Load3(64)).y : _154.y, _152.z ? asfloat(_4.Load3(64)).z : _154.z)));
+ bool4 _156 = isnan(asfloat(_4.Load4(80)));
+ bool4 _157 = isnan(asfloat(_4.Load4(128)));
+ float4 _158 = min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+ float4 _159 = float4(_156.x ? asfloat(_4.Load4(128)).x : _158.x, _156.y ? asfloat(_4.Load4(128)).y : _158.y, _156.z ? asfloat(_4.Load4(128)).z : _158.z, _156.w ? asfloat(_4.Load4(128)).w : _158.w);
+ _4.Store4(32, asuint(float4(_157.x ? asfloat(_4.Load4(80)).x : _159.x, _157.y ? asfloat(_4.Load4(80)).y : _159.y, _157.z ? asfloat(_4.Load4(80)).z : _159.z, _157.w ? asfloat(_4.Load4(80)).w : _159.w)));
+ _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+ bool2 _166 = isnan(asfloat(_4.Load2(56)));
+ bool2 _167 = isnan(asfloat(_4.Load2(104)));
+ float2 _168 = max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+ float2 _169 = float2(_166.x ? asfloat(_4.Load2(104)).x : _168.x, _166.y ? asfloat(_4.Load2(104)).y : _168.y);
+ _4.Store2(8, asuint(float2(_167.x ? asfloat(_4.Load2(56)).x : _169.x, _167.y ? asfloat(_4.Load2(56)).y : _169.y)));
+ bool3 _171 = isnan(asfloat(_4.Load3(64)));
+ bool3 _172 = isnan(asfloat(_4.Load3(112)));
+ float3 _173 = max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+ float3 _174 = float3(_171.x ? asfloat(_4.Load3(112)).x : _173.x, _171.y ? asfloat(_4.Load3(112)).y : _173.y, _171.z ? asfloat(_4.Load3(112)).z : _173.z);
+ _4.Store3(16, asuint(float3(_172.x ? asfloat(_4.Load3(64)).x : _174.x, _172.y ? asfloat(_4.Load3(64)).y : _174.y, _172.z ? asfloat(_4.Load3(64)).z : _174.z)));
+ bool4 _176 = isnan(asfloat(_4.Load4(80)));
+ bool4 _177 = isnan(asfloat(_4.Load4(128)));
+ float4 _178 = max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+ float4 _179 = float4(_176.x ? asfloat(_4.Load4(128)).x : _178.x, _176.y ? asfloat(_4.Load4(128)).y : _178.y, _176.z ? asfloat(_4.Load4(128)).z : _178.z, _176.w ? asfloat(_4.Load4(128)).w : _178.w);
+ _4.Store4(32, asuint(float4(_177.x ? asfloat(_4.Load4(80)).x : _179.x, _177.y ? asfloat(_4.Load4(80)).y : _179.y, _177.z ? asfloat(_4.Load4(80)).z : _179.z, _177.w ? asfloat(_4.Load4(80)).w : _179.w)));
+ float _180 = isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(48)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(48))));
+ _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? _180 : (isnan(_180) ? asfloat(_4.Load(96)) : min(_180, asfloat(_4.Load(96))))));
+ bool2 _193 = isnan(asfloat(_4.Load2(8)));
+ bool2 _194 = isnan(asfloat(_4.Load2(56)));
+ float2 _195 = max(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)));
+ float2 _196 = float2(_193.x ? asfloat(_4.Load2(56)).x : _195.x, _193.y ? asfloat(_4.Load2(56)).y : _195.y);
+ float2 _191 = float2(_194.x ? asfloat(_4.Load2(8)).x : _196.x, _194.y ? asfloat(_4.Load2(8)).y : _196.y);
+ bool2 _198 = isnan(_191);
+ bool2 _199 = isnan(asfloat(_4.Load2(104)));
+ float2 _200 = min(_191, asfloat(_4.Load2(104)));
+ float2 _201 = float2(_198.x ? asfloat(_4.Load2(104)).x : _200.x, _198.y ? asfloat(_4.Load2(104)).y : _200.y);
+ _4.Store2(8, asuint(float2(_199.x ? _191.x : _201.x, _199.y ? _191.y : _201.y)));
+ bool3 _204 = isnan(asfloat(_4.Load3(16)));
+ bool3 _205 = isnan(asfloat(_4.Load3(64)));
+ float3 _206 = max(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)));
+ float3 _207 = float3(_204.x ? asfloat(_4.Load3(64)).x : _206.x, _204.y ? asfloat(_4.Load3(64)).y : _206.y, _204.z ? asfloat(_4.Load3(64)).z : _206.z);
+ float3 _202 = float3(_205.x ? asfloat(_4.Load3(16)).x : _207.x, _205.y ? asfloat(_4.Load3(16)).y : _207.y, _205.z ? asfloat(_4.Load3(16)).z : _207.z);
+ bool3 _209 = isnan(_202);
+ bool3 _210 = isnan(asfloat(_4.Load3(112)));
+ float3 _211 = min(_202, asfloat(_4.Load3(112)));
+ float3 _212 = float3(_209.x ? asfloat(_4.Load3(112)).x : _211.x, _209.y ? asfloat(_4.Load3(112)).y : _211.y, _209.z ? asfloat(_4.Load3(112)).z : _211.z);
+ _4.Store3(16, asuint(float3(_210.x ? _202.x : _212.x, _210.y ? _202.y : _212.y, _210.z ? _202.z : _212.z)));
+ bool4 _215 = isnan(asfloat(_4.Load4(32)));
+ bool4 _216 = isnan(asfloat(_4.Load4(80)));
+ float4 _217 = max(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)));
+ float4 _218 = float4(_215.x ? asfloat(_4.Load4(80)).x : _217.x, _215.y ? asfloat(_4.Load4(80)).y : _217.y, _215.z ? asfloat(_4.Load4(80)).z : _217.z, _215.w ? asfloat(_4.Load4(80)).w : _217.w);
+ float4 _213 = float4(_216.x ? asfloat(_4.Load4(32)).x : _218.x, _216.y ? asfloat(_4.Load4(32)).y : _218.y, _216.z ? asfloat(_4.Load4(32)).z : _218.z, _216.w ? asfloat(_4.Load4(32)).w : _218.w);
+ bool4 _220 = isnan(_213);
+ bool4 _221 = isnan(asfloat(_4.Load4(128)));
+ float4 _222 = min(_213, asfloat(_4.Load4(128)));
+ float4 _223 = float4(_220.x ? asfloat(_4.Load4(128)).x : _222.x, _220.y ? asfloat(_4.Load4(128)).y : _222.y, _220.z ? asfloat(_4.Load4(128)).z : _222.z, _220.w ? asfloat(_4.Load4(128)).w : _222.w);
+ _4.Store4(32, asuint(float4(_221.x ? _213.x : _223.x, _221.y ? _213.y : _223.y, _221.z ? _213.z : _223.z, _221.w ? _213.w : _223.w)));
+ for (int _139 = 0; _139 < 2; )
+ {
+ bool2 _225 = isnan(asfloat(_4.Load2(56)));
+ bool2 _226 = isnan(asfloat(_4.Load2(104)));
+ float2 _227 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+ float2 _228 = float2(_225.x ? asfloat(_4.Load2(104)).x : _227.x, _225.y ? asfloat(_4.Load2(104)).y : _227.y);
+ _4.Store2(8, asuint(float2(_226.x ? asfloat(_4.Load2(56)).x : _228.x, _226.y ? asfloat(_4.Load2(56)).y : _228.y)));
+ float _229 = isnan(asfloat(_4.Load(56))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(56)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(56))));
+ _4.Store(0, asuint(isnan(asfloat(_4.Load(60))) ? _229 : (isnan(_229) ? asfloat(_4.Load(60)) : min(_229, asfloat(_4.Load(60))))));
+ _139++;
+ continue;
+ }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+ comp_main();
+}
diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..5ef1bc91
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,47 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+ float a1;
+ vec2 a2;
+ vec3 a3;
+ vec4 a4;
+ float b1;
+ vec2 b2;
+ vec3 b3;
+ vec4 b4;
+ float c1;
+ vec2 c2;
+ vec3 c3;
+ vec4 c4;
+} _4;
+
+void main()
+{
+ _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : min(_4.b1, _4.c1));
+ _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+ _4.a3 = mix(mix(min(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+ _4.a4 = mix(mix(min(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+ _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : max(_4.b1, _4.c1));
+ _4.a2 = mix(mix(max(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+ _4.a3 = mix(mix(max(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+ _4.a4 = mix(mix(max(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+ float _180 = isnan(_4.b1) ? _4.a1 : (isnan(_4.a1) ? _4.b1 : max(_4.a1, _4.b1));
+ _4.a1 = isnan(_4.c1) ? _180 : (isnan(_180) ? _4.c1 : min(_180, _4.c1));
+ vec2 _191 = mix(mix(max(_4.a2, _4.b2), _4.b2, isnan(_4.a2)), _4.a2, isnan(_4.b2));
+ _4.a2 = mix(mix(min(_191, _4.c2), _4.c2, isnan(_191)), _191, isnan(_4.c2));
+ vec3 _202 = mix(mix(max(_4.a3, _4.b3), _4.b3, isnan(_4.a3)), _4.a3, isnan(_4.b3));
+ _4.a3 = mix(mix(min(_202, _4.c3), _4.c3, isnan(_202)), _202, isnan(_4.c3));
+ vec4 _213 = mix(mix(max(_4.a4, _4.b4), _4.b4, isnan(_4.a4)), _4.a4, isnan(_4.b4));
+ _4.a4 = mix(mix(min(_213, _4.c4), _4.c4, isnan(_213)), _213, isnan(_4.c4));
+ for (int _139 = 0; _139 < 2; )
+ {
+ _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+ float _229 = isnan(_4.b2.x) ? _4.a1 : (isnan(_4.a1) ? _4.b2.x : max(_4.a1, _4.b2.x));
+ _4.a1 = isnan(_4.b2.y) ? _229 : (isnan(_229) ? _4.b2.y : min(_229, _4.b2.y));
+ _139++;
+ continue;
+ }
+}
+
diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..a6e88684
--- /dev/null
+++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,84 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+ _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : min(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+ bool2 _145 = isnan(asfloat(_4.Load2(56)));
+ bool2 _146 = isnan(asfloat(_4.Load2(104)));
+ float2 _147 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+ float2 _148 = float2(_145.x ? asfloat(_4.Load2(104)).x : _147.x, _145.y ? asfloat(_4.Load2(104)).y : _147.y);
+ _4.Store2(8, asuint(float2(_146.x ? asfloat(_4.Load2(56)).x : _148.x, _146.y ? asfloat(_4.Load2(56)).y : _148.y)));
+ bool3 _150 = isnan(asfloat(_4.Load3(64)));
+ bool3 _151 = isnan(asfloat(_4.Load3(112)));
+ float3 _152 = min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+ float3 _153 = float3(_150.x ? asfloat(_4.Load3(112)).x : _152.x, _150.y ? asfloat(_4.Load3(112)).y : _152.y, _150.z ? asfloat(_4.Load3(112)).z : _152.z);
+ _4.Store3(16, asuint(float3(_151.x ? asfloat(_4.Load3(64)).x : _153.x, _151.y ? asfloat(_4.Load3(64)).y : _153.y, _151.z ? asfloat(_4.Load3(64)).z : _153.z)));
+ bool4 _155 = isnan(asfloat(_4.Load4(80)));
+ bool4 _156 = isnan(asfloat(_4.Load4(128)));
+ float4 _157 = min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+ float4 _158 = float4(_155.x ? asfloat(_4.Load4(128)).x : _157.x, _155.y ? asfloat(_4.Load4(128)).y : _157.y, _155.z ? asfloat(_4.Load4(128)).z : _157.z, _155.w ? asfloat(_4.Load4(128)).w : _157.w);
+ _4.Store4(32, asuint(float4(_156.x ? asfloat(_4.Load4(80)).x : _158.x, _156.y ? asfloat(_4.Load4(80)).y : _158.y, _156.z ? asfloat(_4.Load4(80)).z : _158.z, _156.w ? asfloat(_4.Load4(80)).w : _158.w)));
+ _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : max(asfloat(_4.Load(48)), asfloat(_4.Load(96))))));
+ bool2 _165 = isnan(asfloat(_4.Load2(56)));
+ bool2 _166 = isnan(asfloat(_4.Load2(104)));
+ float2 _167 = max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+ float2 _168 = float2(_165.x ? asfloat(_4.Load2(104)).x : _167.x, _165.y ? asfloat(_4.Load2(104)).y : _167.y);
+ _4.Store2(8, asuint(float2(_166.x ? asfloat(_4.Load2(56)).x : _168.x, _166.y ? asfloat(_4.Load2(56)).y : _168.y)));
+ bool3 _170 = isnan(asfloat(_4.Load3(64)));
+ bool3 _171 = isnan(asfloat(_4.Load3(112)));
+ float3 _172 = max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)));
+ float3 _173 = float3(_170.x ? asfloat(_4.Load3(112)).x : _172.x, _170.y ? asfloat(_4.Load3(112)).y : _172.y, _170.z ? asfloat(_4.Load3(112)).z : _172.z);
+ _4.Store3(16, asuint(float3(_171.x ? asfloat(_4.Load3(64)).x : _173.x, _171.y ? asfloat(_4.Load3(64)).y : _173.y, _171.z ? asfloat(_4.Load3(64)).z : _173.z)));
+ bool4 _175 = isnan(asfloat(_4.Load4(80)));
+ bool4 _176 = isnan(asfloat(_4.Load4(128)));
+ float4 _177 = max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)));
+ float4 _178 = float4(_175.x ? asfloat(_4.Load4(128)).x : _177.x, _175.y ? asfloat(_4.Load4(128)).y : _177.y, _175.z ? asfloat(_4.Load4(128)).z : _177.z, _175.w ? asfloat(_4.Load4(128)).w : _177.w);
+ _4.Store4(32, asuint(float4(_176.x ? asfloat(_4.Load4(80)).x : _178.x, _176.y ? asfloat(_4.Load4(80)).y : _178.y, _176.z ? asfloat(_4.Load4(80)).z : _178.z, _176.w ? asfloat(_4.Load4(80)).w : _178.w)));
+ float _179 = isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(48)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(48))));
+ _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? _179 : (isnan(_179) ? asfloat(_4.Load(96)) : min(_179, asfloat(_4.Load(96))))));
+ bool2 _192 = isnan(asfloat(_4.Load2(8)));
+ bool2 _193 = isnan(asfloat(_4.Load2(56)));
+ float2 _194 = max(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)));
+ float2 _195 = float2(_192.x ? asfloat(_4.Load2(56)).x : _194.x, _192.y ? asfloat(_4.Load2(56)).y : _194.y);
+ float2 _190 = float2(_193.x ? asfloat(_4.Load2(8)).x : _195.x, _193.y ? asfloat(_4.Load2(8)).y : _195.y);
+ bool2 _197 = isnan(_190);
+ bool2 _198 = isnan(asfloat(_4.Load2(104)));
+ float2 _199 = min(_190, asfloat(_4.Load2(104)));
+ float2 _200 = float2(_197.x ? asfloat(_4.Load2(104)).x : _199.x, _197.y ? asfloat(_4.Load2(104)).y : _199.y);
+ _4.Store2(8, asuint(float2(_198.x ? _190.x : _200.x, _198.y ? _190.y : _200.y)));
+ bool3 _203 = isnan(asfloat(_4.Load3(16)));
+ bool3 _204 = isnan(asfloat(_4.Load3(64)));
+ float3 _205 = max(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)));
+ float3 _206 = float3(_203.x ? asfloat(_4.Load3(64)).x : _205.x, _203.y ? asfloat(_4.Load3(64)).y : _205.y, _203.z ? asfloat(_4.Load3(64)).z : _205.z);
+ float3 _201 = float3(_204.x ? asfloat(_4.Load3(16)).x : _206.x, _204.y ? asfloat(_4.Load3(16)).y : _206.y, _204.z ? asfloat(_4.Load3(16)).z : _206.z);
+ bool3 _208 = isnan(_201);
+ bool3 _209 = isnan(asfloat(_4.Load3(112)));
+ float3 _210 = min(_201, asfloat(_4.Load3(112)));
+ float3 _211 = float3(_208.x ? asfloat(_4.Load3(112)).x : _210.x, _208.y ? asfloat(_4.Load3(112)).y : _210.y, _208.z ? asfloat(_4.Load3(112)).z : _210.z);
+ _4.Store3(16, asuint(float3(_209.x ? _201.x : _211.x, _209.y ? _201.y : _211.y, _209.z ? _201.z : _211.z)));
+ bool4 _214 = isnan(asfloat(_4.Load4(32)));
+ bool4 _215 = isnan(asfloat(_4.Load4(80)));
+ float4 _216 = max(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)));
+ float4 _217 = float4(_214.x ? asfloat(_4.Load4(80)).x : _216.x, _214.y ? asfloat(_4.Load4(80)).y : _216.y, _214.z ? asfloat(_4.Load4(80)).z : _216.z, _214.w ? asfloat(_4.Load4(80)).w : _216.w);
+ float4 _212 = float4(_215.x ? asfloat(_4.Load4(32)).x : _217.x, _215.y ? asfloat(_4.Load4(32)).y : _217.y, _215.z ? asfloat(_4.Load4(32)).z : _217.z, _215.w ? asfloat(_4.Load4(32)).w : _217.w);
+ bool4 _219 = isnan(_212);
+ bool4 _220 = isnan(asfloat(_4.Load4(128)));
+ float4 _221 = min(_212, asfloat(_4.Load4(128)));
+ float4 _222 = float4(_219.x ? asfloat(_4.Load4(128)).x : _221.x, _219.y ? asfloat(_4.Load4(128)).y : _221.y, _219.z ? asfloat(_4.Load4(128)).z : _221.z, _219.w ? asfloat(_4.Load4(128)).w : _221.w);
+ _4.Store4(32, asuint(float4(_220.x ? _212.x : _222.x, _220.y ? _212.y : _222.y, _220.z ? _212.z : _222.z, _220.w ? _212.w : _222.w)));
+ float _223;
+ for (int i = 0; i < 2; i++, _223 = isnan(asfloat(_4.Load(56))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(56)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(56)))), _4.Store(0, asuint(isnan(asfloat(_4.Load(60))) ? _223 : (isnan(_223) ? asfloat(_4.Load(60)) : min(_223, asfloat(_4.Load(60)))))))
+ {
+ bool2 _235 = isnan(asfloat(_4.Load2(56)));
+ bool2 _236 = isnan(asfloat(_4.Load2(104)));
+ float2 _237 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)));
+ float2 _238 = float2(_235.x ? asfloat(_4.Load2(104)).x : _237.x, _235.y ? asfloat(_4.Load2(104)).y : _237.y);
+ _4.Store2(8, asuint(float2(_236.x ? asfloat(_4.Load2(56)).x : _238.x, _236.y ? asfloat(_4.Load2(56)).y : _238.y)));
+ }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+ comp_main();
+}
diff --git a/reference/shaders/asm/comp/nmin-max-clamp.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..54c452c5
--- /dev/null
+++ b/reference/shaders/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,44 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+ float a1;
+ vec2 a2;
+ vec3 a3;
+ vec4 a4;
+ float b1;
+ vec2 b2;
+ vec3 b3;
+ vec4 b4;
+ float c1;
+ vec2 c2;
+ vec3 c3;
+ vec4 c4;
+} _4;
+
+void main()
+{
+ _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : min(_4.b1, _4.c1));
+ _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+ _4.a3 = mix(mix(min(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+ _4.a4 = mix(mix(min(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+ _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : max(_4.b1, _4.c1));
+ _4.a2 = mix(mix(max(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+ _4.a3 = mix(mix(max(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3));
+ _4.a4 = mix(mix(max(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4));
+ float _179 = isnan(_4.b1) ? _4.a1 : (isnan(_4.a1) ? _4.b1 : max(_4.a1, _4.b1));
+ _4.a1 = isnan(_4.c1) ? _179 : (isnan(_179) ? _4.c1 : min(_179, _4.c1));
+ vec2 _190 = mix(mix(max(_4.a2, _4.b2), _4.b2, isnan(_4.a2)), _4.a2, isnan(_4.b2));
+ _4.a2 = mix(mix(min(_190, _4.c2), _4.c2, isnan(_190)), _190, isnan(_4.c2));
+ vec3 _201 = mix(mix(max(_4.a3, _4.b3), _4.b3, isnan(_4.a3)), _4.a3, isnan(_4.b3));
+ _4.a3 = mix(mix(min(_201, _4.c3), _4.c3, isnan(_201)), _201, isnan(_4.c3));
+ vec4 _212 = mix(mix(max(_4.a4, _4.b4), _4.b4, isnan(_4.a4)), _4.a4, isnan(_4.b4));
+ _4.a4 = mix(mix(min(_212, _4.c4), _4.c4, isnan(_212)), _212, isnan(_4.c4));
+ float _223;
+ for (int i = 0; i < 2; i++, _223 = isnan(_4.b2.x) ? _4.a1 : (isnan(_4.a1) ? _4.b2.x : max(_4.a1, _4.b2.x)), _4.a1 = isnan(_4.b2.y) ? _223 : (isnan(_223) ? _4.b2.y : min(_223, _4.b2.y)))
+ {
+ _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2));
+ }
+}
+
diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main"
+ OpExecutionMode %main LocalSize 1 1 1
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %SSBO "SSBO"
+ OpMemberName %SSBO 0 "a1"
+ OpMemberName %SSBO 1 "a2"
+ OpMemberName %SSBO 2 "a3"
+ OpMemberName %SSBO 3 "a4"
+ OpMemberName %SSBO 4 "b1"
+ OpMemberName %SSBO 5 "b2"
+ OpMemberName %SSBO 6 "b3"
+ OpMemberName %SSBO 7 "b4"
+ OpMemberName %SSBO 8 "c1"
+ OpMemberName %SSBO 9 "c2"
+ OpMemberName %SSBO 10 "c3"
+ OpMemberName %SSBO 11 "c4"
+ OpName %_ ""
+ OpName %i "i"
+ OpMemberDecorate %SSBO 0 Offset 0
+ OpMemberDecorate %SSBO 1 Offset 8
+ OpMemberDecorate %SSBO 2 Offset 16
+ OpMemberDecorate %SSBO 3 Offset 32
+ OpMemberDecorate %SSBO 4 Offset 48
+ OpMemberDecorate %SSBO 5 Offset 56
+ OpMemberDecorate %SSBO 6 Offset 64
+ OpMemberDecorate %SSBO 7 Offset 80
+ OpMemberDecorate %SSBO 8 Offset 96
+ OpMemberDecorate %SSBO 9 Offset 104
+ OpMemberDecorate %SSBO 10 Offset 112
+ OpMemberDecorate %SSBO 11 Offset 128
+ OpDecorate %SSBO BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 0
+ %void = OpTypeVoid
+ %7 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v2float = OpTypeVector %float 2
+ %v3float = OpTypeVector %float 3
+ %v4float = OpTypeVector %float 4
+ %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+ %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %int_8 = OpConstant %int 8
+ %int_1 = OpConstant %int 1
+ %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+ %int_9 = OpConstant %int 9
+ %int_2 = OpConstant %int 2
+ %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+ %int_10 = OpConstant %int 10
+ %int_3 = OpConstant %int 3
+ %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+ %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+ %bool = OpTypeBool
+ %uint = OpTypeInt 32 0
+ %uint_0 = OpConstant %uint 0
+ %uint_1 = OpConstant %uint 1
+ %main = OpFunction %void None %7
+ %35 = OpLabel
+ %i = OpVariable %_ptr_Function_int Function
+ %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %37 = OpLoad %float %36
+ %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %39 = OpLoad %float %38
+ %40 = OpExtInst %float %1 NMin %37 %39
+ %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %41 %40
+ %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %43 = OpLoad %v2float %42
+ %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %45 = OpLoad %v2float %44
+ %46 = OpExtInst %v2float %1 NMin %43 %45
+ %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %47 %46
+ %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %49 = OpLoad %v3float %48
+ %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %51 = OpLoad %v3float %50
+ %52 = OpExtInst %v3float %1 NMin %49 %51
+ %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %53 %52
+ %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %55 = OpLoad %v4float %54
+ %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %57 = OpLoad %v4float %56
+ %58 = OpExtInst %v4float %1 NMin %55 %57
+ %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %59 %58
+ %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %61 = OpLoad %float %60
+ %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %63 = OpLoad %float %62
+ %64 = OpExtInst %float %1 NMax %61 %63
+ %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %65 %64
+ %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %67 = OpLoad %v2float %66
+ %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %69 = OpLoad %v2float %68
+ %70 = OpExtInst %v2float %1 NMax %67 %69
+ %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %71 %70
+ %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %73 = OpLoad %v3float %72
+ %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %75 = OpLoad %v3float %74
+ %76 = OpExtInst %v3float %1 NMax %73 %75
+ %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %77 %76
+ %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %79 = OpLoad %v4float %78
+ %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %81 = OpLoad %v4float %80
+ %82 = OpExtInst %v4float %1 NMax %79 %81
+ %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %83 %82
+ %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %85 = OpLoad %float %84
+ %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %87 = OpLoad %float %86
+ %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %89 = OpLoad %float %88
+ %90 = OpExtInst %float %1 NClamp %85 %87 %89
+ %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %91 %90
+ %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ %93 = OpLoad %v2float %92
+ %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %95 = OpLoad %v2float %94
+ %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %97 = OpLoad %v2float %96
+ %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+ %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %99 %98
+ %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ %101 = OpLoad %v3float %100
+ %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %103 = OpLoad %v3float %102
+ %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %105 = OpLoad %v3float %104
+ %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+ %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %107 %106
+ %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ %109 = OpLoad %v4float %108
+ %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %111 = OpLoad %v4float %110
+ %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %113 = OpLoad %v4float %112
+ %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+ %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %115 %114
+ OpStore %i %int_0
+ OpBranch %116
+ %116 = OpLabel
+ OpLoopMerge %117 %118 None
+ OpBranch %119
+ %119 = OpLabel
+ %120 = OpLoad %int %i
+ %121 = OpSLessThan %bool %120 %int_2
+ OpBranchConditional %121 %122 %117
+ %122 = OpLabel
+ %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %124 = OpLoad %v2float %123
+ %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %126 = OpLoad %v2float %125
+ %127 = OpExtInst %v2float %1 NMin %124 %126
+ %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %128 %127
+ OpBranch %118
+ %118 = OpLabel
+ %129 = OpLoad %int %i
+ %130 = OpIAdd %int %129 %int_1
+ OpStore %i %130
+ %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %132 = OpLoad %float %131
+ %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+ %134 = OpLoad %float %133
+ %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+ %136 = OpLoad %float %135
+ %137 = OpExtInst %float %1 NClamp %132 %134 %136
+ %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %138 %137
+ OpBranch %116
+ %117 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders/asm/comp/nmin-max-clamp.asm.comp b/shaders/asm/comp/nmin-max-clamp.asm.comp
new file mode 100644
index 00000000..6c060eed
--- /dev/null
+++ b/shaders/asm/comp/nmin-max-clamp.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint GLCompute %main "main"
+ OpExecutionMode %main LocalSize 1 1 1
+ OpSource GLSL 450
+ OpName %main "main"
+ OpName %SSBO "SSBO"
+ OpMemberName %SSBO 0 "a1"
+ OpMemberName %SSBO 1 "a2"
+ OpMemberName %SSBO 2 "a3"
+ OpMemberName %SSBO 3 "a4"
+ OpMemberName %SSBO 4 "b1"
+ OpMemberName %SSBO 5 "b2"
+ OpMemberName %SSBO 6 "b3"
+ OpMemberName %SSBO 7 "b4"
+ OpMemberName %SSBO 8 "c1"
+ OpMemberName %SSBO 9 "c2"
+ OpMemberName %SSBO 10 "c3"
+ OpMemberName %SSBO 11 "c4"
+ OpName %_ ""
+ OpName %i "i"
+ OpMemberDecorate %SSBO 0 Offset 0
+ OpMemberDecorate %SSBO 1 Offset 8
+ OpMemberDecorate %SSBO 2 Offset 16
+ OpMemberDecorate %SSBO 3 Offset 32
+ OpMemberDecorate %SSBO 4 Offset 48
+ OpMemberDecorate %SSBO 5 Offset 56
+ OpMemberDecorate %SSBO 6 Offset 64
+ OpMemberDecorate %SSBO 7 Offset 80
+ OpMemberDecorate %SSBO 8 Offset 96
+ OpMemberDecorate %SSBO 9 Offset 104
+ OpMemberDecorate %SSBO 10 Offset 112
+ OpMemberDecorate %SSBO 11 Offset 128
+ OpDecorate %SSBO BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 0
+ %void = OpTypeVoid
+ %7 = OpTypeFunction %void
+ %float = OpTypeFloat 32
+ %v2float = OpTypeVector %float 2
+ %v3float = OpTypeVector %float 3
+ %v4float = OpTypeVector %float 4
+ %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+ %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %int_8 = OpConstant %int 8
+ %int_1 = OpConstant %int 1
+ %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+ %int_9 = OpConstant %int 9
+ %int_2 = OpConstant %int 2
+ %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+ %int_10 = OpConstant %int 10
+ %int_3 = OpConstant %int 3
+ %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+ %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+ %bool = OpTypeBool
+ %uint = OpTypeInt 32 0
+ %uint_0 = OpConstant %uint 0
+ %uint_1 = OpConstant %uint 1
+ %main = OpFunction %void None %7
+ %35 = OpLabel
+ %i = OpVariable %_ptr_Function_int Function
+ %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %37 = OpLoad %float %36
+ %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %39 = OpLoad %float %38
+ %40 = OpExtInst %float %1 NMin %37 %39
+ %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %41 %40
+ %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %43 = OpLoad %v2float %42
+ %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %45 = OpLoad %v2float %44
+ %46 = OpExtInst %v2float %1 NMin %43 %45
+ %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %47 %46
+ %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %49 = OpLoad %v3float %48
+ %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %51 = OpLoad %v3float %50
+ %52 = OpExtInst %v3float %1 NMin %49 %51
+ %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %53 %52
+ %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %55 = OpLoad %v4float %54
+ %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %57 = OpLoad %v4float %56
+ %58 = OpExtInst %v4float %1 NMin %55 %57
+ %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %59 %58
+ %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %61 = OpLoad %float %60
+ %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %63 = OpLoad %float %62
+ %64 = OpExtInst %float %1 NMax %61 %63
+ %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %65 %64
+ %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %67 = OpLoad %v2float %66
+ %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %69 = OpLoad %v2float %68
+ %70 = OpExtInst %v2float %1 NMax %67 %69
+ %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %71 %70
+ %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %73 = OpLoad %v3float %72
+ %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %75 = OpLoad %v3float %74
+ %76 = OpExtInst %v3float %1 NMax %73 %75
+ %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %77 %76
+ %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %79 = OpLoad %v4float %78
+ %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %81 = OpLoad %v4float %80
+ %82 = OpExtInst %v4float %1 NMax %79 %81
+ %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %83 %82
+ %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %85 = OpLoad %float %84
+ %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+ %87 = OpLoad %float %86
+ %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+ %89 = OpLoad %float %88
+ %90 = OpExtInst %float %1 NClamp %85 %87 %89
+ %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %91 %90
+ %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ %93 = OpLoad %v2float %92
+ %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %95 = OpLoad %v2float %94
+ %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %97 = OpLoad %v2float %96
+ %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+ %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %99 %98
+ %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ %101 = OpLoad %v3float %100
+ %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+ %103 = OpLoad %v3float %102
+ %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+ %105 = OpLoad %v3float %104
+ %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+ %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+ OpStore %107 %106
+ %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ %109 = OpLoad %v4float %108
+ %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+ %111 = OpLoad %v4float %110
+ %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+ %113 = OpLoad %v4float %112
+ %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+ %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+ OpStore %115 %114
+ OpStore %i %int_0
+ OpBranch %116
+ %116 = OpLabel
+ OpLoopMerge %117 %118 None
+ OpBranch %119
+ %119 = OpLabel
+ %120 = OpLoad %int %i
+ %121 = OpSLessThan %bool %120 %int_2
+ OpBranchConditional %121 %122 %117
+ %122 = OpLabel
+ %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+ %124 = OpLoad %v2float %123
+ %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+ %126 = OpLoad %v2float %125
+ %127 = OpExtInst %v2float %1 NMin %124 %126
+ %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+ OpStore %128 %127
+ OpBranch %118
+ %118 = OpLabel
+ %129 = OpLoad %int %i
+ %130 = OpIAdd %int %129 %int_1
+ OpStore %i %130
+ %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ %132 = OpLoad %float %131
+ %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+ %134 = OpLoad %float %133
+ %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+ %136 = OpLoad %float %135
+ %137 = OpExtInst %float %1 NClamp %132 %134 %136
+ %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+ OpStore %138 %137
+ OpBranch %116
+ %117 = OpLabel
+ OpReturn
+ OpFunctionEnd
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index d04adae4..9d7939c5 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -4935,14 +4935,27 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
break;
case GLSLstd450NMin:
- emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nmin");
- break;
case GLSLstd450NMax:
- emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nmax");
+ {
+ emit_nminmax_op(result_type, id, args[0], args[1], op);
break;
+ }
+
case GLSLstd450NClamp:
- emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nclamp");
+ {
+ // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
+ // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
+ uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
+ if (!max_id)
+ max_id = ir.increase_bound_by(1);
+
+ // Inherit precision qualifiers.
+ ir.meta[max_id] = ir.meta[id];
+
+ emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
+ emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
break;
+ }
default:
statement("// unimplemented GLSL op ", eop);
@@ -4950,6 +4963,35 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
}
}
+void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
+{
+ // Need to emulate this call.
+ uint32_t &ids = extra_sub_expressions[id];
+ if (!ids)
+ {
+ ids = ir.increase_bound_by(5);
+ auto btype = get<SPIRType>(result_type);
+ btype.basetype = SPIRType::Boolean;
+ set<SPIRType>(ids, btype);
+ }
+
+ uint32_t btype_id = ids + 0;
+ uint32_t left_nan_id = ids + 1;
+ uint32_t right_nan_id = ids + 2;
+ uint32_t tmp_id = ids + 3;
+ uint32_t mixed_first_id = ids + 4;
+
+ // Inherit precision qualifiers.
+ ir.meta[tmp_id] = ir.meta[id];
+ ir.meta[mixed_first_id] = ir.meta[id];
+
+ emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
+ emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
+ emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
+ emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
+ emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
+}
+
void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
uint32_t)
{
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 57576cf2..5d773195 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -18,6 +18,7 @@
#define SPIRV_CROSS_GLSL_HPP
#include "spirv_cross.hpp"
+#include "GLSL.std.450.h"
#include <sstream>
#include <unordered_map>
#include <unordered_set>
@@ -421,6 +422,7 @@ protected:
bool should_dereference(uint32_t id);
bool should_forward(uint32_t id);
void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
+ void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op);
bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp);
void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
uint32_t op3, const char *op);
@@ -552,6 +554,11 @@ protected:
std::vector<std::string> forced_extensions;
std::vector<std::string> header_lines;
+ // Used when expressions emit extra opcodes with their own unique IDs,
+ // and we need to reuse the IDs across recompilation loops.
+ // Currently used by NMin/Max/Clamp implementations.
+ std::unordered_map<uint32_t, uint32_t> extra_sub_expressions;
+
uint32_t statement_count;
inline bool is_legacy() const