From 2a0365c813a453c0232a719a9f41759cf95dded0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 21 Mar 2019 15:26:46 +0100 Subject: GLSL/HLSL: Implement NMin/NMax/NClamp. Need to emulate these calls for correctness. --- .../shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp | 87 +++++++++ .../opt/shaders/asm/comp/nmin-max-clamp.asm.comp | 47 +++++ .../shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp | 84 +++++++++ reference/shaders/asm/comp/nmin-max-clamp.asm.comp | 44 +++++ shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp | 203 +++++++++++++++++++++ shaders/asm/comp/nmin-max-clamp.asm.comp | 203 +++++++++++++++++++++ spirv_glsl.cpp | 50 ++++- spirv_glsl.hpp | 7 + 8 files changed, 721 insertions(+), 4 deletions(-) create mode 100644 reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp create mode 100644 reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp create mode 100644 reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp create mode 100644 reference/shaders/asm/comp/nmin-max-clamp.asm.comp create mode 100644 shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp create mode 100644 shaders/asm/comp/nmin-max-clamp.asm.comp diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp new file mode 100644 index 00000000..7acd67d0 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp @@ -0,0 +1,87 @@ +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))))); + bool2 _146 = isnan(asfloat(_4.Load2(56))); + bool2 _147 = isnan(asfloat(_4.Load2(104))); + float2 _148 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))); + float2 _149 = float2(_146.x ? asfloat(_4.Load2(104)).x : _148.x, _146.y ? asfloat(_4.Load2(104)).y : _148.y); + _4.Store2(8, asuint(float2(_147.x ? asfloat(_4.Load2(56)).x : _149.x, _147.y ? asfloat(_4.Load2(56)).y : _149.y))); + bool3 _151 = isnan(asfloat(_4.Load3(64))); + bool3 _152 = isnan(asfloat(_4.Load3(112))); + float3 _153 = min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))); + float3 _154 = float3(_151.x ? asfloat(_4.Load3(112)).x : _153.x, _151.y ? asfloat(_4.Load3(112)).y : _153.y, _151.z ? asfloat(_4.Load3(112)).z : _153.z); + _4.Store3(16, asuint(float3(_152.x ? asfloat(_4.Load3(64)).x : _154.x, _152.y ? asfloat(_4.Load3(64)).y : _154.y, _152.z ? asfloat(_4.Load3(64)).z : _154.z))); + bool4 _156 = isnan(asfloat(_4.Load4(80))); + bool4 _157 = isnan(asfloat(_4.Load4(128))); + float4 _158 = min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))); + float4 _159 = float4(_156.x ? asfloat(_4.Load4(128)).x : _158.x, _156.y ? asfloat(_4.Load4(128)).y : _158.y, _156.z ? asfloat(_4.Load4(128)).z : _158.z, _156.w ? asfloat(_4.Load4(128)).w : _158.w); + _4.Store4(32, asuint(float4(_157.x ? asfloat(_4.Load4(80)).x : _159.x, _157.y ? asfloat(_4.Load4(80)).y : _159.y, _157.z ? asfloat(_4.Load4(80)).z : _159.z, _157.w ? asfloat(_4.Load4(80)).w : _159.w))); + _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))))); + bool2 _166 = isnan(asfloat(_4.Load2(56))); + bool2 _167 = isnan(asfloat(_4.Load2(104))); + float2 _168 = max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))); + float2 _169 = float2(_166.x ? asfloat(_4.Load2(104)).x : _168.x, _166.y ? asfloat(_4.Load2(104)).y : _168.y); + _4.Store2(8, asuint(float2(_167.x ? asfloat(_4.Load2(56)).x : _169.x, _167.y ? asfloat(_4.Load2(56)).y : _169.y))); + bool3 _171 = isnan(asfloat(_4.Load3(64))); + bool3 _172 = isnan(asfloat(_4.Load3(112))); + float3 _173 = max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))); + float3 _174 = float3(_171.x ? asfloat(_4.Load3(112)).x : _173.x, _171.y ? asfloat(_4.Load3(112)).y : _173.y, _171.z ? asfloat(_4.Load3(112)).z : _173.z); + _4.Store3(16, asuint(float3(_172.x ? asfloat(_4.Load3(64)).x : _174.x, _172.y ? asfloat(_4.Load3(64)).y : _174.y, _172.z ? asfloat(_4.Load3(64)).z : _174.z))); + bool4 _176 = isnan(asfloat(_4.Load4(80))); + bool4 _177 = isnan(asfloat(_4.Load4(128))); + float4 _178 = max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))); + float4 _179 = float4(_176.x ? asfloat(_4.Load4(128)).x : _178.x, _176.y ? asfloat(_4.Load4(128)).y : _178.y, _176.z ? asfloat(_4.Load4(128)).z : _178.z, _176.w ? asfloat(_4.Load4(128)).w : _178.w); + _4.Store4(32, asuint(float4(_177.x ? asfloat(_4.Load4(80)).x : _179.x, _177.y ? asfloat(_4.Load4(80)).y : _179.y, _177.z ? asfloat(_4.Load4(80)).z : _179.z, _177.w ? asfloat(_4.Load4(80)).w : _179.w))); + float _180 = isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(48)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(48)))); + _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? _180 : (isnan(_180) ? asfloat(_4.Load(96)) : min(_180, asfloat(_4.Load(96)))))); + bool2 _193 = isnan(asfloat(_4.Load2(8))); + bool2 _194 = isnan(asfloat(_4.Load2(56))); + float2 _195 = max(asfloat(_4.Load2(8)), asfloat(_4.Load2(56))); + float2 _196 = float2(_193.x ? asfloat(_4.Load2(56)).x : _195.x, _193.y ? asfloat(_4.Load2(56)).y : _195.y); + float2 _191 = float2(_194.x ? asfloat(_4.Load2(8)).x : _196.x, _194.y ? asfloat(_4.Load2(8)).y : _196.y); + bool2 _198 = isnan(_191); + bool2 _199 = isnan(asfloat(_4.Load2(104))); + float2 _200 = min(_191, asfloat(_4.Load2(104))); + float2 _201 = float2(_198.x ? asfloat(_4.Load2(104)).x : _200.x, _198.y ? asfloat(_4.Load2(104)).y : _200.y); + _4.Store2(8, asuint(float2(_199.x ? _191.x : _201.x, _199.y ? _191.y : _201.y))); + bool3 _204 = isnan(asfloat(_4.Load3(16))); + bool3 _205 = isnan(asfloat(_4.Load3(64))); + float3 _206 = max(asfloat(_4.Load3(16)), asfloat(_4.Load3(64))); + float3 _207 = float3(_204.x ? asfloat(_4.Load3(64)).x : _206.x, _204.y ? asfloat(_4.Load3(64)).y : _206.y, _204.z ? asfloat(_4.Load3(64)).z : _206.z); + float3 _202 = float3(_205.x ? asfloat(_4.Load3(16)).x : _207.x, _205.y ? asfloat(_4.Load3(16)).y : _207.y, _205.z ? asfloat(_4.Load3(16)).z : _207.z); + bool3 _209 = isnan(_202); + bool3 _210 = isnan(asfloat(_4.Load3(112))); + float3 _211 = min(_202, asfloat(_4.Load3(112))); + float3 _212 = float3(_209.x ? asfloat(_4.Load3(112)).x : _211.x, _209.y ? asfloat(_4.Load3(112)).y : _211.y, _209.z ? asfloat(_4.Load3(112)).z : _211.z); + _4.Store3(16, asuint(float3(_210.x ? _202.x : _212.x, _210.y ? _202.y : _212.y, _210.z ? _202.z : _212.z))); + bool4 _215 = isnan(asfloat(_4.Load4(32))); + bool4 _216 = isnan(asfloat(_4.Load4(80))); + float4 _217 = max(asfloat(_4.Load4(32)), asfloat(_4.Load4(80))); + float4 _218 = float4(_215.x ? asfloat(_4.Load4(80)).x : _217.x, _215.y ? asfloat(_4.Load4(80)).y : _217.y, _215.z ? asfloat(_4.Load4(80)).z : _217.z, _215.w ? asfloat(_4.Load4(80)).w : _217.w); + float4 _213 = float4(_216.x ? asfloat(_4.Load4(32)).x : _218.x, _216.y ? asfloat(_4.Load4(32)).y : _218.y, _216.z ? asfloat(_4.Load4(32)).z : _218.z, _216.w ? asfloat(_4.Load4(32)).w : _218.w); + bool4 _220 = isnan(_213); + bool4 _221 = isnan(asfloat(_4.Load4(128))); + float4 _222 = min(_213, asfloat(_4.Load4(128))); + float4 _223 = float4(_220.x ? asfloat(_4.Load4(128)).x : _222.x, _220.y ? asfloat(_4.Load4(128)).y : _222.y, _220.z ? asfloat(_4.Load4(128)).z : _222.z, _220.w ? asfloat(_4.Load4(128)).w : _222.w); + _4.Store4(32, asuint(float4(_221.x ? _213.x : _223.x, _221.y ? _213.y : _223.y, _221.z ? _213.z : _223.z, _221.w ? _213.w : _223.w))); + for (int _139 = 0; _139 < 2; ) + { + bool2 _225 = isnan(asfloat(_4.Load2(56))); + bool2 _226 = isnan(asfloat(_4.Load2(104))); + float2 _227 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))); + float2 _228 = float2(_225.x ? asfloat(_4.Load2(104)).x : _227.x, _225.y ? asfloat(_4.Load2(104)).y : _227.y); + _4.Store2(8, asuint(float2(_226.x ? asfloat(_4.Load2(56)).x : _228.x, _226.y ? asfloat(_4.Load2(56)).y : _228.y))); + float _229 = isnan(asfloat(_4.Load(56))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(56)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(56)))); + _4.Store(0, asuint(isnan(asfloat(_4.Load(60))) ? _229 : (isnan(_229) ? asfloat(_4.Load(60)) : min(_229, asfloat(_4.Load(60)))))); + _139++; + continue; + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp new file mode 100644 index 00000000..5ef1bc91 --- /dev/null +++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.asm.comp @@ -0,0 +1,47 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a1; + vec2 a2; + vec3 a3; + vec4 a4; + float b1; + vec2 b2; + vec3 b3; + vec4 b4; + float c1; + vec2 c2; + vec3 c3; + vec4 c4; +} _4; + +void main() +{ + _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : min(_4.b1, _4.c1)); + _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2)); + _4.a3 = mix(mix(min(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3)); + _4.a4 = mix(mix(min(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4)); + _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : max(_4.b1, _4.c1)); + _4.a2 = mix(mix(max(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2)); + _4.a3 = mix(mix(max(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3)); + _4.a4 = mix(mix(max(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4)); + float _180 = isnan(_4.b1) ? _4.a1 : (isnan(_4.a1) ? _4.b1 : max(_4.a1, _4.b1)); + _4.a1 = isnan(_4.c1) ? _180 : (isnan(_180) ? _4.c1 : min(_180, _4.c1)); + vec2 _191 = mix(mix(max(_4.a2, _4.b2), _4.b2, isnan(_4.a2)), _4.a2, isnan(_4.b2)); + _4.a2 = mix(mix(min(_191, _4.c2), _4.c2, isnan(_191)), _191, isnan(_4.c2)); + vec3 _202 = mix(mix(max(_4.a3, _4.b3), _4.b3, isnan(_4.a3)), _4.a3, isnan(_4.b3)); + _4.a3 = mix(mix(min(_202, _4.c3), _4.c3, isnan(_202)), _202, isnan(_4.c3)); + vec4 _213 = mix(mix(max(_4.a4, _4.b4), _4.b4, isnan(_4.a4)), _4.a4, isnan(_4.b4)); + _4.a4 = mix(mix(min(_213, _4.c4), _4.c4, isnan(_213)), _213, isnan(_4.c4)); + for (int _139 = 0; _139 < 2; ) + { + _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2)); + float _229 = isnan(_4.b2.x) ? _4.a1 : (isnan(_4.a1) ? _4.b2.x : max(_4.a1, _4.b2.x)); + _4.a1 = isnan(_4.b2.y) ? _229 : (isnan(_229) ? _4.b2.y : min(_229, _4.b2.y)); + _139++; + continue; + } +} + diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp new file mode 100644 index 00000000..a6e88684 --- /dev/null +++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp @@ -0,0 +1,84 @@ +RWByteAddressBuffer _4 : register(u0); + +void comp_main() +{ + _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))))); + bool2 _145 = isnan(asfloat(_4.Load2(56))); + bool2 _146 = isnan(asfloat(_4.Load2(104))); + float2 _147 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))); + float2 _148 = float2(_145.x ? asfloat(_4.Load2(104)).x : _147.x, _145.y ? asfloat(_4.Load2(104)).y : _147.y); + _4.Store2(8, asuint(float2(_146.x ? asfloat(_4.Load2(56)).x : _148.x, _146.y ? asfloat(_4.Load2(56)).y : _148.y))); + bool3 _150 = isnan(asfloat(_4.Load3(64))); + bool3 _151 = isnan(asfloat(_4.Load3(112))); + float3 _152 = min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))); + float3 _153 = float3(_150.x ? asfloat(_4.Load3(112)).x : _152.x, _150.y ? asfloat(_4.Load3(112)).y : _152.y, _150.z ? asfloat(_4.Load3(112)).z : _152.z); + _4.Store3(16, asuint(float3(_151.x ? asfloat(_4.Load3(64)).x : _153.x, _151.y ? asfloat(_4.Load3(64)).y : _153.y, _151.z ? asfloat(_4.Load3(64)).z : _153.z))); + bool4 _155 = isnan(asfloat(_4.Load4(80))); + bool4 _156 = isnan(asfloat(_4.Load4(128))); + float4 _157 = min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))); + float4 _158 = float4(_155.x ? asfloat(_4.Load4(128)).x : _157.x, _155.y ? asfloat(_4.Load4(128)).y : _157.y, _155.z ? asfloat(_4.Load4(128)).z : _157.z, _155.w ? asfloat(_4.Load4(128)).w : _157.w); + _4.Store4(32, asuint(float4(_156.x ? asfloat(_4.Load4(80)).x : _158.x, _156.y ? asfloat(_4.Load4(80)).y : _158.y, _156.z ? asfloat(_4.Load4(80)).z : _158.z, _156.w ? asfloat(_4.Load4(80)).w : _158.w))); + _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? asfloat(_4.Load(48)) : (isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(96)) : max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))))); + bool2 _165 = isnan(asfloat(_4.Load2(56))); + bool2 _166 = isnan(asfloat(_4.Load2(104))); + float2 _167 = max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))); + float2 _168 = float2(_165.x ? asfloat(_4.Load2(104)).x : _167.x, _165.y ? asfloat(_4.Load2(104)).y : _167.y); + _4.Store2(8, asuint(float2(_166.x ? asfloat(_4.Load2(56)).x : _168.x, _166.y ? asfloat(_4.Load2(56)).y : _168.y))); + bool3 _170 = isnan(asfloat(_4.Load3(64))); + bool3 _171 = isnan(asfloat(_4.Load3(112))); + float3 _172 = max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112))); + float3 _173 = float3(_170.x ? asfloat(_4.Load3(112)).x : _172.x, _170.y ? asfloat(_4.Load3(112)).y : _172.y, _170.z ? asfloat(_4.Load3(112)).z : _172.z); + _4.Store3(16, asuint(float3(_171.x ? asfloat(_4.Load3(64)).x : _173.x, _171.y ? asfloat(_4.Load3(64)).y : _173.y, _171.z ? asfloat(_4.Load3(64)).z : _173.z))); + bool4 _175 = isnan(asfloat(_4.Load4(80))); + bool4 _176 = isnan(asfloat(_4.Load4(128))); + float4 _177 = max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128))); + float4 _178 = float4(_175.x ? asfloat(_4.Load4(128)).x : _177.x, _175.y ? asfloat(_4.Load4(128)).y : _177.y, _175.z ? asfloat(_4.Load4(128)).z : _177.z, _175.w ? asfloat(_4.Load4(128)).w : _177.w); + _4.Store4(32, asuint(float4(_176.x ? asfloat(_4.Load4(80)).x : _178.x, _176.y ? asfloat(_4.Load4(80)).y : _178.y, _176.z ? asfloat(_4.Load4(80)).z : _178.z, _176.w ? asfloat(_4.Load4(80)).w : _178.w))); + float _179 = isnan(asfloat(_4.Load(48))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(48)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(48)))); + _4.Store(0, asuint(isnan(asfloat(_4.Load(96))) ? _179 : (isnan(_179) ? asfloat(_4.Load(96)) : min(_179, asfloat(_4.Load(96)))))); + bool2 _192 = isnan(asfloat(_4.Load2(8))); + bool2 _193 = isnan(asfloat(_4.Load2(56))); + float2 _194 = max(asfloat(_4.Load2(8)), asfloat(_4.Load2(56))); + float2 _195 = float2(_192.x ? asfloat(_4.Load2(56)).x : _194.x, _192.y ? asfloat(_4.Load2(56)).y : _194.y); + float2 _190 = float2(_193.x ? asfloat(_4.Load2(8)).x : _195.x, _193.y ? asfloat(_4.Load2(8)).y : _195.y); + bool2 _197 = isnan(_190); + bool2 _198 = isnan(asfloat(_4.Load2(104))); + float2 _199 = min(_190, asfloat(_4.Load2(104))); + float2 _200 = float2(_197.x ? asfloat(_4.Load2(104)).x : _199.x, _197.y ? asfloat(_4.Load2(104)).y : _199.y); + _4.Store2(8, asuint(float2(_198.x ? _190.x : _200.x, _198.y ? _190.y : _200.y))); + bool3 _203 = isnan(asfloat(_4.Load3(16))); + bool3 _204 = isnan(asfloat(_4.Load3(64))); + float3 _205 = max(asfloat(_4.Load3(16)), asfloat(_4.Load3(64))); + float3 _206 = float3(_203.x ? asfloat(_4.Load3(64)).x : _205.x, _203.y ? asfloat(_4.Load3(64)).y : _205.y, _203.z ? asfloat(_4.Load3(64)).z : _205.z); + float3 _201 = float3(_204.x ? asfloat(_4.Load3(16)).x : _206.x, _204.y ? asfloat(_4.Load3(16)).y : _206.y, _204.z ? asfloat(_4.Load3(16)).z : _206.z); + bool3 _208 = isnan(_201); + bool3 _209 = isnan(asfloat(_4.Load3(112))); + float3 _210 = min(_201, asfloat(_4.Load3(112))); + float3 _211 = float3(_208.x ? asfloat(_4.Load3(112)).x : _210.x, _208.y ? asfloat(_4.Load3(112)).y : _210.y, _208.z ? asfloat(_4.Load3(112)).z : _210.z); + _4.Store3(16, asuint(float3(_209.x ? _201.x : _211.x, _209.y ? _201.y : _211.y, _209.z ? _201.z : _211.z))); + bool4 _214 = isnan(asfloat(_4.Load4(32))); + bool4 _215 = isnan(asfloat(_4.Load4(80))); + float4 _216 = max(asfloat(_4.Load4(32)), asfloat(_4.Load4(80))); + float4 _217 = float4(_214.x ? asfloat(_4.Load4(80)).x : _216.x, _214.y ? asfloat(_4.Load4(80)).y : _216.y, _214.z ? asfloat(_4.Load4(80)).z : _216.z, _214.w ? asfloat(_4.Load4(80)).w : _216.w); + float4 _212 = float4(_215.x ? asfloat(_4.Load4(32)).x : _217.x, _215.y ? asfloat(_4.Load4(32)).y : _217.y, _215.z ? asfloat(_4.Load4(32)).z : _217.z, _215.w ? asfloat(_4.Load4(32)).w : _217.w); + bool4 _219 = isnan(_212); + bool4 _220 = isnan(asfloat(_4.Load4(128))); + float4 _221 = min(_212, asfloat(_4.Load4(128))); + float4 _222 = float4(_219.x ? asfloat(_4.Load4(128)).x : _221.x, _219.y ? asfloat(_4.Load4(128)).y : _221.y, _219.z ? asfloat(_4.Load4(128)).z : _221.z, _219.w ? asfloat(_4.Load4(128)).w : _221.w); + _4.Store4(32, asuint(float4(_220.x ? _212.x : _222.x, _220.y ? _212.y : _222.y, _220.z ? _212.z : _222.z, _220.w ? _212.w : _222.w))); + float _223; + for (int i = 0; i < 2; i++, _223 = isnan(asfloat(_4.Load(56))) ? asfloat(_4.Load(0)) : (isnan(asfloat(_4.Load(0))) ? asfloat(_4.Load(56)) : max(asfloat(_4.Load(0)), asfloat(_4.Load(56)))), _4.Store(0, asuint(isnan(asfloat(_4.Load(60))) ? _223 : (isnan(_223) ? asfloat(_4.Load(60)) : min(_223, asfloat(_4.Load(60))))))) + { + bool2 _235 = isnan(asfloat(_4.Load2(56))); + bool2 _236 = isnan(asfloat(_4.Load2(104))); + float2 _237 = min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104))); + float2 _238 = float2(_235.x ? asfloat(_4.Load2(104)).x : _237.x, _235.y ? asfloat(_4.Load2(104)).y : _237.y); + _4.Store2(8, asuint(float2(_236.x ? asfloat(_4.Load2(56)).x : _238.x, _236.y ? asfloat(_4.Load2(56)).y : _238.y))); + } +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders/asm/comp/nmin-max-clamp.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.asm.comp new file mode 100644 index 00000000..54c452c5 --- /dev/null +++ b/reference/shaders/asm/comp/nmin-max-clamp.asm.comp @@ -0,0 +1,44 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a1; + vec2 a2; + vec3 a3; + vec4 a4; + float b1; + vec2 b2; + vec3 b3; + vec4 b4; + float c1; + vec2 c2; + vec3 c3; + vec4 c4; +} _4; + +void main() +{ + _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : min(_4.b1, _4.c1)); + _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2)); + _4.a3 = mix(mix(min(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3)); + _4.a4 = mix(mix(min(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4)); + _4.a1 = isnan(_4.c1) ? _4.b1 : (isnan(_4.b1) ? _4.c1 : max(_4.b1, _4.c1)); + _4.a2 = mix(mix(max(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2)); + _4.a3 = mix(mix(max(_4.b3, _4.c3), _4.c3, isnan(_4.b3)), _4.b3, isnan(_4.c3)); + _4.a4 = mix(mix(max(_4.b4, _4.c4), _4.c4, isnan(_4.b4)), _4.b4, isnan(_4.c4)); + float _179 = isnan(_4.b1) ? _4.a1 : (isnan(_4.a1) ? _4.b1 : max(_4.a1, _4.b1)); + _4.a1 = isnan(_4.c1) ? _179 : (isnan(_179) ? _4.c1 : min(_179, _4.c1)); + vec2 _190 = mix(mix(max(_4.a2, _4.b2), _4.b2, isnan(_4.a2)), _4.a2, isnan(_4.b2)); + _4.a2 = mix(mix(min(_190, _4.c2), _4.c2, isnan(_190)), _190, isnan(_4.c2)); + vec3 _201 = mix(mix(max(_4.a3, _4.b3), _4.b3, isnan(_4.a3)), _4.a3, isnan(_4.b3)); + _4.a3 = mix(mix(min(_201, _4.c3), _4.c3, isnan(_201)), _201, isnan(_4.c3)); + vec4 _212 = mix(mix(max(_4.a4, _4.b4), _4.b4, isnan(_4.a4)), _4.a4, isnan(_4.b4)); + _4.a4 = mix(mix(min(_212, _4.c4), _4.c4, isnan(_212)), _212, isnan(_4.c4)); + float _223; + for (int i = 0; i < 2; i++, _223 = isnan(_4.b2.x) ? _4.a1 : (isnan(_4.a1) ? _4.b2.x : max(_4.a1, _4.b2.x)), _4.a1 = isnan(_4.b2.y) ? _223 : (isnan(_223) ? _4.b2.y : min(_223, _4.b2.y))) + { + _4.a2 = mix(mix(min(_4.b2, _4.c2), _4.c2, isnan(_4.b2)), _4.b2, isnan(_4.c2)); + } +} + diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp new file mode 100644 index 00000000..6c060eed --- /dev/null +++ b/shaders-hlsl/asm/comp/nmin-max-clamp.asm.comp @@ -0,0 +1,203 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 139 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a1" + OpMemberName %SSBO 1 "a2" + OpMemberName %SSBO 2 "a3" + OpMemberName %SSBO 3 "a4" + OpMemberName %SSBO 4 "b1" + OpMemberName %SSBO 5 "b2" + OpMemberName %SSBO 6 "b3" + OpMemberName %SSBO 7 "b4" + OpMemberName %SSBO 8 "c1" + OpMemberName %SSBO 9 "c2" + OpMemberName %SSBO 10 "c3" + OpMemberName %SSBO 11 "c4" + OpName %_ "" + OpName %i "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 8 + OpMemberDecorate %SSBO 2 Offset 16 + OpMemberDecorate %SSBO 3 Offset 32 + OpMemberDecorate %SSBO 4 Offset 48 + OpMemberDecorate %SSBO 5 Offset 56 + OpMemberDecorate %SSBO 6 Offset 64 + OpMemberDecorate %SSBO 7 Offset 80 + OpMemberDecorate %SSBO 8 Offset 96 + OpMemberDecorate %SSBO 9 Offset 104 + OpMemberDecorate %SSBO 10 Offset 112 + OpMemberDecorate %SSBO 11 Offset 128 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_8 = OpConstant %int 8 + %int_1 = OpConstant %int 1 + %int_5 = OpConstant %int 5 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %int_9 = OpConstant %int 9 + %int_2 = OpConstant %int 2 + %int_6 = OpConstant %int 6 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_10 = OpConstant %int 10 + %int_3 = OpConstant %int 3 + %int_7 = OpConstant %int 7 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_11 = OpConstant %int 11 +%_ptr_Function_int = OpTypePointer Function %int + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %7 + %35 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %37 = OpLoad %float %36 + %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %39 = OpLoad %float %38 + %40 = OpExtInst %float %1 NMin %37 %39 + %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %41 %40 + %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %43 = OpLoad %v2float %42 + %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %45 = OpLoad %v2float %44 + %46 = OpExtInst %v2float %1 NMin %43 %45 + %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %47 %46 + %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %49 = OpLoad %v3float %48 + %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %51 = OpLoad %v3float %50 + %52 = OpExtInst %v3float %1 NMin %49 %51 + %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %53 %52 + %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %55 = OpLoad %v4float %54 + %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %57 = OpLoad %v4float %56 + %58 = OpExtInst %v4float %1 NMin %55 %57 + %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %59 %58 + %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %61 = OpLoad %float %60 + %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %63 = OpLoad %float %62 + %64 = OpExtInst %float %1 NMax %61 %63 + %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %65 %64 + %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %67 = OpLoad %v2float %66 + %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %69 = OpLoad %v2float %68 + %70 = OpExtInst %v2float %1 NMax %67 %69 + %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %71 %70 + %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %73 = OpLoad %v3float %72 + %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %75 = OpLoad %v3float %74 + %76 = OpExtInst %v3float %1 NMax %73 %75 + %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %77 %76 + %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %79 = OpLoad %v4float %78 + %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %81 = OpLoad %v4float %80 + %82 = OpExtInst %v4float %1 NMax %79 %81 + %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %83 %82 + %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %87 = OpLoad %float %86 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %89 = OpLoad %float %88 + %90 = OpExtInst %float %1 NClamp %85 %87 %89 + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %91 %90 + %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + %93 = OpLoad %v2float %92 + %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %95 = OpLoad %v2float %94 + %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %97 = OpLoad %v2float %96 + %98 = OpExtInst %v2float %1 NClamp %93 %95 %97 + %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %99 %98 + %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %101 = OpLoad %v3float %100 + %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %103 = OpLoad %v3float %102 + %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %105 = OpLoad %v3float %104 + %106 = OpExtInst %v3float %1 NClamp %101 %103 %105 + %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %107 %106 + %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + %109 = OpLoad %v4float %108 + %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %111 = OpLoad %v4float %110 + %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %113 = OpLoad %v4float %112 + %114 = OpExtInst %v4float %1 NClamp %109 %111 %113 + %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %115 %114 + OpStore %i %int_0 + OpBranch %116 + %116 = OpLabel + OpLoopMerge %117 %118 None + OpBranch %119 + %119 = OpLabel + %120 = OpLoad %int %i + %121 = OpSLessThan %bool %120 %int_2 + OpBranchConditional %121 %122 %117 + %122 = OpLabel + %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %124 = OpLoad %v2float %123 + %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %126 = OpLoad %v2float %125 + %127 = OpExtInst %v2float %1 NMin %124 %126 + %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %128 %127 + OpBranch %118 + %118 = OpLabel + %129 = OpLoad %int %i + %130 = OpIAdd %int %129 %int_1 + OpStore %i %130 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %132 = OpLoad %float %131 + %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0 + %134 = OpLoad %float %133 + %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1 + %136 = OpLoad %float %135 + %137 = OpExtInst %float %1 NClamp %132 %134 %136 + %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %138 %137 + OpBranch %116 + %117 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/nmin-max-clamp.asm.comp b/shaders/asm/comp/nmin-max-clamp.asm.comp new file mode 100644 index 00000000..6c060eed --- /dev/null +++ b/shaders/asm/comp/nmin-max-clamp.asm.comp @@ -0,0 +1,203 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 139 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a1" + OpMemberName %SSBO 1 "a2" + OpMemberName %SSBO 2 "a3" + OpMemberName %SSBO 3 "a4" + OpMemberName %SSBO 4 "b1" + OpMemberName %SSBO 5 "b2" + OpMemberName %SSBO 6 "b3" + OpMemberName %SSBO 7 "b4" + OpMemberName %SSBO 8 "c1" + OpMemberName %SSBO 9 "c2" + OpMemberName %SSBO 10 "c3" + OpMemberName %SSBO 11 "c4" + OpName %_ "" + OpName %i "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 8 + OpMemberDecorate %SSBO 2 Offset 16 + OpMemberDecorate %SSBO 3 Offset 32 + OpMemberDecorate %SSBO 4 Offset 48 + OpMemberDecorate %SSBO 5 Offset 56 + OpMemberDecorate %SSBO 6 Offset 64 + OpMemberDecorate %SSBO 7 Offset 80 + OpMemberDecorate %SSBO 8 Offset 96 + OpMemberDecorate %SSBO 9 Offset 104 + OpMemberDecorate %SSBO 10 Offset 112 + OpMemberDecorate %SSBO 11 Offset 128 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %7 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 + %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_4 = OpConstant %int 4 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_8 = OpConstant %int 8 + %int_1 = OpConstant %int 1 + %int_5 = OpConstant %int 5 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float + %int_9 = OpConstant %int 9 + %int_2 = OpConstant %int 2 + %int_6 = OpConstant %int 6 +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_10 = OpConstant %int 10 + %int_3 = OpConstant %int 3 + %int_7 = OpConstant %int 7 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_11 = OpConstant %int 11 +%_ptr_Function_int = OpTypePointer Function %int + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %main = OpFunction %void None %7 + %35 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %37 = OpLoad %float %36 + %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %39 = OpLoad %float %38 + %40 = OpExtInst %float %1 NMin %37 %39 + %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %41 %40 + %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %43 = OpLoad %v2float %42 + %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %45 = OpLoad %v2float %44 + %46 = OpExtInst %v2float %1 NMin %43 %45 + %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %47 %46 + %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %49 = OpLoad %v3float %48 + %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %51 = OpLoad %v3float %50 + %52 = OpExtInst %v3float %1 NMin %49 %51 + %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %53 %52 + %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %55 = OpLoad %v4float %54 + %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %57 = OpLoad %v4float %56 + %58 = OpExtInst %v4float %1 NMin %55 %57 + %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %59 %58 + %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %61 = OpLoad %float %60 + %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %63 = OpLoad %float %62 + %64 = OpExtInst %float %1 NMax %61 %63 + %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %65 %64 + %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %67 = OpLoad %v2float %66 + %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %69 = OpLoad %v2float %68 + %70 = OpExtInst %v2float %1 NMax %67 %69 + %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %71 %70 + %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %73 = OpLoad %v3float %72 + %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %75 = OpLoad %v3float %74 + %76 = OpExtInst %v3float %1 NMax %73 %75 + %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %77 %76 + %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %79 = OpLoad %v4float %78 + %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %81 = OpLoad %v4float %80 + %82 = OpExtInst %v4float %1 NMax %79 %81 + %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %83 %82 + %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %85 = OpLoad %float %84 + %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %87 = OpLoad %float %86 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8 + %89 = OpLoad %float %88 + %90 = OpExtInst %float %1 NClamp %85 %87 %89 + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %91 %90 + %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + %93 = OpLoad %v2float %92 + %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %95 = OpLoad %v2float %94 + %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %97 = OpLoad %v2float %96 + %98 = OpExtInst %v2float %1 NClamp %93 %95 %97 + %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %99 %98 + %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + %101 = OpLoad %v3float %100 + %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6 + %103 = OpLoad %v3float %102 + %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10 + %105 = OpLoad %v3float %104 + %106 = OpExtInst %v3float %1 NClamp %101 %103 %105 + %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2 + OpStore %107 %106 + %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + %109 = OpLoad %v4float %108 + %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7 + %111 = OpLoad %v4float %110 + %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11 + %113 = OpLoad %v4float %112 + %114 = OpExtInst %v4float %1 NClamp %109 %111 %113 + %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3 + OpStore %115 %114 + OpStore %i %int_0 + OpBranch %116 + %116 = OpLabel + OpLoopMerge %117 %118 None + OpBranch %119 + %119 = OpLabel + %120 = OpLoad %int %i + %121 = OpSLessThan %bool %120 %int_2 + OpBranchConditional %121 %122 %117 + %122 = OpLabel + %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5 + %124 = OpLoad %v2float %123 + %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9 + %126 = OpLoad %v2float %125 + %127 = OpExtInst %v2float %1 NMin %124 %126 + %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 + OpStore %128 %127 + OpBranch %118 + %118 = OpLabel + %129 = OpLoad %int %i + %130 = OpIAdd %int %129 %int_1 + OpStore %i %130 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %132 = OpLoad %float %131 + %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0 + %134 = OpLoad %float %133 + %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1 + %136 = OpLoad %float %135 + %137 = OpExtInst %float %1 NClamp %132 %134 %136 + %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %138 %137 + OpBranch %116 + %117 = OpLabel + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index d04adae4..9d7939c5 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -4935,14 +4935,27 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; case GLSLstd450NMin: - emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nmin"); - break; case GLSLstd450NMax: - emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nmax"); + { + emit_nminmax_op(result_type, id, args[0], args[1], op); break; + } + case GLSLstd450NClamp: - emit_binary_func_op(result_type, id, args[0], args[1], "unsupported_glsl450_nclamp"); + { + // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. + // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. + uint32_t &max_id = extra_sub_expressions[id | 0x80000000u]; + if (!max_id) + max_id = ir.increase_bound_by(1); + + // Inherit precision qualifiers. + ir.meta[max_id] = ir.meta[id]; + + emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); + emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); break; + } default: statement("// unimplemented GLSL op ", eop); @@ -4950,6 +4963,35 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, } } +void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) +{ + // Need to emulate this call. + uint32_t &ids = extra_sub_expressions[id]; + if (!ids) + { + ids = ir.increase_bound_by(5); + auto btype = get(result_type); + btype.basetype = SPIRType::Boolean; + set(ids, btype); + } + + uint32_t btype_id = ids + 0; + uint32_t left_nan_id = ids + 1; + uint32_t right_nan_id = ids + 2; + uint32_t tmp_id = ids + 3; + uint32_t mixed_first_id = ids + 4; + + // Inherit precision qualifiers. + ir.meta[tmp_id] = ir.meta[id]; + ir.meta[mixed_first_id] = ir.meta[id]; + + emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); + emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); + emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); + emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); + emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); +} + void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 57576cf2..5d773195 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -18,6 +18,7 @@ #define SPIRV_CROSS_GLSL_HPP #include "spirv_cross.hpp" +#include "GLSL.std.450.h" #include #include #include @@ -421,6 +422,7 @@ protected: bool should_dereference(uint32_t id); bool should_forward(uint32_t id); void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); + void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op); bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp); void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op); @@ -552,6 +554,11 @@ protected: std::vector forced_extensions; std::vector header_lines; + // Used when expressions emit extra opcodes with their own unique IDs, + // and we need to reuse the IDs across recompilation loops. + // Currently used by NMin/Max/Clamp implementations. + std::unordered_map extra_sub_expressions; + uint32_t statement_count; inline bool is_legacy() const -- cgit v1.2.3