From 8216e87f02d010bc0e0233addf01541e6332b126 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 28 Jun 2021 11:10:55 +0200 Subject: Handle SPIR-V 1.4 selection constructs. Fix bug in to_trivial_mix_op, where we made a pre-1.4 assumption that component count of selector is equal to value component count. --- .../shaders-hlsl/asm/comp/bitcast_icmp.asm.comp | 24 ++++++++-------------- .../opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp | 16 +++++++-------- .../asm/frag/sample-mask-not-array.asm.frag | 2 +- .../asm/tesc/hs-incorrect-base-type.asm.tesc | 2 +- .../asm/tesc/hs-input-array-access.asm.tesc | 2 +- .../asm/tesc/hs-texcoord-array.asm.tesc | 2 +- .../opt/shaders/asm/comp/bitcast_icmp.asm.comp | 16 +++++++-------- .../opt/shaders/asm/comp/bitcast_iequal.asm.comp | 16 +++++++-------- reference/opt/shaders/comp/casts.comp | 2 +- 9 files changed, 37 insertions(+), 45 deletions(-) (limited to 'reference/opt') diff --git a/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp index 35143a48..da499c3b 100644 --- a/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp +++ b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp @@ -3,22 +3,14 @@ RWByteAddressBuffer _6 : register(u1); void comp_main() { - bool4 _31 = bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w); - bool4 _32 = bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w); - bool4 _33 = bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w)); - bool4 _34 = bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w)); - bool4 _35 = bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w); - bool4 _36 = bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w); - bool4 _37 = bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w)); - bool4 _38 = bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w)); - _6.Store4(0, uint4(_31.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _31.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _31.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _31.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); - _6.Store4(0, uint4(_32.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _32.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _32.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _32.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); - _6.Store4(0, uint4(_33.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _33.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _33.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _33.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); - _6.Store4(0, uint4(_34.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _34.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _34.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _34.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); - _6.Store4(0, uint4(_35.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _35.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _35.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _35.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); - _6.Store4(0, uint4(_36.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _36.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _36.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _36.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); - _6.Store4(0, uint4(_37.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _37.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _37.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _37.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); - _6.Store4(0, uint4(_38.x ? uint4(1u, 1u, 1u, 1u).x : uint4(0u, 0u, 0u, 0u).x, _38.y ? uint4(1u, 1u, 1u, 1u).y : uint4(0u, 0u, 0u, 0u).y, _38.z ? uint4(1u, 1u, 1u, 1u).z : uint4(0u, 0u, 0u, 0u).z, _38.w ? uint4(1u, 1u, 1u, 1u).w : uint4(0u, 0u, 0u, 0u).w)); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w)))); + _6.Store4(0, uint4(bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w)))); } [numthreads(1, 1, 1)] diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp index 31c71daa..bc5d3e64 100644 --- a/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp @@ -17,13 +17,13 @@ struct _4 kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]]) { - _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) < _5._m0); - _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) <= _5._m0); - _6._m0 = select(uint4(0u), uint4(1u), _5._m1 < uint4(_5._m0)); - _6._m0 = select(uint4(0u), uint4(1u), _5._m1 <= uint4(_5._m0)); - _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) > _5._m0); - _6._m0 = select(uint4(0u), uint4(1u), int4(_5._m1) >= _5._m0); - _6._m0 = select(uint4(0u), uint4(1u), _5._m1 > uint4(_5._m0)); - _6._m0 = select(uint4(0u), uint4(1u), _5._m1 >= uint4(_5._m0)); + _6._m0 = uint4(int4(_5._m1) < _5._m0); + _6._m0 = uint4(int4(_5._m1) <= _5._m0); + _6._m0 = uint4(_5._m1 < uint4(_5._m0)); + _6._m0 = uint4(_5._m1 <= uint4(_5._m0)); + _6._m0 = uint4(int4(_5._m1) > _5._m0); + _6._m0 = uint4(int4(_5._m1) >= _5._m0); + _6._m0 = uint4(_5._m1 > uint4(_5._m0)); + _6._m0 = uint4(_5._m1 >= uint4(_5._m0)); } diff --git a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag index 479471d2..e7b96e7a 100644 --- a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag +++ b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag @@ -470,7 +470,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float3 _245; if (any(abs(_142 - View_PrimitiveSceneData._m0[_222 + 5u].xyz) > (View_PrimitiveSceneData._m0[_222 + 19u].xyz + float3(1.0)))) { - _245 = mix(float3(1.0, 1.0, 0.0), float3(0.0, 1.0, 1.0), select(float3(0.0), float3(1.0), float3(fract(dot(_142, float3(0.57700002193450927734375)) * 0.00200000009499490261077880859375)) > float3(0.5))); + _245 = mix(float3(1.0, 1.0, 0.0), float3(0.0, 1.0, 1.0), float3(float3(fract(dot(_142, float3(0.57700002193450927734375)) * 0.00200000009499490261077880859375)) > float3(0.5))); } else { diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.asm.tesc index ec5d9960..ad66d550 100644 --- a/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.asm.tesc +++ b/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.asm.tesc @@ -369,7 +369,7 @@ kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0) float4 _537 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); float3 _538 = _537.xyz; float _540 = _537.w; - if (any((((select(int3(0), int3(1), (_495 - _496) < float3(_498 + _499)) + (int3(2) * select(int3(0), int3(1), (_495 + _496) > float3((-_498) - _499)))) | (select(int3(0), int3(1), (_517 - _496) < float3(_519 + _499)) + (int3(2) * select(int3(0), int3(1), (_517 + _496) > float3((-_519) - _499))))) | (select(int3(0), int3(1), (_538 - _496) < float3(_540 + _499)) + (int3(2) * select(int3(0), int3(1), (_538 + _496) > float3((-_540) - _499))))) != int3(3))) + if (any((((int3((_495 - _496) < float3(_498 + _499)) + (int3(2) * int3((_495 + _496) > float3((-_498) - _499)))) | (int3((_517 - _496) < float3(_519 + _499)) + (int3(2) * int3((_517 + _496) > float3((-_519) - _499))))) | (int3((_538 - _496) < float3(_540 + _499)) + (int3(2) * int3((_538 + _496) > float3((-_540) - _499))))) != int3(3))) { _589 = float4(0.0); break; diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.asm.tesc index 848aa9ab..23d82864 100644 --- a/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.asm.tesc +++ b/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.asm.tesc @@ -437,7 +437,7 @@ kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0) float4 _548 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); float3 _549 = _548.xyz; float _551 = _548.w; - if (any((((select(int3(0), int3(1), (_506 - _507) < float3(_509 + _510)) + (int3(2) * select(int3(0), int3(1), (_506 + _507) > float3((-_509) - _510)))) | (select(int3(0), int3(1), (_528 - _507) < float3(_530 + _510)) + (int3(2) * select(int3(0), int3(1), (_528 + _507) > float3((-_530) - _510))))) | (select(int3(0), int3(1), (_549 - _507) < float3(_551 + _510)) + (int3(2) * select(int3(0), int3(1), (_549 + _507) > float3((-_551) - _510))))) != int3(3))) + if (any((((int3((_506 - _507) < float3(_509 + _510)) + (int3(2) * int3((_506 + _507) > float3((-_509) - _510)))) | (int3((_528 - _507) < float3(_530 + _510)) + (int3(2) * int3((_528 + _507) > float3((-_530) - _510))))) | (int3((_549 - _507) < float3(_551 + _510)) + (int3(2) * int3((_549 + _507) > float3((-_551) - _510))))) != int3(3))) { _600 = float4(0.0); break; diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.asm.tesc index 674992ca..247fc011 100644 --- a/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.asm.tesc +++ b/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.asm.tesc @@ -381,7 +381,7 @@ kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0) float4 _472 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0); float3 _473 = _472.xyz; float _475 = _472.w; - if (any((((select(int3(0), int3(1), (_430 - _431) < float3(_433 + _434)) + (int3(2) * select(int3(0), int3(1), (_430 + _431) > float3((-_433) - _434)))) | (select(int3(0), int3(1), (_452 - _431) < float3(_454 + _434)) + (int3(2) * select(int3(0), int3(1), (_452 + _431) > float3((-_454) - _434))))) | (select(int3(0), int3(1), (_473 - _431) < float3(_475 + _434)) + (int3(2) * select(int3(0), int3(1), (_473 + _431) > float3((-_475) - _434))))) != int3(3))) + if (any((((int3((_430 - _431) < float3(_433 + _434)) + (int3(2) * int3((_430 + _431) > float3((-_433) - _434)))) | (int3((_452 - _431) < float3(_454 + _434)) + (int3(2) * int3((_452 + _431) > float3((-_454) - _434))))) | (int3((_473 - _431) < float3(_475 + _434)) + (int3(2) * int3((_473 + _431) > float3((-_475) - _434))))) != int3(3))) { _524 = float4(0.0); break; diff --git a/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp index bed3b90a..8d59fcc8 100644 --- a/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp +++ b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp @@ -15,13 +15,13 @@ layout(binding = 1, std430) restrict buffer _4_6 void main() { - _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(ivec4(_5._m1), _5._m0)); - _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(ivec4(_5._m1), _5._m0)); - _6._m0 = mix(uvec4(0u), uvec4(1u), lessThan(_5._m1, uvec4(_5._m0))); - _6._m0 = mix(uvec4(0u), uvec4(1u), lessThanEqual(_5._m1, uvec4(_5._m0))); - _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(ivec4(_5._m1), _5._m0)); - _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(ivec4(_5._m1), _5._m0)); - _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThan(_5._m1, uvec4(_5._m0))); - _6._m0 = mix(uvec4(0u), uvec4(1u), greaterThanEqual(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(lessThan(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(lessThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(lessThan(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(lessThanEqual(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(greaterThan(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(greaterThanEqual(ivec4(_5._m1), _5._m0)); + _6._m0 = uvec4(greaterThan(_5._m1, uvec4(_5._m0))); + _6._m0 = uvec4(greaterThanEqual(_5._m1, uvec4(_5._m0))); } diff --git a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp index bdb3eeb9..8a552dba 100644 --- a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp +++ b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp @@ -21,13 +21,13 @@ void main() bvec4 _35 = equal(_30, ivec4(_31)); bvec4 _36 = equal(_31, _31); bvec4 _37 = equal(_30, _30); - _6._m0 = mix(uvec4(0u), uvec4(1u), _34); - _6._m0 = mix(uvec4(0u), uvec4(1u), _35); - _6._m0 = mix(uvec4(0u), uvec4(1u), _36); - _6._m0 = mix(uvec4(0u), uvec4(1u), _37); - _6._m1 = mix(ivec4(0), ivec4(1), _34); - _6._m1 = mix(ivec4(0), ivec4(1), _35); - _6._m1 = mix(ivec4(0), ivec4(1), _36); - _6._m1 = mix(ivec4(0), ivec4(1), _37); + _6._m0 = uvec4(_34); + _6._m0 = uvec4(_35); + _6._m0 = uvec4(_36); + _6._m0 = uvec4(_37); + _6._m1 = ivec4(_34); + _6._m1 = ivec4(_35); + _6._m1 = ivec4(_36); + _6._m1 = ivec4(_37); } diff --git a/reference/opt/shaders/comp/casts.comp b/reference/opt/shaders/comp/casts.comp index 11ef3628..12cf1788 100644 --- a/reference/opt/shaders/comp/casts.comp +++ b/reference/opt/shaders/comp/casts.comp @@ -13,6 +13,6 @@ layout(binding = 0, std430) buffer SSBO0 void main() { - _21.outputs[gl_GlobalInvocationID.x] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u)))); + _21.outputs[gl_GlobalInvocationID.x] = ivec4(notEqual((_27.inputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u)))); } -- cgit v1.2.3