From c8765a75f2f4d431bc7c3d8d71d21f73770c949a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 11 Dec 2020 12:24:34 +0100 Subject: GLSL: Fix KHR subgroup extension table for subgroups. --- .../frag/nonuniform-constructor.sm51.fxconly.frag | 32 ------------- ...niform-constructor.sm51.nonuniformresource.frag | 32 +++++++++++++ .../asm/frag/subgroup-ballot-only.vk.asm.frag | 32 +++++++++++++ .../asm/frag/subgroup-ballot-only.vk.asm.frag.vk | 15 +++++++ .../frag/nonuniform-constructor.sm51.fxconly.frag | 14 ------ ...niform-constructor.sm51.nonuniformresource.frag | 14 ++++++ .../asm/frag/subgroup-ballot-only.vk.asm.frag | 52 ++++++++++++++++++++++ spirv_glsl.cpp | 14 +++--- spirv_glsl.hpp | 32 ++++++------- 9 files changed, 168 insertions(+), 69 deletions(-) delete mode 100644 reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag create mode 100644 reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag create mode 100644 reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag create mode 100644 reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk delete mode 100644 shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag create mode 100644 shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag create mode 100644 shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag diff --git a/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag b/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag deleted file mode 100644 index ca9a116f..00000000 --- a/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag +++ /dev/null @@ -1,32 +0,0 @@ -Texture2D uTex[] : register(t0, space0); -SamplerState Immut : register(s0, space1); - -static float4 FragColor; -static int vIndex; -static float2 vUV; - -struct SPIRV_Cross_Input -{ - float2 vUV : TEXCOORD0; - nointerpolation int vIndex : TEXCOORD1; -}; - -struct SPIRV_Cross_Output -{ - float4 FragColor : SV_Target0; -}; - -void frag_main() -{ - FragColor = uTex[NonUniformResourceIndex(vIndex)].Sample(Immut, vUV); -} - -SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) -{ - vIndex = stage_input.vIndex; - vUV = stage_input.vUV; - frag_main(); - SPIRV_Cross_Output stage_output; - stage_output.FragColor = FragColor; - return stage_output; -} diff --git a/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag b/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag new file mode 100644 index 00000000..ca9a116f --- /dev/null +++ b/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag @@ -0,0 +1,32 @@ +Texture2D uTex[] : register(t0, space0); +SamplerState Immut : register(s0, space1); + +static float4 FragColor; +static int vIndex; +static float2 vUV; + +struct SPIRV_Cross_Input +{ + float2 vUV : TEXCOORD0; + nointerpolation int vIndex : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = uTex[NonUniformResourceIndex(vIndex)].Sample(Immut, vUV); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vIndex = stage_input.vIndex; + vUV = stage_input.vUV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag new file mode 100644 index 00000000..8a918c03 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag @@ -0,0 +1,32 @@ +#version 450 + +#if defined(GL_KHR_shader_subgroup_ballot) +#extension GL_KHR_shader_subgroup_ballot : require +#elif defined(GL_NV_shader_thread_group) +#extension GL_NV_shader_thread_group : require +#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64) +#extension GL_ARB_shader_int64 : enable +#extension GL_ARB_shader_ballot : require +#else +#error No extensions available to emulate requested subgroup feature. +#endif + +layout(location = 0) flat in uint INDEX; +layout(location = 0) out uvec4 SV_Target; + +#if defined(GL_KHR_shader_subgroup_ballot) +#elif defined(GL_NV_shader_thread_group) +uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); } +#elif defined(GL_ARB_shader_ballot) +uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); } +#endif + +void main() +{ + uvec4 _21 = subgroupBallot(INDEX < 100u); + SV_Target.x = _21.x; + SV_Target.y = _21.y; + SV_Target.z = _21.z; + SV_Target.w = _21.w; +} + diff --git a/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk new file mode 100644 index 00000000..ed5933f3 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk @@ -0,0 +1,15 @@ +#version 450 +#extension GL_KHR_shader_subgroup_ballot : require + +layout(location = 0) flat in uint INDEX; +layout(location = 0) out uvec4 SV_Target; + +void main() +{ + uvec4 _21 = subgroupBallot(INDEX < 100u); + SV_Target.x = _21.x; + SV_Target.y = _21.y; + SV_Target.z = _21.z; + SV_Target.w = _21.w; +} + diff --git a/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag b/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag deleted file mode 100644 index 452aa953..00000000 --- a/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.fxconly.frag +++ /dev/null @@ -1,14 +0,0 @@ -#version 450 -#extension GL_EXT_nonuniform_qualifier : require - -layout(location = 0) out vec4 FragColor; -layout(location = 0) in vec2 vUV; -layout(location = 1) flat in int vIndex; - -layout(set = 0, binding = 0) uniform texture2D uTex[]; -layout(set = 1, binding = 0) uniform sampler Immut; - -void main() -{ - FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV); -} diff --git a/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag b/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag new file mode 100644 index 00000000..452aa953 --- /dev/null +++ b/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag @@ -0,0 +1,14 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(location = 0) out vec4 FragColor; +layout(location = 0) in vec2 vUV; +layout(location = 1) flat in int vIndex; + +layout(set = 0, binding = 0) uniform texture2D uTex[]; +layout(set = 1, binding = 0) uniform sampler Immut; + +void main() +{ + FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV); +} diff --git a/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag b/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag new file mode 100644 index 00000000..39f4d066 --- /dev/null +++ b/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag @@ -0,0 +1,52 @@ +; SPIR-V +; Version: 1.3 +; Generator: Unknown(30017); 21022 +; Bound: 31 +; Schema: 0 + OpCapability Shader + OpCapability GroupNonUniformBallot + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %INDEX %SV_Target + OpExecutionMode %main OriginUpperLeft + OpName %main "main" + OpName %INDEX "INDEX" + OpName %SV_Target "SV_Target" + OpDecorate %INDEX Flat + OpDecorate %INDEX Location 0 + OpDecorate %SV_Target Location 0 + %void = OpTypeVoid + %2 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %INDEX = OpVariable %_ptr_Input_uint Input + %v4uint = OpTypeVector %uint 4 +%_ptr_Output_v4uint = OpTypePointer Output %v4uint + %SV_Target = OpVariable %_ptr_Output_v4uint Output + %bool = OpTypeBool + %uint_100 = OpConstant %uint 100 + %uint_3 = OpConstant %uint 3 +%_ptr_Output_uint = OpTypePointer Output %uint + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %main = OpFunction %void None %2 + %4 = OpLabel + OpBranch %29 + %29 = OpLabel + %11 = OpLoad %uint %INDEX + %13 = OpULessThan %bool %11 %uint_100 + %15 = OpGroupNonUniformBallot %v4uint %uint_3 %13 + %17 = OpCompositeExtract %uint %15 0 + %18 = OpCompositeExtract %uint %15 1 + %19 = OpCompositeExtract %uint %15 2 + %20 = OpCompositeExtract %uint %15 3 + %22 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_0 + OpStore %22 %17 + %24 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_1 + OpStore %24 %18 + %26 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_2 + OpStore %26 %19 + %28 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_3 + OpStore %28 %20 + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 12d72dc6..ca68e87f 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -3569,9 +3569,9 @@ void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) statement(""); } - if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBrodcast_First)) + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First)) { - auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBrodcast_First, result); + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result); for (auto &e : exts) { @@ -7380,7 +7380,7 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: - request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBrodcast_First); + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First); break; case OpGroupNonUniformShuffle: @@ -15150,7 +15150,7 @@ CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSub switch (feature) { case SubgroupAllEqualT: - return { SubgroupBrodcast_First, SubgroupAll_Any_AllEqualBool }; + return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool }; case SubgroupElect: return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; case SubgroupInverseBallot_InclBitCount_ExclBitCout: @@ -15185,8 +15185,8 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgrou static const Candidate extensions[FeatureCount] = { KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, - KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, - KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot + KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot }; return extensions[feature]; @@ -15260,7 +15260,7 @@ CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderS return { KHR_shader_subgroup_basic, NV_shader_thread_group }; case NumSubgroups: return { KHR_shader_subgroup_basic, NV_shader_thread_group }; - case SubgroupBrodcast_First: + case SubgroupBroadcast_First: return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; case SubgroupBallotFindLSB_MSB: return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index add549aa..c382db67 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -275,22 +275,22 @@ protected: enum Feature { - SubgroupMask, - SubgroupSize, - SubgroupInvocationID, - SubgroupID, - NumSubgroups, - SubgroupBrodcast_First, - SubgroupBallotFindLSB_MSB, - SubgroupAll_Any_AllEqualBool, - SubgroupAllEqualT, - SubgroupElect, - SubgroupBarrier, - SubgroupMemBarrier, - SubgroupBallot, - SubgroupInverseBallot_InclBitCount_ExclBitCout, - SubgroupBallotBitExtract, - SubgroupBallotBitCount, + SubgroupMask = 0, + SubgroupSize = 1, + SubgroupInvocationID = 2, + SubgroupID = 3, + NumSubgroups = 4, + SubgroupBroadcast_First = 5, + SubgroupBallotFindLSB_MSB = 6, + SubgroupAll_Any_AllEqualBool = 7, + SubgroupAllEqualT = 8, + SubgroupElect = 9, + SubgroupBarrier = 10, + SubgroupMemBarrier = 11, + SubgroupBallot = 12, + SubgroupInverseBallot_InclBitCount_ExclBitCout = 13, + SubgroupBallotBitExtract = 14, + SubgroupBallotBitCount = 15, FeatureCount }; -- cgit v1.2.3