diff options
author | Jan Sikorski <jsikorski@codeweavers.com> | 2020-10-29 12:02:07 +0300 |
---|---|---|
committer | Jan Sikorski <jsikorski@codeweavers.com> | 2020-11-09 13:23:01 +0300 |
commit | f0239bce05b343281215d3a17edc4fc73a407b09 (patch) | |
tree | e0382cf22c0251cf9945b3e21d77d6de6e1367ff | |
parent | 6fc2a0581ab84189d948486822bd1bf045a0111e (diff) |
MSL: extract global variables from subgroup ballot operations
Fixes #1513.
-rw-r--r-- | reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag | 86 | ||||
-rw-r--r-- | shaders-msl/frag/subgroup-globals-extract.msl22.frag | 30 | ||||
-rw-r--r-- | spirv_msl.cpp | 31 |
3 files changed, 147 insertions, 0 deletions
diff --git a/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag new file mode 100644 index 00000000..ccd83bb5 --- /dev/null +++ b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag @@ -0,0 +1,86 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include <metal_stdlib> +#include <simd/simd.h> + +using namespace metal; + +struct main0_out +{ + uint2 FragColor [[color(0)]]; +}; + +inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + ballot &= mask; + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvPopCount4(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvPopCount4(ballot & mask); +} + +static inline __attribute__((always_inline)) +uint sub1(thread uint& gl_SubgroupSize) +{ + return spvSubgroupBallotFindLSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize); +} + +static inline __attribute__((always_inline)) +uint sub2(thread uint& gl_SubgroupSize) +{ + return spvSubgroupBallotFindMSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize); +} + +static inline __attribute__((always_inline)) +uint sub3(thread uint& gl_SubgroupSize) +{ + return spvSubgroupBallotBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize); +} + +static inline __attribute__((always_inline)) +uint sub4(thread uint& gl_SubgroupInvocationID) +{ + return spvSubgroupBallotInclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID); +} + +static inline __attribute__((always_inline)) +uint sub5(thread uint& gl_SubgroupInvocationID) +{ + return spvSubgroupBallotExclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID); +} + +fragment main0_out main0(uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]], uint gl_SubgroupSize [[threads_per_simdgroup]]) +{ + main0_out out = {}; + out.FragColor.x = (((sub1(gl_SubgroupSize) + sub2(gl_SubgroupSize)) + sub3(gl_SubgroupSize)) + sub4(gl_SubgroupInvocationID)) + sub5(gl_SubgroupInvocationID); + return out; +} + diff --git a/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/shaders-msl/frag/subgroup-globals-extract.msl22.frag new file mode 100644 index 00000000..f763163d --- /dev/null +++ b/shaders-msl/frag/subgroup-globals-extract.msl22.frag @@ -0,0 +1,30 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require + +layout(location = 0) out uvec2 FragColor; + +uint sub1() { + return subgroupBallotFindLSB(uvec4(1,2,3,4)); +} + +uint sub2() { + return subgroupBallotFindMSB(uvec4(1,2,3,4)); +} + +uint sub3() { + return subgroupBallotBitCount(uvec4(1,2,3,4)); +} + +uint sub4() { + return subgroupBallotInclusiveBitCount(uvec4(1,2,3,4)); +} + +uint sub5() { + return subgroupBallotExclusiveBitCount(uvec4(1,2,3,4)); +} + +void main() +{ + FragColor.x = sub1() + sub2() + sub3() + sub4() + sub5(); +} diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 360c0451..39389266 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -1491,6 +1491,37 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: } } + case OpGroupNonUniformInverseBallot: + { + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + } + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + { + added_arg_ids.insert(builtin_subgroup_size_id); + break; + } + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast<GroupOperation>(ops[3]); + switch (operation) + { + case GroupOperationReduce: + added_arg_ids.insert(builtin_subgroup_size_id); + break; + case GroupOperationInclusiveScan: + case GroupOperationExclusiveScan: + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + default: + break; + } + break; + } + default: break; } |