Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/KhronosGroup/SPIRV-Cross.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Sikorski <jsikorski@codeweavers.com>2020-10-29 12:02:07 +0300
committerJan Sikorski <jsikorski@codeweavers.com>2020-11-09 13:23:01 +0300
commitf0239bce05b343281215d3a17edc4fc73a407b09 (patch)
treee0382cf22c0251cf9945b3e21d77d6de6e1367ff
parent6fc2a0581ab84189d948486822bd1bf045a0111e (diff)
MSL: extract global variables from subgroup ballot operations
Fixes #1513.
-rw-r--r--reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag86
-rw-r--r--shaders-msl/frag/subgroup-globals-extract.msl22.frag30
-rw-r--r--spirv_msl.cpp31
3 files changed, 147 insertions, 0 deletions
diff --git a/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag
new file mode 100644
index 00000000..ccd83bb5
--- /dev/null
+++ b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+ uint2 FragColor [[color(0)]];
+};
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+ uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+ ballot &= mask;
+ return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+ uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+ ballot &= mask;
+ return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+ return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+ uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+ return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+ uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+ return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+ uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+ return spvPopCount4(ballot & mask);
+}
+
+static inline __attribute__((always_inline))
+uint sub1(thread uint& gl_SubgroupSize)
+{
+ return spvSubgroupBallotFindLSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
+}
+
+static inline __attribute__((always_inline))
+uint sub2(thread uint& gl_SubgroupSize)
+{
+ return spvSubgroupBallotFindMSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
+}
+
+static inline __attribute__((always_inline))
+uint sub3(thread uint& gl_SubgroupSize)
+{
+ return spvSubgroupBallotBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
+}
+
+static inline __attribute__((always_inline))
+uint sub4(thread uint& gl_SubgroupInvocationID)
+{
+ return spvSubgroupBallotInclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID);
+}
+
+static inline __attribute__((always_inline))
+uint sub5(thread uint& gl_SubgroupInvocationID)
+{
+ return spvSubgroupBallotExclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID);
+}
+
+fragment main0_out main0(uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]], uint gl_SubgroupSize [[threads_per_simdgroup]])
+{
+ main0_out out = {};
+ out.FragColor.x = (((sub1(gl_SubgroupSize) + sub2(gl_SubgroupSize)) + sub3(gl_SubgroupSize)) + sub4(gl_SubgroupInvocationID)) + sub5(gl_SubgroupInvocationID);
+ return out;
+}
+
diff --git a/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/shaders-msl/frag/subgroup-globals-extract.msl22.frag
new file mode 100644
index 00000000..f763163d
--- /dev/null
+++ b/shaders-msl/frag/subgroup-globals-extract.msl22.frag
@@ -0,0 +1,30 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+layout(location = 0) out uvec2 FragColor;
+
+uint sub1() {
+ return subgroupBallotFindLSB(uvec4(1,2,3,4));
+}
+
+uint sub2() {
+ return subgroupBallotFindMSB(uvec4(1,2,3,4));
+}
+
+uint sub3() {
+ return subgroupBallotBitCount(uvec4(1,2,3,4));
+}
+
+uint sub4() {
+ return subgroupBallotInclusiveBitCount(uvec4(1,2,3,4));
+}
+
+uint sub5() {
+ return subgroupBallotExclusiveBitCount(uvec4(1,2,3,4));
+}
+
+void main()
+{
+ FragColor.x = sub1() + sub2() + sub3() + sub4() + sub5();
+}
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 360c0451..39389266 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -1491,6 +1491,37 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
}
}
+ case OpGroupNonUniformInverseBallot:
+ {
+ added_arg_ids.insert(builtin_subgroup_invocation_id_id);
+ break;
+ }
+
+ case OpGroupNonUniformBallotFindLSB:
+ case OpGroupNonUniformBallotFindMSB:
+ {
+ added_arg_ids.insert(builtin_subgroup_size_id);
+ break;
+ }
+
+ case OpGroupNonUniformBallotBitCount:
+ {
+ auto operation = static_cast<GroupOperation>(ops[3]);
+ switch (operation)
+ {
+ case GroupOperationReduce:
+ added_arg_ids.insert(builtin_subgroup_size_id);
+ break;
+ case GroupOperationInclusiveScan:
+ case GroupOperationExclusiveScan:
+ added_arg_ids.insert(builtin_subgroup_invocation_id_id);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+
default:
break;
}