MSL: Emit multiple threadgroup slices for multi-patch.

Multiple patches can run in the same workgroup when using multi-patch mode, so we need to allocate enough storage to avoid false sharing.
author: Hans-Kristian Arntzen <post@arntzen-software.no> 2021-04-14 14:13:13 +0300
committer: Hans-Kristian Arntzen <post@arntzen-software.no> 2021-04-19 13:10:49 +0300
commit: 23da445bd44afe588d6ccf61dc871b6ddac21867 (patch)
tree: 5f461faecca651151a87dad59093646f4fbac726 /reference/opt/shaders-msl
parent: b442500204b2dd6e0aa97f0dfb250901ff82a871 (diff)
6 files changed, 387 insertions, 0 deletions
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
new file mode 100644
index 00000000..ca025cdb
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_a;
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_b;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup P spvStorage_11[8];
+    threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    _11.a = 1.0;
+    patchOut.P_b = 2.0;
+    gl_out[gl_InvocationID].C_a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
new file mode 100644
index 00000000..700e3fc5
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float C_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float P_a;
+    float P_b;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup C spvStoragec[8][4];
+    threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    patchOut.P_a = 1.0;
+    patchOut.P_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].C_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
new file mode 100644
index 00000000..d20b7d78
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
@@ -0,0 +1,81 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStoragev0[8][4];
+    threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
new file mode 100644
index 00000000..2831008f
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
@@ -0,0 +1,40 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup float4 spvStoragev1[8][2];
+    threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((threadgroup float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((threadgroup float*)&v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
new file mode 100644
index 00000000..21360341
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out_masked[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
new file mode 100644
index 00000000..3aea5798
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct main0_out
+{
+    float4 v0;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out_masked[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
author	Hans-Kristian Arntzen <post@arntzen-software.no>	2021-04-14 14:13:13 +0300
committer	Hans-Kristian Arntzen <post@arntzen-software.no>	2021-04-19 13:10:49 +0300
commit	23da445bd44afe588d6ccf61dc871b6ddac21867 (patch)
tree	5f461faecca651151a87dad59093646f4fbac726 /reference/opt/shaders-msl
parent	b442500204b2dd6e0aa97f0dfb250901ff82a871 (diff)