diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2021-04-08 12:47:35 +0300 |
---|---|---|
committer | Hans-Kristian Arntzen <post@arntzen-software.no> | 2021-04-19 13:10:49 +0300 |
commit | 46c48ee6b5d07d25b2ca54e0eebfc2fd3fd15a30 (patch) | |
tree | ae30c9e59b711496d6b5e3f520bb27941ca7fcd3 /reference/opt | |
parent | 425e968720cfa1c2ec99e3bb5f4d3bef3c248f3f (diff) |
MSL: Rewrite how IO blocks are emitted in multi-patch mode.
Firstly, never flatten inputs or outputs in multi-patch mode.
The main scenario where we do need to care is Block IO.
In this case, we should only flatten the top-level member, and after
that we use access chains as normal.
Using structs in Input storage class is now possible as well. We don't
need to consider per-location fixups at all here. In Vulkan, IO structs
must match exactly. Only plain vectors can have smaller vector sizes as
a special case.
Diffstat (limited to 'reference/opt')
6 files changed, 112 insertions, 21 deletions
diff --git a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc index e47d56a2..c5e309ad 100644 --- a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc +++ b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc @@ -16,8 +16,7 @@ struct main0_out struct main0_in { - float3 Boo_a; - uint3 Boo_b; + Boo vInput; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) @@ -26,8 +25,7 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; uint gl_InvocationID = gl_GlobalInvocationID.x % 4; uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); - Boo _26 = Boo{ gl_in[gl_InvocationID].Boo_a, gl_in[gl_InvocationID].Boo_b }; - gl_out[gl_InvocationID].vVertex = _26; + gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput; spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0); diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert index 6c98e774..497cf2d5 100644 --- a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert @@ -9,8 +9,6 @@ struct main0_out float4 gl_Position; float gl_PointSize; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) @@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 out.gl_PointSize = 4.0; out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 0.5; - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert index c26f7f2b..aaa41d42 100644 --- a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert @@ -9,8 +9,6 @@ struct main0_out float4 gl_Position; float gl_PointSize; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) @@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 out.gl_PointSize = 4.0; out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 0.5; - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert index e6a6ff9e..3142d14c 100644 --- a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert +++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert @@ -9,8 +9,6 @@ struct main0_out float4 v1; float4 gl_Position; float gl_ClipDistance[2]; - float gl_ClipDistance_0; - float gl_ClipDistance_1; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]]) @@ -25,7 +23,5 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_PointSize = 4.0; out.gl_ClipDistance[0] = 1.0; out.gl_ClipDistance[1] = 0.5; - out.gl_ClipDistance_0 = out.gl_ClipDistance[0]; - out.gl_ClipDistance_1 = out.gl_ClipDistance[1]; } diff --git a/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc new file mode 100644 index 00000000..d4a59bb7 --- /dev/null +++ b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc @@ -0,0 +1,107 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include <metal_stdlib> +#include <simd/simd.h> + +using namespace metal; + +template<typename T, size_t Num> +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Meep +{ + float a; + float b; +}; + +struct Block +{ + spvUnsafeArray<float, 2> a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray<Meep, 2> meeps; +}; + +struct main0_out +{ + float4 gl_Position; +}; + +struct main0_patchOut +{ + spvUnsafeArray<float, 2> a; + float b; + float2x2 m; + Meep meep; + spvUnsafeArray<Meep, 2> meeps; + spvUnsafeArray<float, 2> Block_a; + float Block_b; + float2x2 Block_m; + Meep Block_meep; + spvUnsafeArray<Meep, 2> Block_meeps; +}; + +kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]]) +{ + device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4]; + device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID]; + gl_out[gl_InvocationID].gl_Position = float4(1.0); + patchOut.a[0] = 1.0; + patchOut.a[1] = 2.0; + patchOut.b = 3.0; + patchOut.m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0)); + patchOut.meep.a = 4.0; + patchOut.meep.b = 5.0; + patchOut.meeps[0].a = 6.0; + patchOut.meeps[0].b = 7.0; + patchOut.meeps[1].a = 8.0; + patchOut.meeps[1].b = 9.0; + patchOut.Block_a[0] = 1.0; + patchOut.Block_a[1] = 2.0; + patchOut.Block_b = 3.0; + patchOut.Block_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0)); + patchOut.Block_meep.a = 4.0; + patchOut.Block_meep.b = 5.0; + patchOut.Block_meeps[0].a = 6.0; + patchOut.Block_meeps[0].b = 7.0; + patchOut.Block_meeps[1].a = 8.0; + patchOut.Block_meeps[1].b = 9.0; +} + diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc index 8bd5515b..add59f69 100644 --- a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc +++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc @@ -58,9 +58,7 @@ struct main0_out struct main0_in { - float4x4 VertexData_a; - spvUnsafeArray<float4, 2> VertexData_b; - float4 VertexData_c; + VertexData vInputs; }; kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]]) @@ -69,10 +67,10 @@ kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], devic device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]]; uint gl_InvocationID = gl_GlobalInvocationID.x % 4; uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1]); - spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ VertexData{ gl_in[0].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[0].VertexData_b[0], gl_in[0].VertexData_b[1] }), gl_in[0].VertexData_c }, VertexData{ gl_in[1].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[1].VertexData_b[0], gl_in[1].VertexData_b[1] }), gl_in[1].VertexData_c }, VertexData{ gl_in[2].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[2].VertexData_b[0], gl_in[2].VertexData_b[1] }), gl_in[2].VertexData_c }, VertexData{ gl_in[3].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[3].VertexData_b[0], gl_in[3].VertexData_b[1] }), gl_in[3].VertexData_c }, VertexData{ gl_in[4].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[4].VertexData_b[0], gl_in[4].VertexData_b[1] }), gl_in[4].VertexData_c }, VertexData{ gl_in[5].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[5].VertexData_b[0], gl_in[5].VertexData_b[1] }), gl_in[5].VertexData_c }, VertexData{ gl_in[6].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[6].VertexData_b[0], gl_in[6].VertexData_b[1] }), gl_in[6].VertexData_c }, VertexData{ gl_in[7].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[7].VertexData_b[0], gl_in[7].VertexData_b[1] }), gl_in[7].VertexData_c }, VertexData{ gl_in[8].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[8].VertexData_b[0], gl_in[8].VertexData_b[1] }), gl_in[8].VertexData_c }, VertexData{ gl_in[9].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[9].VertexData_b[0], gl_in[9].VertexData_b[1] }), gl_in[9].VertexData_c }, VertexData{ gl_in[10].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[10].VertexData_b[0], gl_in[10].VertexData_b[1] }), gl_in[10].VertexData_c }, VertexData{ gl_in[11].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[11].VertexData_b[0], gl_in[11].VertexData_b[1] }), gl_in[11].VertexData_c }, VertexData{ gl_in[12].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[12].VertexData_b[0], gl_in[12].VertexData_b[1] }), gl_in[12].VertexData_c }, VertexData{ gl_in[13].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[13].VertexData_b[0], gl_in[13].VertexData_b[1] }), gl_in[13].VertexData_c }, VertexData{ gl_in[14].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[14].VertexData_b[0], gl_in[14].VertexData_b[1] }), gl_in[14].VertexData_c }, VertexData{ gl_in[15].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[15].VertexData_b[0], gl_in[15].VertexData_b[1] }), gl_in[15].VertexData_c }, VertexData{ gl_in[16].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[16].VertexData_b[0], gl_in[16].VertexData_b[1] }), gl_in[16].VertexData_c }, VertexData{ gl_in[17].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[17].VertexData_b[0], gl_in[17].VertexData_b[1] }), gl_in[17].VertexData_c }, VertexData{ gl_in[18].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[18].VertexData_b[0], gl_in[18].VertexData_b[1] }), gl_in[18].VertexData_c }, VertexData{ gl_in[19].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[19].VertexData_b[0], gl_in[19].VertexData_b[1] }), gl_in[19].VertexData_c }, VertexData{ gl_in[20].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[20].VertexData_b[0], gl_in[20].VertexData_b[1] }), gl_in[20].VertexData_c }, VertexData{ gl_in[21].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[21].VertexData_b[0], gl_in[21].VertexData_b[1] }), gl_in[21].VertexData_c }, VertexData{ gl_in[22].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[22].VertexData_b[0], gl_in[22].VertexData_b[1] }), gl_in[22].VertexData_c }, VertexData{ gl_in[23].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[23].VertexData_b[0], gl_in[23].VertexData_b[1] }), gl_in[23].VertexData_c }, VertexData{ gl_in[24].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[24].VertexData_b[0], gl_in[24].VertexData_b[1] }), gl_in[24].VertexData_c }, VertexData{ gl_in[25].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[25].VertexData_b[0], gl_in[25].VertexData_b[1] }), gl_in[25].VertexData_c }, VertexData{ gl_in[26].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[26].VertexData_b[0], gl_in[26].VertexData_b[1] }), gl_in[26].VertexData_c }, VertexData{ gl_in[27].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[27].VertexData_b[0], gl_in[27].VertexData_b[1] }), gl_in[27].VertexData_c }, VertexData{ gl_in[28].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[28].VertexData_b[0], gl_in[28].VertexData_b[1] }), gl_in[28].VertexData_c }, VertexData{ gl_in[29].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[29].VertexData_b[0], gl_in[29].VertexData_b[1] }), gl_in[29].VertexData_c }, VertexData{ gl_in[30].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[30].VertexData_b[0], gl_in[30].VertexData_b[1] }), gl_in[30].VertexData_c }, VertexData{ gl_in[31].VertexData_a, spvUnsafeArray<float4, 2>({ gl_in[31].VertexData_b[0], gl_in[31].VertexData_b[1] }), gl_in[31].VertexData_c } }); + spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs }); spvUnsafeArray<VertexData, 32> tmp; tmp = _19; int _27 = gl_InvocationID ^ 1; - gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + gl_in[_27].VertexData_c; + gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + gl_in[_27].vInputs.c; } |