Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/KhronosGroup/SPIRV-Cross.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans-Kristian Arntzen <post@arntzen-software.no>2019-09-04 15:43:08 +0300
committerGitHub <noreply@github.com>2019-09-04 15:43:08 +0300
commitf24654db8c6da93855803fa7fa5bed0ae3263ee5 (patch)
tree147ea08b1fca7e788df2cc5941ed08396996c043
parenta06997a6a496662c49fefc02bb2421d391b45309 (diff)
parent0286442906c7dd34b58251d82f977456b78527be (diff)
Merge pull request #1143 from KhronosGroup/interlock2019-09-04
Merge SPV_EXT_fragment_shader_interlock with fixes
-rw-r--r--reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag24
-rw-r--r--reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag43
-rw-r--r--reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag33
-rw-r--r--reference/opt/shaders/frag/pixel-interlock-ordered.frag23
-rw-r--r--reference/opt/shaders/frag/pixel-interlock-unordered.frag23
-rw-r--r--reference/opt/shaders/frag/sample-interlock-ordered.frag23
-rw-r--r--reference/opt/shaders/frag/sample-interlock-unordered.frag23
-rw-r--r--reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag32
-rw-r--r--reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag42
-rw-r--r--reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag42
-rw-r--r--reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag32
-rw-r--r--reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag24
-rw-r--r--reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag35
-rw-r--r--reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag49
-rw-r--r--reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag45
-rw-r--r--reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag35
-rw-r--r--reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag43
-rw-r--r--reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag33
-rw-r--r--reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag39
-rw-r--r--reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag53
-rw-r--r--reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag49
-rw-r--r--reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag34
-rw-r--r--reference/shaders/frag/pixel-interlock-ordered.frag23
-rw-r--r--reference/shaders/frag/pixel-interlock-unordered.frag23
-rw-r--r--reference/shaders/frag/sample-interlock-ordered.frag23
-rw-r--r--reference/shaders/frag/sample-interlock-unordered.frag23
-rw-r--r--shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag89
-rw-r--r--shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag121
-rw-r--r--shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag102
-rw-r--r--shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag31
-rw-r--r--shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag36
-rw-r--r--shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag89
-rw-r--r--shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag121
-rw-r--r--shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag102
-rw-r--r--shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag31
-rw-r--r--shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag36
-rw-r--r--shaders-msl/frag/pixel-interlock-ordered.msl2.frag36
-rw-r--r--shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag89
-rw-r--r--shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag121
-rw-r--r--shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag102
-rw-r--r--shaders-no-opt/frag/pixel-interlock-simple-callstack.frag31
-rw-r--r--shaders/frag/pixel-interlock-ordered.frag22
-rw-r--r--shaders/frag/pixel-interlock-unordered.frag22
-rw-r--r--shaders/frag/sample-interlock-ordered.frag22
-rw-r--r--shaders/frag/sample-interlock-unordered.frag22
-rw-r--r--spirv_cross.cpp321
-rw-r--r--spirv_cross.hpp67
-rw-r--r--spirv_glsl.cpp85
-rw-r--r--spirv_hlsl.cpp24
-rw-r--r--spirv_msl.cpp38
50 files changed, 2618 insertions, 13 deletions
diff --git a/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 00000000..8923f96a
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+ _9.Store(0, uint(0));
+ img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+ img[int2(0, 0)] = img3[int2(0, 0)];
+ uint _39;
+ InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+ _42.Store(0, uint(int(_42.Load(0)) + 42));
+ uint _55;
+ _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+ frag_main();
+}
diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 00000000..f77b8ed9
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+ int baz;
+};
+
+struct Buffer
+{
+ int foo;
+ uint bar;
+};
+
+struct Buffer2
+{
+ uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+ device Buffer3* m_9 [[id(0)]];
+ texture2d<float, access::write> img4 [[id(1)]];
+ texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+ texture2d<float> img3 [[id(3), raster_order_group(0)]];
+ volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
+ device Buffer2* m_44 [[id(5), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+ (*spvDescriptorSet0.m_9).baz = 0;
+ spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+ spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+ (*spvDescriptorSet0.m_34).foo += 42;
+ uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
+}
+
diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 00000000..803416c6
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+ int baz;
+};
+
+struct Buffer
+{
+ int foo;
+ uint bar;
+};
+
+struct Buffer2
+{
+ uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
+{
+ _9.baz = 0;
+ img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+ img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+ _34.foo += 42;
+ uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
+}
+
diff --git a/reference/opt/shaders/frag/pixel-interlock-ordered.frag b/reference/opt/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 00000000..46cca96c
--- /dev/null
+++ b/reference/opt/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _41 = atomicAnd(_30.bar, 255u);
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/opt/shaders/frag/pixel-interlock-unordered.frag b/reference/opt/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 00000000..d60cd145
--- /dev/null
+++ b/reference/opt/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _41 = atomicAnd(_30.bar, 255u);
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/opt/shaders/frag/sample-interlock-ordered.frag b/reference/opt/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 00000000..67ca5560
--- /dev/null
+++ b/reference/opt/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/opt/shaders/frag/sample-interlock-unordered.frag b/reference/opt/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 00000000..ea74397d
--- /dev/null
+++ b/reference/opt/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _41 = atomicAnd(_30.bar, 255u);
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
new file mode 100644
index 00000000..3268995c
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
@@ -0,0 +1,32 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RWByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+ float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+ int _31 = int(gl_FragCoord.x);
+ _7.Store(_31 * 4 + 0, _7.Load(_31 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+ int _39 = int(gl_FragCoord.x);
+ _9.Store(_39 * 4 + 0, _9.Load(_39 * 4 + 0) + 1u);
+ callee2();
+}
+
+void frag_main()
+{
+ callee();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+ gl_FragCoord = stage_input.gl_FragCoord;
+ frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
new file mode 100644
index 00000000..69277121
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
@@ -0,0 +1,42 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RWByteAddressBuffer _13 : register(u2, space0);
+RasterizerOrderedByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+ float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+ int _44 = int(gl_FragCoord.x);
+ _7.Store(_44 * 4 + 0, _7.Load(_44 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+ int _52 = int(gl_FragCoord.x);
+ _9.Store(_52 * 4 + 0, _9.Load(_52 * 4 + 0) + 1u);
+ callee2();
+ if (true)
+ {
+ }
+}
+
+void _35()
+{
+ _13.Store(int(gl_FragCoord.x) * 4 + 0, 4u);
+}
+
+void frag_main()
+{
+ callee();
+ _35();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+ gl_FragCoord = stage_input.gl_FragCoord;
+ frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
new file mode 100644
index 00000000..bd963a74
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
@@ -0,0 +1,42 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RasterizerOrderedByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+ float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+ int _37 = int(gl_FragCoord.x);
+ _7.Store(_37 * 4 + 0, _7.Load(_37 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+ int _45 = int(gl_FragCoord.x);
+ _9.Store(_45 * 4 + 0, _9.Load(_45 * 4 + 0) + 1u);
+ callee2();
+}
+
+void _29()
+{
+}
+
+void _31()
+{
+}
+
+void frag_main()
+{
+ callee();
+ _29();
+ _31();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+ gl_FragCoord = stage_input.gl_FragCoord;
+ frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
new file mode 100644
index 00000000..55b71de2
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
@@ -0,0 +1,32 @@
+RasterizerOrderedByteAddressBuffer _14 : register(u1, space0);
+RasterizerOrderedByteAddressBuffer _35 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+ float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+ int _25 = int(gl_FragCoord.x);
+ _14.Store(_25 * 4 + 0, _14.Load(_25 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+ int _38 = int(gl_FragCoord.x);
+ _35.Store(_38 * 4 + 0, _35.Load(_38 * 4 + 0) + 1u);
+ callee2();
+}
+
+void frag_main()
+{
+ callee();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+ gl_FragCoord = stage_input.gl_FragCoord;
+ frag_main();
+}
diff --git a/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 00000000..8923f96a
--- /dev/null
+++ b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+ _9.Store(0, uint(0));
+ img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+ img[int2(0, 0)] = img3[int2(0, 0)];
+ uint _39;
+ InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+ _42.Store(0, uint(int(_42.Load(0)) + 42));
+ uint _55;
+ _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+ frag_main();
+}
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
new file mode 100644
index 00000000..1b6af2a3
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
@@ -0,0 +1,35 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+ uint values1[1];
+};
+
+struct SSBO0
+{
+ uint values0[1];
+};
+
+inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+ int _31 = int(gl_FragCoord.x);
+ v_7.values1[_31]++;
+}
+
+inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+ int _39 = int(gl_FragCoord.x);
+ v_9.values0[_39]++;
+ callee2(gl_FragCoord, v_7);
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1)]], float4 gl_FragCoord [[position]])
+{
+ callee(gl_FragCoord, v_7, v_9);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
new file mode 100644
index 00000000..dded6a14
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+ uint values1[1];
+};
+
+struct _12
+{
+ uint _m0[1];
+};
+
+struct SSBO0
+{
+ uint values0[1];
+};
+
+inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+ int _44 = int(gl_FragCoord.x);
+ v_7.values1[_44]++;
+}
+
+inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+ int _52 = int(gl_FragCoord.x);
+ v_9.values0[_52]++;
+ callee2(gl_FragCoord, v_7);
+ if (true)
+ {
+ }
+}
+
+inline void _35(thread float4& gl_FragCoord, device _12& v_13)
+{
+ v_13._m0[int(gl_FragCoord.x)] = 4u;
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device _12& v_13 [[buffer(1)]], device SSBO0& v_9 [[buffer(2), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+ callee(gl_FragCoord, v_7, v_9);
+ _35(gl_FragCoord, v_13);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
new file mode 100644
index 00000000..5fe65f3b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
@@ -0,0 +1,45 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+ uint values1[1];
+};
+
+struct SSBO0
+{
+ uint values0[1];
+};
+
+inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+ int _37 = int(gl_FragCoord.x);
+ v_7.values1[_37]++;
+}
+
+inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+ int _45 = int(gl_FragCoord.x);
+ v_9.values0[_45]++;
+ callee2(gl_FragCoord, v_7);
+}
+
+inline void _29()
+{
+}
+
+inline void _31()
+{
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+ callee(gl_FragCoord, v_7, v_9);
+ _29();
+ _31();
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
new file mode 100644
index 00000000..716ba251
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
@@ -0,0 +1,35 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+ uint values1[1];
+};
+
+struct SSBO0
+{
+ uint values0[1];
+};
+
+inline void callee2(device SSBO1& v_14, thread float4& gl_FragCoord)
+{
+ int _25 = int(gl_FragCoord.x);
+ v_14.values1[_25]++;
+}
+
+inline void callee(device SSBO1& v_14, thread float4& gl_FragCoord, device SSBO0& v_35)
+{
+ int _38 = int(gl_FragCoord.x);
+ v_35.values0[_38]++;
+ callee2(v_14, gl_FragCoord);
+}
+
+fragment void main0(device SSBO1& v_14 [[buffer(0), raster_order_group(0)]], device SSBO0& v_35 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+ callee(v_14, gl_FragCoord, v_35);
+}
+
diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 00000000..f77b8ed9
--- /dev/null
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+ int baz;
+};
+
+struct Buffer
+{
+ int foo;
+ uint bar;
+};
+
+struct Buffer2
+{
+ uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+ device Buffer3* m_9 [[id(0)]];
+ texture2d<float, access::write> img4 [[id(1)]];
+ texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+ texture2d<float> img3 [[id(3), raster_order_group(0)]];
+ volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
+ device Buffer2* m_44 [[id(5), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+ (*spvDescriptorSet0.m_9).baz = 0;
+ spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+ spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+ (*spvDescriptorSet0.m_34).foo += 42;
+ uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
+}
+
diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 00000000..803416c6
--- /dev/null
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+ int baz;
+};
+
+struct Buffer
+{
+ int foo;
+ uint bar;
+};
+
+struct Buffer2
+{
+ uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
+{
+ _9.baz = 0;
+ img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+ img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+ _34.foo += 42;
+ uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
new file mode 100644
index 00000000..948803c6
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
@@ -0,0 +1,39 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+ uint values1[];
+} _7;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+ uint values0[];
+} _9;
+
+void callee2()
+{
+ int _31 = int(gl_FragCoord.x);
+ _7.values1[_31]++;
+}
+
+void callee()
+{
+ int _39 = int(gl_FragCoord.x);
+ _9.values0[_39]++;
+ callee2();
+}
+
+void spvMainInterlockedBody()
+{
+ callee();
+}
+
+void main()
+{
+ // Interlocks were used in a way not compatible with GLSL, this is very slow.
+ beginInvocationInterlockARB();
+ spvMainInterlockedBody();
+ endInvocationInterlockARB();
+}
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
new file mode 100644
index 00000000..72dca0d7
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
@@ -0,0 +1,53 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+ uint values1[];
+} _7;
+
+layout(binding = 2, std430) buffer _12_13
+{
+ uint _m0[];
+} _13;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+ uint values0[];
+} _9;
+
+void callee2()
+{
+ int _44 = int(gl_FragCoord.x);
+ _7.values1[_44]++;
+}
+
+void callee()
+{
+ int _52 = int(gl_FragCoord.x);
+ _9.values0[_52]++;
+ callee2();
+ if (true)
+ {
+ }
+}
+
+void _35()
+{
+ _13._m0[int(gl_FragCoord.x)] = 4u;
+}
+
+void spvMainInterlockedBody()
+{
+ callee();
+ _35();
+}
+
+void main()
+{
+ // Interlocks were used in a way not compatible with GLSL, this is very slow.
+ beginInvocationInterlockARB();
+ spvMainInterlockedBody();
+ endInvocationInterlockARB();
+}
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
new file mode 100644
index 00000000..b09eb666
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
@@ -0,0 +1,49 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+ uint values1[];
+} _7;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+ uint values0[];
+} _9;
+
+void callee2()
+{
+ int _37 = int(gl_FragCoord.x);
+ _7.values1[_37]++;
+}
+
+void callee()
+{
+ int _45 = int(gl_FragCoord.x);
+ _9.values0[_45]++;
+ callee2();
+}
+
+void _29()
+{
+}
+
+void _31()
+{
+}
+
+void spvMainInterlockedBody()
+{
+ callee();
+ _29();
+ _31();
+}
+
+void main()
+{
+ // Interlocks were used in a way not compatible with GLSL, this is very slow.
+ beginInvocationInterlockARB();
+ spvMainInterlockedBody();
+ endInvocationInterlockARB();
+}
diff --git a/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
new file mode 100644
index 00000000..151ed01d
--- /dev/null
+++ b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
@@ -0,0 +1,34 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+ uint values1[];
+} _14;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+ uint values0[];
+} _35;
+
+void callee2()
+{
+ int _25 = int(gl_FragCoord.x);
+ _14.values1[_25]++;
+}
+
+void callee()
+{
+ int _38 = int(gl_FragCoord.x);
+ _35.values0[_38]++;
+ callee2();
+}
+
+void main()
+{
+ beginInvocationInterlockARB();
+ callee();
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/pixel-interlock-ordered.frag b/reference/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 00000000..46cca96c
--- /dev/null
+++ b/reference/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _41 = atomicAnd(_30.bar, 255u);
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/pixel-interlock-unordered.frag b/reference/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 00000000..d60cd145
--- /dev/null
+++ b/reference/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _41 = atomicAnd(_30.bar, 255u);
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/sample-interlock-ordered.frag b/reference/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 00000000..67ca5560
--- /dev/null
+++ b/reference/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+ endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/sample-interlock-unordered.frag b/reference/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 00000000..ea74397d
--- /dev/null
+++ b/reference/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+ uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+ _30.foo += 42;
+ uint _41 = atomicAnd(_30.bar, 255u);
+ endInvocationInterlockARB();
+}
+
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
new file mode 100644
index 00000000..ebd8d6ba
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ OpReturn
+ OpFunctionEnd
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ OpBeginInvocationInterlockEXT
+ %43 = OpFunctionCall %void %callee2_
+ OpEndInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
new file mode 100644
index 00000000..69b8f911
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+
+ OpMemberDecorate %SSBO2 0 Offset 0
+ OpDecorate %SSBO2 BufferBlock
+ OpDecorate %ssbo2 DescriptorSet 0
+ OpDecorate %ssbo2 Binding 2
+
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+ %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %uint_4 = OpConstant %uint 4
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+ %bool = OpTypeBool
+ %true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ %callee3_res = OpFunctionCall %void %callee3_
+ OpReturn
+ OpFunctionEnd
+
+ %callee3_ = OpFunction %void None %3
+ %calle3_block = OpLabel
+ %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+ %frag_coord_int = OpConvertFToS %int %frag_coord_x
+ %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+ OpStore %ssbo_ptr %uint_4
+ OpReturn
+ OpFunctionEnd
+
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ %43 = OpFunctionCall %void %callee2_
+
+ OpSelectionMerge %merged_block None
+ OpBranchConditional %true %dummy_block %merged_block
+ %dummy_block = OpLabel
+ OpBeginInvocationInterlockEXT
+ OpEndInvocationInterlockEXT
+ OpBranch %merged_block
+
+ %merged_block = OpLabel
+ OpReturn
+
+ OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
new file mode 100644
index 00000000..7c0fe9a2
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ %call3res = OpFunctionCall %void %callee3_
+ %call4res = OpFunctionCall %void %callee4_
+ OpReturn
+ OpFunctionEnd
+
+ %callee3_ = OpFunction %void None %3
+ %begin3 = OpLabel
+ OpBeginInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
+
+ %callee4_ = OpFunction %void None %3
+ %begin4 = OpLabel
+ OpEndInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
+
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ %43 = OpFunctionCall %void %callee2_
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
new file mode 100644
index 00000000..59079fe5
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+ uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+ uint values1[];
+};
+
+void callee2()
+{
+ values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+ values0[int(gl_FragCoord.x)] += 1;
+ callee2();
+}
+
+void main()
+{
+ beginInvocationInterlockARB();
+ callee();
+ endInvocationInterlockARB();
+}
diff --git a/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 00000000..ceac8cc5
--- /dev/null
+++ b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+ uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+ int baz;
+};
+
+void main()
+{
+ // Deliberately outside the critical section to test usage tracking.
+ baz = 0;
+ imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+ imageAtomicAdd(img2, ivec2(0, 0), 1u);
+ foo += 42;
+ atomicAnd(bar, quux);
+ endInvocationInterlockARB();
+}
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
new file mode 100644
index 00000000..ebd8d6ba
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ OpReturn
+ OpFunctionEnd
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ OpBeginInvocationInterlockEXT
+ %43 = OpFunctionCall %void %callee2_
+ OpEndInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
new file mode 100644
index 00000000..69b8f911
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+
+ OpMemberDecorate %SSBO2 0 Offset 0
+ OpDecorate %SSBO2 BufferBlock
+ OpDecorate %ssbo2 DescriptorSet 0
+ OpDecorate %ssbo2 Binding 2
+
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+ %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %uint_4 = OpConstant %uint 4
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+ %bool = OpTypeBool
+ %true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ %callee3_res = OpFunctionCall %void %callee3_
+ OpReturn
+ OpFunctionEnd
+
+ %callee3_ = OpFunction %void None %3
+ %calle3_block = OpLabel
+ %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+ %frag_coord_int = OpConvertFToS %int %frag_coord_x
+ %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+ OpStore %ssbo_ptr %uint_4
+ OpReturn
+ OpFunctionEnd
+
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ %43 = OpFunctionCall %void %callee2_
+
+ OpSelectionMerge %merged_block None
+ OpBranchConditional %true %dummy_block %merged_block
+ %dummy_block = OpLabel
+ OpBeginInvocationInterlockEXT
+ OpEndInvocationInterlockEXT
+ OpBranch %merged_block
+
+ %merged_block = OpLabel
+ OpReturn
+
+ OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
new file mode 100644
index 00000000..7c0fe9a2
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ %call3res = OpFunctionCall %void %callee3_
+ %call4res = OpFunctionCall %void %callee4_
+ OpReturn
+ OpFunctionEnd
+
+ %callee3_ = OpFunction %void None %3
+ %begin3 = OpLabel
+ OpBeginInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
+
+ %callee4_ = OpFunction %void None %3
+ %begin4 = OpLabel
+ OpEndInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
+
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ %43 = OpFunctionCall %void %callee2_
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
new file mode 100644
index 00000000..59079fe5
--- /dev/null
+++ b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+ uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+ uint values1[];
+};
+
+void callee2()
+{
+ values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+ values0[int(gl_FragCoord.x)] += 1;
+ callee2();
+}
+
+void main()
+{
+ beginInvocationInterlockARB();
+ callee();
+ endInvocationInterlockARB();
+}
diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 00000000..04886a67
--- /dev/null
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+//layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+ uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+ int baz;
+};
+
+void main()
+{
+ // Deliberately outside the critical section to test usage tracking.
+ baz = 0;
+ imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+ //imageAtomicAdd(img2, ivec2(0, 0), 1u);
+ foo += 42;
+ atomicAnd(bar, quux);
+ endInvocationInterlockARB();
+}
diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 00000000..04886a67
--- /dev/null
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+//layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+ uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+ int baz;
+};
+
+void main()
+{
+ // Deliberately outside the critical section to test usage tracking.
+ baz = 0;
+ imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+ //imageAtomicAdd(img2, ivec2(0, 0), 1u);
+ foo += 42;
+ atomicAnd(bar, quux);
+ endInvocationInterlockARB();
+}
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
new file mode 100644
index 00000000..ebd8d6ba
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ OpReturn
+ OpFunctionEnd
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ OpBeginInvocationInterlockEXT
+ %43 = OpFunctionCall %void %callee2_
+ OpEndInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
new file mode 100644
index 00000000..69b8f911
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+
+ OpMemberDecorate %SSBO2 0 Offset 0
+ OpDecorate %SSBO2 BufferBlock
+ OpDecorate %ssbo2 DescriptorSet 0
+ OpDecorate %ssbo2 Binding 2
+
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+ %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %uint_4 = OpConstant %uint 4
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+ %bool = OpTypeBool
+ %true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ %callee3_res = OpFunctionCall %void %callee3_
+ OpReturn
+ OpFunctionEnd
+
+ %callee3_ = OpFunction %void None %3
+ %calle3_block = OpLabel
+ %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+ %frag_coord_int = OpConvertFToS %int %frag_coord_x
+ %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+ OpStore %ssbo_ptr %uint_4
+ OpReturn
+ OpFunctionEnd
+
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ %43 = OpFunctionCall %void %callee2_
+
+ OpSelectionMerge %merged_block None
+ OpBranchConditional %true %dummy_block %merged_block
+ %dummy_block = OpLabel
+ OpBeginInvocationInterlockEXT
+ OpEndInvocationInterlockEXT
+ OpBranch %merged_block
+
+ %merged_block = OpLabel
+ OpReturn
+
+ OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
new file mode 100644
index 00000000..7c0fe9a2
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+ OpCapability Shader
+ OpCapability FragmentShaderPixelInterlockEXT
+ OpExtension "SPV_EXT_fragment_shader_interlock"
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %main "main" %gl_FragCoord
+ OpExecutionMode %main OriginUpperLeft
+ OpExecutionMode %main PixelInterlockOrderedEXT
+ OpSource GLSL 450
+ OpSourceExtension "GL_ARB_fragment_shader_interlock"
+ OpName %main "main"
+ OpName %callee2_ "callee2("
+ OpName %callee_ "callee("
+ OpName %SSBO1 "SSBO1"
+ OpMemberName %SSBO1 0 "values1"
+ OpName %_ ""
+ OpName %gl_FragCoord "gl_FragCoord"
+ OpName %SSBO0 "SSBO0"
+ OpMemberName %SSBO0 0 "values0"
+ OpName %__0 ""
+ OpDecorate %_runtimearr_uint ArrayStride 4
+ OpMemberDecorate %SSBO1 0 Offset 0
+ OpDecorate %SSBO1 BufferBlock
+ OpDecorate %_ DescriptorSet 0
+ OpDecorate %_ Binding 1
+ OpDecorate %gl_FragCoord BuiltIn FragCoord
+ OpDecorate %_runtimearr_uint_0 ArrayStride 4
+ OpMemberDecorate %SSBO0 0 Offset 0
+ OpDecorate %SSBO0 BufferBlock
+ OpDecorate %__0 DescriptorSet 0
+ OpDecorate %__0 Binding 0
+ %void = OpTypeVoid
+ %3 = OpTypeFunction %void
+ %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+ %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+ %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+ %int = OpTypeInt 32 1
+ %int_0 = OpConstant %int 0
+ %float = OpTypeFloat 32
+ %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+ %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+ %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+ %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+ %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+ %main = OpFunction %void None %3
+ %5 = OpLabel
+ %44 = OpFunctionCall %void %callee_
+ %call3res = OpFunctionCall %void %callee3_
+ %call4res = OpFunctionCall %void %callee4_
+ OpReturn
+ OpFunctionEnd
+
+ %callee3_ = OpFunction %void None %3
+ %begin3 = OpLabel
+ OpBeginInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
+
+ %callee4_ = OpFunction %void None %3
+ %begin4 = OpLabel
+ OpEndInvocationInterlockEXT
+ OpReturn
+ OpFunctionEnd
+
+ %callee2_ = OpFunction %void None %3
+ %7 = OpLabel
+ %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %24 = OpLoad %float %23
+ %25 = OpConvertFToS %int %24
+ %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ %29 = OpLoad %uint %28
+ %30 = OpIAdd %uint %29 %uint_1
+ %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+ OpStore %31 %30
+ OpReturn
+ OpFunctionEnd
+ %callee_ = OpFunction %void None %3
+ %9 = OpLabel
+ %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+ %37 = OpLoad %float %36
+ %38 = OpConvertFToS %int %37
+ %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ %40 = OpLoad %uint %39
+ %41 = OpIAdd %uint %40 %uint_1
+ %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+ OpStore %42 %41
+ %43 = OpFunctionCall %void %callee2_
+ OpReturn
+ OpFunctionEnd
diff --git a/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
new file mode 100644
index 00000000..59079fe5
--- /dev/null
+++ b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+ uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+ uint values1[];
+};
+
+void callee2()
+{
+ values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+ values0[int(gl_FragCoord.x)] += 1;
+ callee2();
+}
+
+void main()
+{
+ beginInvocationInterlockARB();
+ callee();
+ endInvocationInterlockARB();
+}
diff --git a/shaders/frag/pixel-interlock-ordered.frag b/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 00000000..4439f067
--- /dev/null
+++ b/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+};
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+ imageAtomicAdd(img2, ivec2(0, 0), 1u);
+ foo += 42;
+ atomicAnd(bar, 0xff);
+ endInvocationInterlockARB();
+}
diff --git a/shaders/frag/pixel-interlock-unordered.frag b/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 00000000..f8fd468c
--- /dev/null
+++ b/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+};
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+ imageAtomicAdd(img2, ivec2(0, 0), 1u);
+ foo += 42;
+ atomicAnd(bar, 0xff);
+ endInvocationInterlockARB();
+}
diff --git a/shaders/frag/sample-interlock-ordered.frag b/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 00000000..fa80dc9f
--- /dev/null
+++ b/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+};
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+ imageAtomicAdd(img2, ivec2(0, 0), 1u);
+ foo += 42;
+ atomicAnd(bar, gl_SampleMaskIn[0]);
+ endInvocationInterlockARB();
+}
diff --git a/shaders/frag/sample-interlock-unordered.frag b/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 00000000..6fe5437f
--- /dev/null
+++ b/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+ int foo;
+ uint bar;
+};
+
+void main()
+{
+ beginInvocationInterlockARB();
+ imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+ imageAtomicAdd(img2, ivec2(0, 0), 1u);
+ foo += 42;
+ atomicAnd(bar, 0xff);
+ endInvocationInterlockARB();
+}
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index a83e14e2..c4913ecd 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -1507,6 +1507,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
{
handler.set_current_block(block);
+ handler.rearm_current_block(block);
// Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
// but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
@@ -1530,6 +1531,8 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand
return false;
if (!handler.end_function_scope(ops, i.length))
return false;
+
+ handler.rearm_current_block(block);
}
}
}
@@ -3798,7 +3801,12 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin
const CFG &Compiler::get_cfg_for_current_function() const
{
assert(current_function);
- auto cfg_itr = function_cfgs.find(current_function->self);
+ return get_cfg_for_function(current_function->self);
+}
+
+const CFG &Compiler::get_cfg_for_function(uint32_t id) const
+{
+ auto cfg_itr = function_cfgs.find(id);
assert(cfg_itr != end(function_cfgs));
assert(cfg_itr->second);
return *cfg_itr->second;
@@ -4249,6 +4257,317 @@ void Compiler::analyze_non_block_pointer_types()
sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
}
+bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
+{
+ if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
+ {
+ if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
+ {
+ // Most complex case, we have no sensible way of dealing with this
+ // other than taking the 100% conservative approach, exit early.
+ split_function_case = true;
+ return false;
+ }
+ else
+ {
+ interlock_function_id = call_stack.back();
+ // If this call is performed inside control flow we have a problem.
+ auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
+
+ uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
+ bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
+ if (!outside_control_flow)
+ control_flow_interlock = true;
+ }
+ }
+ return true;
+}
+
+void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
+{
+ current_block_id = block.self;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+ if (length < 3)
+ return false;
+ call_stack.push_back(args[2]);
+ return true;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+ call_stack.pop_back();
+ return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+ if (length < 3)
+ return false;
+
+ if (args[2] == interlock_function_id)
+ call_stack_is_interlocked = true;
+
+ call_stack.push_back(args[2]);
+ return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+ if (call_stack.back() == interlock_function_id)
+ call_stack_is_interlocked = false;
+
+ call_stack.pop_back();
+ return true;
+}
+
+void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
+{
+ if ((use_critical_section && in_crit_sec) ||
+ (control_flow_interlock && call_stack_is_interlocked) ||
+ split_function_case)
+ {
+ compiler.interlocked_resources.insert(id);
+ }
+}
+
+bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
+{
+ // Only care about critical section analysis if we have simple case.
+ if (use_critical_section)
+ {
+ if (opcode == OpBeginInvocationInterlockEXT)
+ {
+ in_crit_sec = true;
+ return true;
+ }
+
+ if (opcode == OpEndInvocationInterlockEXT)
+ {
+ // End critical section--nothing more to do.
+ return false;
+ }
+ }
+
+ // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
+ switch (opcode)
+ {
+ case OpLoad:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t ptr = args[2];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+
+ // We're only concerned with buffer and image memory here.
+ if (!var)
+ break;
+
+ switch (var->storage)
+ {
+ default:
+ break;
+
+ case StorageClassUniformConstant:
+ {
+ uint32_t result_type = args[0];
+ uint32_t id = args[1];
+ compiler.set<SPIRExpression>(id, "", result_type, true);
+ compiler.register_read(id, ptr, true);
+ break;
+ }
+
+ case StorageClassUniform:
+ // Must have BufferBlock; we only care about SSBOs.
+ if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+ break;
+ // fallthrough
+ case StorageClassStorageBuffer:
+ access_potential_resource(var->self);
+ break;
+ }
+ break;
+ }
+
+ case OpInBoundsAccessChain:
+ case OpAccessChain:
+ case OpPtrAccessChain:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t result_type = args[0];
+
+ auto &type = compiler.get<SPIRType>(result_type);
+ if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
+ type.storage == StorageClassStorageBuffer)
+ {
+ uint32_t id = args[1];
+ uint32_t ptr = args[2];
+ compiler.set<SPIRExpression>(id, "", result_type, true);
+ compiler.register_read(id, ptr, true);
+ compiler.ir.ids[id].set_allow_type_rewrite();
+ }
+ break;
+ }
+
+ case OpImageTexelPointer:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t result_type = args[0];
+ uint32_t id = args[1];
+ uint32_t ptr = args[2];
+ auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+ if (var)
+ e.loaded_from = var->self;
+ break;
+ }
+
+ case OpStore:
+ case OpImageWrite:
+ case OpAtomicStore:
+ {
+ if (length < 1)
+ return false;
+
+ uint32_t ptr = args[0];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+ if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+ var->storage == StorageClassStorageBuffer))
+ {
+ access_potential_resource(var->self);
+ }
+
+ break;
+ }
+
+ case OpCopyMemory:
+ {
+ if (length < 2)
+ return false;
+
+ uint32_t dst = args[0];
+ uint32_t src = args[1];
+ auto *dst_var = compiler.maybe_get_backing_variable(dst);
+ auto *src_var = compiler.maybe_get_backing_variable(src);
+
+ if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
+ access_potential_resource(dst_var->self);
+
+ if (src_var)
+ {
+ if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
+ break;
+
+ if (src_var->storage == StorageClassUniform &&
+ !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
+ {
+ break;
+ }
+
+ access_potential_resource(src_var->self);
+ }
+
+ break;
+ }
+
+ case OpImageRead:
+ case OpAtomicLoad:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t ptr = args[2];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+
+ // We're only concerned with buffer and image memory here.
+ if (!var)
+ break;
+
+ switch (var->storage)
+ {
+ default:
+ break;
+
+ case StorageClassUniform:
+ // Must have BufferBlock; we only care about SSBOs.
+ if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+ break;
+ // fallthrough
+ case StorageClassUniformConstant:
+ case StorageClassStorageBuffer:
+ access_potential_resource(var->self);
+ break;
+ }
+ break;
+ }
+
+ case OpAtomicExchange:
+ case OpAtomicCompareExchange:
+ case OpAtomicIIncrement:
+ case OpAtomicIDecrement:
+ case OpAtomicIAdd:
+ case OpAtomicISub:
+ case OpAtomicSMin:
+ case OpAtomicUMin:
+ case OpAtomicSMax:
+ case OpAtomicUMax:
+ case OpAtomicAnd:
+ case OpAtomicOr:
+ case OpAtomicXor:
+ {
+ if (length < 3)
+ return false;
+
+ uint32_t ptr = args[2];
+ auto *var = compiler.maybe_get_backing_variable(ptr);
+ if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+ var->storage == StorageClassStorageBuffer))
+ {
+ access_potential_resource(var->self);
+ }
+
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return true;
+}
+
+void Compiler::analyze_interlocked_resource_usage()
+{
+ if (get_execution_model() == ExecutionModelFragment &&
+ (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+ get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+ get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+ get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
+ {
+ InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
+ traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
+
+ InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
+ handler.interlock_function_id = prepass_handler.interlock_function_id;
+ handler.split_function_case = prepass_handler.split_function_case;
+ handler.control_flow_interlock = prepass_handler.control_flow_interlock;
+ handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
+
+ traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+
+ // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
+ interlocked_is_complex = !handler.use_critical_section ||
+ handler.interlock_function_id != ir.default_entry_point;
+ }
+}
+
bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
{
if (!type.pointer)
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index 90cc9956..28ccba67 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -710,6 +710,13 @@ protected:
{
}
+ // Called after returning from a function or when entering a block,
+ // can be called multiple times per block,
+ // while set_current_block is only called on block entry.
+ virtual void rearm_current_block(const SPIRBlock &)
+ {
+ }
+
virtual bool begin_function_scope(const uint32_t *, uint32_t)
{
return true;
@@ -884,10 +891,11 @@ protected:
void build_function_control_flow_graphs_and_analyze();
std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs;
const CFG &get_cfg_for_current_function() const;
+ const CFG &get_cfg_for_function(uint32_t id) const;
struct CFGBuilder : OpcodeHandler
{
- CFGBuilder(Compiler &compiler_);
+ explicit CFGBuilder(Compiler &compiler_);
bool follow_function_call(const SPIRFunction &func) override;
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
@@ -932,7 +940,7 @@ protected:
struct PhysicalStorageBufferPointerHandler : OpcodeHandler
{
- PhysicalStorageBufferPointerHandler(Compiler &compiler_);
+ explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_);
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
Compiler &compiler;
std::unordered_set<uint32_t> types;
@@ -945,6 +953,61 @@ protected:
bool single_function);
bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
+ // Finds all resources that are written to from inside the critical section, if present.
+ // The critical section is delimited by OpBeginInvocationInterlockEXT and
+ // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
+ // while inside the critical section must be placed in a raster order group.
+ struct InterlockedResourceAccessHandler : OpcodeHandler
+ {
+ InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id)
+ : compiler(compiler_)
+ {
+ call_stack.push_back(entry_point_id);
+ }
+
+ bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+ bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+ bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+ Compiler &compiler;
+ bool in_crit_sec = false;
+
+ uint32_t interlock_function_id = 0;
+ bool split_function_case = false;
+ bool control_flow_interlock = false;
+ bool use_critical_section = false;
+ bool call_stack_is_interlocked = false;
+ SmallVector<uint32_t> call_stack;
+
+ void access_potential_resource(uint32_t id);
+ };
+
+ struct InterlockedResourceAccessPrepassHandler : OpcodeHandler
+ {
+ InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id)
+ : compiler(compiler_)
+ {
+ call_stack.push_back(entry_point_id);
+ }
+
+ void rearm_current_block(const SPIRBlock &block) override;
+ bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+ bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+ bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+ Compiler &compiler;
+ uint32_t interlock_function_id = 0;
+ uint32_t current_block_id = 0;
+ bool split_function_case = false;
+ bool control_flow_interlock = false;
+ SmallVector<uint32_t> call_stack;
+ };
+
+ void analyze_interlocked_resource_usage();
+ // The set of all resources written while inside the critical section, if present.
+ std::unordered_set<uint32_t> interlocked_resources;
+ bool interlocked_is_complex = false;
+
void make_constant_null(uint32_t id, uint32_t type);
std::unordered_map<uint32_t, std::string> declared_block_names;
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 3619f09c..a8f7a867 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -511,6 +511,7 @@ string CompilerGLSL::compile()
fixup_image_load_store_access();
update_active_builtins();
analyze_image_and_sampler_usage();
+ analyze_interlocked_resource_usage();
// Shaders might cast unrelated data to pointers of non-block types.
// Find all such instances and make sure we can cast the pointers to a synthesized block type.
@@ -535,6 +536,25 @@ string CompilerGLSL::compile()
pass_count++;
} while (is_forcing_recompilation());
+ // Implement the interlocked wrapper function at the end.
+ // The body was implemented in lieu of main().
+ if (interlocked_is_complex)
+ {
+ statement("void main()");
+ begin_scope();
+ statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
+ if (options.es)
+ statement("beginInvocationInterlockNV();");
+ else
+ statement("beginInvocationInterlockARB();");
+ statement("spvMainInterlockedBody();");
+ if (options.es)
+ statement("endInvocationInterlockNV();");
+ else
+ statement("endInvocationInterlockARB();");
+ end_scope();
+ }
+
// Entry point in GLSL is always main().
get_entry_point().name = "main";
@@ -605,6 +625,26 @@ void CompilerGLSL::emit_header()
if (execution.flags.get(ExecutionModePostDepthCoverage))
require_extension_internal("GL_ARB_post_depth_coverage");
+ // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
+ if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+ execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+ execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+ execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+ {
+ if (options.es)
+ {
+ if (options.version < 310)
+ SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
+ require_extension_internal("GL_NV_fragment_shader_interlock");
+ }
+ else
+ {
+ if (options.version < 420)
+ require_extension_internal("GL_ARB_shader_image_load_store");
+ require_extension_internal("GL_ARB_fragment_shader_interlock");
+ }
+ }
+
for (auto &ext : forced_extensions)
{
if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
@@ -784,6 +824,15 @@ void CompilerGLSL::emit_header()
if (execution.flags.get(ExecutionModePostDepthCoverage))
inputs.push_back("post_depth_coverage");
+ if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
+ inputs.push_back("pixel_interlock_ordered");
+ else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
+ inputs.push_back("pixel_interlock_unordered");
+ else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
+ inputs.push_back("sample_interlock_ordered");
+ else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+ inputs.push_back("sample_interlock_unordered");
+
if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
statement("layout(depth_greater) out float gl_FragDepth;");
else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
@@ -10109,6 +10158,34 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
break;
+ case OpBeginInvocationInterlockEXT:
+ // If the interlock is complex, we emit this elsewhere.
+ if (!interlocked_is_complex)
+ {
+ if (options.es)
+ statement("beginInvocationInterlockNV();");
+ else
+ statement("beginInvocationInterlockARB();");
+
+ flush_all_active_variables();
+ // Make sure forwarding doesn't propagate outside interlock region.
+ }
+ break;
+
+ case OpEndInvocationInterlockEXT:
+ // If the interlock is complex, we emit this elsewhere.
+ if (!interlocked_is_complex)
+ {
+ if (options.es)
+ statement("endInvocationInterlockNV();");
+ else
+ statement("endInvocationInterlockARB();");
+
+ flush_all_active_variables();
+ // Make sure forwarding doesn't propagate outside interlock region.
+ }
+ break;
+
default:
statement("// unimplemented op ", instruction.op);
break;
@@ -11022,7 +11099,13 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
if (func.self == ir.default_entry_point)
{
- decl += "main";
+ // If we need complex fallback in GLSL, we just wrap main() in a function
+ // and interlock the entire shader ...
+ if (interlocked_is_complex)
+ decl += "spvMainInterlockedBody";
+ else
+ decl += "main";
+
processing_entry_point = true;
}
else
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 9658d119..0df89065 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
}
}
-string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
+string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
{
auto &imagetype = get<SPIRType>(type.image.type);
const char *dim = nullptr;
@@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
if (type.image.sampled == 1)
return join("Buffer<", type_to_glsl(imagetype), components, ">");
else if (type.image.sampled == 2)
+ {
+ if (interlocked_resources.count(id))
+ return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
+ ">");
return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">");
+ }
else
SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
case DimSubpassData:
@@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
const char *arrayed = type.image.arrayed ? "Array" : "";
const char *ms = type.image.ms ? "MS" : "";
const char *rw = typed_load ? "RW" : "";
+ if (typed_load && interlocked_resources.count(id))
+ rw = "RasterizerOrdered";
return join(rw, "Texture", dim, ms, arrayed, "<",
typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
join(type_to_glsl(imagetype), components),
@@ -1848,9 +1855,13 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
Bitset flags = ir.get_buffer_block_flags(var);
bool is_readonly = flags.get(DecorationNonWritable);
bool is_coherent = flags.get(DecorationCoherent);
+ bool is_interlocked = interlocked_resources.count(var.self) > 0;
+ const char *type_name = "ByteAddressBuffer ";
+ if (!is_readonly)
+ type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
add_resource_name(var.self);
- statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ",
- to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
+ statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
+ to_resource_binding(var), ";");
}
else
{
@@ -4673,6 +4684,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpIsHelperInvocationEXT:
SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL.");
+ case OpBeginInvocationInterlockEXT:
+ case OpEndInvocationInterlockEXT:
+ if (hlsl_options.shader_model < 51)
+ SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
+ break; // Nothing to do in the body
+
default:
CompilerGLSL::emit_instruction(instruction);
break;
@@ -4850,6 +4867,7 @@ string CompilerHLSL::compile()
validate_shader_model();
update_active_builtins();
analyze_image_and_sampler_usage();
+ analyze_interlocked_resource_usage();
// Subpass input needs SV_Position.
if (need_subpass_input)
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index f4ba6a40..88199522 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -852,6 +852,7 @@ string CompilerMSL::compile()
update_active_builtins();
analyze_image_and_sampler_usage();
analyze_sampled_image_usage();
+ analyze_interlocked_resource_usage();
preprocess_op_codes();
build_implicit_builtins();
@@ -5541,6 +5542,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
emit_op(ops[0], ops[1], "simd_is_helper_thread()", false);
break;
+ case OpBeginInvocationInterlockEXT:
+ case OpEndInvocationInterlockEXT:
+ if (!msl_options.supports_msl_version(2, 0))
+ SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
+ break; // Nothing to do in the body
+
default:
CompilerGLSL::emit_instruction(instruction);
break;
@@ -7436,8 +7443,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
bool is_builtin = is_member_builtin(type, index, &builtin);
if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
- return join(" [[id(",
- get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]");
+ {
+ string quals = join(
+ " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
+ if (interlocked_resources.count(
+ get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
+ quals += ", raster_order_group(0)";
+ quals += "]]";
+ return quals;
+ }
// Vertex function inputs
if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
@@ -8239,7 +8253,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
ep_args += ", ";
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
r.name + "_" + convert_to_string(i);
- ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
+ ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
}
}
else
@@ -8248,7 +8265,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
ep_args += ", ";
ep_args +=
get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
- ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+ ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
}
break;
}
@@ -8264,7 +8284,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
ep_args += image_type_glsl(type, var_id) + " " + r.name;
if (r.plane > 0)
ep_args += join(plane_name_suffix, r.plane);
- ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
+ ep_args += " [[texture(" + convert_to_string(r.index) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
break;
default:
if (!ep_args.empty())
@@ -8274,7 +8297,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
type_to_glsl(type, var_id) + "& " + r.name;
else
ep_args += type_to_glsl(type, var_id) + " " + r.name;
- ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+ ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+ if (interlocked_resources.count(var_id))
+ ep_args += ", raster_order_group(0)";
+ ep_args += "]]";
break;
}
}