Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/HansKristian-Work/vkd3d-proton.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans-Kristian Arntzen <post@arntzen-software.no>2022-09-07 13:47:03 +0300
committerHans-Kristian Arntzen <post@arntzen-software.no>2022-09-08 17:52:11 +0300
commit5711cee3991a6f065ac6a7b455930812bc7eb6f6 (patch)
tree6b9acf515f30dc25d338fd44f3d7f9e6a321b36f
parent5e57db6a7690e21ae40ae5eaeceb93d8a198a92b (diff)
tests: Add test for in-group UAV coherence.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
-rw-r--r--tests/d3d12_shaders.c260
-rw-r--r--tests/d3d12_tests.h2
2 files changed, 262 insertions, 0 deletions
diff --git a/tests/d3d12_shaders.c b/tests/d3d12_shaders.c
index f1d0aaf3..d2ac73b3 100644
--- a/tests/d3d12_shaders.c
+++ b/tests/d3d12_shaders.c
@@ -14252,3 +14252,263 @@ void test_constant_buffer_dxil(void)
test_constant_buffers(true);
}
+static void test_memory_model_uav_coherent_thread_group(bool use_dxil)
+{
+ /* From D3D11 functional spec:
+ * If a UAV is not declared as "globally coherent", it is only "group coherent",
+ * which means loads can only see data written by stores and atomics in other threads in the same thread group.
+ * The affected hardware knows it can make use of its thread-group specific caching for loads,
+ * since writes to the memory only came from the current thread group.
+ * A UAV tagged as "globally coherent" is also inherently obviously "group coherent",
+ * although the affected hardware would not use its local cache.
+ * As such, the "globally coherent" flag should only be specified when necessary.
+ *
+ * As a reminder though, to guarantee coherency on UAV accesses on all implementations,
+ * not only must shaders make the global vs group scope distinction discussed here upon UAV declaration,
+ * but they must also make appropriate use of memory and/or thread barriers ("sync_*" in the IL)
+ * as needed within in the shader to enforce proper ordering of operations by individual threads as seen by others.
+ * In addition, the "sync" operation has options for memory barriers that also distinguish between global vs group scope,
+ * but that control is separate from the topic of this section, and may not be exposed until a later time,
+ * as discussed in the sync instruction definition.
+ */
+
+ /* Vulkan does not guarantee this, so we need to promote to coherent to make this work. */
+
+#define NUM_ITERATIONS 64
+#define WORKGROUP_SIZE 256
+ struct vec4 expected_result[WORKGROUP_SIZE] = {{ 0.0f }};
+ uint32_t work_list_data[WORKGROUP_SIZE * NUM_ITERATIONS];
+ float input_data[WORKGROUP_SIZE * 4 * NUM_ITERATIONS];
+ D3D12_ROOT_PARAMETER root_parameters[4];
+ D3D12_ROOT_SIGNATURE_DESC rs_desc;
+ struct resource_readback rb;
+ struct test_context context;
+ ID3D12Resource *work_list;
+ unsigned int i, j, iter;
+ ID3D12Resource *output;
+ ID3D12Resource *inputs;
+ const struct vec4 *val;
+ uint32_t addr_xor;
+
+#if 0
+ RWStructuredBuffer<float4> RW : register(u0);
+ StructuredBuffer<uint> WorkList : register(t0);
+ StructuredBuffer<float4> RO : register(t1);
+
+ cbuffer Constants : register(b0) { uint count; };
+
+ [numthreads(256, 1, 1)]
+ void main(uint id : SV_GroupIndex)
+ {
+ // Prime the L0 caches.
+ RW[id] = 0.0.xxxx;
+
+ for (uint iter = 0; iter < count; iter++)
+ {
+ uint increment_offset = WorkList[256 * iter + id];
+ // globallycoherent is not needed when sharing data between threads in the same group.
+ DeviceMemoryBarrierWithGroupSync();
+
+ // If caches are incoherent here in the workgroup, this will break hard.
+ if (increment_offset < 256)
+ RW[increment_offset] += RO[256 * iter + id];
+ }
+ }
+#endif
+
+ static const BYTE cs_code_dxil[] =
+ {
+ 0x44, 0x58, 0x42, 0x43, 0x62, 0x39, 0x82, 0x27, 0xb8, 0xcd, 0x82, 0x14, 0xb5, 0x36, 0xa6, 0xeb, 0x8d, 0x59, 0x48, 0x54, 0x01, 0x00, 0x00, 0x00, 0x10, 0x09, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+ 0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xfd, 0x8b, 0x29, 0x97, 0x70, 0x0f, 0x94, 0xc6, 0xdb, 0x49, 0xcc, 0x4e, 0x9e, 0x39, 0x0c, 0x1a, 0x44, 0x58, 0x49, 0x4c, 0xd4, 0x07, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00,
+ 0xf5, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xbc, 0x07, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0xec, 0x01, 0x00, 0x00,
+ 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19,
+ 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5,
+ 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+ 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0xc0, 0x00, 0xd2, 0x06, 0x63, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x80,
+ 0x6a, 0x03, 0x41, 0xfc, 0xff, 0xff, 0xff, 0xff, 0x00, 0x48, 0x00, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x86, 0x09, 0x01, 0x01, 0x00,
+ 0x89, 0x20, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04, 0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8c, 0x8c,
+ 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0x98, 0xc1, 0x08, 0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x08, 0xc3, 0x40, 0x44, 0x19, 0x80, 0x61, 0x20, 0xe3, 0xa8, 0xe1, 0xf2, 0x27, 0xec, 0x21, 0x24,
+ 0x9f, 0xdb, 0xa8, 0x62, 0x25, 0x26, 0x1f, 0xb9, 0x6d, 0x44, 0x0c, 0xc3, 0x30, 0xcc, 0x11, 0x20, 0x94, 0xdc, 0x33, 0x5c, 0xfe, 0x84, 0x3d, 0x84, 0xe4, 0x87, 0x40, 0x33, 0x2c, 0x04, 0x0a, 0x94,
+ 0x42, 0x20, 0xc3, 0x32, 0x10, 0x33, 0x47, 0x10, 0x14, 0x63, 0x19, 0x92, 0x61, 0x70, 0xe8, 0x99, 0x01, 0xb8, 0x69, 0xb8, 0xfc, 0x09, 0x7b, 0x08, 0xc9, 0x5f, 0x09, 0x69, 0x25, 0x26, 0xbf, 0xb8,
+ 0x6d, 0x54, 0x44, 0x51, 0x14, 0x0d, 0xa5, 0x90, 0x86, 0x65, 0x18, 0x68, 0x2a, 0x0a, 0x30, 0x2c, 0xc3, 0x10, 0x45, 0x51, 0x94, 0x50, 0x75, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12,
+ 0xd2, 0x4a, 0x4c, 0x3e, 0x72, 0xdb, 0xa8, 0x18, 0x86, 0x61, 0x18, 0x4a, 0x71, 0x0d, 0xcb, 0x30, 0x10, 0x36, 0x10, 0x30, 0x13, 0x19, 0x8c, 0x03, 0x3b, 0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xcc,
+ 0x02, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94, 0x03, 0x39, 0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0, 0x41, 0x3d, 0xb8, 0xc3, 0x3c, 0xa4, 0xc3, 0x39,
+ 0xb8, 0x43, 0x39, 0x90, 0x03, 0x18, 0xa4, 0x83, 0x3b, 0xd0, 0x83, 0x1f, 0xa0, 0x60, 0xa0, 0x6d, 0x18, 0x81, 0x88, 0x66, 0x4a, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x83, 0x59,
+ 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0xb0, 0x87, 0x72, 0x18, 0x07, 0x7a, 0x78, 0x07,
+ 0x79, 0xe0, 0x03, 0x73, 0x60, 0x87, 0x77, 0x08, 0x07, 0x7a, 0x60, 0x03, 0x30, 0xa0, 0x03, 0x3f, 0x00, 0x03, 0x3f, 0x40, 0x41, 0x47, 0xde, 0x4c, 0x6b, 0x30, 0x0e, 0xec, 0x10, 0x0e, 0xf3, 0x30,
+ 0x0f, 0x6e, 0x20, 0x0b, 0xb7, 0x30, 0x0b, 0xf4, 0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2, 0x50, 0x0e, 0xe4, 0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e, 0xf2, 0xc0, 0x07, 0xf6,
+ 0x50, 0x0e, 0xe3, 0x40, 0x0f, 0xef, 0x20, 0x0f, 0x7c, 0x60, 0x0e, 0xec, 0xf0, 0x0e, 0xe1, 0x40, 0x0f, 0x6c, 0x00, 0x06, 0x74, 0xe0, 0x07, 0x60, 0xe0, 0x07, 0x28, 0xe8, 0x08, 0xbc, 0x84, 0x73,
+ 0x1a, 0x69, 0x02, 0x9a, 0x49, 0x42, 0xc1, 0x40, 0xe2, 0x1c, 0x01, 0x28, 0x4c, 0x01, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
+ 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
+ 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90,
+ 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6,
+ 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x0c, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0xf2, 0x30, 0x40, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x79, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x33, 0x01, 0x01,
+ 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xa7, 0x02, 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x06, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x59, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0x60, 0x04, 0xa0,
+ 0x18, 0x0a, 0xa3, 0x10, 0x0a, 0xa4, 0x00, 0x09, 0xc8, 0x19, 0x01, 0x20, 0xb2, 0x40, 0xe8, 0x9b, 0x01, 0xa0, 0x70, 0x06, 0x80, 0xc6, 0x19, 0x00, 0xe2, 0x66, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00,
+ 0x49, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41,
+ 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0x01, 0x99, 0x20, 0x0c,
+ 0xc9, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb2, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x1c, 0x45, 0x60, 0x82, 0x30, 0x2c, 0x13, 0x84, 0x81, 0xd9, 0x20, 0x0c,
+ 0xcd, 0x86, 0x84, 0x50, 0x16, 0x82, 0x18, 0x18, 0xc2, 0x99, 0x20, 0x7c, 0xd1, 0x04, 0x61, 0x68, 0x36, 0x08, 0x43, 0xb4, 0x21, 0x19, 0xa0, 0x85, 0x18, 0x06, 0x86, 0x90, 0x36, 0x08, 0xcf, 0x34,
+ 0x41, 0x08, 0x03, 0x69, 0x82, 0xe0, 0x3c, 0x1b, 0x16, 0xa2, 0x5a, 0x08, 0x62, 0x60, 0x2c, 0xcb, 0x92, 0x36, 0x04, 0xd7, 0x04, 0x61, 0x0c, 0xa6, 0x0d, 0x08, 0x91, 0x2d, 0x04, 0x31, 0x34, 0xc0,
+ 0x86, 0x40, 0xdb, 0x40, 0x50, 0xd8, 0x06, 0x4c, 0x10, 0x04, 0x80, 0x44, 0x5b, 0x58, 0x9a, 0xdb, 0x04, 0x81, 0x0c, 0xa0, 0x09, 0xc2, 0xe0, 0x6c, 0x18, 0xc0, 0x60, 0x18, 0x36, 0x10, 0xc4, 0xd7,
+ 0x84, 0xc1, 0x86, 0xa2, 0xf3, 0x00, 0x4e, 0x0c, 0xaa, 0xb0, 0xb1, 0xd9, 0xb5, 0xb9, 0xa4, 0x91, 0x95, 0xb9, 0xd1, 0x4d, 0x09, 0x82, 0x2a, 0x64, 0x78, 0x2e, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f,
+ 0x6e, 0x53, 0x02, 0xa2, 0x09, 0x19, 0x9e, 0x8b, 0x5d, 0x18, 0x9b, 0x5d, 0x99, 0xdc, 0x94, 0xc0, 0xa8, 0x43, 0x86, 0xe7, 0x32, 0x87, 0x16, 0x46, 0x56, 0x26, 0xd7, 0xf4, 0x46, 0x56, 0xc6, 0x36,
+ 0x25, 0x40, 0xca, 0x90, 0xe1, 0xb9, 0xc8, 0x95, 0xcd, 0xbd, 0xd5, 0xc9, 0x8d, 0x95, 0xcd, 0x4d, 0x09, 0xb6, 0x3a, 0x64, 0x78, 0x2e, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x50, 0x6f, 0x69, 0x6e, 0x74,
+ 0x73, 0x53, 0x02, 0x31, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
+ 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
+ 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
+ 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
+ 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
+ 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
+ 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
+ 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
+ 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
+ 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81,
+ 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x26, 0xd0, 0x0c, 0x97,
+ 0xef, 0x3c, 0x7e, 0x00, 0x44, 0x11, 0x42, 0x44, 0x56, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc5, 0x6d, 0xdb, 0x01, 0x35, 0x5c, 0xbe, 0xf3, 0xf8,
+ 0x01, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0, 0x23, 0xb7, 0x6d, 0x06, 0xd5, 0x70, 0xf9, 0xce, 0xe3, 0x07, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x17, 0xb7, 0x6d, 0x03, 0xdb, 0x70,
+ 0xf9, 0xce, 0xe3, 0x0b, 0x01, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0x50, 0x12, 0x06, 0x20, 0x60, 0x3e, 0x72, 0xdb, 0x46, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40,
+ 0x33, 0x2c, 0x84, 0x05, 0x88, 0xc1, 0x70, 0xf9, 0xce, 0xe3, 0x17, 0x0b, 0x30, 0x4d, 0x44, 0x43, 0x0c, 0xed, 0x11, 0x11, 0xc0, 0x20, 0x0e, 0x62, 0x03, 0x46, 0x0e, 0xf5, 0xf8, 0xc8, 0x6d, 0x1b,
+ 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x13, 0x04, 0x47, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x34, 0x94, 0x5c, 0x21,
+ 0x06, 0x94, 0x62, 0x40, 0xd9, 0x15, 0x70, 0x40, 0x41, 0x14, 0x68, 0x40, 0x19, 0x50, 0x32, 0x02, 0x50, 0x1e, 0x25, 0x50, 0x04, 0x14, 0x8d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00,
+ 0x82, 0x60, 0xf0, 0x78, 0xcd, 0x90, 0x65, 0xd3, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x9f, 0x63, 0x6c, 0x1b, 0x35, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x0f, 0x18, 0x3c, 0xc7, 0xb6, 0x55,
+ 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xf0, 0x84, 0x01, 0x54, 0x70, 0x9c, 0x35, 0x62, 0x50, 0x00, 0x20, 0x08, 0x06, 0x44, 0x19, 0x34, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50, 0x85, 0x01, 0x54,
+ 0x04, 0x9e, 0x61, 0x18, 0x46, 0x32, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0c, 0x19, 0x3c, 0x82, 0x37, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x01, 0x18, 0x80, 0xc1, 0x2c, 0x83, 0x11, 0x04, 0xb3, 0x04,
+ 0xc2, 0x40, 0xc5, 0x90, 0x06, 0x84, 0x28, 0x04, 0x15, 0x44, 0x57, 0x81, 0x01, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0x99, 0xc1, 0x95, 0x04, 0x64, 0x30, 0x9a, 0x10, 0x00, 0x23, 0x06, 0x06,
+ 0x00, 0x82, 0x60, 0x60, 0xb4, 0x01, 0x35, 0x0d, 0x37, 0x04, 0x61, 0x40, 0x06, 0xb3, 0x0c, 0x03, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb4, 0x06, 0x5a, 0x43, 0x9c, 0xc1, 0x68, 0x42,
+ 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x4c, 0x70, 0xf0, 0x4d, 0x07, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c,
+ 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x18, 0x91, 0xc0, 0xc7, 0x88, 0x04, 0x3e, 0x46, 0x24, 0xf0, 0x31, 0x22, 0x81, 0xcf, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x54, 0x78, 0x70, 0x06, 0x1c, 0x54,
+ 0x07, 0xc4, 0x20, 0x04, 0x60, 0x30, 0x4b, 0x40, 0x14, 0x65, 0x07, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0c, 0x1f, 0x9c, 0x81, 0x66, 0x07, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x30, 0x04, 0x64,
+ 0x30, 0xcb, 0x20, 0x14, 0xc1, 0x2c, 0x81, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+ static const DWORD cs_code_dxbc[] =
+ {
+ 0x43425844, 0xa353d5fd, 0x80bdd567, 0xce3392ef, 0xd7210985, 0x00000001, 0x00000270, 0x00000003,
+ 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
+ 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x0000021c, 0x00050050, 0x00000087, 0x0100086a,
+ 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x040000a2, 0x00107000, 0x00000000, 0x00000004,
+ 0x040000a2, 0x00107000, 0x00000001, 0x00000010, 0x0400009e, 0x0011e000, 0x00000000, 0x00000010,
+ 0x0200005f, 0x00024000, 0x02000068, 0x00000003, 0x0400009b, 0x00000100, 0x00000001, 0x00000001,
+ 0x0b0000a8, 0x0011e0f2, 0x00000000, 0x0002400a, 0x00004001, 0x00000000, 0x00004002, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x05000036, 0x00100012, 0x00000000, 0x00004001, 0x00000000,
+ 0x01000030, 0x08000050, 0x00100022, 0x00000000, 0x0010000a, 0x00000000, 0x0020800a, 0x00000000,
+ 0x00000000, 0x03040003, 0x0010001a, 0x00000000, 0x0a00008c, 0x00100022, 0x00000000, 0x00004001,
+ 0x00000018, 0x00004001, 0x00000008, 0x0010000a, 0x00000000, 0x0002400a, 0x8b0000a7, 0x80002302,
+ 0x00199983, 0x00100042, 0x00000000, 0x0010001a, 0x00000000, 0x00004001, 0x00000000, 0x00107006,
+ 0x00000000, 0x010048be, 0x0700004f, 0x00100082, 0x00000000, 0x0010002a, 0x00000000, 0x00004001,
+ 0x00000100, 0x0304001f, 0x0010003a, 0x00000000, 0x8b0000a7, 0x80008302, 0x00199983, 0x001000f2,
+ 0x00000001, 0x0010001a, 0x00000000, 0x00004001, 0x00000000, 0x00107e46, 0x00000001, 0x8b0000a7,
+ 0x80008302, 0x00199983, 0x001000f2, 0x00000002, 0x0010002a, 0x00000000, 0x00004001, 0x00000000,
+ 0x0011ee46, 0x00000000, 0x07000000, 0x001000f2, 0x00000001, 0x00100e46, 0x00000001, 0x00100e46,
+ 0x00000002, 0x090000a8, 0x0011e0f2, 0x00000000, 0x0010002a, 0x00000000, 0x00004001, 0x00000000,
+ 0x00100e46, 0x00000001, 0x01000015, 0x0700001e, 0x00100012, 0x00000000, 0x0010000a, 0x00000000,
+ 0x00004001, 0x00000001, 0x01000016, 0x0100003e,
+ };
+
+ static const D3D12_SHADER_BYTECODE code_dxil = SHADER_BYTECODE(cs_code_dxil);
+ static const D3D12_SHADER_BYTECODE code_dxbc = SHADER_BYTECODE(cs_code_dxbc);
+
+ if (!init_compute_test_context(&context))
+ return;
+
+ if (use_dxil && !context_supports_dxil(&context))
+ {
+ skip("Context does not support DXIL.\n");
+ destroy_test_context(&context);
+ return;
+ }
+
+ memset(root_parameters, 0, sizeof(root_parameters));
+ memset(&rs_desc, 0, sizeof(rs_desc));
+
+ rs_desc.pParameters = root_parameters;
+ rs_desc.NumParameters = ARRAY_SIZE(root_parameters);
+ root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
+ root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+ root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
+ root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+ root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
+ root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+ root_parameters[2].Descriptor.ShaderRegister = 1;
+ root_parameters[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+ root_parameters[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+ root_parameters[3].Constants.Num32BitValues = 1;
+
+ create_root_signature(context.device, &rs_desc, &context.root_signature);
+ context.pipeline_state = create_compute_pipeline_state(context.device,
+ context.root_signature, use_dxil ? code_dxil : code_dxbc);
+
+ output = create_default_buffer(context.device, WORKGROUP_SIZE * sizeof(float) * 4,
+ D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+
+ srand(1337);
+
+ for (iter = 0; iter < NUM_ITERATIONS; iter++)
+ {
+ /* Ensures unique write indices per iteration. */
+ addr_xor = rand() % WORKGROUP_SIZE;
+
+ for (i = 0; i < WORKGROUP_SIZE; i++)
+ {
+ work_list_data[WORKGROUP_SIZE * iter + i] = (rand() % 4) == 0 ? (i ^ addr_xor) : ~0u;
+
+ for (j = 0; j < 4; j++)
+ {
+ input_data[4 * (WORKGROUP_SIZE * iter + i) + j] = (float)(rand() % 16) + 1.0f;
+ if (work_list_data[WORKGROUP_SIZE * iter + i] < WORKGROUP_SIZE)
+ (&expected_result[i ^ addr_xor].x)[j] += input_data[4 * (WORKGROUP_SIZE * iter + i) + j];
+ }
+ }
+ }
+
+ work_list = create_upload_buffer(context.device, sizeof(work_list_data), work_list_data);
+ inputs = create_upload_buffer(context.device, sizeof(input_data), input_data);
+
+ ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
+ ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
+ ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0, ID3D12Resource_GetGPUVirtualAddress(output));
+ ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 1, ID3D12Resource_GetGPUVirtualAddress(work_list));
+ ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 2, ID3D12Resource_GetGPUVirtualAddress(inputs));
+ ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(context.list, 3, NUM_ITERATIONS, 0);
+ ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
+
+ transition_resource_state(context.list, output, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
+ get_buffer_readback_with_command_list(output, DXGI_FORMAT_R32G32B32A32_FLOAT, &rb, context.queue, context.list);
+
+ for (i = 0; i < 256; i++)
+ {
+ val = get_readback_vec4(&rb, i, 0);
+ ok(compare_vec4(val, &expected_result[i], 0),
+ "Failed output index %u. Expected {%f, %f, %f, %f}, got {%f, %f, %f, %f}.\n", i,
+ expected_result[i].x, expected_result[i].y, expected_result[i].z, expected_result[i].w,
+ val->x, val->y, val->z, val->w);
+ }
+
+ ID3D12Resource_Release(output);
+ ID3D12Resource_Release(inputs);
+ ID3D12Resource_Release(work_list);
+ release_resource_readback(&rb);
+ destroy_test_context(&context);
+}
+
+void test_memory_model_uav_coherence_thread_group_dxbc(void)
+{
+ test_memory_model_uav_coherent_thread_group(false);
+}
+
+void test_memory_model_uav_coherence_thread_group_dxil(void)
+{
+ test_memory_model_uav_coherent_thread_group(true);
+}
diff --git a/tests/d3d12_tests.h b/tests/d3d12_tests.h
index c2a00658..e6861191 100644
--- a/tests/d3d12_tests.h
+++ b/tests/d3d12_tests.h
@@ -322,3 +322,5 @@ decl_test(test_fence_wait_robustness_shared);
decl_test(test_root_signature_empty_blob);
decl_test(test_sparse_buffer_memory_lifetime);
decl_test(test_rendering_no_attachments_layers);
+decl_test(test_memory_model_uav_coherence_thread_group_dxbc);
+decl_test(test_memory_model_uav_coherence_thread_group_dxil);