diff options
author | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-09-07 13:47:03 +0300 |
---|---|---|
committer | Hans-Kristian Arntzen <post@arntzen-software.no> | 2022-09-08 17:52:11 +0300 |
commit | 5711cee3991a6f065ac6a7b455930812bc7eb6f6 (patch) | |
tree | 6b9acf515f30dc25d338fd44f3d7f9e6a321b36f | |
parent | 5e57db6a7690e21ae40ae5eaeceb93d8a198a92b (diff) |
tests: Add test for in-group UAV coherence.
Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
-rw-r--r-- | tests/d3d12_shaders.c | 260 | ||||
-rw-r--r-- | tests/d3d12_tests.h | 2 |
2 files changed, 262 insertions, 0 deletions
diff --git a/tests/d3d12_shaders.c b/tests/d3d12_shaders.c index f1d0aaf3..d2ac73b3 100644 --- a/tests/d3d12_shaders.c +++ b/tests/d3d12_shaders.c @@ -14252,3 +14252,263 @@ void test_constant_buffer_dxil(void) test_constant_buffers(true); } +static void test_memory_model_uav_coherent_thread_group(bool use_dxil) +{ + /* From D3D11 functional spec: + * If a UAV is not declared as "globally coherent", it is only "group coherent", + * which means loads can only see data written by stores and atomics in other threads in the same thread group. + * The affected hardware knows it can make use of its thread-group specific caching for loads, + * since writes to the memory only came from the current thread group. + * A UAV tagged as "globally coherent" is also inherently obviously "group coherent", + * although the affected hardware would not use its local cache. + * As such, the "globally coherent" flag should only be specified when necessary. + * + * As a reminder though, to guarantee coherency on UAV accesses on all implementations, + * not only must shaders make the global vs group scope distinction discussed here upon UAV declaration, + * but they must also make appropriate use of memory and/or thread barriers ("sync_*" in the IL) + * as needed within in the shader to enforce proper ordering of operations by individual threads as seen by others. + * In addition, the "sync" operation has options for memory barriers that also distinguish between global vs group scope, + * but that control is separate from the topic of this section, and may not be exposed until a later time, + * as discussed in the sync instruction definition. + */ + + /* Vulkan does not guarantee this, so we need to promote to coherent to make this work. */ + +#define NUM_ITERATIONS 64 +#define WORKGROUP_SIZE 256 + struct vec4 expected_result[WORKGROUP_SIZE] = {{ 0.0f }}; + uint32_t work_list_data[WORKGROUP_SIZE * NUM_ITERATIONS]; + float input_data[WORKGROUP_SIZE * 4 * NUM_ITERATIONS]; + D3D12_ROOT_PARAMETER root_parameters[4]; + D3D12_ROOT_SIGNATURE_DESC rs_desc; + struct resource_readback rb; + struct test_context context; + ID3D12Resource *work_list; + unsigned int i, j, iter; + ID3D12Resource *output; + ID3D12Resource *inputs; + const struct vec4 *val; + uint32_t addr_xor; + +#if 0 + RWStructuredBuffer<float4> RW : register(u0); + StructuredBuffer<uint> WorkList : register(t0); + StructuredBuffer<float4> RO : register(t1); + + cbuffer Constants : register(b0) { uint count; }; + + [numthreads(256, 1, 1)] + void main(uint id : SV_GroupIndex) + { + // Prime the L0 caches. + RW[id] = 0.0.xxxx; + + for (uint iter = 0; iter < count; iter++) + { + uint increment_offset = WorkList[256 * iter + id]; + // globallycoherent is not needed when sharing data between threads in the same group. + DeviceMemoryBarrierWithGroupSync(); + + // If caches are incoherent here in the workgroup, this will break hard. + if (increment_offset < 256) + RW[increment_offset] += RO[256 * iter + id]; + } + } +#endif + + static const BYTE cs_code_dxil[] = + { + 0x44, 0x58, 0x42, 0x43, 0x62, 0x39, 0x82, 0x27, 0xb8, 0xcd, 0x82, 0x14, 0xb5, 0x36, 0xa6, 0xeb, 0x8d, 0x59, 0x48, 0x54, 0x01, 0x00, 0x00, 0x00, 0x10, 0x09, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x34, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0xa8, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfd, 0x8b, 0x29, 0x97, 0x70, 0x0f, 0x94, 0xc6, 0xdb, 0x49, 0xcc, 0x4e, 0x9e, 0x39, 0x0c, 0x1a, 0x44, 0x58, 0x49, 0x4c, 0xd4, 0x07, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, + 0xf5, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xbc, 0x07, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0xec, 0x01, 0x00, 0x00, + 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, + 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x18, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xc4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x62, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, + 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x11, 0x23, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x31, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0xc0, 0x00, 0xd2, 0x06, 0x63, 0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x80, + 0x6a, 0x03, 0x41, 0xfc, 0xff, 0xff, 0xff, 0xff, 0x00, 0x48, 0x00, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x86, 0x09, 0x01, 0x01, 0x00, + 0x89, 0x20, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x32, 0x22, 0x88, 0x09, 0x20, 0x64, 0x85, 0x04, 0x13, 0x23, 0xa4, 0x84, 0x04, 0x13, 0x23, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8c, 0x8c, + 0x0b, 0x84, 0xc4, 0x4c, 0x10, 0x98, 0xc1, 0x08, 0x40, 0x09, 0x00, 0x0a, 0xe6, 0x08, 0xc0, 0xa0, 0x08, 0xc3, 0x40, 0x44, 0x19, 0x80, 0x61, 0x20, 0xe3, 0xa8, 0xe1, 0xf2, 0x27, 0xec, 0x21, 0x24, + 0x9f, 0xdb, 0xa8, 0x62, 0x25, 0x26, 0x1f, 0xb9, 0x6d, 0x44, 0x0c, 0xc3, 0x30, 0xcc, 0x11, 0x20, 0x94, 0xdc, 0x33, 0x5c, 0xfe, 0x84, 0x3d, 0x84, 0xe4, 0x87, 0x40, 0x33, 0x2c, 0x04, 0x0a, 0x94, + 0x42, 0x20, 0xc3, 0x32, 0x10, 0x33, 0x47, 0x10, 0x14, 0x63, 0x19, 0x92, 0x61, 0x70, 0xe8, 0x99, 0x01, 0xb8, 0x69, 0xb8, 0xfc, 0x09, 0x7b, 0x08, 0xc9, 0x5f, 0x09, 0x69, 0x25, 0x26, 0xbf, 0xb8, + 0x6d, 0x54, 0x44, 0x51, 0x14, 0x0d, 0xa5, 0x90, 0x86, 0x65, 0x18, 0x68, 0x2a, 0x0a, 0x30, 0x2c, 0xc3, 0x10, 0x45, 0x51, 0x94, 0x50, 0x75, 0xd3, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xbf, 0x12, + 0xd2, 0x4a, 0x4c, 0x3e, 0x72, 0xdb, 0xa8, 0x18, 0x86, 0x61, 0x18, 0x4a, 0x71, 0x0d, 0xcb, 0x30, 0x10, 0x36, 0x10, 0x30, 0x13, 0x19, 0x8c, 0x03, 0x3b, 0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xcc, + 0x02, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94, 0x03, 0x39, 0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0, 0x41, 0x3d, 0xb8, 0xc3, 0x3c, 0xa4, 0xc3, 0x39, + 0xb8, 0x43, 0x39, 0x90, 0x03, 0x18, 0xa4, 0x83, 0x3b, 0xd0, 0x83, 0x1f, 0xa0, 0x60, 0xa0, 0x6d, 0x18, 0x81, 0x88, 0x66, 0x4a, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x83, 0x59, + 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0xb0, 0x87, 0x72, 0x18, 0x07, 0x7a, 0x78, 0x07, + 0x79, 0xe0, 0x03, 0x73, 0x60, 0x87, 0x77, 0x08, 0x07, 0x7a, 0x60, 0x03, 0x30, 0xa0, 0x03, 0x3f, 0x00, 0x03, 0x3f, 0x40, 0x41, 0x47, 0xde, 0x4c, 0x6b, 0x30, 0x0e, 0xec, 0x10, 0x0e, 0xf3, 0x30, + 0x0f, 0x6e, 0x20, 0x0b, 0xb7, 0x30, 0x0b, 0xf4, 0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2, 0x50, 0x0e, 0xe4, 0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e, 0xf2, 0xc0, 0x07, 0xf6, + 0x50, 0x0e, 0xe3, 0x40, 0x0f, 0xef, 0x20, 0x0f, 0x7c, 0x60, 0x0e, 0xec, 0xf0, 0x0e, 0xe1, 0x40, 0x0f, 0x6c, 0x00, 0x06, 0x74, 0xe0, 0x07, 0x60, 0xe0, 0x07, 0x28, 0xe8, 0x08, 0xbc, 0x84, 0x73, + 0x1a, 0x69, 0x02, 0x9a, 0x49, 0x42, 0xc1, 0x40, 0xe2, 0x1c, 0x01, 0x28, 0x4c, 0x01, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, + 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, + 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, + 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, + 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x0c, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x18, 0xf2, 0x30, 0x40, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x79, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x33, 0x01, 0x01, + 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0xa7, 0x02, 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x06, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x59, 0x20, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x4a, 0x60, 0x04, 0xa0, + 0x18, 0x0a, 0xa3, 0x10, 0x0a, 0xa4, 0x00, 0x09, 0xc8, 0x19, 0x01, 0x20, 0xb2, 0x40, 0xe8, 0x9b, 0x01, 0xa0, 0x70, 0x06, 0x80, 0xc6, 0x19, 0x00, 0xe2, 0x66, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, + 0x49, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, + 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0x01, 0x99, 0x20, 0x0c, + 0xc9, 0x06, 0x61, 0x20, 0x26, 0x08, 0x83, 0xb2, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x1c, 0x45, 0x60, 0x82, 0x30, 0x2c, 0x13, 0x84, 0x81, 0xd9, 0x20, 0x0c, + 0xcd, 0x86, 0x84, 0x50, 0x16, 0x82, 0x18, 0x18, 0xc2, 0x99, 0x20, 0x7c, 0xd1, 0x04, 0x61, 0x68, 0x36, 0x08, 0x43, 0xb4, 0x21, 0x19, 0xa0, 0x85, 0x18, 0x06, 0x86, 0x90, 0x36, 0x08, 0xcf, 0x34, + 0x41, 0x08, 0x03, 0x69, 0x82, 0xe0, 0x3c, 0x1b, 0x16, 0xa2, 0x5a, 0x08, 0x62, 0x60, 0x2c, 0xcb, 0x92, 0x36, 0x04, 0xd7, 0x04, 0x61, 0x0c, 0xa6, 0x0d, 0x08, 0x91, 0x2d, 0x04, 0x31, 0x34, 0xc0, + 0x86, 0x40, 0xdb, 0x40, 0x50, 0xd8, 0x06, 0x4c, 0x10, 0x04, 0x80, 0x44, 0x5b, 0x58, 0x9a, 0xdb, 0x04, 0x81, 0x0c, 0xa0, 0x09, 0xc2, 0xe0, 0x6c, 0x18, 0xc0, 0x60, 0x18, 0x36, 0x10, 0xc4, 0xd7, + 0x84, 0xc1, 0x86, 0xa2, 0xf3, 0x00, 0x4e, 0x0c, 0xaa, 0xb0, 0xb1, 0xd9, 0xb5, 0xb9, 0xa4, 0x91, 0x95, 0xb9, 0xd1, 0x4d, 0x09, 0x82, 0x2a, 0x64, 0x78, 0x2e, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x53, 0x02, 0xa2, 0x09, 0x19, 0x9e, 0x8b, 0x5d, 0x18, 0x9b, 0x5d, 0x99, 0xdc, 0x94, 0xc0, 0xa8, 0x43, 0x86, 0xe7, 0x32, 0x87, 0x16, 0x46, 0x56, 0x26, 0xd7, 0xf4, 0x46, 0x56, 0xc6, 0x36, + 0x25, 0x40, 0xca, 0x90, 0xe1, 0xb9, 0xc8, 0x95, 0xcd, 0xbd, 0xd5, 0xc9, 0x8d, 0x95, 0xcd, 0x4d, 0x09, 0xb6, 0x3a, 0x64, 0x78, 0x2e, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x50, 0x6f, 0x69, 0x6e, 0x74, + 0x73, 0x53, 0x02, 0x31, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, + 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, + 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, + 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, + 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, + 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, + 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, + 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, + 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, + 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x30, 0x83, 0x81, + 0xc8, 0x01, 0x1f, 0xdc, 0x40, 0x1c, 0xe4, 0xa1, 0x1c, 0xc2, 0x61, 0x1d, 0xdc, 0x40, 0x1c, 0xe4, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x26, 0xd0, 0x0c, 0x97, + 0xef, 0x3c, 0x7e, 0x00, 0x44, 0x11, 0x42, 0x44, 0x56, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc5, 0x6d, 0xdb, 0x01, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, + 0x01, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0xe0, 0x23, 0xb7, 0x6d, 0x06, 0xd5, 0x70, 0xf9, 0xce, 0xe3, 0x07, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x17, 0xb7, 0x6d, 0x03, 0xdb, 0x70, + 0xf9, 0xce, 0xe3, 0x0b, 0x01, 0x55, 0x14, 0x44, 0x54, 0x3a, 0xc0, 0x50, 0x12, 0x06, 0x20, 0x60, 0x3e, 0x72, 0xdb, 0x46, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, + 0x33, 0x2c, 0x84, 0x05, 0x88, 0xc1, 0x70, 0xf9, 0xce, 0xe3, 0x17, 0x0b, 0x30, 0x4d, 0x44, 0x43, 0x0c, 0xed, 0x11, 0x11, 0xc0, 0x20, 0x0e, 0x62, 0x03, 0x46, 0x0e, 0xf5, 0xf8, 0xc8, 0x6d, 0x1b, + 0x00, 0xc1, 0x00, 0x48, 0x03, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x13, 0x04, 0x47, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x34, 0x94, 0x5c, 0x21, + 0x06, 0x94, 0x62, 0x40, 0xd9, 0x15, 0x70, 0x40, 0x41, 0x14, 0x68, 0x40, 0x19, 0x50, 0x32, 0x02, 0x50, 0x1e, 0x25, 0x50, 0x04, 0x14, 0x8d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, + 0x82, 0x60, 0xf0, 0x78, 0xcd, 0x90, 0x65, 0xd3, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x9f, 0x63, 0x6c, 0x1b, 0x35, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x0f, 0x18, 0x3c, 0xc7, 0xb6, 0x55, + 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xf0, 0x84, 0x01, 0x54, 0x70, 0x9c, 0x35, 0x62, 0x50, 0x00, 0x20, 0x08, 0x06, 0x44, 0x19, 0x34, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x50, 0x85, 0x01, 0x54, + 0x04, 0x9e, 0x61, 0x18, 0x46, 0x32, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0c, 0x19, 0x3c, 0x82, 0x37, 0x9a, 0x10, 0x00, 0xc3, 0x0d, 0x01, 0x18, 0x80, 0xc1, 0x2c, 0x83, 0x11, 0x04, 0xb3, 0x04, + 0xc2, 0x40, 0xc5, 0x90, 0x06, 0x84, 0x28, 0x04, 0x15, 0x44, 0x57, 0x81, 0x01, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x80, 0x99, 0xc1, 0x95, 0x04, 0x64, 0x30, 0x9a, 0x10, 0x00, 0x23, 0x06, 0x06, + 0x00, 0x82, 0x60, 0x60, 0xb4, 0x01, 0x35, 0x0d, 0x37, 0x04, 0x61, 0x40, 0x06, 0xb3, 0x0c, 0x03, 0x11, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xb4, 0x06, 0x5a, 0x43, 0x9c, 0xc1, 0x68, 0x42, + 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x68, 0xc2, 0x20, 0x8c, 0x26, 0x10, 0xc3, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x4c, 0x70, 0xf0, 0x4d, 0x07, 0x1b, 0x8c, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, + 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x18, 0x91, 0xc0, 0xc7, 0x88, 0x04, 0x3e, 0x46, 0x24, 0xf0, 0x31, 0x22, 0x81, 0xcf, 0x88, 0x41, 0x03, 0x80, 0x20, 0x18, 0x54, 0x78, 0x70, 0x06, 0x1c, 0x54, + 0x07, 0xc4, 0x20, 0x04, 0x60, 0x30, 0x4b, 0x40, 0x14, 0x65, 0x07, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x0c, 0x1f, 0x9c, 0x81, 0x66, 0x07, 0xa3, 0x09, 0x01, 0x30, 0xdc, 0x30, 0x04, 0x64, + 0x30, 0xcb, 0x20, 0x14, 0xc1, 0x2c, 0x81, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + + static const DWORD cs_code_dxbc[] = + { + 0x43425844, 0xa353d5fd, 0x80bdd567, 0xce3392ef, 0xd7210985, 0x00000001, 0x00000270, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x0000021c, 0x00050050, 0x00000087, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x040000a2, 0x00107000, 0x00000000, 0x00000004, + 0x040000a2, 0x00107000, 0x00000001, 0x00000010, 0x0400009e, 0x0011e000, 0x00000000, 0x00000010, + 0x0200005f, 0x00024000, 0x02000068, 0x00000003, 0x0400009b, 0x00000100, 0x00000001, 0x00000001, + 0x0b0000a8, 0x0011e0f2, 0x00000000, 0x0002400a, 0x00004001, 0x00000000, 0x00004002, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x05000036, 0x00100012, 0x00000000, 0x00004001, 0x00000000, + 0x01000030, 0x08000050, 0x00100022, 0x00000000, 0x0010000a, 0x00000000, 0x0020800a, 0x00000000, + 0x00000000, 0x03040003, 0x0010001a, 0x00000000, 0x0a00008c, 0x00100022, 0x00000000, 0x00004001, + 0x00000018, 0x00004001, 0x00000008, 0x0010000a, 0x00000000, 0x0002400a, 0x8b0000a7, 0x80002302, + 0x00199983, 0x00100042, 0x00000000, 0x0010001a, 0x00000000, 0x00004001, 0x00000000, 0x00107006, + 0x00000000, 0x010048be, 0x0700004f, 0x00100082, 0x00000000, 0x0010002a, 0x00000000, 0x00004001, + 0x00000100, 0x0304001f, 0x0010003a, 0x00000000, 0x8b0000a7, 0x80008302, 0x00199983, 0x001000f2, + 0x00000001, 0x0010001a, 0x00000000, 0x00004001, 0x00000000, 0x00107e46, 0x00000001, 0x8b0000a7, + 0x80008302, 0x00199983, 0x001000f2, 0x00000002, 0x0010002a, 0x00000000, 0x00004001, 0x00000000, + 0x0011ee46, 0x00000000, 0x07000000, 0x001000f2, 0x00000001, 0x00100e46, 0x00000001, 0x00100e46, + 0x00000002, 0x090000a8, 0x0011e0f2, 0x00000000, 0x0010002a, 0x00000000, 0x00004001, 0x00000000, + 0x00100e46, 0x00000001, 0x01000015, 0x0700001e, 0x00100012, 0x00000000, 0x0010000a, 0x00000000, + 0x00004001, 0x00000001, 0x01000016, 0x0100003e, + }; + + static const D3D12_SHADER_BYTECODE code_dxil = SHADER_BYTECODE(cs_code_dxil); + static const D3D12_SHADER_BYTECODE code_dxbc = SHADER_BYTECODE(cs_code_dxbc); + + if (!init_compute_test_context(&context)) + return; + + if (use_dxil && !context_supports_dxil(&context)) + { + skip("Context does not support DXIL.\n"); + destroy_test_context(&context); + return; + } + + memset(root_parameters, 0, sizeof(root_parameters)); + memset(&rs_desc, 0, sizeof(rs_desc)); + + rs_desc.pParameters = root_parameters; + rs_desc.NumParameters = ARRAY_SIZE(root_parameters); + root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_parameters[2].Descriptor.ShaderRegister = 1; + root_parameters[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_parameters[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + root_parameters[3].Constants.Num32BitValues = 1; + + create_root_signature(context.device, &rs_desc, &context.root_signature); + context.pipeline_state = create_compute_pipeline_state(context.device, + context.root_signature, use_dxil ? code_dxil : code_dxbc); + + output = create_default_buffer(context.device, WORKGROUP_SIZE * sizeof(float) * 4, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + srand(1337); + + for (iter = 0; iter < NUM_ITERATIONS; iter++) + { + /* Ensures unique write indices per iteration. */ + addr_xor = rand() % WORKGROUP_SIZE; + + for (i = 0; i < WORKGROUP_SIZE; i++) + { + work_list_data[WORKGROUP_SIZE * iter + i] = (rand() % 4) == 0 ? (i ^ addr_xor) : ~0u; + + for (j = 0; j < 4; j++) + { + input_data[4 * (WORKGROUP_SIZE * iter + i) + j] = (float)(rand() % 16) + 1.0f; + if (work_list_data[WORKGROUP_SIZE * iter + i] < WORKGROUP_SIZE) + (&expected_result[i ^ addr_xor].x)[j] += input_data[4 * (WORKGROUP_SIZE * iter + i) + j]; + } + } + } + + work_list = create_upload_buffer(context.device, sizeof(work_list_data), work_list_data); + inputs = create_upload_buffer(context.device, sizeof(input_data), input_data); + + ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature); + ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state); + ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0, ID3D12Resource_GetGPUVirtualAddress(output)); + ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 1, ID3D12Resource_GetGPUVirtualAddress(work_list)); + ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 2, ID3D12Resource_GetGPUVirtualAddress(inputs)); + ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(context.list, 3, NUM_ITERATIONS, 0); + ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1); + + transition_resource_state(context.list, output, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + get_buffer_readback_with_command_list(output, DXGI_FORMAT_R32G32B32A32_FLOAT, &rb, context.queue, context.list); + + for (i = 0; i < 256; i++) + { + val = get_readback_vec4(&rb, i, 0); + ok(compare_vec4(val, &expected_result[i], 0), + "Failed output index %u. Expected {%f, %f, %f, %f}, got {%f, %f, %f, %f}.\n", i, + expected_result[i].x, expected_result[i].y, expected_result[i].z, expected_result[i].w, + val->x, val->y, val->z, val->w); + } + + ID3D12Resource_Release(output); + ID3D12Resource_Release(inputs); + ID3D12Resource_Release(work_list); + release_resource_readback(&rb); + destroy_test_context(&context); +} + +void test_memory_model_uav_coherence_thread_group_dxbc(void) +{ + test_memory_model_uav_coherent_thread_group(false); +} + +void test_memory_model_uav_coherence_thread_group_dxil(void) +{ + test_memory_model_uav_coherent_thread_group(true); +} diff --git a/tests/d3d12_tests.h b/tests/d3d12_tests.h index c2a00658..e6861191 100644 --- a/tests/d3d12_tests.h +++ b/tests/d3d12_tests.h @@ -322,3 +322,5 @@ decl_test(test_fence_wait_robustness_shared); decl_test(test_root_signature_empty_blob); decl_test(test_sparse_buffer_memory_lifetime); decl_test(test_rendering_no_attachments_layers); +decl_test(test_memory_model_uav_coherence_thread_group_dxbc); +decl_test(test_memory_model_uav_coherence_thread_group_dxil); |