diff options
author | Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com> | 2018-01-09 14:07:07 +0300 |
---|---|---|
committer | Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com> | 2018-01-09 14:07:07 +0300 |
commit | 9c72aa00c9d1e65add75bc218811a694eccb8806 (patch) | |
tree | aa46dd471e2d4416a6f92e324312d27e9a8a0d8d | |
parent | 23f0abf1122c0dc3dd107d3f5587987b3f4d06df (diff) |
Overhaul barrier handling in GLSL.
-rw-r--r-- | reference/opt/shaders/comp/shared.comp | 4 | ||||
-rw-r--r-- | reference/shaders/comp/shared.comp | 4 | ||||
-rw-r--r-- | spirv_glsl.cpp | 124 | ||||
-rw-r--r-- | spirv_glsl.hpp | 1 | ||||
-rw-r--r-- | spirv_hlsl.cpp | 10 | ||||
-rw-r--r-- | spirv_hlsl.hpp | 1 |
6 files changed, 107 insertions, 37 deletions
diff --git a/reference/opt/shaders/comp/shared.comp b/reference/opt/shaders/comp/shared.comp index e0bc0554..694a0793 100644 --- a/reference/opt/shaders/comp/shared.comp +++ b/reference/opt/shaders/comp/shared.comp @@ -16,8 +16,8 @@ shared float sShared[4]; void main() { sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x]; - memoryBarrier(); - memoryBarrier(); + memoryBarrierShared(); + memoryBarrierShared(); barrier(); _44.out_data[gl_GlobalInvocationID.x] = sShared[(4u - gl_LocalInvocationIndex) - 1u]; } diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp index 08215c4f..d287991a 100644 --- a/reference/shaders/comp/shared.comp +++ b/reference/shaders/comp/shared.comp @@ -18,8 +18,8 @@ void main() uint ident = gl_GlobalInvocationID.x; float idata = _22.in_data[ident]; sShared[gl_LocalInvocationIndex] = idata; - memoryBarrier(); - memoryBarrier(); + memoryBarrierShared(); + memoryBarrierShared(); barrier(); _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u]; } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 6529760c..0283769f 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -25,6 +25,16 @@ using namespace spv; using namespace spirv_cross; using namespace std; +static uint32_t mask_relevant_memory_semantics(uint32_t semantics) +{ + return semantics & (MemorySemanticsAtomicCounterMemoryMask | + MemorySemanticsImageMemoryMask | + MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsUniformMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask | + MemorySemanticsSubgroupMemoryMask); +} + static bool packing_is_vec4_padded(BufferPackingStandard packing) { switch (packing) @@ -6629,37 +6639,97 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Compute case OpControlBarrier: + case OpMemoryBarrier: { - // Ignore execution and memory scope. - if (get_entry_point().model == ExecutionModelGLCompute) + if (get_entry_point().model == ExecutionModelTessellationControl) { - uint32_t mem = get<SPIRConstant>(ops[2]).scalar(); + // Control shaders only have barriers, and it implies memory barriers. + if (opcode == OpControlBarrier) + statement("barrier();"); + break; + } - // We cannot forward any loads beyond the memory barrier. - if (mem) - flush_all_active_variables(); + uint32_t memory; + uint32_t semantics; - if (mem == MemorySemanticsWorkgroupMemoryMask) - statement("memoryBarrierShared();"); - else if (mem) - statement("memoryBarrier();"); + if (opcode == OpMemoryBarrier) + { + memory = get<SPIRConstant>(ops[0]).scalar(); + semantics = get<SPIRConstant>(ops[1]).scalar(); + } + else + { + memory = get<SPIRConstant>(ops[1]).scalar(); + semantics = get<SPIRConstant>(ops[2]).scalar(); } - statement("barrier();"); - break; - } - case OpMemoryBarrier: - { - uint32_t mem = get<SPIRConstant>(ops[1]).scalar(); + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar(); + uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar(); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + // If we have the same memory scope, and all memory types are covered, we're good. + if (next_memory == memory && (semantics & next_semantics) == semantics) + break; + } + } - // We cannot forward any loads beyond the memory barrier. - if (mem) + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) flush_all_active_variables(); - if (mem == MemorySemanticsWorkgroupMemoryMask) - statement("memoryBarrierShared();"); - else if (mem) - statement("memoryBarrier();"); + if (memory == ScopeWorkgroup) // Only need to consider memory within a group + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + else if (semantics != 0) + statement("groupMemoryBarrier();"); + } + else + { + const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsUniformMemoryMask | + MemorySemanticsImageMemoryMask | + MemorySemanticsAtomicCounterMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("memoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 4 barriers. + statement("memoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("memoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("memoryBarrierImage();"); + if (semantics & MemorySemanticsAtomicCounterMemoryMask) + statement("memoryBarrierAtomicCounter();"); + } + } + + if (opcode == OpControlBarrier) + statement("barrier();"); break; } @@ -8350,3 +8420,13 @@ void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_ } } } + +const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) +{ + // FIXME: This is kind of hacky. There should be a cleaner way. + auto offset = uint32_t(&instr - current_emitting_block->ops.data()); + if ((offset + 1) < current_emitting_block->ops.size()) + return ¤t_emitting_block->ops[offset + 1]; + else + return nullptr; +} diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 98682d08..d5a04ddb 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -509,6 +509,7 @@ protected: static std::string sanitize_underscores(const std::string &str); bool can_use_io_location(spv::StorageClass storage); + const Instruction *get_next_instruction_in_block(const Instruction &instr); private: void init() diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index cdacff78..54e39c2d 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -3017,16 +3017,6 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) register_read(ops[1], ops[2], should_forward(ops[2])); } -const Instruction *CompilerHLSL::get_next_instruction_in_block(const Instruction &instr) -{ - // FIXME: This is kind of hacky. There should be a cleaner way. - uint32_t offset = uint32_t(&instr - current_emitting_block->ops.data()); - if ((offset + 1) < current_emitting_block->ops.size()) - return ¤t_emitting_block->ops[offset + 1]; - else - return nullptr; -} - void CompilerHLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index 95c0c360..7144b5cd 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -107,7 +107,6 @@ private: void write_access_chain(const SPIRAccessChain &chain, uint32_t value); void emit_store(const Instruction &instruction); void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op); - const Instruction *get_next_instruction_in_block(const Instruction &instr); void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier) override; |