Overhaul barrier handling in GLSL.

author: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com> 2018-01-09 14:07:07 +0300
committer: Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com> 2018-01-09 14:07:07 +0300
commit: 9c72aa00c9d1e65add75bc218811a694eccb8806 (patch)
tree: aa46dd471e2d4416a6f92e324312d27e9a8a0d8d
parent: 23f0abf1122c0dc3dd107d3f5587987b3f4d06df (diff)
6 files changed, 107 insertions, 37 deletions
diff --git a/reference/opt/shaders/comp/shared.comp b/reference/opt/shaders/comp/shared.comp
index e0bc0554..694a0793 100644
--- a/reference/opt/shaders/comp/shared.comp
+++ b/reference/opt/shaders/comp/shared.comp
@@ -16,8 +16,8 @@ shared float sShared[4];
 void main()
 {
     sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x];
-    memoryBarrier();
-    memoryBarrier();
+    memoryBarrierShared();
+    memoryBarrierShared();
     barrier();
     _44.out_data[gl_GlobalInvocationID.x] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
 }
diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp
index 08215c4f..d287991a 100644
--- a/reference/shaders/comp/shared.comp
+++ b/reference/shaders/comp/shared.comp
@@ -18,8 +18,8 @@ void main()
     uint ident = gl_GlobalInvocationID.x;
     float idata = _22.in_data[ident];
     sShared[gl_LocalInvocationIndex] = idata;
-    memoryBarrier();
-    memoryBarrier();
+    memoryBarrierShared();
+    memoryBarrierShared();
     barrier();
     _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
 }
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 6529760c..0283769f 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -25,6 +25,16 @@ using namespace spv;
 using namespace spirv_cross;
 using namespace std;
 
+static uint32_t mask_relevant_memory_semantics(uint32_t semantics)
+{
+	return semantics & (MemorySemanticsAtomicCounterMemoryMask |
+	                    MemorySemanticsImageMemoryMask |
+	                    MemorySemanticsWorkgroupMemoryMask |
+	                    MemorySemanticsUniformMemoryMask |
+	                    MemorySemanticsCrossWorkgroupMemoryMask |
+	                    MemorySemanticsSubgroupMemoryMask);
+}
+
 static bool packing_is_vec4_padded(BufferPackingStandard packing)
 {
 	switch (packing)
@@ -6629,37 +6639,97 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 
 	// Compute
 	case OpControlBarrier:
+	case OpMemoryBarrier:
 	{
-		// Ignore execution and memory scope.
-		if (get_entry_point().model == ExecutionModelGLCompute)
+		if (get_entry_point().model == ExecutionModelTessellationControl)
 		{
-			uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
+			// Control shaders only have barriers, and it implies memory barriers.
+			if (opcode == OpControlBarrier)
+				statement("barrier();");
+			break;
+		}
 
-			// We cannot forward any loads beyond the memory barrier.
-			if (mem)
-				flush_all_active_variables();
+		uint32_t memory;
+		uint32_t semantics;
 
-			if (mem == MemorySemanticsWorkgroupMemoryMask)
-				statement("memoryBarrierShared();");
-			else if (mem)
-				statement("memoryBarrier();");
+		if (opcode == OpMemoryBarrier)
+		{
+			memory = get<SPIRConstant>(ops[0]).scalar();
+			semantics = get<SPIRConstant>(ops[1]).scalar();
+		}
+		else
+		{
+			memory = get<SPIRConstant>(ops[1]).scalar();
+			semantics = get<SPIRConstant>(ops[2]).scalar();
 		}
-		statement("barrier();");
-		break;
-	}
 
-	case OpMemoryBarrier:
-	{
-		uint32_t mem = get<SPIRConstant>(ops[1]).scalar();
+		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
+		semantics = mask_relevant_memory_semantics(semantics);
+
+		if (opcode == OpMemoryBarrier)
+		{
+			// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
+			// does what we need, so we avoid redundant barriers.
+			const Instruction *next = get_next_instruction_in_block(instruction);
+			if (next && next->op == OpControlBarrier)
+			{
+				auto *next_ops = stream(*next);
+				uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
+				uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
+				next_semantics = mask_relevant_memory_semantics(next_semantics);
+
+				// If we have the same memory scope, and all memory types are covered, we're good.
+				if (next_memory == memory && (semantics & next_semantics) == semantics)
+					break;
+			}
+		}
 
-		// We cannot forward any loads beyond the memory barrier.
-		if (mem)
+		// We are synchronizing some memory or syncing execution,
+		// so we cannot forward any loads beyond the memory barrier.
+		if (semantics || opcode == OpControlBarrier)
 			flush_all_active_variables();
 
-		if (mem == MemorySemanticsWorkgroupMemoryMask)
-			statement("memoryBarrierShared();");
-		else if (mem)
-			statement("memoryBarrier();");
+		if (memory == ScopeWorkgroup) // Only need to consider memory within a group
+		{
+			if (semantics == MemorySemanticsWorkgroupMemoryMask)
+				statement("memoryBarrierShared();");
+			else if (semantics != 0)
+				statement("groupMemoryBarrier();");
+		}
+		else
+		{
+			const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask |
+			                              MemorySemanticsUniformMemoryMask |
+			                              MemorySemanticsImageMemoryMask |
+			                              MemorySemanticsAtomicCounterMemoryMask;
+
+			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
+			{
+				// These are not relevant for GLSL, but assume it means memoryBarrier().
+				// memoryBarrier() does everything, so no need to test anything else.
+				statement("memoryBarrier();");
+			}
+			else if ((semantics & all_barriers) == all_barriers)
+			{
+				// Short-hand instead of emitting 4 barriers.
+				statement("memoryBarrier();");
+			}
+			else
+			{
+				// Pick out individual barriers.
+				if (semantics & MemorySemanticsWorkgroupMemoryMask)
+					statement("memoryBarrierShared();");
+				if (semantics & MemorySemanticsUniformMemoryMask)
+					statement("memoryBarrierBuffer();");
+				if (semantics & MemorySemanticsImageMemoryMask)
+					statement("memoryBarrierImage();");
+				if (semantics & MemorySemanticsAtomicCounterMemoryMask)
+					statement("memoryBarrierAtomicCounter();");
+			}
+		}
+
+		if (opcode == OpControlBarrier)
+			statement("barrier();");
 		break;
 	}
 
@@ -8350,3 +8420,13 @@ void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_
 		}
 	}
 }
+
+const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
+{
+	// FIXME: This is kind of hacky. There should be a cleaner way.
+	auto offset = uint32_t(&instr - current_emitting_block->ops.data());
+	if ((offset + 1) < current_emitting_block->ops.size())
+		return &current_emitting_block->ops[offset + 1];
+	else
+		return nullptr;
+}
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 98682d08..d5a04ddb 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -509,6 +509,7 @@ protected:
 	static std::string sanitize_underscores(const std::string &str);
 
 	bool can_use_io_location(spv::StorageClass storage);
+	const Instruction *get_next_instruction_in_block(const Instruction &instr);
 
 private:
 	void init()
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index cdacff78..54e39c2d 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -3017,16 +3017,6 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 	register_read(ops[1], ops[2], should_forward(ops[2]));
 }
 
-const Instruction *CompilerHLSL::get_next_instruction_in_block(const Instruction &instr)
-{
-	// FIXME: This is kind of hacky. There should be a cleaner way.
-	uint32_t offset = uint32_t(&instr - current_emitting_block->ops.data());
-	if ((offset + 1) < current_emitting_block->ops.size())
-		return &current_emitting_block->ops[offset + 1];
-	else
-		return nullptr;
-}
-
 void CompilerHLSL::emit_instruction(const Instruction &instruction)
 {
 	auto ops = stream(instruction);
diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp
index 95c0c360..7144b5cd 100644
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@@ -107,7 +107,6 @@ private:
 	void write_access_chain(const SPIRAccessChain &chain, uint32_t value);
 	void emit_store(const Instruction &instruction);
 	void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
-	const Instruction *get_next_instruction_in_block(const Instruction &instr);
 
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                        const std::string &qualifier) override;
author	Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>	2018-01-09 14:07:07 +0300
committer	Hans-Kristian Arntzen <hans-kristian.arntzen@arm.com>	2018-01-09 14:07:07 +0300
commit	9c72aa00c9d1e65add75bc218811a694eccb8806 (patch)
tree	aa46dd471e2d4416a6f92e324312d27e9a8a0d8d
parent	23f0abf1122c0dc3dd107d3f5587987b3f4d06df (diff)