GPUPass: Refactor gpupass caching system to allow fast gpumaterial creation.

This is part of the work needed to refactor the material parameters update. Now the gpupass cache is polled before adding the gpumaterial to the deferred compilation queue. We store gpupasses in a single linked list grouped based on their hashes. This is not the most efficient way but it can be improved upon later.
author: Clément Foucault <foucault.clem@gmail.com> 2018-06-07 12:58:15 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2018-06-07 13:02:42 +0300
commit: cdbda1c3d8017b994366b101b34c79a5df58902f (patch)
tree: 8b2f18917c0fb847629af39233e1c20e0739caaf /source/blender/gpu/intern/gpu_codegen.c
parent: 366ac88d685d569e5eaa19ce3fc43764683b60fd (diff)
1 files changed, 129 insertions, 62 deletions
diff --git a/source/blender/gpu/intern/gpu_codegen.c b/source/blender/gpu/intern/gpu_codegen.c
index 22665e2c0bf..91d0d9dbecc 100644
--- a/source/blender/gpu/intern/gpu_codegen.c
+++ b/source/blender/gpu/intern/gpu_codegen.c
@@ -40,10 +40,11 @@
 
 #include "BLI_blenlib.h"
 #include "BLI_hash_mm2a.h"
-#include "BLI_linklist.h"
+#include "BLI_link_utils.h"
 #include "BLI_utildefines.h"
 #include "BLI_dynstr.h"
 #include "BLI_ghash.h"
+#include "BLI_threads.h"
 
 #include "PIL_time.h"
 
@@ -75,39 +76,54 @@ static char *glsl_material_library = NULL;
  * same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
  **/
 
-static LinkNode *pass_cache = NULL; /* GPUPass */
+/* Only use one linklist that contains the GPUPasses grouped by hash. */
+static GPUPass *pass_cache = NULL;
+static SpinLock pass_cache_spin;
 
-static uint32_t gpu_pass_hash(const char *vert, const char *geom, const char *frag, const char *defs)
+static uint32_t gpu_pass_hash(const char *frag_gen, const char *defs)
 {
 	BLI_HashMurmur2A hm2a;
 	BLI_hash_mm2a_init(&hm2a, 0);
-	BLI_hash_mm2a_add(&hm2a, (unsigned char *)frag, strlen(frag));
-	BLI_hash_mm2a_add(&hm2a, (unsigned char *)vert, strlen(vert));
+	BLI_hash_mm2a_add(&hm2a, (unsigned char *)frag_gen, strlen(frag_gen));
 	if (defs)
 		BLI_hash_mm2a_add(&hm2a, (unsigned char *)defs, strlen(defs));
-	if (geom)
-		BLI_hash_mm2a_add(&hm2a, (unsigned char *)geom, strlen(geom));
 
 	return BLI_hash_mm2a_end(&hm2a);
 }
 
-/* Search by hash then by exact string match. */
-static GPUPass *gpu_pass_cache_lookup(
-        const char *vert, const char *geom, const char *frag, const char *defs, uint32_t hash)
+/* Search by hash only. Return first pass with the same hash.
+ * There is hash collision if (pass->next && pass->next->hash == hash) */
+static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
 {
-	for (LinkNode *ln = pass_cache; ln; ln = ln->next) {
-		GPUPass *pass = (GPUPass *)ln->link;
+	BLI_spin_lock(&pass_cache_spin);
+	/* Could be optimized with a Lookup table. */
+	for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
 		if (pass->hash == hash) {
-			/* Note: Could be made faster if that becomes a real bottleneck. */
-			if ((defs != NULL) && (strcmp(pass->defines, defs) != 0)) { /* Pass */ }
-			else if ((geom != NULL) && (strcmp(pass->geometrycode, geom) != 0)) { /* Pass */ }
-			else if ((strcmp(pass->fragmentcode, frag) == 0) &&
-			         (strcmp(pass->vertexcode, vert) == 0))
-			{
-				return pass;
-			}
+			BLI_spin_unlock(&pass_cache_spin);
+			return pass;
 		}
 	}
+	BLI_spin_unlock(&pass_cache_spin);
+	return NULL;
+}
+
+/* Check all possible passes with the same hash. */
+static GPUPass *gpu_pass_cache_resolve_collision(
+        GPUPass *pass, const char *vert, const char *geom, const char *frag, const char *defs, uint32_t hash)
+{
+	BLI_spin_lock(&pass_cache_spin);
+	/* Collision, need to strcmp the whole shader. */
+	for (; pass && (pass->hash == hash); pass = pass->next) {
+		if ((defs != NULL) && (strcmp(pass->defines, defs) != 0)) { /* Pass */ }
+		else if ((geom != NULL) && (strcmp(pass->geometrycode, geom) != 0)) { /* Pass */ }
+		else if ((strcmp(pass->fragmentcode, frag) == 0) &&
+		         (strcmp(pass->vertexcode, vert) == 0))
+		{
+			BLI_spin_unlock(&pass_cache_spin);
+			return pass;
+		}
+	}
+	BLI_spin_unlock(&pass_cache_spin);
 	return NULL;
 }
 
@@ -1099,12 +1115,12 @@ void GPU_code_generate_glsl_lib(void)
 
 /* GPU pass binding/unbinding */
 
-GPUShader *GPU_pass_shader(GPUPass *pass)
+GPUShader *GPU_pass_shader_get(GPUPass *pass)
 {
 	return pass->shader;
 }
 
-static void gpu_nodes_extract_dynamic_inputs(GPUShader *shader, ListBase *inputs, ListBase *nodes)
+void GPU_nodes_extract_dynamic_inputs(GPUShader *shader, ListBase *inputs, ListBase *nodes)
 {
 	GPUNode *node;
 	GPUInput *next, *input;
@@ -1896,16 +1912,25 @@ void GPU_nodes_prune(ListBase *nodes, GPUNodeLink *outlink)
 	}
 }
 
+static bool gpu_pass_is_valid(GPUPass *pass)
+{
+	/* Shader is not null if compilation is successful,
+	 * refcount is positive if compilation as not yet been done. */
+	return (pass->shader != NULL || pass->refcount > 0);
+}
+
 GPUPass *GPU_generate_pass_new(
         GPUMaterial *material,
-        GPUNodeLink *frag_outlink, struct GPUVertexAttribs *attribs,
-        ListBase *nodes, ListBase *inputs,
-        const char *vert_code, const char *geom_code,
-        const char *frag_lib, const char *defines)
+        GPUNodeLink *frag_outlink,
+        struct GPUVertexAttribs *attribs,
+        ListBase *nodes,
+        const char *vert_code,
+        const char *geom_code,
+        const char *frag_lib,
+        const char *defines)
 {
 	char *vertexcode, *geometrycode, *fragmentcode;
-	GPUShader *shader;
-	GPUPass *pass;
+	GPUPass *pass = NULL, *pass_hash = NULL;
 
 	/* prune unused nodes */
 	GPU_nodes_prune(nodes, frag_outlink);
@@ -1914,6 +1939,24 @@ GPUPass *GPU_generate_pass_new(
 
 	/* generate code */
 	char *fragmentgen = code_generate_fragment(material, nodes, frag_outlink->output);
+
+	/* Cache lookup: Reuse shaders already compiled */
+	uint32_t hash = gpu_pass_hash(fragmentgen, defines);
+	pass_hash = gpu_pass_cache_lookup(hash);
+
+	if (pass_hash && (pass_hash->next == NULL || pass_hash->next->hash != hash)) {
+		/* No collision, just return the pass. */
+		MEM_freeN(fragmentgen);
+		if (!gpu_pass_is_valid(pass_hash)) {
+			/* Shader has already been created but failed to compile. */
+			return NULL;
+		}
+		pass_hash->refcount += 1;
+		return pass_hash;
+	}
+
+	/* Either the shader is not compiled or there is a hash collision...
+	 * continue generating the shader strings. */
 	char *tmp = BLI_strdupcat(frag_lib, glsl_material_library);
 
 	vertexcode = code_generate_vertex(nodes, vert_code, (geom_code != NULL));
@@ -1923,51 +1966,62 @@ GPUPass *GPU_generate_pass_new(
 	MEM_freeN(fragmentgen);
 	MEM_freeN(tmp);
 
-	/* Cache lookup: Reuse shaders already compiled */
-	uint32_t hash = gpu_pass_hash(vertexcode, geometrycode, fragmentcode, defines);
-	pass = gpu_pass_cache_lookup(vertexcode, geometrycode, fragmentcode, defines, hash);
+	if (pass_hash) {
+		/* Cache lookup: Reuse shaders already compiled */
+		pass = gpu_pass_cache_resolve_collision(pass_hash, vertexcode, geometrycode, fragmentcode, defines, hash);
+	}
+
 	if (pass) {
 		/* Cache hit. Reuse the same GPUPass and GPUShader. */
-		shader = pass->shader;
-		pass->refcount += 1;
+		if (!gpu_pass_is_valid(pass)) {
+			/* Shader has already been created but failed to compile. */
+			return NULL;
+		}
 
 		MEM_SAFE_FREE(vertexcode);
 		MEM_SAFE_FREE(fragmentcode);
 		MEM_SAFE_FREE(geometrycode);
+
+		pass->refcount += 1;
 	}
 	else {
-		/* Cache miss. (Re)compile the shader. */
-		shader = GPU_shader_create(vertexcode,
-		                           fragmentcode,
-		                           geometrycode,
-		                           NULL,
-		                           defines);
-
 		/* We still create a pass even if shader compilation
 		 * fails to avoid trying to compile again and again. */
 		pass = MEM_callocN(sizeof(GPUPass), "GPUPass");
-		pass->shader = shader;
+		pass->shader = NULL;
 		pass->refcount = 1;
 		pass->hash = hash;
 		pass->vertexcode = vertexcode;
 		pass->fragmentcode = fragmentcode;
 		pass->geometrycode = geometrycode;
-		pass->libcode = glsl_material_library;
 		pass->defines = (defines) ? BLI_strdup(defines) : NULL;
+		pass->compiled = false;
 
-		BLI_linklist_prepend(&pass_cache, pass);
+		BLI_spin_lock(&pass_cache_spin);
+		if (pass_hash != NULL) {
+			/* Add after the first pass having the same hash. */
+			pass->next = pass_hash->next;
+			pass_hash->next = pass;
+		}
+		else {
+			/* No other pass have same hash, just prepend to the list. */
+			BLI_LINKS_PREPEND(pass_cache, pass);
+		}
+		BLI_spin_unlock(&pass_cache_spin);
 	}
 
-	/* did compilation failed ? */
-	if (!shader) {
-		gpu_nodes_free(nodes);
-		/* Pass will not be used. Don't increment refcount. */
-		pass->refcount--;
-		return NULL;
-	}
-	else {
-		gpu_nodes_extract_dynamic_inputs(shader, inputs, nodes);
-		return pass;
+	return pass;
+}
+
+void GPU_pass_compile(GPUPass *pass)
+{
+	if (!pass->compiled) {
+		pass->shader = GPU_shader_create(pass->vertexcode,
+		                                 pass->fragmentcode,
+		                                 pass->geometrycode,
+		                                 NULL,
+		                                 pass->defines);
+		pass->compiled = true;
 	}
 }
 
@@ -2006,23 +2060,36 @@ void GPU_pass_cache_garbage_collect(void)
 
 	lasttime = ctime;
 
-	LinkNode *next, **prev_ln = &pass_cache;
-	for (LinkNode *ln = pass_cache; ln; ln = next) {
-		GPUPass *pass = (GPUPass *)ln->link;
-		next = ln->next;
+	BLI_spin_lock(&pass_cache_spin);
+	GPUPass *next, **prev_pass = &pass_cache;
+	for (GPUPass *pass = pass_cache; pass; pass = next) {
+		next = pass->next;
 		if (pass->refcount == 0) {
-			gpu_pass_free(pass);
 			/* Remove from list */
-			MEM_freeN(ln);
-			*prev_ln = next;
+			*prev_pass = next;
+			gpu_pass_free(pass);
 		}
 		else {
-			prev_ln = &ln->next;
+			prev_pass = &pass->next;
 		}
 	}
+	BLI_spin_unlock(&pass_cache_spin);
+}
+
+void GPU_pass_cache_init(void)
+{
+	BLI_spin_init(&pass_cache_spin);
 }
 
 void GPU_pass_cache_free(void)
 {
-	BLI_linklist_free(pass_cache, (LinkNodeFreeFP)gpu_pass_free);
+	BLI_spin_lock(&pass_cache_spin);
+	while (pass_cache) {
+		GPUPass *next = pass_cache->next;
+		gpu_pass_free(pass_cache);
+		pass_cache = next;
+	}
+	BLI_spin_unlock(&pass_cache_spin);
+
+	BLI_spin_end(&pass_cache_spin);
 }
author	Clément Foucault <foucault.clem@gmail.com>	2018-06-07 12:58:15 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2018-06-07 13:02:42 +0300
commit	cdbda1c3d8017b994366b101b34c79a5df58902f (patch)
tree	8b2f18917c0fb847629af39233e1c20e0739caaf /source/blender/gpu/intern/gpu_codegen.c
parent	366ac88d685d569e5eaa19ce3fc43764683b60fd (diff)