Fix crash when editing shaders on Intel HD 4000.

In the Intel HD 4000 driver a shader has to be deleted in the same context in which it is created. However, because you can't use a rendering context on different threads, to maintain the multithreaded compilation, the solution was to use the `GL_ARB_get_program_binary` and copy the binary generated for the shader and generate a shader on the main context using that binary. This solution is limited only to Intel HD 4000 and windows. Reviewers: fclem Reviewed By: fclem Differential Revision: https://developer.blender.org/D5019
author: mano-wii <germano.costa@ig.com.br> 2019-06-05 19:06:11 +0300
committer: mano-wii <germano.costa@ig.com.br> 2019-06-05 19:50:58 +0300
commit: ce66b22c427defa3db498d2d69ee615b3c913c5f (patch)
tree: 5b0b947f7c0cd22d100af33c23d3ad60eee2133c /source/blender/gpu/intern
parent: dd81efa4a343ecbb7a208e06ea854ce673e20504 (diff)
5 files changed, 119 insertions, 38 deletions
diff --git a/source/blender/gpu/intern/gpu_codegen.c b/source/blender/gpu/intern/gpu_codegen.c
index 12c35d76ac4..0c751808489 100644
--- a/source/blender/gpu/intern/gpu_codegen.c
+++ b/source/blender/gpu/intern/gpu_codegen.c
@@ -2105,17 +2105,17 @@ static int count_active_texture_sampler(GPUShader *shader, char *source)
   return sampler_len;
 }
 
-static bool gpu_pass_shader_validate(GPUPass *pass)
+static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
 {
-  if (pass->shader == NULL) {
+  if (shader == NULL) {
     return false;
   }
 
   /* NOTE: The only drawback of this method is that it will count a sampler
    * used in the fragment shader and only declared (but not used) in the vertex
    * shader as used by both. But this corner case is not happening for now. */
-  int vert_samplers_len = count_active_texture_sampler(pass->shader, pass->vertexcode);
-  int frag_samplers_len = count_active_texture_sampler(pass->shader, pass->fragmentcode);
+  int vert_samplers_len = count_active_texture_sampler(shader, pass->vertexcode);
+  int frag_samplers_len = count_active_texture_sampler(shader, pass->fragmentcode);
 
   int total_samplers_len = vert_samplers_len + frag_samplers_len;
 
@@ -2126,7 +2126,7 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
   }
 
   if (pass->geometrycode) {
-    int geom_samplers_len = count_active_texture_sampler(pass->shader, pass->geometrycode);
+    int geom_samplers_len = count_active_texture_sampler(shader, pass->geometrycode);
     total_samplers_len += geom_samplers_len;
     if (geom_samplers_len > GPU_max_textures_geom()) {
       return false;
@@ -2136,30 +2136,40 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
   return (total_samplers_len <= GPU_max_textures());
 }
 
-void GPU_pass_compile(GPUPass *pass, const char *shname)
+bool GPU_pass_compile(GPUPass *pass, const char *shname)
 {
+  bool sucess = true;
   if (!pass->compiled) {
-    pass->shader = GPU_shader_create(
+    GPUShader *shader = GPU_shader_create(
         pass->vertexcode, pass->fragmentcode, pass->geometrycode, NULL, pass->defines, shname);
 
     /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
      * We need to make sure to count active samplers to avoid undefined behavior. */
-    if (!gpu_pass_shader_validate(pass)) {
-      if (pass->shader != NULL) {
+    if (!gpu_pass_shader_validate(pass, shader)) {
+      sucess = false;
+      if (shader != NULL) {
         fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
-        GPU_shader_free(pass->shader);
+        GPU_shader_free(shader);
+        shader = NULL;
       }
-      pass->shader = NULL;
     }
-    else if (!BLI_thread_is_main()) {
-      /* For some Intel drivers, you must use the program at least once
-       * in the rendering context that it is linked. */
-      glUseProgram(GPU_shader_get_program(pass->shader));
-      glUseProgram(0);
+    else if (!BLI_thread_is_main() && GPU_context_local_shaders_workaround()) {
+      pass->binary.content = GPU_shader_get_binary(
+          shader, &pass->binary.format, &pass->binary.len);
+      GPU_shader_free(shader);
+      shader = NULL;
     }
 
+    pass->shader = shader;
     pass->compiled = true;
   }
+  else if (pass->binary.content && BLI_thread_is_main()) {
+    pass->shader = GPU_shader_load_from_binary(
+        pass->binary.content, pass->binary.format, pass->binary.len, shname);
+    MEM_SAFE_FREE(pass->binary.content);
+  }
+
+  return sucess;
 }
 
 void GPU_pass_release(GPUPass *pass)
@@ -2178,6 +2188,9 @@ static void gpu_pass_free(GPUPass *pass)
   MEM_SAFE_FREE(pass->geometrycode);
   MEM_SAFE_FREE(pass->vertexcode);
   MEM_SAFE_FREE(pass->defines);
+  if (pass->binary.content) {
+    MEM_freeN(pass->binary.content);
+  }
   MEM_freeN(pass);
 }
 
diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h
index d1bb3f26920..eac5df7e348 100644
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -164,6 +164,11 @@ struct GPUPass {
   char *defines;
   uint refcount; /* Orphaned GPUPasses gets freed by the garbage collector. */
   uint32_t hash; /* Identity hash generated from all GLSL code. */
+  struct {
+    char *content;
+    int format;
+    int len;
+  } binary;
   bool compiled; /* Did we already tried to compile the attached GPUShader. */
 };
 
@@ -185,7 +190,7 @@ void GPU_nodes_extract_dynamic_inputs(struct GPUShader *shader, ListBase *inputs
 void GPU_nodes_get_vertex_attrs(ListBase *nodes, struct GPUVertAttrLayers *attrs);
 void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);
 
-void GPU_pass_compile(GPUPass *pass, const char *shname);
+bool GPU_pass_compile(GPUPass *pass, const char *shname);
 void GPU_pass_release(GPUPass *pass);
 void GPU_pass_free_nodes(ListBase *nodes);
 
diff --git a/source/blender/gpu/intern/gpu_extensions.c b/source/blender/gpu/intern/gpu_extensions.c
index 8cd554cc9d5..58efe3dc5c4 100644
--- a/source/blender/gpu/intern/gpu_extensions.c
+++ b/source/blender/gpu/intern/gpu_extensions.c
@@ -89,6 +89,9 @@ static struct GPUGlobal {
   /* Crappy driver don't know how to map framebuffer slot to output vars...
    * We need to have no "holes" in the output buffer slots. */
   bool unused_fb_slot_workaround;
+  /* Some crappy Intel drivers don't work well with shaders created in different
+   * rendering contexts. */
+  bool context_local_shaders_workaround;
 } GG = {1, 0};
 
 static void gpu_detect_mip_render_workaround(void)
@@ -209,6 +212,11 @@ bool GPU_unused_fb_slot_workaround(void)
   return GG.unused_fb_slot_workaround;
 }
 
+bool GPU_context_local_shaders_workaround(void)
+{
+  return GG.context_local_shaders_workaround;
+}
+
 bool GPU_crappy_amd_driver(void)
 {
   /* Currently are the same drivers with the `unused_fb_slot` problem. */
@@ -347,6 +355,7 @@ void gpu_extensions_init(void)
     GG.mip_render_workaround = true;
     GG.depth_blitting_workaround = true;
     GG.unused_fb_slot_workaround = true;
+    GG.context_local_shaders_workaround = true;
   }
 
   /* df/dy calculation factors, those are dependent on driver */
@@ -354,19 +363,24 @@ void gpu_extensions_init(void)
     GG.dfdyfactors[0] = 1.0;
     GG.dfdyfactors[1] = -1.0;
   }
-  else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN) &&
-           (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
-            strstr(version, "4.0.0 - Build 9.18.10.3186") ||
-            strstr(version, "4.0.0 - Build 9.18.10.3165") ||
-            strstr(version, "3.1.0 - Build 9.17.10.3347") ||
-            strstr(version, "3.1.0 - Build 9.17.10.4101") ||
-            strstr(version, "3.3.0 - Build 8.15.10.2618"))) {
-    GG.dfdyfactors[0] = -1.0;
-    GG.dfdyfactors[1] = 1.0;
-  }
-  else {
-    GG.dfdyfactors[0] = 1.0;
-    GG.dfdyfactors[1] = 1.0;
+  else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN)) {
+    if (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
+        strstr(version, "4.0.0 - Build 9.18.10.3186") ||
+        strstr(version, "4.0.0 - Build 9.18.10.3165") ||
+        strstr(version, "3.1.0 - Build 9.17.10.3347") ||
+        strstr(version, "3.1.0 - Build 9.17.10.4101") ||
+        strstr(version, "3.3.0 - Build 8.15.10.2618")) {
+      GG.dfdyfactors[0] = -1.0;
+      GG.dfdyfactors[1] = 1.0;
+    }
+    else {
+      GG.dfdyfactors[0] = 1.0;
+      GG.dfdyfactors[1] = 1.0;
+    }
+
+    if (strstr(renderer, "HD Graphics 4000")) {
+      GG.context_local_shaders_workaround = true;
+    }
   }
 
   GPU_invalid_tex_init();
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index f3df10af518..6f1b8d2d0c6 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -733,23 +733,25 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
 
 void GPU_material_compile(GPUMaterial *mat)
 {
-  /* Only run once! */
+  bool sucess;
+
   BLI_assert(mat->status == GPU_MAT_QUEUED);
   BLI_assert(mat->pass);
 
   /* NOTE: The shader may have already been compiled here since we are
    * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
 #ifndef NDEBUG
-  GPU_pass_compile(mat->pass, mat->name);
+  sucess = GPU_pass_compile(mat->pass, mat->name);
 #else
-  GPU_pass_compile(mat->pass, __func__);
+  sucess = GPU_pass_compile(mat->pass, __func__);
 #endif
 
-  GPUShader *sh = GPU_pass_shader_get(mat->pass);
-
-  if (sh != NULL) {
-    mat->status = GPU_MAT_SUCCESS;
-    GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
+  if (sucess) {
+    GPUShader *sh = GPU_pass_shader_get(mat->pass);
+    if (sh != NULL) {
+      mat->status = GPU_MAT_SUCCESS;
+      GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
+    }
   }
   else {
     mat->status = GPU_MAT_FAILED;
diff --git a/source/blender/gpu/intern/gpu_shader.c b/source/blender/gpu/intern/gpu_shader.c
index a7eb6726364..047cabda9f8 100644
--- a/source/blender/gpu/intern/gpu_shader.c
+++ b/source/blender/gpu/intern/gpu_shader.c
@@ -292,6 +292,36 @@ GPUShader *GPU_shader_create(const char *vertexcode,
       vertexcode, fragcode, geocode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, shname);
 }
 
+GPUShader *GPU_shader_load_from_binary(const char *binary,
+                                       const int binary_format,
+                                       const int binary_len,
+                                       const char *shname)
+{
+  BLI_assert(GL_ARB_get_program_binary);
+  int success;
+  int program = glCreateProgram();
+
+  glProgramBinary(program, binary_format, binary, binary_len);
+  glGetProgramiv(program, GL_LINK_STATUS, &success);
+
+  if (success) {
+    GPUShader *shader = MEM_callocN(sizeof(*shader), __func__);
+    shader->interface = GPU_shaderinterface_create(program);
+    shader->program = program;
+
+#ifndef NDEBUG
+    BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
+#else
+    UNUSED_VARS(shname);
+#endif
+
+    return shader;
+  }
+
+  glDeleteProgram(program);
+  return NULL;
+}
+
 #define DEBUG_SHADER_NONE ""
 #define DEBUG_SHADER_VERTEX "vert"
 #define DEBUG_SHADER_FRAGMENT "frag"
@@ -815,6 +845,23 @@ int GPU_shader_get_attribute(GPUShader *shader, const char *name)
   return attr ? attr->location : -1;
 }
 
+char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len)
+{
+  BLI_assert(GLEW_ARB_get_program_binary);
+  char *r_binary;
+  int binary_len = 0;
+
+  glGetProgramiv(shader->program, GL_PROGRAM_BINARY_LENGTH, &binary_len);
+  r_binary = MEM_mallocN(binary_len, __func__);
+  glGetProgramBinary(shader->program, binary_len, NULL, r_binary_format, r_binary);
+
+  if (r_binary_len) {
+    *r_binary_len = binary_len;
+  }
+
+  return r_binary;
+}
+
 static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = {
     [GPU_SHADER_TEXT] =
         {
author	mano-wii <germano.costa@ig.com.br>	2019-06-05 19:06:11 +0300
committer	mano-wii <germano.costa@ig.com.br>	2019-06-05 19:50:58 +0300
commit	ce66b22c427defa3db498d2d69ee615b3c913c5f (patch)
tree	5b0b947f7c0cd22d100af33c23d3ad60eee2133c /source/blender/gpu/intern
parent	dd81efa4a343ecbb7a208e06ea854ce673e20504 (diff)