1 files changed, 281 insertions, 21 deletions
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index a4842ef0e43..6d0779797b0 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -34,6 +34,8 @@
 
 #include "DRW_engine.h"
 
+#include "PIL_time.h"
+
 #include "gpu_codegen.h"
 #include "gpu_node_graph.h"
 
@@ -41,16 +43,54 @@
 
 /* Structs */
 #define MAX_COLOR_BAND 128
+#define MAX_GPU_SKIES 8
+
+/** Whether the optimized variant of the #GPUPass should be created asynchronously.
+ * Usage of this depends on whether there are possible threading challenges of doing so.
+ * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
+ * compilation, though this option exists in case any potential scenarios for material graph
+ * optimization cause a slow down on the main thread.
+ *
+ * NOTE: The actual shader program for the optimized pass will always be compiled asynchronously,
+ * this flag controls whether shader node graph source serialization happens on the compilation
+ * worker thread. */
+#define ASYNC_OPTIMIZED_PASS_CREATION 0
 
 typedef struct GPUColorBandBuilder {
   float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
   int current_layer;
 } GPUColorBandBuilder;
 
+typedef struct GPUSkyBuilder {
+  float pixels[MAX_GPU_SKIES][GPU_SKY_WIDTH * GPU_SKY_HEIGHT][4];
+  int current_layer;
+} GPUSkyBuilder;
+
 struct GPUMaterial {
-  /* Contains GPUShader and source code for deferred compilation.
-   * Can be shared between similar material (i.e: sharing same nodetree topology). */
+  /* Contains #GPUShader and source code for deferred compilation.
+   * Can be shared between similar material (i.e: sharing same node-tree topology). */
   GPUPass *pass;
+  /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
+   * This shader variant bakes dynamic uniform data as constant. This variant will not use
+   * the ubo, and instead bake constants directly into the shader source. */
+  GPUPass *optimized_pass;
+  /* Optimization status.
+   * We also use this status to determine whether this material should be considered for
+   * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
+   *   `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
+   *   `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
+   * performance to do so, based on the heuristic.
+   */
+  eGPUMaterialOptimizationStatus optimization_status;
+  double creation_time;
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  struct DeferredOptimizePass {
+    GPUCodegenCallbackFn callback;
+    void *thunk;
+  } DeferredOptimizePass;
+  struct DeferredOptimizePass optimize_pass_info;
+#endif
+
   /** UBOs for this material parameters. */
   GPUUniformBuf *ubo;
   /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
@@ -73,6 +113,10 @@ struct GPUMaterial {
   GPUTexture *coba_tex;
   /** Builder for coba_tex. */
   GPUColorBandBuilder *coba_builder;
+  /** 2D Texture array containing all sky textures. */
+  GPUTexture *sky_tex;
+  /** Builder for sky_tex. */
+  GPUSkyBuilder *sky_builder;
   /* Low level node graph(s). Also contains resources needed by the material. */
   GPUNodeGraph graph;
 
@@ -91,11 +135,42 @@ struct GPUMaterial {
 
 #ifndef NDEBUG
   char name[64];
+#else
+  char name[16];
 #endif
 };
 
 /* Functions */
 
+GPUTexture **gpu_material_sky_texture_layer_set(
+    GPUMaterial *mat, int width, int height, const float *pixels, float *row)
+{
+  /* In order to put all sky textures into one 2D array texture,
+   * we need them to be the same size. */
+  BLI_assert(width == GPU_SKY_WIDTH);
+  BLI_assert(height == GPU_SKY_HEIGHT);
+  UNUSED_VARS_NDEBUG(width, height);
+
+  if (mat->sky_builder == NULL) {
+    mat->sky_builder = MEM_mallocN(sizeof(GPUSkyBuilder), "GPUSkyBuilder");
+    mat->sky_builder->current_layer = 0;
+  }
+
+  int layer = mat->sky_builder->current_layer;
+  *row = (float)layer;
+
+  if (*row == MAX_GPU_SKIES) {
+    printf("Too many sky textures in shader!\n");
+  }
+  else {
+    float *dst = (float *)mat->sky_builder->pixels[layer];
+    memcpy(dst, pixels, sizeof(float) * GPU_SKY_WIDTH * GPU_SKY_HEIGHT * 4);
+    mat->sky_builder->current_layer += 1;
+  }
+
+  return &mat->sky_tex;
+}
+
 GPUTexture **gpu_material_ramp_texture_row_set(GPUMaterial *mat,
                                                int size,
                                                float *pixels,
@@ -141,6 +216,24 @@ static void gpu_material_ramp_texture_build(GPUMaterial *mat)
   mat->coba_builder = NULL;
 }
 
+static void gpu_material_sky_texture_build(GPUMaterial *mat)
+{
+  if (mat->sky_builder == NULL) {
+    return;
+  }
+
+  mat->sky_tex = GPU_texture_create_2d_array("mat_sky",
+                                             GPU_SKY_WIDTH,
+                                             GPU_SKY_HEIGHT,
+                                             mat->sky_builder->current_layer,
+                                             1,
+                                             GPU_RGBA32F,
+                                             (float *)mat->sky_builder->pixels);
+
+  MEM_freeN(mat->sky_builder);
+  mat->sky_builder = NULL;
+}
+
 void GPU_material_free_single(GPUMaterial *material)
 {
   bool do_free = atomic_sub_and_fetch_uint32(&material->refcount, 1) == 0;
@@ -150,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material)
 
   gpu_node_graph_free(&material->graph);
 
+  if (material->optimized_pass != NULL) {
+    GPU_pass_release(material->optimized_pass);
+  }
   if (material->pass != NULL) {
     GPU_pass_release(material->pass);
   }
@@ -159,6 +255,9 @@ void GPU_material_free_single(GPUMaterial *material)
   if (material->coba_tex != NULL) {
     GPU_texture_free(material->coba_tex);
   }
+  if (material->sky_tex != NULL) {
+    GPU_texture_free(material->sky_tex);
+  }
   if (material->sss_profile != NULL) {
     GPU_uniformbuf_free(material->sss_profile);
   }
@@ -185,12 +284,20 @@ Scene *GPU_material_scene(GPUMaterial *material)
 
 GPUPass *GPU_material_get_pass(GPUMaterial *material)
 {
-  return material->pass;
+  return (material->optimized_pass) ? material->optimized_pass : material->pass;
 }
 
 GPUShader *GPU_material_get_shader(GPUMaterial *material)
 {
-  return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
+  /* First attempt to select optimized shader. If not available, fetch original. */
+  GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) :
+                                                   NULL;
+  return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
+}
+
+const char *GPU_material_get_name(GPUMaterial *material)
+{
+  return material->name;
 }
 
 Material *GPU_material_get_material(GPUMaterial *material)
@@ -205,12 +312,7 @@ GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
 
 void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
 {
-#ifndef NDEBUG
-  const char *name = material->name;
-#else
-  const char *name = "Material";
-#endif
-  material->ubo = GPU_uniformbuf_create_from_list(inputs, name);
+  material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name);
 }
 
 ListBase GPU_material_attributes(GPUMaterial *material)
@@ -223,9 +325,9 @@ ListBase GPU_material_textures(GPUMaterial *material)
   return material->graph.textures;
 }
 
-GPUUniformAttrList *GPU_material_uniform_attributes(GPUMaterial *material)
+const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material)
 {
-  GPUUniformAttrList *attrs = &material->graph.uniform_attrs;
+  const GPUUniformAttrList *attrs = &material->graph.uniform_attrs;
   return attrs->count > 0 ? attrs : NULL;
 }
 
@@ -603,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
   mat->status = status;
 }
 
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
+{
+  return mat->optimization_status;
+}
+
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
+{
+  mat->optimization_status = status;
+  if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+    /* Reset creation timer to delay optimization pass. */
+    mat->creation_time = PIL_check_seconds_timer();
+  }
+}
+
+bool GPU_material_optimization_ready(GPUMaterial *mat)
+{
+  /* Timer threshold before optimizations will be queued.
+   * When materials are frequently being modified, optimization
+   * can incur CPU overhead from excessive compilation. */
+  const double optimization_time_threshold_s = 5.0;
+  return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
+}
+
 /* Code generation */
 
 bool GPU_material_has_surface_output(GPUMaterial *mat)
@@ -668,15 +793,12 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
   mat->uuid = shader_uuid;
   mat->flag = GPU_MATFLAG_UPDATED;
   mat->status = GPU_MAT_CREATED;
+  mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
   mat->is_volume_shader = is_volume_shader;
   mat->graph.used_libraries = BLI_gset_new(
       BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
   mat->refcount = 1;
-#ifndef NDEBUG
   STRNCPY(mat->name, name);
-#else
-  UNUSED_VARS(name);
-#endif
   if (is_lookdev) {
     mat->flag |= GPU_MATFLAG_LOOKDEV_HACK;
   }
@@ -686,10 +808,11 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
   ntreeGPUMaterialNodes(localtree, mat);
 
   gpu_material_ramp_texture_build(mat);
+  gpu_material_sky_texture_build(mat);
 
   {
     /* Create source code and search pass cache for an already compiled version. */
-    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
+    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
 
     if (mat->pass == NULL) {
       /* We had a cache hit and the shader has already failed to compile. */
@@ -697,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
       gpu_node_graph_free(&mat->graph);
     }
     else {
+      /* Determine whether we should generate an optimized variant of the graph.
+       * Heuristic is based on complexity of default material pass and shader node graph. */
+      if (GPU_pass_should_optimize(mat->pass)) {
+        GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+      }
+
       GPUShader *sh = GPU_pass_shader_get(mat->pass);
       if (sh != NULL) {
         /* We had a cache hit and the shader is already compiled. */
         mat->status = GPU_MAT_SUCCESS;
-        gpu_node_graph_free_nodes(&mat->graph);
+
+        if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+          gpu_node_graph_free_nodes(&mat->graph);
+        }
+      }
+
+      /* Generate optimized pass. */
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+        mat->optimized_pass = NULL;
+        mat->optimize_pass_info.callback = callback;
+        mat->optimize_pass_info.thunk = thunk;
+#else
+        mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
+        if (mat->optimized_pass == NULL) {
+          /* Failed to create optimized pass. */
+          gpu_node_graph_free_nodes(&mat->graph);
+          GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+        }
+        else {
+          GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
+          if (optimized_sh != NULL) {
+            /* Optimized shader already available. */
+            gpu_node_graph_free_nodes(&mat->graph);
+            GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+          }
+        }
+#endif
       }
     }
   }
@@ -752,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat)
     GPUShader *sh = GPU_pass_shader_get(mat->pass);
     if (sh != NULL) {
       mat->status = GPU_MAT_SUCCESS;
-      gpu_node_graph_free_nodes(&mat->graph);
+
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+        /* Only free node graph nodes if not required by secondary optimization pass. */
+        gpu_node_graph_free_nodes(&mat->graph);
+      }
     }
     else {
       mat->status = GPU_MAT_FAILED;
@@ -766,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat)
   }
 }
 
+void GPU_material_optimize(GPUMaterial *mat)
+{
+  /* If shader is flagged for skipping optimization or has already been successfully
+   * optimized, skip. */
+  if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
+    return;
+  }
+
+  /* If original shader has not been fully compiled, we are not
+   * ready to perform optimization. */
+  if (mat->status != GPU_MAT_SUCCESS) {
+    /* Reset optimization status. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+    return;
+  }
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  /* If the optimized pass is not valid, first generate optimized pass.
+   * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
+   * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
+   * optimal, as these do not benefit from caching, due to baked constants. However, this could
+   * possibly be cause for concern for certain cases.  */
+  if (!mat->optimized_pass) {
+    mat->optimized_pass = GPU_generate_pass(
+        mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
+    BLI_assert(mat->optimized_pass);
+  }
+#else
+  if (!mat->optimized_pass) {
+    /* Optimized pass has not been created, skip future optimization attempts. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    return;
+  }
+#endif
+
+  bool success;
+  /* NOTE: The shader may have already been compiled here since we are
+   * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
+#ifndef NDEBUG
+  success = GPU_pass_compile(mat->optimized_pass, mat->name);
+#else
+  success = GPU_pass_compile(mat->optimized_pass, __func__);
+#endif
+
+  if (success) {
+    GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
+    if (sh != NULL) {
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+    }
+    else {
+      /* Optimized pass failed to compile. Disable any future optimization attempts. */
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+  }
+  else {
+    /* Optimization pass generation failed. Disable future attempts to optimize. */
+    GPU_pass_release(mat->optimized_pass);
+    mat->optimized_pass = NULL;
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+  }
+
+  /* Release node graph as no longer needed. */
+  gpu_node_graph_free_nodes(&mat->graph);
+}
+
 void GPU_materials_free(Main *bmain)
 {
   LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
@@ -789,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
   material->graph.used_libraries = BLI_gset_new(
       BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
   material->refcount = 1;
+  material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
+  material->optimized_pass = NULL;
 
   /* Construct the material graph by adding and linking the necessary GPU material nodes. */
   construct_function_cb(thunk, material);
@@ -797,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
   gpu_material_ramp_texture_build(material);
 
   /* Lookup an existing pass in the cache or generate a new one. */
-  material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
+  material->pass = GPU_generate_pass(
+      material, &material->graph, generate_code_function_cb, thunk, false);
+  material->optimized_pass = NULL;
 
   /* The pass already exists in the pass cache but its shader already failed to compile. */
   if (material->pass == NULL) {
@@ -806,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
     return material;
   }
 
+  /* Generate optimized pass. */
+  if (GPU_pass_should_optimize(material->pass)) {
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+    mmaterial->optimized_pass = NULL;
+    material->optimize_pass_info.callback = generate_code_function_cb;
+    material->optimize_pass_info.thunk = thunk;
+    GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY);
+#else
+    material->optimized_pass = GPU_generate_pass(
+        material, &material->graph, generate_code_function_cb, thunk, true);
+
+    if (material->optimized_pass == NULL) {
+      /* Failed to create optimized pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+      GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+    else {
+      GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass);
+      if (optimized_sh != NULL) {
+        /* Optimized shader already available. */
+        gpu_node_graph_free_nodes(&material->graph);
+        GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS);
+      }
+    }
+#endif
+  }
+
   /* The pass already exists in the pass cache and its shader is already compiled. */
   GPUShader *shader = GPU_pass_shader_get(material->pass);
   if (shader != NULL) {
     material->status = GPU_MAT_SUCCESS;
-    gpu_node_graph_free_nodes(&material->graph);
+    if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+      /* Only free node graph if not required by secondary optimization pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+    }
     return material;
   }