diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2022-09-27 01:00:21 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2022-09-27 01:00:28 +0300 |
commit | cd1caa5853e4a4e87a13029b49976c61c780a697 (patch) | |
tree | 6dc9f826abc32bf35177b7bf8c1dbdc5e9e59457 /source/blender | |
parent | 66a863e30197990e20a1e9a45923af6331935b21 (diff) |
GPU: Revert part of D16017 that was accidentally commited
This code slipped through the final review step surely caused by a faulty
merge.
Fixes T101372 Regression: World shader setup crashes Blender in rendered view
Regression introduced by rB697b447c2069bbbbaa9929aab0ea1f66ef8bf4d0
Diffstat (limited to 'source/blender')
-rw-r--r-- | source/blender/draw/DRW_engine.h | 1 | ||||
-rw-r--r-- | source/blender/draw/engines/eevee_next/eevee_shader.cc | 2 | ||||
-rw-r--r-- | source/blender/draw/intern/DRW_render.h | 1 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager_shader.c | 190 | ||||
-rw-r--r-- | source/blender/gpu/GPU_material.h | 8 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_codegen.cc | 104 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_codegen.h | 4 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_material.c | 215 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_node_graph.c | 19 | ||||
-rw-r--r-- | source/blender/gpu/intern/gpu_node_graph.h | 14 |
10 files changed, 61 insertions, 497 deletions
diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h index 04e3bddfb6c..dec7a22aadb 100644 --- a/source/blender/draw/DRW_engine.h +++ b/source/blender/draw/DRW_engine.h @@ -201,7 +201,6 @@ void DRW_gpu_render_context_enable(void *re_gpu_context); void DRW_gpu_render_context_disable(void *re_gpu_context); void DRW_deferred_shader_remove(struct GPUMaterial *mat); -void DRW_deferred_shader_optimize_remove(struct GPUMaterial *mat); /** * Get DrawData from the given ID-block. In order for this to work, we assume that diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index 05ff06e7435..64b1d4891a9 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -471,8 +471,6 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name, this); GPU_material_status_set(gpumat, GPU_MAT_QUEUED); GPU_material_compile(gpumat); - /* Queue deferred material optimization. */ - DRW_shader_queue_optimize_material(gpumat); return gpumat; } diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h index 4bdef577e44..7b80ffd2b88 100644 --- a/source/blender/draw/intern/DRW_render.h +++ b/source/blender/draw/intern/DRW_render.h @@ -251,7 +251,6 @@ struct GPUMaterial *DRW_shader_from_material(struct Material *ma, bool deferred, GPUCodegenCallbackFn callback, void *thunk); -void DRW_shader_queue_optimize_material(struct GPUMaterial *mat); void DRW_shader_free(struct GPUShader *shader); #define DRW_SHADER_FREE_SAFE(shader) \ do { \ diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c index f452cd47cb7..04a9f3fdd2d 100644 --- a/source/blender/draw/intern/draw_manager_shader.c +++ b/source/blender/draw/intern/draw_manager_shader.c @@ -55,9 +55,6 @@ typedef struct DRWShaderCompiler { ListBase queue; /* GPUMaterial */ SpinLock list_lock; - /** Optimization queue. */ - ListBase optimize_queue; /* GPUMaterial */ - void *gl_context; GPUContext *gpu_context; bool own_context; @@ -113,29 +110,8 @@ static void drw_deferred_shader_compilation_exec( MEM_freeN(link); } else { - /* Check for Material Optimization job once there are no more - * shaders to compile. */ - BLI_spin_lock(&comp->list_lock); - /* Pop tail because it will be less likely to lock the main thread - * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */ - link = (LinkData *)BLI_poptail(&comp->optimize_queue); - GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : NULL; - if (optimize_mat) { - /* Avoid another thread freeing the material during optimization. */ - GPU_material_acquire(optimize_mat); - } - BLI_spin_unlock(&comp->list_lock); - - if (optimize_mat) { - /* Compile optimized material shader. */ - GPU_material_optimize(optimize_mat); - GPU_material_release(optimize_mat); - MEM_freeN(link); - } - else { - /* No more materials to optimize, or shaders to compile. */ - break; - } + /* No more materials to optimize, or shaders to compile. */ + break; } if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { @@ -157,7 +133,6 @@ static void drw_deferred_shader_compilation_free(void *custom_data) BLI_spin_lock(&comp->list_lock); BLI_freelistN(&comp->queue); - BLI_freelistN(&comp->optimize_queue); BLI_spin_unlock(&comp->list_lock); if (comp->own_context) { @@ -173,13 +148,34 @@ static void drw_deferred_shader_compilation_free(void *custom_data) MEM_freeN(comp); } -/** - * Append either shader compilation or optimization job to deferred queue and - * ensure shader compilation worker is active. - * We keep two separate queue's to ensure core compilations always complete before optimization. - */ -static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job) +static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) { + if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) { + return; + } + /* Do not defer the compilation if we are rendering for image. + * deferred rendering is only possible when `evil_C` is available */ + if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) { + deferred = false; + } + + if (!deferred) { + DRW_deferred_shader_remove(mat); + /* Shaders could already be compiling. Have to wait for compilation to finish. */ + while (GPU_material_status(mat) == GPU_MAT_QUEUED) { + PIL_sleep_ms(20); + } + if (GPU_material_status(mat) == GPU_MAT_CREATED) { + GPU_material_compile(mat); + } + return; + } + + /* Don't add material to the queue twice. */ + if (GPU_material_status(mat) == GPU_MAT_QUEUED) { + return; + } + const bool use_main_context = GPU_use_main_context_workaround(); const bool job_own_context = !use_main_context; @@ -200,7 +196,6 @@ static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job if (old_comp) { BLI_spin_lock(&old_comp->list_lock); BLI_movelisttolist(&comp->queue, &old_comp->queue); - BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue); BLI_spin_unlock(&old_comp->list_lock); /* Do not recreate context, just pass ownership. */ if (old_comp->gl_context) { @@ -211,18 +206,9 @@ static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job } } - /* Add to either compilation or optimization queue. */ - if (is_optimization_job) { - BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED); - LinkData *node = BLI_genericNodeN(mat); - BLI_addtail(&comp->optimize_queue, node); - } - else { - GPU_material_status_set(mat, GPU_MAT_QUEUED); - LinkData *node = BLI_genericNodeN(mat); - BLI_addtail(&comp->queue, node); - } + GPU_material_status_set(mat, GPU_MAT_QUEUED); + LinkData *node = BLI_genericNodeN(mat); + BLI_addtail(&comp->queue, node); /* Create only one context. */ if (comp->gl_context == NULL) { @@ -251,39 +237,6 @@ static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job WM_jobs_start(wm, wm_job); } -static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) -{ - if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) { - return; - } - - /* Do not defer the compilation if we are rendering for image. - * deferred rendering is only possible when `evil_C` is available */ - if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) { - deferred = false; - } - - if (!deferred) { - DRW_deferred_shader_remove(mat); - /* Shaders could already be compiling. Have to wait for compilation to finish. */ - while (GPU_material_status(mat) == GPU_MAT_QUEUED) { - PIL_sleep_ms(20); - } - if (GPU_material_status(mat) == GPU_MAT_CREATED) { - GPU_material_compile(mat); - } - return; - } - - /* Don't add material to the queue twice. */ - if (GPU_material_status(mat) == GPU_MAT_QUEUED) { - return; - } - - /* Add deferred shader compilation to queue. */ - drw_deferred_queue_append(mat, false); -} - void DRW_deferred_shader_remove(GPUMaterial *mat) { LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) { @@ -299,42 +252,9 @@ void DRW_deferred_shader_remove(GPUMaterial *mat) BLI_remlink(&comp->queue, link); GPU_material_status_set(link->data, GPU_MAT_CREATED); } - - MEM_SAFE_FREE(link); - - /* Search for optimization job in queue. */ - LinkData *opti_link = (LinkData *)BLI_findptr( - &comp->optimize_queue, mat, offsetof(LinkData, data)); - if (opti_link) { - BLI_remlink(&comp->optimize_queue, opti_link); - GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY); - } BLI_spin_unlock(&comp->list_lock); - MEM_SAFE_FREE(opti_link); - } - } - } -} - -void DRW_deferred_shader_optimize_remove(GPUMaterial *mat) -{ - LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) { - LISTBASE_FOREACH (wmWindow *, win, &wm->windows) { - DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type( - wm, wm, WM_JOB_TYPE_SHADER_COMPILATION); - if (comp != NULL) { - BLI_spin_lock(&comp->list_lock); - /* Search for optimization job in queue. */ - LinkData *opti_link = (LinkData *)BLI_findptr( - &comp->optimize_queue, mat, offsetof(LinkData, data)); - if (opti_link) { - BLI_remlink(&comp->optimize_queue, opti_link); - GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY); - } - BLI_spin_unlock(&comp->list_lock); - - MEM_SAFE_FREE(opti_link); + MEM_SAFE_FREE(link); } } } @@ -468,7 +388,6 @@ GPUMaterial *DRW_shader_from_world(World *wo, } drw_deferred_shader_add(mat, deferred); - DRW_shader_queue_optimize_material(mat); return mat; } @@ -498,52 +417,9 @@ GPUMaterial *DRW_shader_from_material(Material *ma, } drw_deferred_shader_add(mat, deferred); - DRW_shader_queue_optimize_material(mat); return mat; } -void DRW_shader_queue_optimize_material(GPUMaterial *mat) -{ - /* Do not perform deferred optimization if performing render. - * De-queue any queued optimization jobs. */ - if (DRW_state_is_image_render()) { - if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) { - /* Remove from pending optimization job queue. */ - DRW_deferred_shader_optimize_remove(mat); - /* If optimization job had already started, wait for it to complete. */ - while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) { - PIL_sleep_ms(20); - } - } - return; - } - - /* We do not need to perform optimization on the material if it is already compiled or in the - * optimization queue. If optimization is not required, the status will be flagged as - * `GPU_MAT_OPTIMIZATION_SKIP`. - * We can also skip cases which have already been queued up. */ - if (ELEM(GPU_material_optimization_status(mat), - GPU_MAT_OPTIMIZATION_SKIP, - GPU_MAT_OPTIMIZATION_SUCCESS, - GPU_MAT_OPTIMIZATION_QUEUED)) { - return; - } - - /* Only queue optimization once the original shader has been successfully compiled. */ - if (GPU_material_status(mat) != GPU_MAT_SUCCESS) { - return; - } - - /* Defer optimization until sufficient time has passed beyond creation. This avoids excessive - * recompilation for shaders which are being actively modified. */ - if (!GPU_material_optimization_ready(mat)) { - return; - } - - /* Add deferred shader compilation to queue. */ - drw_deferred_queue_append(mat, true); -} - void DRW_shader_free(GPUShader *shader) { GPU_shader_free(shader); diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h index 11500f5af60..1c9bdc77bac 100644 --- a/source/blender/gpu/GPU_material.h +++ b/source/blender/gpu/GPU_material.h @@ -257,14 +257,6 @@ struct GPUShader *GPU_material_get_shader(GPUMaterial *material); const char *GPU_material_get_name(GPUMaterial *material); /** - * Material Optimization. - * \note Compiles optimal version of shader graph, populating mat->optimized_pass. - * This operation should always be deferred until existing compilations have completed. - * Default un-optimized materials will still exist for interactive material editing performance. - */ -void GPU_material_optimize(GPUMaterial *mat); - -/** * Return can be NULL if it's a world material. */ struct Material *GPU_material_get_material(GPUMaterial *material); diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index 51644bc9307..994a30f0944 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -95,9 +95,6 @@ struct GPUPass { uint32_t hash; /** Did we already tried to compile the attached GPUShader. */ bool compiled; - /** Hint that an optimized variant of this pass should be created based on a complexity heuristic - * during pass code generation. */ - bool should_optimize; }; /* -------------------------------------------------------------------- */ @@ -204,8 +201,7 @@ static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output) } /* Trick type to change overload and keep a somewhat nice syntax. */ -struct GPUConstant : public GPUInput { -}; +struct GPUConstant : public GPUInput {}; /* Print data constructor (i.e: vec2(1.0f, 1.0f)). */ static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input) @@ -245,11 +241,6 @@ class GPUCodegen { ListBase ubo_inputs_ = {nullptr, nullptr}; GPUInput *cryptomatte_input_ = nullptr; - /** Cache parameters for complexity heuristic. */ - uint nodes_total_ = 0; - uint textures_total_ = 0; - uint uniforms_total_ = 0; - public: GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_) { @@ -290,14 +281,6 @@ class GPUCodegen { return hash_; } - /* Heuristic determined during pass codegen for whether a - * more optimal variant of this material should be compiled. */ - bool should_optimize_heuristic() const - { - bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64); - return do_optimize; - } - private: void set_unique_ids(); @@ -419,9 +402,6 @@ void GPUCodegen::generate_resources() } } - /* Increment heuristic. */ - textures_total_ = slot; - if (!BLI_listbase_is_empty(&ubo_inputs_)) { /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */ ss << "struct NodeTree {\n"; @@ -459,16 +439,11 @@ void GPUCodegen::generate_library() GPUCodegenCreateInfo &info = *create_info; void *value; - /* Iterate over libraries. We need to keep this struct intact in case - * it is required for the optimization an pass. */ - GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries); - while (!BLI_ghashIterator_done(ihash)) { - value = BLI_ghashIterator_getKey(ihash); + GSetIterState pop_state = {}; + while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) { auto deps = gpu_shader_dependency_get_resolved_source((const char *)value); info.dependencies_generated.extend_non_duplicates(deps); - BLI_ghashIterator_step(ihash); } - BLI_ghashIterator_free(ihash); } void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) @@ -536,9 +511,6 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) } } eval_ss << ");\n\n"; - - /* Increment heuristic. */ - nodes_total_++; } char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link) @@ -602,7 +574,6 @@ void GPUCodegen::generate_uniform_buffer() if (input->source == GPU_SOURCE_UNIFORM && !input->link) { /* We handle the UBO uniforms separately. */ BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input)); - uniforms_total_++; } } } @@ -630,7 +601,6 @@ void GPUCodegen::generate_graphs() { set_unique_ids(); - /* Serialize graph. */ output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface); output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume); output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement); @@ -666,17 +636,10 @@ void GPUCodegen::generate_graphs() GPUPass *GPU_generate_pass(GPUMaterial *material, GPUNodeGraph *graph, GPUCodegenCallbackFn finalize_source_cb, - void *thunk, - bool optimize_graph) + void *thunk) { gpu_node_graph_prune_unused(graph); - /* If Optimize flag is passed in, we are generating an optimized - * variant of the GPUMaterial's GPUPass. */ - if (optimize_graph) { - gpu_node_graph_optimize(graph); - } - /* Extract attributes before compiling so the generated VBOs are ready to accept the future * shader. */ gpu_node_graph_finalize_uniform_attrs(graph); @@ -684,33 +647,23 @@ GPUPass *GPU_generate_pass(GPUMaterial *material, GPUCodegen codegen(material, graph); codegen.generate_graphs(); codegen.generate_cryptomatte(); + codegen.generate_uniform_buffer(); - GPUPass *pass_hash = nullptr; - - if (!optimize_graph) { - /* The optimized version of the shader should not re-generate a UBO. - * The UBO will not be used for this variant. */ - codegen.generate_uniform_buffer(); - - /** Cache lookup: Reuse shaders already compiled. - * NOTE: We only perform cache look-up for non-optimized shader - * graphs, as baked constant data among other optimizations will generate too many - * shader source permutations, with minimal re-usability. */ - pass_hash = gpu_pass_cache_lookup(codegen.hash_get()); - - /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source - * there is no way to have a collision currently. Some advocated to only use a bigger hash. */ - if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) { - if (!gpu_pass_is_valid(pass_hash)) { - /* Shader has already been created but failed to compile. */ - return nullptr; - } - /* No collision, just return the pass. */ - BLI_spin_lock(&pass_cache_spin); - pass_hash->refcount += 1; - BLI_spin_unlock(&pass_cache_spin); - return pass_hash; + /* Cache lookup: Reuse shaders already compiled. */ + GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get()); + + /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source + * there is no way to have a collision currently. Some advocated to only use a bigger hash. */ + if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) { + if (!gpu_pass_is_valid(pass_hash)) { + /* Shader has already been created but failed to compile. */ + return nullptr; } + /* No collision, just return the pass. */ + BLI_spin_lock(&pass_cache_spin); + pass_hash->refcount += 1; + BLI_spin_unlock(&pass_cache_spin); + return pass_hash; } /* Either the shader is not compiled or there is a hash collision... @@ -748,31 +701,14 @@ GPUPass *GPU_generate_pass(GPUMaterial *material, pass->create_info = codegen.create_info; pass->hash = codegen.hash_get(); pass->compiled = false; - /* Only flag pass optimization hint if this is the first generated pass for a material. - * Optimized passes cannot be optimized further, even if the heuristic is still not - * favorable. */ - pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic(); codegen.create_info = nullptr; - /* Only insert non-optimized graphs into cache. - * Optimized graphs will continuously be recompiled with new unique source during material - * editing, and thus causing the cache to fill up quickly with materials offering minimal - * re-use. */ - if (!optimize_graph) { - gpu_pass_cache_insert_after(pass_hash, pass); - } + gpu_pass_cache_insert_after(pass_hash, pass); } return pass; } -bool GPU_pass_should_optimize(GPUPass *pass) -{ - /* Returns optimization heuristic prepared during - * initial codegen. */ - return pass->should_optimize; -} - /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h index aabdf1ac003..95a672c0400 100644 --- a/source/blender/gpu/intern/gpu_codegen.h +++ b/source/blender/gpu/intern/gpu_codegen.h @@ -25,12 +25,10 @@ typedef struct GPUPass GPUPass; GPUPass *GPU_generate_pass(GPUMaterial *material, struct GPUNodeGraph *graph, GPUCodegenCallbackFn finalize_source_cb, - void *thunk, - bool optimize_graph); + void *thunk); GPUShader *GPU_pass_shader_get(GPUPass *pass); bool GPU_pass_compile(GPUPass *pass, const char *shname); void GPU_pass_release(GPUPass *pass); -bool GPU_pass_should_optimize(GPUPass *pass); /* Module */ diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c index 6d0779797b0..0f9dc8be9c5 100644 --- a/source/blender/gpu/intern/gpu_material.c +++ b/source/blender/gpu/intern/gpu_material.c @@ -34,8 +34,6 @@ #include "DRW_engine.h" -#include "PIL_time.h" - #include "gpu_codegen.h" #include "gpu_node_graph.h" @@ -45,17 +43,6 @@ #define MAX_COLOR_BAND 128 #define MAX_GPU_SKIES 8 -/** Whether the optimized variant of the #GPUPass should be created asynchronously. - * Usage of this depends on whether there are possible threading challenges of doing so. - * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader - * compilation, though this option exists in case any potential scenarios for material graph - * optimization cause a slow down on the main thread. - * - * NOTE: The actual shader program for the optimized pass will always be compiled asynchronously, - * this flag controls whether shader node graph source serialization happens on the compilation - * worker thread. */ -#define ASYNC_OPTIMIZED_PASS_CREATION 0 - typedef struct GPUColorBandBuilder { float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4]; int current_layer; @@ -70,27 +57,6 @@ struct GPUMaterial { /* Contains #GPUShader and source code for deferred compilation. * Can be shared between similar material (i.e: sharing same node-tree topology). */ GPUPass *pass; - /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance. - * This shader variant bakes dynamic uniform data as constant. This variant will not use - * the ubo, and instead bake constants directly into the shader source. */ - GPUPass *optimized_pass; - /* Optimization status. - * We also use this status to determine whether this material should be considered for - * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations. - * `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization. - * `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit - * performance to do so, based on the heuristic. - */ - eGPUMaterialOptimizationStatus optimization_status; - double creation_time; -#if ASYNC_OPTIMIZED_PASS_CREATION == 1 - struct DeferredOptimizePass { - GPUCodegenCallbackFn callback; - void *thunk; - } DeferredOptimizePass; - struct DeferredOptimizePass optimize_pass_info; -#endif - /** UBOs for this material parameters. */ GPUUniformBuf *ubo; /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */ @@ -243,9 +209,6 @@ void GPU_material_free_single(GPUMaterial *material) gpu_node_graph_free(&material->graph); - if (material->optimized_pass != NULL) { - GPU_pass_release(material->optimized_pass); - } if (material->pass != NULL) { GPU_pass_release(material->pass); } @@ -284,15 +247,12 @@ Scene *GPU_material_scene(GPUMaterial *material) GPUPass *GPU_material_get_pass(GPUMaterial *material) { - return (material->optimized_pass) ? material->optimized_pass : material->pass; + return material->pass; } GPUShader *GPU_material_get_shader(GPUMaterial *material) { - /* First attempt to select optimized shader. If not available, fetch original. */ - GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) : - NULL; - return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL); + return material->pass ? GPU_pass_shader_get(material->pass) : NULL; } const char *GPU_material_get_name(GPUMaterial *material) @@ -705,29 +665,6 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status) mat->status = status; } -eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat) -{ - return mat->optimization_status; -} - -void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status) -{ - mat->optimization_status = status; - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { - /* Reset creation timer to delay optimization pass. */ - mat->creation_time = PIL_check_seconds_timer(); - } -} - -bool GPU_material_optimization_ready(GPUMaterial *mat) -{ - /* Timer threshold before optimizations will be queued. - * When materials are frequently being modified, optimization - * can incur CPU overhead from excessive compilation. */ - const double optimization_time_threshold_s = 5.0; - return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s); -} - /* Code generation */ bool GPU_material_has_surface_output(GPUMaterial *mat) @@ -793,7 +730,6 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, mat->uuid = shader_uuid; mat->flag = GPU_MATFLAG_UPDATED; mat->status = GPU_MAT_CREATED; - mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP; mat->is_volume_shader = is_volume_shader; mat->graph.used_libraries = BLI_gset_new( BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); @@ -812,7 +748,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, { /* Create source code and search pass cache for an already compiled version. */ - mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false); + mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk); if (mat->pass == NULL) { /* We had a cache hit and the shader has already failed to compile. */ @@ -820,44 +756,11 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, gpu_node_graph_free(&mat->graph); } else { - /* Determine whether we should generate an optimized variant of the graph. - * Heuristic is based on complexity of default material pass and shader node graph. */ - if (GPU_pass_should_optimize(mat->pass)) { - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); - } - GPUShader *sh = GPU_pass_shader_get(mat->pass); if (sh != NULL) { /* We had a cache hit and the shader is already compiled. */ mat->status = GPU_MAT_SUCCESS; - - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { - gpu_node_graph_free_nodes(&mat->graph); - } - } - - /* Generate optimized pass. */ - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { -#if ASYNC_OPTIMIZED_PASS_CREATION == 1 - mat->optimized_pass = NULL; - mat->optimize_pass_info.callback = callback; - mat->optimize_pass_info.thunk = thunk; -#else - mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true); - if (mat->optimized_pass == NULL) { - /* Failed to create optimized pass. */ - gpu_node_graph_free_nodes(&mat->graph); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - } - else { - GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass); - if (optimized_sh != NULL) { - /* Optimized shader already available. */ - gpu_node_graph_free_nodes(&mat->graph); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); - } - } -#endif + gpu_node_graph_free_nodes(&mat->graph); } } } @@ -908,11 +811,7 @@ void GPU_material_compile(GPUMaterial *mat) GPUShader *sh = GPU_pass_shader_get(mat->pass); if (sh != NULL) { mat->status = GPU_MAT_SUCCESS; - - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { - /* Only free node graph nodes if not required by secondary optimization pass. */ - gpu_node_graph_free_nodes(&mat->graph); - } + gpu_node_graph_free_nodes(&mat->graph); } else { mat->status = GPU_MAT_FAILED; @@ -926,71 +825,6 @@ void GPU_material_compile(GPUMaterial *mat) } } -void GPU_material_optimize(GPUMaterial *mat) -{ - /* If shader is flagged for skipping optimization or has already been successfully - * optimized, skip. */ - if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) { - return; - } - - /* If original shader has not been fully compiled, we are not - * ready to perform optimization. */ - if (mat->status != GPU_MAT_SUCCESS) { - /* Reset optimization status. */ - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); - return; - } - -#if ASYNC_OPTIMIZED_PASS_CREATION == 1 - /* If the optimized pass is not valid, first generate optimized pass. - * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when - * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more - * optimal, as these do not benefit from caching, due to baked constants. However, this could - * possibly be cause for concern for certain cases. */ - if (!mat->optimized_pass) { - mat->optimized_pass = GPU_generate_pass( - mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true); - BLI_assert(mat->optimized_pass); - } -#else - if (!mat->optimized_pass) { - /* Optimized pass has not been created, skip future optimization attempts. */ - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - return; - } -#endif - - bool success; - /* NOTE: The shader may have already been compiled here since we are - * sharing GPUShader across GPUMaterials. In this case it's a no-op. */ -#ifndef NDEBUG - success = GPU_pass_compile(mat->optimized_pass, mat->name); -#else - success = GPU_pass_compile(mat->optimized_pass, __func__); -#endif - - if (success) { - GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass); - if (sh != NULL) { - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); - } - else { - /* Optimized pass failed to compile. Disable any future optimization attempts. */ - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - } - } - else { - /* Optimization pass generation failed. Disable future attempts to optimize. */ - GPU_pass_release(mat->optimized_pass); - mat->optimized_pass = NULL; - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - } - - /* Release node graph as no longer needed. */ - gpu_node_graph_free_nodes(&mat->graph); -} - void GPU_materials_free(Main *bmain) { LISTBASE_FOREACH (Material *, ma, &bmain->materials) { @@ -1014,8 +848,6 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi material->graph.used_libraries = BLI_gset_new( BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); material->refcount = 1; - material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP; - material->optimized_pass = NULL; /* Construct the material graph by adding and linking the necessary GPU material nodes. */ construct_function_cb(thunk, material); @@ -1024,9 +856,7 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi gpu_material_ramp_texture_build(material); /* Lookup an existing pass in the cache or generate a new one. */ - material->pass = GPU_generate_pass( - material, &material->graph, generate_code_function_cb, thunk, false); - material->optimized_pass = NULL; + material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk); /* The pass already exists in the pass cache but its shader already failed to compile. */ if (material->pass == NULL) { @@ -1035,42 +865,11 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi return material; } - /* Generate optimized pass. */ - if (GPU_pass_should_optimize(material->pass)) { - -#if ASYNC_OPTIMIZED_PASS_CREATION == 1 - mmaterial->optimized_pass = NULL; - material->optimize_pass_info.callback = generate_code_function_cb; - material->optimize_pass_info.thunk = thunk; - GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY); -#else - material->optimized_pass = GPU_generate_pass( - material, &material->graph, generate_code_function_cb, thunk, true); - - if (material->optimized_pass == NULL) { - /* Failed to create optimized pass. */ - gpu_node_graph_free_nodes(&material->graph); - GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP); - } - else { - GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass); - if (optimized_sh != NULL) { - /* Optimized shader already available. */ - gpu_node_graph_free_nodes(&material->graph); - GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS); - } - } -#endif - } - /* The pass already exists in the pass cache and its shader is already compiled. */ GPUShader *shader = GPU_pass_shader_get(material->pass); if (shader != NULL) { material->status = GPU_MAT_SUCCESS; - if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { - /* Only free node graph if not required by secondary optimization pass. */ - gpu_node_graph_free_nodes(&material->graph); - } + gpu_node_graph_free_nodes(&material->graph); return material; } diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c index f02633621db..510c16dfb02 100644 --- a/source/blender/gpu/intern/gpu_node_graph.c +++ b/source/blender/gpu/intern/gpu_node_graph.c @@ -914,22 +914,3 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph) } } } - -void gpu_node_graph_optimize(GPUNodeGraph *graph) -{ - /* Replace all uniform node links with constant. */ - LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) { - LISTBASE_FOREACH (GPUInput *, input, &node->inputs) { - if (input->link) { - if (input->link->link_type == GPU_NODE_LINK_UNIFORM) { - input->link->link_type = GPU_NODE_LINK_CONSTANT; - } - } - if (input->source == GPU_SOURCE_UNIFORM) { - input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT; - } - } - } - - /* TODO: Consider performing other node graph optimizations here. */ -} diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h index 75ca05ffaea..7db22151f86 100644 --- a/source/blender/gpu/intern/gpu_node_graph.h +++ b/source/blender/gpu/intern/gpu_node_graph.h @@ -181,20 +181,6 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph); void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph); /** - * Optimize node graph for optimized material shader path. - * Once the base material has been generated, we can modify the shader - * node graph to create one which will produce an optimally performing shader. - * This currently involves baking uniform data into constant data to enable - * aggressive constant folding by the compiler in order to reduce complexity and - * shader core memory pressure. - * - * NOTE: Graph optimizations will produce a shader which needs to be re-compiled - * more frequently, however, the default material pass will always exist to fall - * back on. - */ -void gpu_node_graph_optimize(GPUNodeGraph *graph); - -/** * Free intermediate node graph. */ void gpu_node_graph_free_nodes(GPUNodeGraph *graph); |