diff options
Diffstat (limited to 'source/blender/gpu/intern/gpu_material.c')
-rw-r--r-- | source/blender/gpu/intern/gpu_material.c | 302 |
1 files changed, 281 insertions, 21 deletions
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c index a4842ef0e43..6d0779797b0 100644 --- a/source/blender/gpu/intern/gpu_material.c +++ b/source/blender/gpu/intern/gpu_material.c @@ -34,6 +34,8 @@ #include "DRW_engine.h" +#include "PIL_time.h" + #include "gpu_codegen.h" #include "gpu_node_graph.h" @@ -41,16 +43,54 @@ /* Structs */ #define MAX_COLOR_BAND 128 +#define MAX_GPU_SKIES 8 + +/** Whether the optimized variant of the #GPUPass should be created asynchronously. + * Usage of this depends on whether there are possible threading challenges of doing so. + * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader + * compilation, though this option exists in case any potential scenarios for material graph + * optimization cause a slow down on the main thread. + * + * NOTE: The actual shader program for the optimized pass will always be compiled asynchronously, + * this flag controls whether shader node graph source serialization happens on the compilation + * worker thread. */ +#define ASYNC_OPTIMIZED_PASS_CREATION 0 typedef struct GPUColorBandBuilder { float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4]; int current_layer; } GPUColorBandBuilder; +typedef struct GPUSkyBuilder { + float pixels[MAX_GPU_SKIES][GPU_SKY_WIDTH * GPU_SKY_HEIGHT][4]; + int current_layer; +} GPUSkyBuilder; + struct GPUMaterial { - /* Contains GPUShader and source code for deferred compilation. - * Can be shared between similar material (i.e: sharing same nodetree topology). */ + /* Contains #GPUShader and source code for deferred compilation. + * Can be shared between similar material (i.e: sharing same node-tree topology). */ GPUPass *pass; + /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance. + * This shader variant bakes dynamic uniform data as constant. This variant will not use + * the ubo, and instead bake constants directly into the shader source. */ + GPUPass *optimized_pass; + /* Optimization status. + * We also use this status to determine whether this material should be considered for + * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations. + * `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization. + * `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit + * performance to do so, based on the heuristic. + */ + eGPUMaterialOptimizationStatus optimization_status; + double creation_time; +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + struct DeferredOptimizePass { + GPUCodegenCallbackFn callback; + void *thunk; + } DeferredOptimizePass; + struct DeferredOptimizePass optimize_pass_info; +#endif + /** UBOs for this material parameters. */ GPUUniformBuf *ubo; /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */ @@ -73,6 +113,10 @@ struct GPUMaterial { GPUTexture *coba_tex; /** Builder for coba_tex. */ GPUColorBandBuilder *coba_builder; + /** 2D Texture array containing all sky textures. */ + GPUTexture *sky_tex; + /** Builder for sky_tex. */ + GPUSkyBuilder *sky_builder; /* Low level node graph(s). Also contains resources needed by the material. */ GPUNodeGraph graph; @@ -91,11 +135,42 @@ struct GPUMaterial { #ifndef NDEBUG char name[64]; +#else + char name[16]; #endif }; /* Functions */ +GPUTexture **gpu_material_sky_texture_layer_set( + GPUMaterial *mat, int width, int height, const float *pixels, float *row) +{ + /* In order to put all sky textures into one 2D array texture, + * we need them to be the same size. */ + BLI_assert(width == GPU_SKY_WIDTH); + BLI_assert(height == GPU_SKY_HEIGHT); + UNUSED_VARS_NDEBUG(width, height); + + if (mat->sky_builder == NULL) { + mat->sky_builder = MEM_mallocN(sizeof(GPUSkyBuilder), "GPUSkyBuilder"); + mat->sky_builder->current_layer = 0; + } + + int layer = mat->sky_builder->current_layer; + *row = (float)layer; + + if (*row == MAX_GPU_SKIES) { + printf("Too many sky textures in shader!\n"); + } + else { + float *dst = (float *)mat->sky_builder->pixels[layer]; + memcpy(dst, pixels, sizeof(float) * GPU_SKY_WIDTH * GPU_SKY_HEIGHT * 4); + mat->sky_builder->current_layer += 1; + } + + return &mat->sky_tex; +} + GPUTexture **gpu_material_ramp_texture_row_set(GPUMaterial *mat, int size, float *pixels, @@ -141,6 +216,24 @@ static void gpu_material_ramp_texture_build(GPUMaterial *mat) mat->coba_builder = NULL; } +static void gpu_material_sky_texture_build(GPUMaterial *mat) +{ + if (mat->sky_builder == NULL) { + return; + } + + mat->sky_tex = GPU_texture_create_2d_array("mat_sky", + GPU_SKY_WIDTH, + GPU_SKY_HEIGHT, + mat->sky_builder->current_layer, + 1, + GPU_RGBA32F, + (float *)mat->sky_builder->pixels); + + MEM_freeN(mat->sky_builder); + mat->sky_builder = NULL; +} + void GPU_material_free_single(GPUMaterial *material) { bool do_free = atomic_sub_and_fetch_uint32(&material->refcount, 1) == 0; @@ -150,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material) gpu_node_graph_free(&material->graph); + if (material->optimized_pass != NULL) { + GPU_pass_release(material->optimized_pass); + } if (material->pass != NULL) { GPU_pass_release(material->pass); } @@ -159,6 +255,9 @@ void GPU_material_free_single(GPUMaterial *material) if (material->coba_tex != NULL) { GPU_texture_free(material->coba_tex); } + if (material->sky_tex != NULL) { + GPU_texture_free(material->sky_tex); + } if (material->sss_profile != NULL) { GPU_uniformbuf_free(material->sss_profile); } @@ -185,12 +284,20 @@ Scene *GPU_material_scene(GPUMaterial *material) GPUPass *GPU_material_get_pass(GPUMaterial *material) { - return material->pass; + return (material->optimized_pass) ? material->optimized_pass : material->pass; } GPUShader *GPU_material_get_shader(GPUMaterial *material) { - return material->pass ? GPU_pass_shader_get(material->pass) : NULL; + /* First attempt to select optimized shader. If not available, fetch original. */ + GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) : + NULL; + return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL); +} + +const char *GPU_material_get_name(GPUMaterial *material) +{ + return material->name; } Material *GPU_material_get_material(GPUMaterial *material) @@ -205,12 +312,7 @@ GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material) void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs) { -#ifndef NDEBUG - const char *name = material->name; -#else - const char *name = "Material"; -#endif - material->ubo = GPU_uniformbuf_create_from_list(inputs, name); + material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name); } ListBase GPU_material_attributes(GPUMaterial *material) @@ -223,9 +325,9 @@ ListBase GPU_material_textures(GPUMaterial *material) return material->graph.textures; } -GPUUniformAttrList *GPU_material_uniform_attributes(GPUMaterial *material) +const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material) { - GPUUniformAttrList *attrs = &material->graph.uniform_attrs; + const GPUUniformAttrList *attrs = &material->graph.uniform_attrs; return attrs->count > 0 ? attrs : NULL; } @@ -603,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status) mat->status = status; } +eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat) +{ + return mat->optimization_status; +} + +void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status) +{ + mat->optimization_status = status; + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { + /* Reset creation timer to delay optimization pass. */ + mat->creation_time = PIL_check_seconds_timer(); + } +} + +bool GPU_material_optimization_ready(GPUMaterial *mat) +{ + /* Timer threshold before optimizations will be queued. + * When materials are frequently being modified, optimization + * can incur CPU overhead from excessive compilation. */ + const double optimization_time_threshold_s = 5.0; + return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s); +} + /* Code generation */ bool GPU_material_has_surface_output(GPUMaterial *mat) @@ -668,15 +793,12 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, mat->uuid = shader_uuid; mat->flag = GPU_MATFLAG_UPDATED; mat->status = GPU_MAT_CREATED; + mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP; mat->is_volume_shader = is_volume_shader; mat->graph.used_libraries = BLI_gset_new( BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); mat->refcount = 1; -#ifndef NDEBUG STRNCPY(mat->name, name); -#else - UNUSED_VARS(name); -#endif if (is_lookdev) { mat->flag |= GPU_MATFLAG_LOOKDEV_HACK; } @@ -686,10 +808,11 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, ntreeGPUMaterialNodes(localtree, mat); gpu_material_ramp_texture_build(mat); + gpu_material_sky_texture_build(mat); { /* Create source code and search pass cache for an already compiled version. */ - mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk); + mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false); if (mat->pass == NULL) { /* We had a cache hit and the shader has already failed to compile. */ @@ -697,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, gpu_node_graph_free(&mat->graph); } else { + /* Determine whether we should generate an optimized variant of the graph. + * Heuristic is based on complexity of default material pass and shader node graph. */ + if (GPU_pass_should_optimize(mat->pass)) { + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); + } + GPUShader *sh = GPU_pass_shader_get(mat->pass); if (sh != NULL) { /* We had a cache hit and the shader is already compiled. */ mat->status = GPU_MAT_SUCCESS; - gpu_node_graph_free_nodes(&mat->graph); + + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { + gpu_node_graph_free_nodes(&mat->graph); + } + } + + /* Generate optimized pass. */ + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + mat->optimized_pass = NULL; + mat->optimize_pass_info.callback = callback; + mat->optimize_pass_info.thunk = thunk; +#else + mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true); + if (mat->optimized_pass == NULL) { + /* Failed to create optimized pass. */ + gpu_node_graph_free_nodes(&mat->graph); + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + } + else { + GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass); + if (optimized_sh != NULL) { + /* Optimized shader already available. */ + gpu_node_graph_free_nodes(&mat->graph); + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); + } + } +#endif } } } @@ -752,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat) GPUShader *sh = GPU_pass_shader_get(mat->pass); if (sh != NULL) { mat->status = GPU_MAT_SUCCESS; - gpu_node_graph_free_nodes(&mat->graph); + + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { + /* Only free node graph nodes if not required by secondary optimization pass. */ + gpu_node_graph_free_nodes(&mat->graph); + } } else { mat->status = GPU_MAT_FAILED; @@ -766,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat) } } +void GPU_material_optimize(GPUMaterial *mat) +{ + /* If shader is flagged for skipping optimization or has already been successfully + * optimized, skip. */ + if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) { + return; + } + + /* If original shader has not been fully compiled, we are not + * ready to perform optimization. */ + if (mat->status != GPU_MAT_SUCCESS) { + /* Reset optimization status. */ + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); + return; + } + +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + /* If the optimized pass is not valid, first generate optimized pass. + * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when + * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more + * optimal, as these do not benefit from caching, due to baked constants. However, this could + * possibly be cause for concern for certain cases. */ + if (!mat->optimized_pass) { + mat->optimized_pass = GPU_generate_pass( + mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true); + BLI_assert(mat->optimized_pass); + } +#else + if (!mat->optimized_pass) { + /* Optimized pass has not been created, skip future optimization attempts. */ + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + return; + } +#endif + + bool success; + /* NOTE: The shader may have already been compiled here since we are + * sharing GPUShader across GPUMaterials. In this case it's a no-op. */ +#ifndef NDEBUG + success = GPU_pass_compile(mat->optimized_pass, mat->name); +#else + success = GPU_pass_compile(mat->optimized_pass, __func__); +#endif + + if (success) { + GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass); + if (sh != NULL) { + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); + } + else { + /* Optimized pass failed to compile. Disable any future optimization attempts. */ + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + } + } + else { + /* Optimization pass generation failed. Disable future attempts to optimize. */ + GPU_pass_release(mat->optimized_pass); + mat->optimized_pass = NULL; + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + } + + /* Release node graph as no longer needed. */ + gpu_node_graph_free_nodes(&mat->graph); +} + void GPU_materials_free(Main *bmain) { LISTBASE_FOREACH (Material *, ma, &bmain->materials) { @@ -789,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi material->graph.used_libraries = BLI_gset_new( BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); material->refcount = 1; + material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP; + material->optimized_pass = NULL; /* Construct the material graph by adding and linking the necessary GPU material nodes. */ construct_function_cb(thunk, material); @@ -797,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi gpu_material_ramp_texture_build(material); /* Lookup an existing pass in the cache or generate a new one. */ - material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk); + material->pass = GPU_generate_pass( + material, &material->graph, generate_code_function_cb, thunk, false); + material->optimized_pass = NULL; /* The pass already exists in the pass cache but its shader already failed to compile. */ if (material->pass == NULL) { @@ -806,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi return material; } + /* Generate optimized pass. */ + if (GPU_pass_should_optimize(material->pass)) { + +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + mmaterial->optimized_pass = NULL; + material->optimize_pass_info.callback = generate_code_function_cb; + material->optimize_pass_info.thunk = thunk; + GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY); +#else + material->optimized_pass = GPU_generate_pass( + material, &material->graph, generate_code_function_cb, thunk, true); + + if (material->optimized_pass == NULL) { + /* Failed to create optimized pass. */ + gpu_node_graph_free_nodes(&material->graph); + GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP); + } + else { + GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass); + if (optimized_sh != NULL) { + /* Optimized shader already available. */ + gpu_node_graph_free_nodes(&material->graph); + GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS); + } + } +#endif + } + /* The pass already exists in the pass cache and its shader is already compiled. */ GPUShader *shader = GPU_pass_shader_get(material->pass); if (shader != NULL) { material->status = GPU_MAT_SUCCESS; - gpu_node_graph_free_nodes(&material->graph); + if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { + /* Only free node graph if not required by secondary optimization pass. */ + gpu_node_graph_free_nodes(&material->graph); + } return material; } |