diff options
Diffstat (limited to 'source/blender/gpu/intern/gpu_codegen.cc')
-rw-r--r-- | source/blender/gpu/intern/gpu_codegen.cc | 825 |
1 files changed, 825 insertions, 0 deletions
diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc new file mode 100644 index 00000000000..8963fa45c96 --- /dev/null +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -0,0 +1,825 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2005 Blender Foundation. */ + +/** \file + * \ingroup gpu + * + * Convert material node-trees to GLSL. + */ + +#include "MEM_guardedalloc.h" + +#include "DNA_customdata_types.h" +#include "DNA_image_types.h" + +#include "BLI_blenlib.h" +#include "BLI_dynstr.h" +#include "BLI_ghash.h" +#include "BLI_hash_mm2a.h" +#include "BLI_link_utils.h" +#include "BLI_threads.h" +#include "BLI_utildefines.h" + +#include "PIL_time.h" + +#include "BKE_material.h" +#include "BKE_world.h" + +#include "GPU_capabilities.h" +#include "GPU_material.h" +#include "GPU_shader.h" +#include "GPU_uniform_buffer.h" +#include "GPU_vertex_format.h" + +#include "BLI_sys_types.h" /* for intptr_t support */ + +#include "gpu_codegen.h" +#include "gpu_material_library.h" +#include "gpu_node_graph.h" +#include "gpu_shader_create_info.hh" +#include "gpu_shader_dependency_private.h" + +#include <stdarg.h> +#include <string.h> + +#include <sstream> +#include <string> + +using namespace blender::gpu::shader; + +struct GPUCodegenCreateInfo : ShaderCreateInfo { + struct NameBuffer { + char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1]; + char var_names[16][8]; + }; + + /** Optional generated interface. */ + StageInterfaceInfo *interface_generated = nullptr; + /** Optional name buffer containing names referenced by StringRefNull. */ + NameBuffer *name_buffer = nullptr; + + GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name){}; + ~GPUCodegenCreateInfo() + { + delete interface_generated; + MEM_delete(name_buffer); + }; +}; + +struct GPUPass { + struct GPUPass *next; + + GPUShader *shader; + GPUCodegenCreateInfo *create_info = nullptr; + /** Orphaned GPUPasses gets freed by the garbage collector. */ + uint refcount; + /** Identity hash generated from all GLSL code. */ + uint32_t hash; + /** Did we already tried to compile the attached GPUShader. */ + bool compiled; +}; + +/* -------------------------------------------------------------------- */ +/** \name GPUPass Cache + * + * Internal shader cache: This prevent the shader recompilation / stall when + * using undo/redo AND also allows for GPUPass reuse if the Shader code is the + * same for 2 different Materials. Unused GPUPasses are free by Garbage collection. + */ + +/* Only use one linklist that contains the GPUPasses grouped by hash. */ +static GPUPass *pass_cache = nullptr; +static SpinLock pass_cache_spin; + +/* Search by hash only. Return first pass with the same hash. + * There is hash collision if (pass->next && pass->next->hash == hash) */ +static GPUPass *gpu_pass_cache_lookup(uint32_t hash) +{ + BLI_spin_lock(&pass_cache_spin); + /* Could be optimized with a Lookup table. */ + for (GPUPass *pass = pass_cache; pass; pass = pass->next) { + if (pass->hash == hash) { + BLI_spin_unlock(&pass_cache_spin); + return pass; + } + } + BLI_spin_unlock(&pass_cache_spin); + return nullptr; +} + +static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass) +{ + BLI_spin_lock(&pass_cache_spin); + if (node != nullptr) { + /* Add after the first pass having the same hash. */ + pass->next = node->next; + node->next = pass; + } + else { + /* No other pass have same hash, just prepend to the list. */ + BLI_LINKS_PREPEND(pass_cache, pass); + } + BLI_spin_unlock(&pass_cache_spin); +} + +/* Check all possible passes with the same hash. */ +static GPUPass *gpu_pass_cache_resolve_collision(GPUPass *pass, + GPUShaderCreateInfo *info, + uint32_t hash) +{ + BLI_spin_lock(&pass_cache_spin); + for (; pass && (pass->hash == hash); pass = pass->next) { + if (*reinterpret_cast<ShaderCreateInfo *>(info) == + *reinterpret_cast<ShaderCreateInfo *>(pass->create_info)) { + BLI_spin_unlock(&pass_cache_spin); + return pass; + } + } + BLI_spin_unlock(&pass_cache_spin); + return nullptr; +} + +static bool gpu_pass_is_valid(GPUPass *pass) +{ + /* Shader is not null if compilation is successful. */ + return (pass->compiled == false || pass->shader != nullptr); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Type > string conversion + * \{ */ + +static std::ostream &operator<<(std::ostream &stream, const GPUInput *input) +{ + switch (input->source) { + case GPU_SOURCE_FUNCTION_CALL: + case GPU_SOURCE_OUTPUT: + return stream << "tmp" << input->id; + case GPU_SOURCE_CONSTANT: + return stream << "cons" << input->id; + case GPU_SOURCE_UNIFORM: + return stream << "node_tree.u" << input->id; + case GPU_SOURCE_ATTR: + return stream << "var_attrs.v" << input->attr->id; + case GPU_SOURCE_UNIFORM_ATTR: + return stream << "unf_attrs[resource_id].attr" << input->uniform_attr->id; + case GPU_SOURCE_STRUCT: + return stream << "strct" << input->id; + case GPU_SOURCE_TEX: + return stream << input->texture->sampler_name; + case GPU_SOURCE_TEX_TILED_MAPPING: + return stream << input->texture->tiled_mapping_name; + case GPU_SOURCE_VOLUME_GRID: + return stream << input->volume_grid->sampler_name; + case GPU_SOURCE_VOLUME_GRID_TRANSFORM: + return stream << input->volume_grid->transform_name; + default: + BLI_assert(0); + return stream; + } +} + +static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output) +{ + return stream << "tmp" << output->id; +} + +/* Trick type to change overload and keep a somewhat nice syntax. */ +struct GPUConstant : public GPUInput { +}; + +/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */ +static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input) +{ + stream << input->type << "("; + for (int i = 0; i < input->type; i++) { + char formated_float[32]; + /* Print with the maximum precision for single precision float using scientific notation. + * See https://stackoverflow.com/questions/16839658/#answer-21162120 */ + SNPRINTF(formated_float, "%.9g", input->vec[i]); + stream << formated_float; + if (i < input->type - 1) { + stream << ", "; + } + } + stream << ")"; + return stream; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name GLSL code generation + * \{ */ + +class GPUCodegen { + public: + GPUMaterial &mat; + GPUNodeGraph &graph; + GPUCodegenOutput output = {}; + GPUCodegenCreateInfo *create_info = nullptr; + + private: + uint32_t hash_ = 0; + BLI_HashMurmur2A hm2a_; + ListBase ubo_inputs_ = {nullptr, nullptr}; + + public: + GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_) + { + BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat)); + BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat)); + create_info = new GPUCodegenCreateInfo("codegen"); + output.create_info = reinterpret_cast<GPUShaderCreateInfo *>( + static_cast<ShaderCreateInfo *>(create_info)); + + if (GPU_material_flag_get(mat_, GPU_MATFLAG_OBJECT_INFO)) { + create_info->additional_info("draw_object_infos"); + } + } + + ~GPUCodegen() + { + MEM_SAFE_FREE(output.attr_load); + MEM_SAFE_FREE(output.surface); + MEM_SAFE_FREE(output.volume); + MEM_SAFE_FREE(output.thickness); + MEM_SAFE_FREE(output.displacement); + MEM_SAFE_FREE(output.material_functions); + delete create_info; + BLI_freelistN(&ubo_inputs_); + }; + + void generate_graphs(); + void generate_uniform_buffer(); + void generate_attribs(); + void generate_resources(); + void generate_library(); + + uint32_t hash_get() const + { + return hash_; + } + + private: + void set_unique_ids(); + + void node_serialize(std::stringstream &eval_ss, const GPUNode *node); + char *graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link); + + static char *extract_c_str(std::stringstream &stream) + { + auto string = stream.str(); + return BLI_strdup(string.c_str()); + } +}; + +static char attr_prefix_get(CustomDataType type) +{ + switch (type) { + case CD_MTFACE: + return 'u'; + case CD_TANGENT: + return 't'; + case CD_MCOL: + case CD_MLOOPCOL: + return 'c'; + case CD_PROP_COLOR: + return 'c'; + case CD_AUTO_FROM_NAME: + return 'a'; + case CD_HAIRLENGTH: + return 'l'; + default: + BLI_assert_msg(0, "GPUVertAttr Prefix type not found : This should not happen!"); + return '\0'; + } +} + +void GPUCodegen::generate_attribs() +{ + if (BLI_listbase_is_empty(&graph.attributes)) { + output.attr_load = nullptr; + return; + } + + GPUCodegenCreateInfo &info = *create_info; + + info.name_buffer = MEM_new<GPUCodegenCreateInfo::NameBuffer>("info.name_buffer"); + info.interface_generated = new StageInterfaceInfo("codegen_iface", "var_attrs"); + StageInterfaceInfo &iface = *info.interface_generated; + info.vertex_out(iface); + + /* Input declaration, loading / assignment to interface and geometry shader passthrough. */ + std::stringstream decl_ss, iface_ss, load_ss; + + int slot = 15; + LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) { + + /* NOTE: Replicate changes to mesh_render_data_create() in draw_cache_impl_mesh.c */ + if (attr->type == CD_ORCO) { + /* OPTI: orco is computed from local positions, but only if no modifier is present. */ + STRNCPY(info.name_buffer->attr_names[slot], "orco"); + } + else { + char *name = info.name_buffer->attr_names[slot]; + name[0] = attr_prefix_get(static_cast<CustomDataType>(attr->type)); + name[1] = '\0'; + if (attr->name[0] != '\0') { + /* XXX FIXME: see notes in mesh_render_data_create() */ + GPU_vertformat_safe_attr_name(attr->name, &name[1], GPU_MAX_SAFE_ATTR_NAME); + } + } + SNPRINTF(info.name_buffer->var_names[slot], "v%d", attr->id); + + blender::StringRefNull attr_name = info.name_buffer->attr_names[slot]; + blender::StringRefNull var_name = info.name_buffer->var_names[slot]; + + eGPUType input_type, iface_type; + + load_ss << "var_attrs." << var_name; + switch (attr->type) { + case CD_ORCO: + /* Need vec4 to detect usage of default attribute. */ + input_type = GPU_VEC4; + iface_type = GPU_VEC3; + load_ss << " = attr_load_orco(" << attr_name << ");\n"; + break; + case CD_HAIRLENGTH: + iface_type = input_type = GPU_FLOAT; + load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n"; + break; + case CD_TANGENT: + iface_type = input_type = GPU_VEC4; + load_ss << " = attr_load_tangent(" << attr_name << ");\n"; + break; + case CD_MTFACE: + iface_type = input_type = GPU_VEC3; + load_ss << " = attr_load_uv(" << attr_name << ");\n"; + break; + case CD_MCOL: + iface_type = input_type = GPU_VEC4; + load_ss << " = attr_load_color(" << attr_name << ");\n"; + break; + default: + iface_type = input_type = GPU_VEC4; + load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n"; + break; + } + + info.vertex_in(slot--, to_type(input_type), attr_name); + iface.smooth(to_type(iface_type), var_name); + } + + output.attr_load = extract_c_str(load_ss); +} + +void GPUCodegen::generate_resources() +{ + GPUCodegenCreateInfo &info = *create_info; + + std::stringstream ss; + + /* Textures. */ + LISTBASE_FOREACH (GPUMaterialTexture *, tex, &graph.textures) { + if (tex->colorband) { + info.sampler(0, ImageType::FLOAT_1D_ARRAY, tex->sampler_name, Frequency::BATCH); + } + else if (tex->tiled_mapping_name[0] != '\0') { + info.sampler(0, ImageType::FLOAT_2D_ARRAY, tex->sampler_name, Frequency::BATCH); + info.sampler(0, ImageType::FLOAT_1D_ARRAY, tex->tiled_mapping_name, Frequency::BATCH); + } + else { + info.sampler(0, ImageType::FLOAT_2D, tex->sampler_name, Frequency::BATCH); + } + } + /* Volume Grids. */ + LISTBASE_FOREACH (GPUMaterialVolumeGrid *, grid, &graph.volume_grids) { + info.sampler(0, ImageType::FLOAT_3D, grid->sampler_name, Frequency::BATCH); + /* TODO(@fclem): Global uniform. To put in an UBO. */ + info.push_constant(Type::MAT4, grid->transform_name); + } + + if (!BLI_listbase_is_empty(&ubo_inputs_)) { + /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */ + ss << "struct NodeTree {\n"; + LISTBASE_FOREACH (LinkData *, link, &ubo_inputs_) { + GPUInput *input = (GPUInput *)(link->data); + ss << input->type << " u" << input->id << ";\n"; + } + ss << "};\n\n"; + + info.uniform_buf(0, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH); + } + + if (!BLI_listbase_is_empty(&graph.uniform_attrs.list)) { + ss << "struct UniformAttrs {\n"; + LISTBASE_FOREACH (GPUUniformAttr *, attr, &graph.uniform_attrs.list) { + ss << "vec4 attr" << attr->id << ";\n"; + } + ss << "};\n\n"; + + /* TODO(fclem): Use the macro for length. Currently not working for EEVEE. */ + /* DRW_RESOURCE_CHUNK_LEN = 512 */ + info.uniform_buf(0, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH); + } + + info.typedef_source_generated = ss.str(); +} + +void GPUCodegen::generate_library() +{ + GPUCodegenCreateInfo &info = *create_info; + + void *value; + GSetIterState pop_state = {}; + while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) { + auto deps = gpu_shader_dependency_get_resolved_source((const char *)value); + info.dependencies_generated.extend_non_duplicates(deps); + } +} + +void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) +{ + /* Declare constants. */ + LISTBASE_FOREACH (GPUInput *, input, &node->inputs) { + switch (input->source) { + case GPU_SOURCE_FUNCTION_CALL: + eval_ss << input->type << " " << input << "; " << input->function_call << input << ");\n"; + break; + case GPU_SOURCE_STRUCT: + eval_ss << input->type << " " << input << " = CLOSURE_DEFAULT;\n"; + break; + case GPU_SOURCE_CONSTANT: + eval_ss << input->type << " " << input << " = " << (GPUConstant *)input << ";\n"; + break; + default: + break; + } + } + /* Declare temporary variables for node output storage. */ + LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) { + eval_ss << output->type << " " << output << ";\n"; + } + + /* Function call. */ + eval_ss << node->name << "("; + /* Input arguments. */ + LISTBASE_FOREACH (GPUInput *, input, &node->inputs) { + switch (input->source) { + case GPU_SOURCE_OUTPUT: + case GPU_SOURCE_ATTR: { + /* These inputs can have non matching types. Do conversion. */ + eGPUType to = input->type; + eGPUType from = (input->source == GPU_SOURCE_ATTR) ? input->attr->gputype : + input->link->output->type; + if (from != to) { + /* Use defines declared inside codegen_lib (i.e: vec4_from_float). */ + eval_ss << to << "_from_" << from << "("; + } + + if (input->source == GPU_SOURCE_ATTR) { + eval_ss << input; + } + else { + eval_ss << input->link->output; + } + + if (from != to) { + eval_ss << ")"; + } + break; + } + default: + eval_ss << input; + break; + } + eval_ss << ", "; + } + /* Output arguments. */ + LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) { + eval_ss << output; + if (output->next) { + eval_ss << ", "; + } + } + eval_ss << ");\n\n"; +} + +char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link) +{ + if (output_link == nullptr) { + return nullptr; + } + + std::stringstream eval_ss; + /* NOTE: The node order is already top to bottom (or left to right in node editor) + * because of the evaluation order inside ntreeExecGPUNodes(). */ + LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) { + if ((node->tag & tree_tag) == 0) { + continue; + } + node_serialize(eval_ss, node); + } + eval_ss << "return " << output_link->output << ";\n"; + + char *eval_c_str = extract_c_str(eval_ss); + BLI_hash_mm2a_add(&hm2a_, (uchar *)eval_c_str, eval_ss.str().size()); + return eval_c_str; +} + +void GPUCodegen::generate_uniform_buffer() +{ + /* Extract uniform inputs. */ + LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) { + LISTBASE_FOREACH (GPUInput *, input, &node->inputs) { + if (input->source == GPU_SOURCE_UNIFORM && !input->link) { + /* We handle the UBO uniforms separately. */ + BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input)); + } + } + } + if (!BLI_listbase_is_empty(&ubo_inputs_)) { + /* This sorts the inputs based on size. */ + GPU_material_uniform_buffer_create(&mat, &ubo_inputs_); + } +} + +/* Sets id for unique names for all inputs, resources and temp variables. */ +void GPUCodegen::set_unique_ids() +{ + int id = 1; + LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) { + LISTBASE_FOREACH (GPUInput *, input, &node->inputs) { + input->id = id++; + } + LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) { + output->id = id++; + } + } +} + +void GPUCodegen::generate_graphs() +{ + set_unique_ids(); + + output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface); + output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume); + output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement); + output.thickness = graph_serialize(GPU_NODE_TAG_THICKNESS, graph.outlink_thickness); + + if (!BLI_listbase_is_empty(&graph.material_functions)) { + std::stringstream eval_ss; + eval_ss << "\n/* Generated Functions */\n\n"; + LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) { + char *fn = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink); + eval_ss << "float " << func_link->name << "() {\n" << fn << "}\n\n"; + MEM_SAFE_FREE(fn); + } + output.material_functions = extract_c_str(eval_ss); + } + + LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) { + BLI_hash_mm2a_add(&hm2a_, (uchar *)attr->name, strlen(attr->name)); + } + + hash_ = BLI_hash_mm2a_end(&hm2a_); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name GPUPass + * \{ */ + +GPUPass *GPU_generate_pass(GPUMaterial *material, + GPUNodeGraph *graph, + GPUCodegenCallbackFn finalize_source_cb, + void *thunk) +{ + /* Prune the unused nodes and extract attributes before compiling so the + * generated VBOs are ready to accept the future shader. */ + gpu_node_graph_prune_unused(graph); + gpu_node_graph_finalize_uniform_attrs(graph); + + GPUCodegen codegen(material, graph); + codegen.generate_graphs(); + codegen.generate_uniform_buffer(); + + /* Cache lookup: Reuse shaders already compiled. */ + GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get()); + + /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source + * there is no way to have a collision currently. Some advocated to only use a bigger hash. */ + if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) { + if (!gpu_pass_is_valid(pass_hash)) { + /* Shader has already been created but failed to compile. */ + return nullptr; + } + /* No collision, just return the pass. */ + pass_hash->refcount += 1; + return pass_hash; + } + + /* Either the shader is not compiled or there is a hash collision... + * continue generating the shader strings. */ + codegen.generate_attribs(); + codegen.generate_resources(); + codegen.generate_library(); + + /* Make engine add its own code and implement the generated functions. */ + finalize_source_cb(thunk, material, &codegen.output); + + GPUPass *pass = nullptr; + if (pass_hash) { + /* Cache lookup: Reuse shaders already compiled. */ + pass = gpu_pass_cache_resolve_collision( + pass_hash, codegen.output.create_info, codegen.hash_get()); + } + + if (pass) { + /* Cache hit. Reuse the same GPUPass and GPUShader. */ + if (!gpu_pass_is_valid(pass)) { + /* Shader has already been created but failed to compile. */ + return nullptr; + } + pass->refcount += 1; + } + else { + /* We still create a pass even if shader compilation + * fails to avoid trying to compile again and again. */ + pass = (GPUPass *)MEM_callocN(sizeof(GPUPass), "GPUPass"); + pass->shader = nullptr; + pass->refcount = 1; + pass->create_info = codegen.create_info; + pass->hash = codegen.hash_get(); + pass->compiled = false; + + codegen.create_info = nullptr; + + gpu_pass_cache_insert_after(pass_hash, pass); + } + return pass; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Compilation + * \{ */ + +static int count_active_texture_sampler(GPUPass *pass, GPUShader *shader) +{ + int num_samplers = 0; + + for (const ShaderCreateInfo::Resource &res : pass->create_info->pass_resources_) { + if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) { + if (GPU_shader_get_uniform(shader, res.sampler.name.c_str()) != -1) { + num_samplers += 1; + } + } + } + + return num_samplers; +} + +static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader) +{ + if (shader == nullptr) { + return false; + } + + /* NOTE: The only drawback of this method is that it will count a sampler + * used in the fragment shader and only declared (but not used) in the vertex + * shader as used by both. But this corner case is not happening for now. */ + int active_samplers_len = count_active_texture_sampler(pass, shader); + + /* Validate against opengl limit. */ + if ((active_samplers_len > GPU_max_textures_frag()) || + (active_samplers_len > GPU_max_textures_vert())) { + return false; + } + + if (pass->create_info->geometry_source_.is_empty() == false) { + if (active_samplers_len > GPU_max_textures_geom()) { + return false; + } + } + + return (active_samplers_len * 3 <= GPU_max_textures()); +} + +bool GPU_pass_compile(GPUPass *pass, const char *shname) +{ + bool success = true; + if (!pass->compiled) { + GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>( + static_cast<ShaderCreateInfo *>(pass->create_info)); + + pass->create_info->name_ = shname; + + GPUShader *shader = GPU_shader_create_from_info(info); + + /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit. + * We need to make sure to count active samplers to avoid undefined behavior. */ + if (!gpu_pass_shader_validate(pass, shader)) { + success = false; + if (shader != nullptr) { + fprintf(stderr, "GPUShader: error: too many samplers in shader.\n"); + GPU_shader_free(shader); + shader = nullptr; + } + } + pass->shader = shader; + pass->compiled = true; + } + return success; +} + +GPUShader *GPU_pass_shader_get(GPUPass *pass) +{ + return pass->shader; +} + +void GPU_pass_release(GPUPass *pass) +{ + BLI_assert(pass->refcount > 0); + pass->refcount--; +} + +static void gpu_pass_free(GPUPass *pass) +{ + BLI_assert(pass->refcount == 0); + if (pass->shader) { + GPU_shader_free(pass->shader); + } + delete pass->create_info; + MEM_freeN(pass); +} + +void GPU_pass_cache_garbage_collect(void) +{ + static int lasttime = 0; + const int shadercollectrate = 60; /* hardcoded for now. */ + int ctime = (int)PIL_check_seconds_timer(); + + if (ctime < shadercollectrate + lasttime) { + return; + } + + lasttime = ctime; + + BLI_spin_lock(&pass_cache_spin); + GPUPass *next, **prev_pass = &pass_cache; + for (GPUPass *pass = pass_cache; pass; pass = next) { + next = pass->next; + if (pass->refcount == 0) { + /* Remove from list */ + *prev_pass = next; + gpu_pass_free(pass); + } + else { + prev_pass = &pass->next; + } + } + BLI_spin_unlock(&pass_cache_spin); +} + +void GPU_pass_cache_init(void) +{ + BLI_spin_init(&pass_cache_spin); +} + +void GPU_pass_cache_free(void) +{ + BLI_spin_lock(&pass_cache_spin); + while (pass_cache) { + GPUPass *next = pass_cache->next; + gpu_pass_free(pass_cache); + pass_cache = next; + } + BLI_spin_unlock(&pass_cache_spin); + + BLI_spin_end(&pass_cache_spin); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Module + * \{ */ + +void gpu_codegen_init(void) +{ +} + +void gpu_codegen_exit(void) +{ + // BKE_world_defaults_free_gpu(); + BKE_material_defaults_free_gpu(); + GPU_shader_free_builtin_shaders(); +} + +/** \} */
\ No newline at end of file |