diff options
Diffstat (limited to 'source/blender/compositor/realtime_compositor/intern')
18 files changed, 2735 insertions, 0 deletions
diff --git a/source/blender/compositor/realtime_compositor/intern/compile_state.cc b/source/blender/compositor/realtime_compositor/intern/compile_state.cc new file mode 100644 index 00000000000..5fa2fc9d544 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/compile_state.cc @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <limits> + +#include "BLI_math_vec_types.hh" + +#include "DNA_node_types.h" + +#include "NOD_derived_node_tree.hh" + +#include "COM_compile_state.hh" +#include "COM_domain.hh" +#include "COM_input_descriptor.hh" +#include "COM_node_operation.hh" +#include "COM_result.hh" +#include "COM_scheduler.hh" +#include "COM_shader_operation.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +using namespace nodes::derived_node_tree_types; + +CompileState::CompileState(const Schedule &schedule) : schedule_(schedule) +{ +} + +const Schedule &CompileState::get_schedule() +{ + return schedule_; +} + +void CompileState::map_node_to_node_operation(DNode node, NodeOperation *operations) +{ + return node_operations_.add_new(node, operations); +} + +void CompileState::map_node_to_shader_operation(DNode node, ShaderOperation *operations) +{ + return shader_operations_.add_new(node, operations); +} + +Result &CompileState::get_result_from_output_socket(DOutputSocket output) +{ + /* The output belongs to a node that was compiled into a standard node operation, so return a + * reference to the result from that operation using the output identifier. */ + if (node_operations_.contains(output.node())) { + NodeOperation *operation = node_operations_.lookup(output.node()); + return operation->get_result(output->identifier); + } + + /* Otherwise, the output belongs to a node that was compiled into a shader operation, so + * retrieve the internal identifier of that output and return a reference to the result from + * that operation using the retrieved identifier. */ + ShaderOperation *operation = shader_operations_.lookup(output.node()); + return operation->get_result(operation->get_output_identifier_from_output_socket(output)); +} + +void CompileState::add_node_to_shader_compile_unit(DNode node) +{ + shader_compile_unit_.add_new(node); + + /* If the domain of the shader compile unit is not yet determined or was determined to be + * an identity domain, update it to be the computed domain of the node. */ + if (shader_compile_unit_domain_ == Domain::identity()) { + shader_compile_unit_domain_ = compute_shader_node_domain(node); + } +} + +ShaderCompileUnit &CompileState::get_shader_compile_unit() +{ + return shader_compile_unit_; +} + +void CompileState::reset_shader_compile_unit() +{ + return shader_compile_unit_.clear(); +} + +bool CompileState::should_compile_shader_compile_unit(DNode node) +{ + /* If the shader compile unit is empty, then it can't be compiled yet. */ + if (shader_compile_unit_.is_empty()) { + return false; + } + + /* If the node is not a shader node, then it can't be added to the shader compile unit and the + * shader compile unit is considered complete and should be compiled. */ + if (!is_shader_node(node)) { + return true; + } + + /* If the computed domain of the node doesn't matches the domain of the shader compile unit, then + * it can't be added to the shader compile unit and the shader compile unit is considered + * complete and should be compiled. Identity domains are an exception as they are always + * compatible because they represents single values. */ + if (shader_compile_unit_domain_ != Domain::identity() && + shader_compile_unit_domain_ != compute_shader_node_domain(node)) { + return true; + } + + /* Otherwise, the node is compatible and can be added to the compile unit and it shouldn't be + * compiled just yet. */ + return false; +} + +Domain CompileState::compute_shader_node_domain(DNode node) +{ + /* Default to an identity domain in case no domain input was found, most likely because all + * inputs are single values. */ + Domain node_domain = Domain::identity(); + int current_domain_priority = std::numeric_limits<int>::max(); + + /* Go over the inputs and find the domain of the non single value input with the highest domain + * priority. */ + for (const bNodeSocket *input : node->input_sockets()) { + const DInputSocket dinput{node.context(), input}; + + /* Get the output linked to the input. If it is null, that means the input is unlinked, so skip + * it. */ + const DOutputSocket output = get_output_linked_to_input(dinput); + if (!output) { + continue; + } + + const InputDescriptor input_descriptor = input_descriptor_from_input_socket(input); + + /* If the output belongs to a node that is part of the shader compile unit, then the domain of + * the input is the domain of the compile unit itself. */ + if (shader_compile_unit_.contains(output.node())) { + /* Single value inputs can't be domain inputs. */ + if (shader_compile_unit_domain_.size == int2(1)) { + continue; + } + + /* Notice that the lower the domain priority value is, the higher the priority is, hence the + * less than comparison. */ + if (input_descriptor.domain_priority < current_domain_priority) { + node_domain = shader_compile_unit_domain_; + current_domain_priority = input_descriptor.domain_priority; + } + continue; + } + + const Result &result = get_result_from_output_socket(output); + + /* A single value input can't be a domain input. */ + if (result.is_single_value() || input_descriptor.expects_single_value) { + continue; + } + + /* An input that skips realization can't be a domain input. */ + if (input_descriptor.skip_realization) { + continue; + } + + /* Notice that the lower the domain priority value is, the higher the priority is, hence the + * less than comparison. */ + if (input_descriptor.domain_priority < current_domain_priority) { + node_domain = result.domain(); + current_domain_priority = input_descriptor.domain_priority; + } + } + + return node_domain; +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/context.cc b/source/blender/compositor/realtime_compositor/intern/context.cc new file mode 100644 index 00000000000..64ac29af3d1 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/context.cc @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "COM_context.hh" +#include "COM_static_shader_manager.hh" +#include "COM_texture_pool.hh" + +namespace blender::realtime_compositor { + +Context::Context(TexturePool &texture_pool) : texture_pool_(texture_pool) +{ +} + +int Context::get_frame_number() const +{ + return get_scene()->r.cfra; +} + +float Context::get_time() const +{ + const float frame_number = static_cast<float>(get_frame_number()); + const float frame_rate = static_cast<float>(get_scene()->r.frs_sec) / + static_cast<float>(get_scene()->r.frs_sec_base); + return frame_number / frame_rate; +} + +TexturePool &Context::texture_pool() +{ + return texture_pool_; +} + +StaticShaderManager &Context::shader_manager() +{ + return shader_manager_; +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/conversion_operation.cc b/source/blender/compositor/realtime_compositor/intern/conversion_operation.cc new file mode 100644 index 00000000000..3743b9bba87 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/conversion_operation.cc @@ -0,0 +1,239 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_math_vec_types.hh" + +#include "GPU_shader.h" + +#include "COM_context.hh" +#include "COM_conversion_operation.hh" +#include "COM_input_descriptor.hh" +#include "COM_result.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +/* -------------------------------------------------------------------- */ +/** \name Conversion Operation + * \{ */ + +void ConversionOperation::execute() +{ + Result &result = get_result(); + const Result &input = get_input(); + + if (input.is_single_value()) { + result.allocate_single_value(); + execute_single(input, result); + return; + } + + result.allocate_texture(input.domain()); + + GPUShader *shader = get_conversion_shader(); + GPU_shader_bind(shader); + + input.bind_as_texture(shader, "input_tx"); + result.bind_as_image(shader, "output_img"); + + compute_dispatch_threads_at_least(shader, input.domain().size); + + input.unbind_as_texture(); + result.unbind_as_image(); + GPU_shader_unbind(); +} + +SimpleOperation *ConversionOperation::construct_if_needed(Context &context, + const Result &input_result, + const InputDescriptor &input_descriptor) +{ + ResultType result_type = input_result.type(); + ResultType expected_type = input_descriptor.type; + + /* If the result type differs from the expected type, return an instance of an appropriate + * conversion operation. Otherwise, return a null pointer. */ + + if (result_type == ResultType::Float && expected_type == ResultType::Vector) { + return new ConvertFloatToVectorOperation(context); + } + + if (result_type == ResultType::Float && expected_type == ResultType::Color) { + return new ConvertFloatToColorOperation(context); + } + + if (result_type == ResultType::Color && expected_type == ResultType::Float) { + return new ConvertColorToFloatOperation(context); + } + + if (result_type == ResultType::Color && expected_type == ResultType::Vector) { + return new ConvertColorToVectorOperation(context); + } + + if (result_type == ResultType::Vector && expected_type == ResultType::Float) { + return new ConvertVectorToFloatOperation(context); + } + + if (result_type == ResultType::Vector && expected_type == ResultType::Color) { + return new ConvertVectorToColorOperation(context); + } + + return nullptr; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Convert Float to Vector Operation + * \{ */ + +ConvertFloatToVectorOperation::ConvertFloatToVectorOperation(Context &context) + : ConversionOperation(context) +{ + InputDescriptor input_descriptor; + input_descriptor.type = ResultType::Float; + declare_input_descriptor(input_descriptor); + populate_result(Result(ResultType::Vector, texture_pool())); +} + +void ConvertFloatToVectorOperation::execute_single(const Result &input, Result &output) +{ + output.set_vector_value(float3(input.get_float_value())); +} + +GPUShader *ConvertFloatToVectorOperation::get_conversion_shader() const +{ + return shader_manager().get("compositor_convert_float_to_vector"); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Convert Float to Color Operation + * \{ */ + +ConvertFloatToColorOperation::ConvertFloatToColorOperation(Context &context) + : ConversionOperation(context) +{ + InputDescriptor input_descriptor; + input_descriptor.type = ResultType::Float; + declare_input_descriptor(input_descriptor); + populate_result(Result(ResultType::Color, texture_pool())); +} + +void ConvertFloatToColorOperation::execute_single(const Result &input, Result &output) +{ + float4 color = float4(input.get_float_value()); + color[3] = 1.0f; + output.set_color_value(color); +} + +GPUShader *ConvertFloatToColorOperation::get_conversion_shader() const +{ + return shader_manager().get("compositor_convert_float_to_color"); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Convert Color to Float Operation + * \{ */ + +ConvertColorToFloatOperation::ConvertColorToFloatOperation(Context &context) + : ConversionOperation(context) +{ + InputDescriptor input_descriptor; + input_descriptor.type = ResultType::Color; + declare_input_descriptor(input_descriptor); + populate_result(Result(ResultType::Float, texture_pool())); +} + +void ConvertColorToFloatOperation::execute_single(const Result &input, Result &output) +{ + float4 color = input.get_color_value(); + output.set_float_value((color[0] + color[1] + color[2]) / 3.0f); +} + +GPUShader *ConvertColorToFloatOperation::get_conversion_shader() const +{ + return shader_manager().get("compositor_convert_color_to_float"); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Convert Color to Vector Operation + * \{ */ + +ConvertColorToVectorOperation::ConvertColorToVectorOperation(Context &context) + : ConversionOperation(context) +{ + InputDescriptor input_descriptor; + input_descriptor.type = ResultType::Color; + declare_input_descriptor(input_descriptor); + populate_result(Result(ResultType::Vector, texture_pool())); +} + +void ConvertColorToVectorOperation::execute_single(const Result &input, Result &output) +{ + float4 color = input.get_color_value(); + output.set_vector_value(float3(color)); +} + +GPUShader *ConvertColorToVectorOperation::get_conversion_shader() const +{ + return shader_manager().get("compositor_convert_color_to_vector"); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Convert Vector to Float Operation + * \{ */ + +ConvertVectorToFloatOperation::ConvertVectorToFloatOperation(Context &context) + : ConversionOperation(context) +{ + InputDescriptor input_descriptor; + input_descriptor.type = ResultType::Vector; + declare_input_descriptor(input_descriptor); + populate_result(Result(ResultType::Float, texture_pool())); +} + +void ConvertVectorToFloatOperation::execute_single(const Result &input, Result &output) +{ + float3 vector = input.get_vector_value(); + output.set_float_value((vector[0] + vector[1] + vector[2]) / 3.0f); +} + +GPUShader *ConvertVectorToFloatOperation::get_conversion_shader() const +{ + return shader_manager().get("compositor_convert_vector_to_float"); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Convert Vector to Color Operation + * \{ */ + +ConvertVectorToColorOperation::ConvertVectorToColorOperation(Context &context) + : ConversionOperation(context) +{ + InputDescriptor input_descriptor; + input_descriptor.type = ResultType::Vector; + declare_input_descriptor(input_descriptor); + populate_result(Result(ResultType::Color, texture_pool())); +} + +void ConvertVectorToColorOperation::execute_single(const Result &input, Result &output) +{ + output.set_color_value(float4(input.get_vector_value(), 1.0f)); +} + +GPUShader *ConvertVectorToColorOperation::get_conversion_shader() const +{ + return shader_manager().get("compositor_convert_vector_to_color"); +} + +/** \} */ + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/domain.cc b/source/blender/compositor/realtime_compositor/intern/domain.cc new file mode 100644 index 00000000000..31b297c212e --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/domain.cc @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_float3x3.hh" +#include "BLI_math_vec_types.hh" + +#include "COM_domain.hh" + +namespace blender::realtime_compositor { + +Domain::Domain(int2 size) : size(size), transformation(float3x3::identity()) +{ +} + +Domain::Domain(int2 size, float3x3 transformation) : size(size), transformation(transformation) +{ +} + +void Domain::transform(const float3x3 &input_transformation) +{ + transformation = input_transformation * transformation; +} + +Domain Domain::identity() +{ + return Domain(int2(1), float3x3::identity()); +} + +bool operator==(const Domain &a, const Domain &b) +{ + return a.size == b.size && a.transformation == b.transformation; +} + +bool operator!=(const Domain &a, const Domain &b) +{ + return !(a == b); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/evaluator.cc b/source/blender/compositor/realtime_compositor/intern/evaluator.cc new file mode 100644 index 00000000000..48457bec199 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/evaluator.cc @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <string> + +#include "DNA_node_types.h" + +#include "NOD_derived_node_tree.hh" + +#include "COM_compile_state.hh" +#include "COM_context.hh" +#include "COM_evaluator.hh" +#include "COM_input_single_value_operation.hh" +#include "COM_node_operation.hh" +#include "COM_operation.hh" +#include "COM_result.hh" +#include "COM_scheduler.hh" +#include "COM_shader_operation.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +using namespace nodes::derived_node_tree_types; + +Evaluator::Evaluator(Context &context, bNodeTree &node_tree) + : context_(context), node_tree_(node_tree) +{ +} + +void Evaluator::evaluate() +{ + context_.texture_pool().reset(); + + if (!is_compiled_) { + compile_and_evaluate(); + is_compiled_ = true; + return; + } + + for (const std::unique_ptr<Operation> &operation : operations_stream_) { + operation->evaluate(); + } +} + +void Evaluator::reset() +{ + operations_stream_.clear(); + derived_node_tree_.reset(); + + is_compiled_ = false; +} + +bool Evaluator::validate_node_tree() +{ + if (derived_node_tree_->has_link_cycles()) { + context_.set_info_message("Compositor node tree has cyclic links!"); + return false; + } + + if (derived_node_tree_->has_undefined_nodes_or_sockets()) { + context_.set_info_message("Compositor node tree has undefined nodes or sockets!"); + return false; + } + + return true; +} + +void Evaluator::compile_and_evaluate() +{ + derived_node_tree_ = std::make_unique<DerivedNodeTree>(node_tree_); + + if (!validate_node_tree()) { + return; + } + + const Schedule schedule = compute_schedule(*derived_node_tree_); + + CompileState compile_state(schedule); + + for (const DNode &node : schedule) { + if (compile_state.should_compile_shader_compile_unit(node)) { + compile_and_evaluate_shader_compile_unit(compile_state); + } + + if (is_shader_node(node)) { + compile_state.add_node_to_shader_compile_unit(node); + } + else { + compile_and_evaluate_node(node, compile_state); + } + } +} + +void Evaluator::compile_and_evaluate_node(DNode node, CompileState &compile_state) +{ + NodeOperation *operation = node->typeinfo->get_compositor_operation(context_, node); + + compile_state.map_node_to_node_operation(node, operation); + + map_node_operation_inputs_to_their_results(node, operation, compile_state); + + /* This has to be done after input mapping because the method may add Input Single Value + * Operations to the operations stream, which needs to be evaluated before the operation itself + * is evaluated. */ + operations_stream_.append(std::unique_ptr<Operation>(operation)); + + operation->compute_results_reference_counts(compile_state.get_schedule()); + + operation->evaluate(); +} + +void Evaluator::map_node_operation_inputs_to_their_results(DNode node, + NodeOperation *operation, + CompileState &compile_state) +{ + for (const bNodeSocket *input : node->input_sockets()) { + const DInputSocket dinput{node.context(), input}; + + DSocket dorigin = get_input_origin_socket(dinput); + + /* The origin socket is an output, which means the input is linked. So map the input to the + * result we get from the output. */ + if (dorigin->is_output()) { + Result &result = compile_state.get_result_from_output_socket(DOutputSocket(dorigin)); + operation->map_input_to_result(input->identifier, &result); + continue; + } + + /* Otherwise, the origin socket is an input, which either means the input is unlinked and the + * origin is the input socket itself or the input is connected to an unlinked input of a group + * input node and the origin is the input of the group input node. So map the input to the + * result of a newly created Input Single Value Operation. */ + auto *input_operation = new InputSingleValueOperation(context_, DInputSocket(dorigin)); + operation->map_input_to_result(input->identifier, &input_operation->get_result()); + + operations_stream_.append(std::unique_ptr<InputSingleValueOperation>(input_operation)); + + input_operation->evaluate(); + } +} + +void Evaluator::compile_and_evaluate_shader_compile_unit(CompileState &compile_state) +{ + ShaderCompileUnit &compile_unit = compile_state.get_shader_compile_unit(); + ShaderOperation *operation = new ShaderOperation(context_, compile_unit); + + for (DNode node : compile_unit) { + compile_state.map_node_to_shader_operation(node, operation); + } + + map_shader_operation_inputs_to_their_results(operation, compile_state); + + operations_stream_.append(std::unique_ptr<Operation>(operation)); + + operation->compute_results_reference_counts(compile_state.get_schedule()); + + operation->evaluate(); + + compile_state.reset_shader_compile_unit(); +} + +void Evaluator::map_shader_operation_inputs_to_their_results(ShaderOperation *operation, + CompileState &compile_state) +{ + for (const auto &item : operation->get_inputs_to_linked_outputs_map().items()) { + Result &result = compile_state.get_result_from_output_socket(item.value); + operation->map_input_to_result(item.key, &result); + } +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/input_single_value_operation.cc b/source/blender/compositor/realtime_compositor/intern/input_single_value_operation.cc new file mode 100644 index 00000000000..b3cc86b5f79 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/input_single_value_operation.cc @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_math_vec_types.hh" + +#include "COM_input_single_value_operation.hh" +#include "COM_operation.hh" +#include "COM_result.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +const StringRef InputSingleValueOperation::output_identifier_ = StringRef("Output"); + +InputSingleValueOperation::InputSingleValueOperation(Context &context, DInputSocket input_socket) + : Operation(context), input_socket_(input_socket) +{ + const ResultType result_type = get_node_socket_result_type(input_socket_.bsocket()); + Result result = Result(result_type, texture_pool()); + + /* The result of an input single value operation is guaranteed to have a single user. */ + result.set_initial_reference_count(1); + + populate_result(result); +} + +void InputSingleValueOperation::execute() +{ + /* Allocate a single value for the result. */ + Result &result = get_result(); + result.allocate_single_value(); + + const bNodeSocket *bsocket = input_socket_.bsocket(); + + /* Set the value of the result to the default value of the input socket. */ + switch (result.type()) { + case ResultType::Float: + result.set_float_value(bsocket->default_value_typed<bNodeSocketValueFloat>()->value); + break; + case ResultType::Vector: + result.set_vector_value( + float3(bsocket->default_value_typed<bNodeSocketValueVector>()->value)); + break; + case ResultType::Color: + result.set_color_value(float4(bsocket->default_value_typed<bNodeSocketValueRGBA>()->value)); + break; + } +} + +Result &InputSingleValueOperation::get_result() +{ + return Operation::get_result(output_identifier_); +} + +void InputSingleValueOperation::populate_result(Result result) +{ + Operation::populate_result(output_identifier_, result); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/node_operation.cc b/source/blender/compositor/realtime_compositor/intern/node_operation.cc new file mode 100644 index 00000000000..1c20c967ddb --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/node_operation.cc @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <memory> + +#include "BLI_map.hh" +#include "BLI_string_ref.hh" +#include "BLI_vector.hh" + +#include "DNA_node_types.h" + +#include "NOD_derived_node_tree.hh" +#include "NOD_node_declaration.hh" + +#include "COM_context.hh" +#include "COM_input_descriptor.hh" +#include "COM_node_operation.hh" +#include "COM_operation.hh" +#include "COM_result.hh" +#include "COM_scheduler.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +using namespace nodes::derived_node_tree_types; + +NodeOperation::NodeOperation(Context &context, DNode node) : Operation(context), node_(node) +{ + for (const bNodeSocket *output : node->output_sockets()) { + const ResultType result_type = get_node_socket_result_type(output); + const Result result = Result(result_type, texture_pool()); + populate_result(output->identifier, result); + } + + for (const bNodeSocket *input : node->input_sockets()) { + const InputDescriptor input_descriptor = input_descriptor_from_input_socket(input); + declare_input_descriptor(input->identifier, input_descriptor); + } +} + +void NodeOperation::compute_results_reference_counts(const Schedule &schedule) +{ + for (const bNodeSocket *output : this->node()->output_sockets()) { + const DOutputSocket doutput{node().context(), output}; + + const int reference_count = number_of_inputs_linked_to_output_conditioned( + doutput, [&](DInputSocket input) { return schedule.contains(input.node()); }); + + get_result(doutput->identifier).set_initial_reference_count(reference_count); + } +} + +const DNode &NodeOperation::node() const +{ + return node_; +} + +const bNode &NodeOperation::bnode() const +{ + return *node_; +} + +bool NodeOperation::should_compute_output(StringRef identifier) +{ + return get_result(identifier).should_compute(); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/operation.cc b/source/blender/compositor/realtime_compositor/intern/operation.cc new file mode 100644 index 00000000000..832196cc5ef --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/operation.cc @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <limits> +#include <memory> + +#include "BLI_map.hh" +#include "BLI_string_ref.hh" +#include "BLI_vector.hh" + +#include "COM_context.hh" +#include "COM_conversion_operation.hh" +#include "COM_domain.hh" +#include "COM_input_descriptor.hh" +#include "COM_operation.hh" +#include "COM_realize_on_domain_operation.hh" +#include "COM_reduce_to_single_value_operation.hh" +#include "COM_result.hh" +#include "COM_simple_operation.hh" +#include "COM_static_shader_manager.hh" +#include "COM_texture_pool.hh" + +namespace blender::realtime_compositor { + +Operation::Operation(Context &context) : context_(context) +{ +} + +Operation::~Operation() = default; + +void Operation::evaluate() +{ + evaluate_input_processors(); + + reset_results(); + + execute(); + + release_inputs(); +} + +Result &Operation::get_result(StringRef identifier) +{ + return results_.lookup(identifier); +} + +void Operation::map_input_to_result(StringRef identifier, Result *result) +{ + results_mapped_to_inputs_.add_new(identifier, result); +} + +Domain Operation::compute_domain() +{ + /* Default to an identity domain in case no domain input was found, most likely because all + * inputs are single values. */ + Domain operation_domain = Domain::identity(); + int current_domain_priority = std::numeric_limits<int>::max(); + + /* Go over the inputs and find the domain of the non single value input with the highest domain + * priority. */ + for (StringRef identifier : input_descriptors_.keys()) { + const Result &result = get_input(identifier); + const InputDescriptor &descriptor = get_input_descriptor(identifier); + + /* A single value input can't be a domain input. */ + if (result.is_single_value() || descriptor.expects_single_value) { + continue; + } + + /* An input that skips realization can't be a domain input. */ + if (descriptor.skip_realization) { + continue; + } + + /* Notice that the lower the domain priority value is, the higher the priority is, hence the + * less than comparison. */ + if (descriptor.domain_priority < current_domain_priority) { + operation_domain = result.domain(); + current_domain_priority = descriptor.domain_priority; + } + } + + return operation_domain; +} + +void Operation::add_and_evaluate_input_processors() +{ + /* Each input processor type is added to all inputs entirely before the next type. This is done + * because the construction of the input processors may depend on the result of previous input + * processors for all inputs. For instance, the realize on domain input processor considers the + * value of all inputs, so previous input processors for all inputs needs to be added and + * evaluated first. */ + + for (const StringRef &identifier : results_mapped_to_inputs_.keys()) { + SimpleOperation *single_value = ReduceToSingleValueOperation::construct_if_needed( + context(), get_input(identifier)); + add_and_evaluate_input_processor(identifier, single_value); + } + + for (const StringRef &identifier : results_mapped_to_inputs_.keys()) { + SimpleOperation *conversion = ConversionOperation::construct_if_needed( + context(), get_input(identifier), get_input_descriptor(identifier)); + add_and_evaluate_input_processor(identifier, conversion); + } + + for (const StringRef &identifier : results_mapped_to_inputs_.keys()) { + SimpleOperation *realize_on_domain = RealizeOnDomainOperation::construct_if_needed( + context(), get_input(identifier), get_input_descriptor(identifier), compute_domain()); + add_and_evaluate_input_processor(identifier, realize_on_domain); + } +} + +void Operation::add_and_evaluate_input_processor(StringRef identifier, SimpleOperation *processor) +{ + /* Allow null inputs to facilitate construct_if_needed pattern of addition. For instance, see the + * implementation of the add_and_evaluate_input_processors method. */ + if (!processor) { + return; + } + + ProcessorsVector &processors = input_processors_.lookup_or_add_default(identifier); + + /* Get the result that should serve as the input for the processor. This is either the result + * mapped to the input or the result of the last processor depending on whether this is the first + * processor or not. */ + Result &result = processors.is_empty() ? get_input(identifier) : processors.last()->get_result(); + + /* Map the input result of the processor and add it to the processors vector. */ + processor->map_input_to_result(&result); + processors.append(std::unique_ptr<SimpleOperation>(processor)); + + /* Switch the result mapped to the input to be the output result of the processor. */ + switch_result_mapped_to_input(identifier, &processor->get_result()); + + processor->evaluate(); +} + +Result &Operation::get_input(StringRef identifier) const +{ + return *results_mapped_to_inputs_.lookup(identifier); +} + +void Operation::switch_result_mapped_to_input(StringRef identifier, Result *result) +{ + results_mapped_to_inputs_.lookup(identifier) = result; +} + +void Operation::populate_result(StringRef identifier, Result result) +{ + results_.add_new(identifier, result); +} + +void Operation::declare_input_descriptor(StringRef identifier, InputDescriptor descriptor) +{ + input_descriptors_.add_new(identifier, descriptor); +} + +InputDescriptor &Operation::get_input_descriptor(StringRef identifier) +{ + return input_descriptors_.lookup(identifier); +} + +Context &Operation::context() +{ + return context_; +} + +TexturePool &Operation::texture_pool() const +{ + return context_.texture_pool(); +} + +StaticShaderManager &Operation::shader_manager() const +{ + return context_.shader_manager(); +} + +void Operation::evaluate_input_processors() +{ + if (!input_processors_added_) { + add_and_evaluate_input_processors(); + input_processors_added_ = true; + return; + } + + for (const ProcessorsVector &processors : input_processors_.values()) { + for (const std::unique_ptr<SimpleOperation> &processor : processors) { + processor->evaluate(); + } + } +} + +void Operation::reset_results() +{ + for (Result &result : results_.values()) { + result.reset(); + } +} + +void Operation::release_inputs() +{ + for (Result *result : results_mapped_to_inputs_.values()) { + result->release(); + } +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/realize_on_domain_operation.cc b/source/blender/compositor/realtime_compositor/intern/realize_on_domain_operation.cc new file mode 100644 index 00000000000..817293c0fa6 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/realize_on_domain_operation.cc @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_float3x3.hh" +#include "BLI_math_vec_types.hh" +#include "BLI_utildefines.h" + +#include "GPU_shader.h" +#include "GPU_texture.h" + +#include "COM_context.hh" +#include "COM_domain.hh" +#include "COM_input_descriptor.hh" +#include "COM_realize_on_domain_operation.hh" +#include "COM_result.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +RealizeOnDomainOperation::RealizeOnDomainOperation(Context &context, + Domain domain, + ResultType type) + : SimpleOperation(context), domain_(domain) +{ + InputDescriptor input_descriptor; + input_descriptor.type = type; + declare_input_descriptor(input_descriptor); + populate_result(Result(type, texture_pool())); +} + +void RealizeOnDomainOperation::execute() +{ + Result &input = get_input(); + Result &result = get_result(); + + result.allocate_texture(domain_); + + GPUShader *shader = get_realization_shader(); + GPU_shader_bind(shader); + + /* Transform the input space into the domain space. */ + const float3x3 local_transformation = input.domain().transformation * + domain_.transformation.inverted(); + + /* Set the origin of the transformation to be the center of the domain. */ + const float3x3 transformation = float3x3::from_origin_transformation( + local_transformation, float2(domain_.size) / 2.0f); + + /* Invert the transformation because the shader transforms the domain coordinates instead of the + * input image itself and thus expect the inverse. */ + const float3x3 inverse_transformation = transformation.inverted(); + + GPU_shader_uniform_mat3_as_mat4(shader, "inverse_transformation", inverse_transformation.ptr()); + + /* The texture sampler should use bilinear interpolation for both the bilinear and bicubic + * cases, as the logic used by the bicubic realization shader expects textures to use bilinear + * interpolation. */ + const bool use_bilinear = ELEM(input.get_realization_options().interpolation, + Interpolation::Bilinear, + Interpolation::Bicubic); + GPU_texture_filter_mode(input.texture(), use_bilinear); + + /* Make out-of-bound texture access return zero by clamping to border color. And make texture + * wrap appropriately if the input repeats. */ + const bool repeats = input.get_realization_options().repeat_x || + input.get_realization_options().repeat_y; + GPU_texture_wrap_mode(input.texture(), repeats, false); + + input.bind_as_texture(shader, "input_tx"); + result.bind_as_image(shader, "domain_img"); + + compute_dispatch_threads_at_least(shader, domain_.size); + + input.unbind_as_texture(); + result.unbind_as_image(); + GPU_shader_unbind(); +} + +GPUShader *RealizeOnDomainOperation::get_realization_shader() +{ + switch (get_result().type()) { + case ResultType::Color: + return shader_manager().get("compositor_realize_on_domain_color"); + case ResultType::Vector: + return shader_manager().get("compositor_realize_on_domain_vector"); + case ResultType::Float: + return shader_manager().get("compositor_realize_on_domain_float"); + } + + BLI_assert_unreachable(); + return nullptr; +} + +Domain RealizeOnDomainOperation::compute_domain() +{ + return domain_; +} + +SimpleOperation *RealizeOnDomainOperation::construct_if_needed( + Context &context, + const Result &input_result, + const InputDescriptor &input_descriptor, + const Domain &operation_domain) +{ + /* This input wants to skip realization, the operation is not needed. */ + if (input_descriptor.skip_realization) { + return nullptr; + } + + /* The input expects a single value and if no single value is provided, it will be ignored and a + * default value will be used, so no need to realize it and the operation is not needed. */ + if (input_descriptor.expects_single_value) { + return nullptr; + } + + /* Input result is a single value and does not need realization, the operation is not needed. */ + if (input_result.is_single_value()) { + return nullptr; + } + + /* The input have an identical domain to the operation domain, so no need to realize it and the + * operation is not needed. */ + if (input_result.domain() == operation_domain) { + return nullptr; + } + + /* Otherwise, realization is needed. */ + return new RealizeOnDomainOperation(context, operation_domain, input_descriptor.type); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/reduce_to_single_value_operation.cc b/source/blender/compositor/realtime_compositor/intern/reduce_to_single_value_operation.cc new file mode 100644 index 00000000000..acc9b4ab7d6 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/reduce_to_single_value_operation.cc @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "GPU_state.h" +#include "GPU_texture.h" + +#include "MEM_guardedalloc.h" + +#include "COM_context.hh" +#include "COM_input_descriptor.hh" +#include "COM_reduce_to_single_value_operation.hh" +#include "COM_result.hh" + +namespace blender::realtime_compositor { + +ReduceToSingleValueOperation::ReduceToSingleValueOperation(Context &context, ResultType type) + : SimpleOperation(context) +{ + InputDescriptor input_descriptor; + input_descriptor.type = type; + declare_input_descriptor(input_descriptor); + populate_result(Result(type, texture_pool())); +} + +void ReduceToSingleValueOperation::execute() +{ + /* Make sure any prior writes to the texture are reflected before downloading it. */ + GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE); + + const Result &input = get_input(); + float *pixel = static_cast<float *>(GPU_texture_read(input.texture(), GPU_DATA_FLOAT, 0)); + + Result &result = get_result(); + result.allocate_single_value(); + switch (result.type()) { + case ResultType::Color: + result.set_color_value(pixel); + break; + case ResultType::Vector: + result.set_vector_value(pixel); + break; + case ResultType::Float: + result.set_float_value(*pixel); + break; + } + + MEM_freeN(pixel); +} + +SimpleOperation *ReduceToSingleValueOperation::construct_if_needed(Context &context, + const Result &input_result) +{ + /* Input result is already a single value, the operation is not needed. */ + if (input_result.is_single_value()) { + return nullptr; + } + + /* The input is a full sized texture and can't be reduced to a single value, the operation is not + * needed. */ + if (input_result.domain().size != int2(1)) { + return nullptr; + } + + /* The input is a texture of a single pixel and can be reduced to a single value. */ + return new ReduceToSingleValueOperation(context, input_result.type()); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/result.cc b/source/blender/compositor/realtime_compositor/intern/result.cc new file mode 100644 index 00000000000..8059367d211 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/result.cc @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_float3x3.hh" +#include "BLI_math_vec_types.hh" + +#include "GPU_shader.h" +#include "GPU_state.h" +#include "GPU_texture.h" + +#include "COM_domain.hh" +#include "COM_result.hh" +#include "COM_texture_pool.hh" + +namespace blender::realtime_compositor { + +Result::Result(ResultType type, TexturePool &texture_pool) + : type_(type), texture_pool_(&texture_pool) +{ +} + +void Result::allocate_texture(Domain domain) +{ + is_single_value_ = false; + switch (type_) { + case ResultType::Float: + texture_ = texture_pool_->acquire_float(domain.size); + break; + case ResultType::Vector: + texture_ = texture_pool_->acquire_vector(domain.size); + break; + case ResultType::Color: + texture_ = texture_pool_->acquire_color(domain.size); + break; + } + domain_ = domain; +} + +void Result::allocate_single_value() +{ + is_single_value_ = true; + /* Single values are stored in 1x1 textures as well as the single value members. */ + const int2 texture_size{1, 1}; + switch (type_) { + case ResultType::Float: + texture_ = texture_pool_->acquire_float(texture_size); + break; + case ResultType::Vector: + texture_ = texture_pool_->acquire_vector(texture_size); + break; + case ResultType::Color: + texture_ = texture_pool_->acquire_color(texture_size); + break; + } + domain_ = Domain::identity(); +} + +void Result::allocate_invalid() +{ + allocate_single_value(); + switch (type_) { + case ResultType::Float: + set_float_value(0.0f); + break; + case ResultType::Vector: + set_vector_value(float3(0.0f)); + break; + case ResultType::Color: + set_color_value(float4(0.0f)); + break; + } +} + +void Result::bind_as_texture(GPUShader *shader, const char *texture_name) const +{ + /* Make sure any prior writes to the texture are reflected before reading from it. */ + GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH); + + const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture_name); + GPU_texture_bind(texture_, texture_image_unit); +} + +void Result::bind_as_image(GPUShader *shader, const char *image_name) const +{ + const int image_unit = GPU_shader_get_texture_binding(shader, image_name); + GPU_texture_image_bind(texture_, image_unit); +} + +void Result::unbind_as_texture() const +{ + GPU_texture_unbind(texture_); +} + +void Result::unbind_as_image() const +{ + GPU_texture_image_unbind(texture_); +} + +void Result::pass_through(Result &target) +{ + /* Increment the reference count of the master by the original reference count of the target. */ + increment_reference_count(target.reference_count()); + + /* Make the target an exact copy of this result, but keep the initial reference count, as this is + * a property of the original result and is needed for correctly resetting the result before the + * next evaluation. */ + const int initial_reference_count = target.initial_reference_count_; + target = *this; + target.initial_reference_count_ = initial_reference_count; + + target.master_ = this; +} + +void Result::transform(const float3x3 &transformation) +{ + domain_.transform(transformation); +} + +RealizationOptions &Result::get_realization_options() +{ + return domain_.realization_options; +} + +float Result::get_float_value() const +{ + return float_value_; +} + +float3 Result::get_vector_value() const +{ + return vector_value_; +} + +float4 Result::get_color_value() const +{ + return color_value_; +} + +float Result::get_float_value_default(float default_value) const +{ + if (is_single_value()) { + return get_float_value(); + } + return default_value; +} + +float3 Result::get_vector_value_default(const float3 &default_value) const +{ + if (is_single_value()) { + return get_vector_value(); + } + return default_value; +} + +float4 Result::get_color_value_default(const float4 &default_value) const +{ + if (is_single_value()) { + return get_color_value(); + } + return default_value; +} + +void Result::set_float_value(float value) +{ + float_value_ = value; + GPU_texture_update(texture_, GPU_DATA_FLOAT, &float_value_); +} + +void Result::set_vector_value(const float3 &value) +{ + vector_value_ = value; + GPU_texture_update(texture_, GPU_DATA_FLOAT, vector_value_); +} + +void Result::set_color_value(const float4 &value) +{ + color_value_ = value; + GPU_texture_update(texture_, GPU_DATA_FLOAT, color_value_); +} + +void Result::set_initial_reference_count(int count) +{ + initial_reference_count_ = count; +} + +void Result::reset() +{ + master_ = nullptr; + reference_count_ = initial_reference_count_; +} + +void Result::increment_reference_count(int count) +{ + /* If there is a master result, increment its reference count instead. */ + if (master_) { + master_->increment_reference_count(count); + return; + } + + reference_count_ += count; +} + +void Result::release() +{ + /* If there is a master result, release it instead. */ + if (master_) { + master_->release(); + return; + } + + /* Decrement the reference count, and if it reaches zero, release the texture back into the + * texture pool. */ + reference_count_--; + if (reference_count_ == 0) { + texture_pool_->release(texture_); + } +} + +bool Result::should_compute() +{ + return initial_reference_count_ != 0; +} + +ResultType Result::type() const +{ + return type_; +} + +bool Result::is_texture() const +{ + return !is_single_value_; +} + +bool Result::is_single_value() const +{ + return is_single_value_; +} + +GPUTexture *Result::texture() const +{ + return texture_; +} + +int Result::reference_count() const +{ + /* If there is a master result, return its reference count instead. */ + if (master_) { + return master_->reference_count(); + } + return reference_count_; +} + +const Domain &Result::domain() const +{ + return domain_; +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/scheduler.cc b/source/blender/compositor/realtime_compositor/intern/scheduler.cc new file mode 100644 index 00000000000..ac5cc55a73f --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/scheduler.cc @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_map.hh" +#include "BLI_set.hh" +#include "BLI_stack.hh" +#include "BLI_vector.hh" +#include "BLI_vector_set.hh" + +#include "NOD_derived_node_tree.hh" + +#include "BKE_node_runtime.hh" + +#include "COM_scheduler.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +using namespace nodes::derived_node_tree_types; + +/* Compute the output node whose result should be computed. The output node is the node marked as + * NODE_DO_OUTPUT. If multiple types of output nodes are marked, then the preference will be + * CMP_NODE_COMPOSITE > CMP_NODE_VIEWER > CMP_NODE_SPLITVIEWER. If no output node exists, a null + * node will be returned. */ +static DNode compute_output_node(DerivedNodeTree &tree) +{ + const bNodeTree &root_tree = tree.root_context().btree(); + + for (const bNode *node : root_tree.nodes_by_type("CompositorNodeComposite")) { + if (node->flag & NODE_DO_OUTPUT) { + return DNode(&tree.root_context(), node); + } + } + + for (const bNode *node : root_tree.nodes_by_type("CompositorNodeViewer")) { + if (node->flag & NODE_DO_OUTPUT) { + return DNode(&tree.root_context(), node); + } + } + + for (const bNode *node : root_tree.nodes_by_type("CompositorNodeSplitViewer")) { + if (node->flag & NODE_DO_OUTPUT) { + return DNode(&tree.root_context(), node); + } + } + + /* No output node found, return a null node. */ + return DNode(); +} + +/* A type representing a mapping that associates each node with a heuristic estimation of the + * number of intermediate buffers needed to compute it and all of its dependencies. See the + * compute_number_of_needed_buffers function for more information. */ +using NeededBuffers = Map<DNode, int>; + +/* Compute a heuristic estimation of the number of intermediate buffers needed to compute each node + * and all of its dependencies for all nodes that the given node depends on. The output is a map + * that maps each node with the number of intermediate buffers needed to compute it and all of its + * dependencies. + * + * Consider a node that takes n number of buffers as an input from a number of node dependencies, + * which we shall call the input nodes. The node also computes and outputs m number of buffers. + * In order for the node to compute its output, a number of intermediate buffers will be needed. + * Since the node takes n buffers and outputs m buffers, then the number of buffers directly + * needed by the node is (n + m). But each of the input buffers are computed by a node that, in + * turn, needs a number of buffers to compute its output. So the total number of buffers needed + * to compute the output of the node is max(n + m, d) where d is the number of buffers needed by + * the input node that needs the largest number of buffers. We only consider the input node that + * needs the largest number of buffers, because those buffers can be reused by any input node + * that needs a lesser number of buffers. + * + * Shader nodes, however, are a special case because links between two shader nodes inside the same + * shader operation don't pass a buffer, but a single value in the compiled shader. So for shader + * nodes, only inputs and outputs linked to nodes that are not shader nodes should be considered. + * Note that this might not actually be true, because the compiler may decide to split a shader + * operation into multiples ones that will pass buffers, but this is not something that can be + * known at scheduling-time. See the discussion in COM_compile_state.hh, COM_evaluator.hh, and + * COM_shader_operation.hh for more information. In the node tree shown below, node 4 will have + * exactly the same number of needed buffers by node 3, because its inputs and outputs are all + * internally linked in the shader operation. + * + * Shader Operation + * +------------------------------------------------------+ + * .------------. | .------------. .------------. .------------. | .------------. + * | Node 1 | | | Node 3 | | Node 4 | | Node 5 | | | Node 6 | + * | |----|--| |--| |------| |--|--| | + * | | .-|--| | | | .---| | | | | + * '------------' | | '------------' '------------' | '------------' | '------------' + * | +----------------------------------|-------------------+ + * .------------. | | + * | Node 2 | | | + * | |--'------------------------------------' + * | | + * '------------' + * + * Note that the computed output is not guaranteed to be accurate, and will not be in most cases. + * The computation is merely a heuristic estimation that works well in most cases. This is due to a + * number of reasons: + * - The node tree is actually a graph that allows output sharing, which is not something that was + * taken into consideration in this implementation because it is difficult to correctly consider. + * - Each node may allocate any number of internal buffers, which is not taken into account in this + * implementation because it rarely affects the output and is done by very few nodes. + * - The compiler may decide to compiler the schedule differently depending on runtime information + * which we can merely speculate at scheduling-time as described above. */ +static NeededBuffers compute_number_of_needed_buffers(DNode output_node) +{ + NeededBuffers needed_buffers; + + /* A stack of nodes used to traverse the node tree starting from the output node. */ + Stack<DNode> node_stack = {output_node}; + + /* Traverse the node tree in a post order depth first manner and compute the number of needed + * buffers for each node. Post order traversal guarantee that all the node dependencies of each + * node are computed before it. This is done by pushing all the uncomputed node dependencies to + * the node stack first and only popping and computing the node when all its node dependencies + * were computed. */ + while (!node_stack.is_empty()) { + /* Do not pop the node immediately, as it may turn out that we can't compute its number of + * needed buffers just yet because its dependencies weren't computed, it will be popped later + * when needed. */ + DNode &node = node_stack.peek(); + + /* Go over the node dependencies connected to the inputs of the node and push them to the node + * stack if they were not computed already. */ + Set<DNode> pushed_nodes; + for (const bNodeSocket *input : node->input_sockets()) { + const DInputSocket dinput{node.context(), input}; + + /* Get the output linked to the input. If it is null, that means the input is unlinked and + * has no dependency node. */ + const DOutputSocket doutput = get_output_linked_to_input(dinput); + if (!doutput) { + continue; + } + + /* The node dependency was already computed or pushed before, so skip it. */ + if (needed_buffers.contains(doutput.node()) || pushed_nodes.contains(doutput.node())) { + continue; + } + + /* The output node needs to be computed, push the node dependency to the node stack and + * indicate that it was pushed. */ + node_stack.push(doutput.node()); + pushed_nodes.add_new(doutput.node()); + } + + /* If any of the node dependencies were pushed, that means that not all of them were computed + * and consequently we can't compute the number of needed buffers for this node just yet. */ + if (!pushed_nodes.is_empty()) { + continue; + } + + /* We don't need to store the result of the pop because we already peeked at it before. */ + node_stack.pop(); + + /* Compute the number of buffers that the node takes as an input as well as the number of + * buffers needed to compute the most demanding of the node dependencies. */ + int number_of_input_buffers = 0; + int buffers_needed_by_dependencies = 0; + for (const bNodeSocket *input : node->input_sockets()) { + const DInputSocket dinput{node.context(), input}; + + /* Get the output linked to the input. If it is null, that means the input is unlinked. + * Unlinked inputs do not take a buffer, so skip those inputs. */ + const DOutputSocket doutput = get_output_linked_to_input(dinput); + if (!doutput) { + continue; + } + + /* Since this input is linked, if the link is not between two shader nodes, it means that the + * node takes a buffer through this input and so we increment the number of input buffers. */ + if (!is_shader_node(node) || !is_shader_node(doutput.node())) { + number_of_input_buffers++; + } + + /* If the number of buffers needed by the node dependency is more than the total number of + * buffers needed by the dependencies, then update the latter to be the former. This is + * computing the "d" in the aforementioned equation "max(n + m, d)". */ + const int buffers_needed_by_dependency = needed_buffers.lookup(doutput.node()); + if (buffers_needed_by_dependency > buffers_needed_by_dependencies) { + buffers_needed_by_dependencies = buffers_needed_by_dependency; + } + } + + /* Compute the number of buffers that will be computed/output by this node. */ + int number_of_output_buffers = 0; + for (const bNodeSocket *output : node->output_sockets()) { + const DOutputSocket doutput{node.context(), output}; + + /* The output is not linked, it outputs no buffer. */ + if (!output->is_logically_linked()) { + continue; + } + + /* If any of the links is not between two shader nodes, it means that the node outputs + * a buffer through this output and so we increment the number of output buffers. */ + if (!is_output_linked_to_node_conditioned(doutput, is_shader_node) || + !is_shader_node(node)) { + number_of_output_buffers++; + } + } + + /* Compute the heuristic estimation of the number of needed intermediate buffers to compute + * this node and all of its dependencies. This is computing the aforementioned equation + * "max(n + m, d)". */ + const int total_buffers = MAX2(number_of_input_buffers + number_of_output_buffers, + buffers_needed_by_dependencies); + needed_buffers.add(node, total_buffers); + } + + return needed_buffers; +} + +/* There are multiple different possible orders of evaluating a node graph, each of which needs + * to allocate a number of intermediate buffers to store its intermediate results. It follows + * that we need to find the evaluation order which uses the least amount of intermediate buffers. + * For instance, consider a node that takes two input buffers A and B. Each of those buffers is + * computed through a number of nodes constituting a sub-graph whose root is the node that + * outputs that buffer. Suppose the number of intermediate buffers needed to compute A and B are + * N(A) and N(B) respectively and N(A) > N(B). Then evaluating the sub-graph computing A would be + * a better option than that of B, because had B was computed first, its outputs will need to be + * stored in extra buffers in addition to the buffers needed by A. The number of buffers needed by + * each node is estimated as described in the compute_number_of_needed_buffers function. + * + * This is a heuristic generalization of the Sethi–Ullman algorithm, a generalization that + * doesn't always guarantee an optimal evaluation order, as the optimal evaluation order is very + * difficult to compute, however, this method works well in most cases. Moreover it assumes that + * all buffers will have roughly the same size, which may not always be the case. */ +Schedule compute_schedule(DerivedNodeTree &tree) +{ + Schedule schedule; + + /* Compute the output node whose result should be computed. */ + const DNode output_node = compute_output_node(tree); + + /* No output node, the node tree has no effect, return an empty schedule. */ + if (!output_node) { + return schedule; + } + + /* Compute the number of buffers needed by each node connected to the output. */ + const NeededBuffers needed_buffers = compute_number_of_needed_buffers(output_node); + + /* A stack of nodes used to traverse the node tree starting from the output node. */ + Stack<DNode> node_stack = {output_node}; + + /* Traverse the node tree in a post order depth first manner, scheduling the nodes in an order + * informed by the number of buffers needed by each node. Post order traversal guarantee that all + * the node dependencies of each node are scheduled before it. This is done by pushing all the + * unscheduled node dependencies to the node stack first and only popping and scheduling the node + * when all its node dependencies were scheduled. */ + while (!node_stack.is_empty()) { + /* Do not pop the node immediately, as it may turn out that we can't schedule it just yet + * because its dependencies weren't scheduled, it will be popped later when needed. */ + DNode &node = node_stack.peek(); + + /* Compute the nodes directly connected to the node inputs sorted by their needed buffers such + * that the node with the lowest number of needed buffers comes first. Note that we actually + * want the node with the highest number of needed buffers to be schedule first, but since + * those are pushed to the traversal stack, we need to push them in reverse order. */ + Vector<DNode> sorted_dependency_nodes; + for (const bNodeSocket *input : node->input_sockets()) { + const DInputSocket dinput{node.context(), input}; + + /* Get the output linked to the input. If it is null, that means the input is unlinked and + * has no dependency node, so skip it. */ + const DOutputSocket doutput = get_output_linked_to_input(dinput); + if (!doutput) { + continue; + } + + /* The dependency node was added before, so skip it. The number of dependency nodes is very + * small, typically less than 3, so a linear search is okay. */ + if (sorted_dependency_nodes.contains(doutput.node())) { + continue; + } + + /* The dependency node was already schedule, so skip it. */ + if (schedule.contains(doutput.node())) { + continue; + } + + /* Sort in ascending order on insertion, the number of dependency nodes is very small, + * typically less than 3, so insertion sort is okay. */ + int insertion_position = 0; + for (int i = 0; i < sorted_dependency_nodes.size(); i++) { + if (needed_buffers.lookup(doutput.node()) > + needed_buffers.lookup(sorted_dependency_nodes[i])) { + insertion_position++; + } + else { + break; + } + } + sorted_dependency_nodes.insert(insertion_position, doutput.node()); + } + + /* Push the sorted dependency nodes to the node stack in order. */ + for (const DNode &dependency_node : sorted_dependency_nodes) { + node_stack.push(dependency_node); + } + + /* If there are no sorted dependency nodes, that means they were all already scheduled or that + * none exists in the first place, so we can pop and schedule the node now. */ + if (sorted_dependency_nodes.is_empty()) { + /* The node might have already been scheduled, so we don't use add_new here and simply don't + * add it if it was already scheduled. */ + schedule.add(node_stack.pop()); + } + } + + return schedule; +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/shader_node.cc b/source/blender/compositor/realtime_compositor/intern/shader_node.cc new file mode 100644 index 00000000000..96dd50790c3 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/shader_node.cc @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_assert.h" +#include "BLI_math_vector.h" +#include "BLI_string_ref.hh" + +#include "DNA_node_types.h" + +#include "NOD_derived_node_tree.hh" + +#include "GPU_material.h" + +#include "COM_shader_node.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +using namespace nodes::derived_node_tree_types; + +ShaderNode::ShaderNode(DNode node) : node_(node) +{ + populate_inputs(); + populate_outputs(); +} + +GPUNodeStack *ShaderNode::get_inputs_array() +{ + return inputs_.data(); +} + +GPUNodeStack *ShaderNode::get_outputs_array() +{ + return outputs_.data(); +} + +GPUNodeStack &ShaderNode::get_input(StringRef identifier) +{ + return inputs_[node_.input_by_identifier(identifier)->index()]; +} + +GPUNodeStack &ShaderNode::get_output(StringRef identifier) +{ + return outputs_[node_.output_by_identifier(identifier)->index()]; +} + +GPUNodeLink *ShaderNode::get_input_link(StringRef identifier) +{ + GPUNodeStack &input = get_input(identifier); + if (input.link) { + return input.link; + } + return GPU_uniform(input.vec); +} + +const DNode &ShaderNode::node() const +{ + return node_; +} + +const bNode &ShaderNode::bnode() const +{ + return *node_; +} + +static eGPUType gpu_type_from_socket_type(eNodeSocketDatatype type) +{ + switch (type) { + case SOCK_FLOAT: + return GPU_FLOAT; + case SOCK_VECTOR: + return GPU_VEC3; + case SOCK_RGBA: + return GPU_VEC4; + default: + BLI_assert_unreachable(); + return GPU_NONE; + } +} + +static void gpu_stack_vector_from_socket(float *vector, const bNodeSocket *socket) +{ + switch (socket->type) { + case SOCK_FLOAT: + vector[0] = socket->default_value_typed<bNodeSocketValueFloat>()->value; + return; + case SOCK_VECTOR: + copy_v3_v3(vector, socket->default_value_typed<bNodeSocketValueVector>()->value); + return; + case SOCK_RGBA: + copy_v4_v4(vector, socket->default_value_typed<bNodeSocketValueRGBA>()->value); + return; + default: + BLI_assert_unreachable(); + } +} + +static void populate_gpu_node_stack(DSocket socket, GPUNodeStack &stack) +{ + /* Make sure this stack is not marked as the end of the stack array. */ + stack.end = false; + /* This will be initialized later by the GPU material compiler or the compile method. */ + stack.link = nullptr; + + stack.sockettype = socket->type; + stack.type = gpu_type_from_socket_type((eNodeSocketDatatype)socket->type); + + if (socket->is_input()) { + const DInputSocket input(socket); + + DSocket origin = get_input_origin_socket(input); + + /* The input is linked if the origin socket is an output socket. Had it been an input socket, + * then it is an unlinked input of a group input node. */ + stack.hasinput = origin->is_output(); + + /* Get the socket value from the origin if it is an input, because then it would either be an + * unlinked input or an unlinked input of a group input node that the socket is linked to, + * otherwise, get the value from the socket itself. */ + if (origin->is_input()) { + gpu_stack_vector_from_socket(stack.vec, origin.bsocket()); + } + else { + gpu_stack_vector_from_socket(stack.vec, socket.bsocket()); + } + } + else { + stack.hasoutput = socket->is_logically_linked(); + } +} + +void ShaderNode::populate_inputs() +{ + /* Reserve a stack for each input in addition to an extra stack at the end to mark the end of the + * array, as this is what the GPU module functions expect. */ + const int num_input_sockets = node_->input_sockets().size(); + inputs_.resize(num_input_sockets + 1); + inputs_.last().end = true; + + for (int i = 0; i < num_input_sockets; i++) { + populate_gpu_node_stack(node_.input(i), inputs_[i]); + } +} + +void ShaderNode::populate_outputs() +{ + /* Reserve a stack for each output in addition to an extra stack at the end to mark the end of + * the array, as this is what the GPU module functions expect. */ + const int num_output_sockets = node_->output_sockets().size(); + outputs_.resize(num_output_sockets + 1); + outputs_.last().end = true; + + for (int i = 0; i < num_output_sockets; i++) { + populate_gpu_node_stack(node_.output(i), outputs_[i]); + } +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/shader_operation.cc b/source/blender/compositor/realtime_compositor/intern/shader_operation.cc new file mode 100644 index 00000000000..8e52baf63ec --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/shader_operation.cc @@ -0,0 +1,526 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <memory> +#include <string> + +#include "BLI_listbase.h" +#include "BLI_map.hh" +#include "BLI_string_ref.hh" +#include "BLI_utildefines.h" + +#include "DNA_customdata_types.h" + +#include "GPU_material.h" +#include "GPU_shader.h" +#include "GPU_texture.h" +#include "GPU_uniform_buffer.h" + +#include "gpu_shader_create_info.hh" + +#include "NOD_derived_node_tree.hh" +#include "NOD_node_declaration.hh" + +#include "COM_context.hh" +#include "COM_operation.hh" +#include "COM_result.hh" +#include "COM_scheduler.hh" +#include "COM_shader_node.hh" +#include "COM_shader_operation.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +using namespace nodes::derived_node_tree_types; + +ShaderOperation::ShaderOperation(Context &context, ShaderCompileUnit &compile_unit) + : Operation(context), compile_unit_(compile_unit) +{ + material_ = GPU_material_from_callbacks(&construct_material, &generate_code, this); + GPU_material_status_set(material_, GPU_MAT_QUEUED); + GPU_material_compile(material_); +} + +ShaderOperation::~ShaderOperation() +{ + GPU_material_free_single(material_); +} + +void ShaderOperation::execute() +{ + const Domain domain = compute_domain(); + for (StringRef identifier : output_sockets_to_output_identifiers_map_.values()) { + Result &result = get_result(identifier); + result.allocate_texture(domain); + } + + GPUShader *shader = GPU_material_get_shader(material_); + GPU_shader_bind(shader); + + bind_material_resources(shader); + bind_inputs(shader); + bind_outputs(shader); + + compute_dispatch_threads_at_least(shader, domain.size); + + GPU_texture_unbind_all(); + GPU_texture_image_unbind_all(); + GPU_uniformbuf_unbind_all(); + GPU_shader_unbind(); +} + +StringRef ShaderOperation::get_output_identifier_from_output_socket(DOutputSocket output_socket) +{ + return output_sockets_to_output_identifiers_map_.lookup(output_socket); +} + +Map<std::string, DOutputSocket> &ShaderOperation::get_inputs_to_linked_outputs_map() +{ + return inputs_to_linked_outputs_map_; +} + +void ShaderOperation::compute_results_reference_counts(const Schedule &schedule) +{ + for (const auto &item : output_sockets_to_output_identifiers_map_.items()) { + const int reference_count = number_of_inputs_linked_to_output_conditioned( + item.key, [&](DInputSocket input) { return schedule.contains(input.node()); }); + + get_result(item.value).set_initial_reference_count(reference_count); + } +} + +void ShaderOperation::bind_material_resources(GPUShader *shader) +{ + /* Bind the uniform buffer of the material if it exists. It may not exist if the GPU material has + * no uniforms. */ + GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material_); + if (ubo) { + GPU_uniformbuf_bind(ubo, GPU_shader_get_uniform_block_binding(shader, GPU_UBO_BLOCK_NAME)); + } + + /* Bind color band textures needed by curve and ramp nodes. */ + ListBase textures = GPU_material_textures(material_); + LISTBASE_FOREACH (GPUMaterialTexture *, texture, &textures) { + if (texture->colorband) { + const int texture_image_unit = GPU_shader_get_texture_binding(shader, texture->sampler_name); + GPU_texture_bind(*texture->colorband, texture_image_unit); + } + } +} + +void ShaderOperation::bind_inputs(GPUShader *shader) +{ + /* Attributes represents the inputs of the operation and their names match those of the inputs of + * the operation as well as the corresponding texture samples in the shader. */ + ListBase attributes = GPU_material_attributes(material_); + LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) { + get_input(attribute->name).bind_as_texture(shader, attribute->name); + } +} + +void ShaderOperation::bind_outputs(GPUShader *shader) +{ + for (StringRefNull output_identifier : output_sockets_to_output_identifiers_map_.values()) { + get_result(output_identifier).bind_as_image(shader, output_identifier.c_str()); + } +} + +void ShaderOperation::construct_material(void *thunk, GPUMaterial *material) +{ + ShaderOperation *operation = static_cast<ShaderOperation *>(thunk); + for (DNode node : operation->compile_unit_) { + ShaderNode *shader_node = node->typeinfo->get_compositor_shader_node(node); + operation->shader_nodes_.add_new(node, std::unique_ptr<ShaderNode>(shader_node)); + + operation->link_node_inputs(node, material); + + shader_node->compile(material); + + operation->populate_results_for_node(node, material); + } +} + +void ShaderOperation::link_node_inputs(DNode node, GPUMaterial *material) +{ + for (const bNodeSocket *input : node->input_sockets()) { + const DInputSocket dinput{node.context(), input}; + + /* Get the output linked to the input. If it is null, that means the input is unlinked. + * Unlinked inputs are linked by the node compile method, so skip this here. */ + const DOutputSocket doutput = get_output_linked_to_input(dinput); + if (!doutput) { + continue; + } + + /* If the origin node is part of the shader operation, then the link is internal to the GPU + * material graph and is linked appropriately. */ + if (compile_unit_.contains(doutput.node())) { + link_node_input_internal(dinput, doutput); + continue; + } + + /* Otherwise, the origin node is not part of the shader operation, then the link is external to + * the GPU material graph and an input to the shader operation must be declared and linked to + * the node input. */ + link_node_input_external(dinput, doutput, material); + } +} + +void ShaderOperation::link_node_input_internal(DInputSocket input_socket, + DOutputSocket output_socket) +{ + ShaderNode &output_node = *shader_nodes_.lookup(output_socket.node()); + GPUNodeStack &output_stack = output_node.get_output(output_socket->identifier); + + ShaderNode &input_node = *shader_nodes_.lookup(input_socket.node()); + GPUNodeStack &input_stack = input_node.get_input(input_socket->identifier); + + input_stack.link = output_stack.link; +} + +void ShaderOperation::link_node_input_external(DInputSocket input_socket, + DOutputSocket output_socket, + GPUMaterial *material) +{ + + ShaderNode &node = *shader_nodes_.lookup(input_socket.node()); + GPUNodeStack &stack = node.get_input(input_socket->identifier); + + /* An input was already declared for that same output socket, so no need to declare it again. */ + if (!output_to_material_attribute_map_.contains(output_socket)) { + declare_operation_input(input_socket, output_socket, material); + } + + /* Link the attribute representing the shader operation input corresponding to the given output + * socket. */ + stack.link = output_to_material_attribute_map_.lookup(output_socket); +} + +static const char *get_set_function_name(ResultType type) +{ + switch (type) { + case ResultType::Float: + return "set_value"; + case ResultType::Vector: + return "set_rgb"; + case ResultType::Color: + return "set_rgba"; + } + + BLI_assert_unreachable(); + return nullptr; +} + +void ShaderOperation::declare_operation_input(DInputSocket input_socket, + DOutputSocket output_socket, + GPUMaterial *material) +{ + const int input_index = output_to_material_attribute_map_.size(); + std::string input_identifier = "input" + std::to_string(input_index); + + /* Declare the input descriptor for this input and prefer to declare its type to be the same as + * the type of the output socket because doing type conversion in the shader is much cheaper. */ + InputDescriptor input_descriptor = input_descriptor_from_input_socket(input_socket.bsocket()); + input_descriptor.type = get_node_socket_result_type(output_socket.bsocket()); + declare_input_descriptor(input_identifier, input_descriptor); + + /* Add a new GPU attribute representing an input to the GPU material. Instead of using the + * attribute directly, we link it to an appropriate set function and use its output link instead. + * This is needed because the `gputype` member of the attribute is only initialized if it is + * linked to a GPU node. */ + GPUNodeLink *attribute_link; + GPU_link(material, + get_set_function_name(input_descriptor.type), + GPU_attribute(material, CD_AUTO_FROM_NAME, input_identifier.c_str()), + &attribute_link); + + /* Map the output socket to the attribute that was created for it. */ + output_to_material_attribute_map_.add(output_socket, attribute_link); + + /* Map the identifier of the operation input to the output socket it is linked to. */ + inputs_to_linked_outputs_map_.add_new(input_identifier, output_socket); +} + +void ShaderOperation::populate_results_for_node(DNode node, GPUMaterial *material) +{ + for (const bNodeSocket *output : node->output_sockets()) { + const DOutputSocket doutput{node.context(), output}; + + /* If any of the nodes linked to the output are not part of the shader operation, then an + * output result needs to be populated for it. */ + const bool need_to_populate_result = is_output_linked_to_node_conditioned( + doutput, [&](DNode node) { return !compile_unit_.contains(node); }); + + if (need_to_populate_result) { + populate_operation_result(doutput, material); + } + } +} + +static const char *get_store_function_name(ResultType type) +{ + switch (type) { + case ResultType::Float: + return "node_compositor_store_output_float"; + case ResultType::Vector: + return "node_compositor_store_output_vector"; + case ResultType::Color: + return "node_compositor_store_output_color"; + } + + BLI_assert_unreachable(); + return nullptr; +} + +void ShaderOperation::populate_operation_result(DOutputSocket output_socket, GPUMaterial *material) +{ + const unsigned int output_id = output_sockets_to_output_identifiers_map_.size(); + std::string output_identifier = "output" + std::to_string(output_id); + + const ResultType result_type = get_node_socket_result_type(output_socket.bsocket()); + const Result result = Result(result_type, texture_pool()); + populate_result(output_identifier, result); + + /* Map the output socket to the identifier of the newly populated result. */ + output_sockets_to_output_identifiers_map_.add_new(output_socket, output_identifier); + + ShaderNode &node = *shader_nodes_.lookup(output_socket.node()); + GPUNodeLink *output_link = node.get_output(output_socket->identifier).link; + + /* Link the output node stack to an output storer storing in the appropriate result. The result + * is identified by its index in the operation and the index is encoded as a float to be passed + * to the GPU function. Additionally, create an output link from the storer node to declare as an + * output to the GPU material. This storer output link is a dummy link in the sense that its + * value is ignored since it is already written in the output, but it is used to track nodes that + * contribute to the output of the compositor node tree. */ + GPUNodeLink *storer_output_link; + GPUNodeLink *id_link = GPU_constant((float *)&output_id); + const char *store_function_name = get_store_function_name(result_type); + GPU_link(material, store_function_name, id_link, output_link, &storer_output_link); + + /* Declare the output link of the storer node as an output of the GPU material to help the GPU + * code generator to track the nodes that contribute to the output of the shader. */ + GPU_material_add_output_link_composite(material, storer_output_link); +} + +using namespace gpu::shader; + +void ShaderOperation::generate_code(void *thunk, + GPUMaterial *material, + GPUCodegenOutput *code_generator_output) +{ + ShaderOperation *operation = static_cast<ShaderOperation *>(thunk); + ShaderCreateInfo &shader_create_info = *reinterpret_cast<ShaderCreateInfo *>( + code_generator_output->create_info); + + shader_create_info.local_group_size(16, 16); + + /* The resources are added without explicit locations, so make sure it is done by the + * shader creator. */ + shader_create_info.auto_resource_location(true); + + /* Add implementation for implicit conversion operations inserted by the code generator. This + * file should include the functions [float|vec3|vec4]_from_[float|vec3|vec4]. */ + shader_create_info.typedef_source("gpu_shader_compositor_type_conversion.glsl"); + + /* The source shader is a compute shader with a main function that calls the dynamically + * generated evaluate function. The evaluate function includes the serialized GPU material graph + * preceded by code that initialized the inputs of the operation. Additionally, the storer + * functions that writes the outputs are defined outside the evaluate function. */ + shader_create_info.compute_source("gpu_shader_compositor_main.glsl"); + + /* The main function is emitted in the shader before the evaluate function, so the evaluate + * function needs to be forward declared here. */ + shader_create_info.typedef_source_generated += "void evaluate();\n"; + + operation->generate_code_for_outputs(shader_create_info); + + shader_create_info.compute_source_generated += "void evaluate()\n{\n"; + + operation->generate_code_for_inputs(material, shader_create_info); + + shader_create_info.compute_source_generated += code_generator_output->composite; + + shader_create_info.compute_source_generated += "}\n"; +} + +static eGPUTextureFormat texture_format_from_result_type(ResultType type) +{ + switch (type) { + case ResultType::Float: + return GPU_R16F; + case ResultType::Vector: + return GPU_RGBA16F; + case ResultType::Color: + return GPU_RGBA16F; + } + + BLI_assert_unreachable(); + return GPU_RGBA16F; +} + +/* Texture storers in the shader always take a vec4 as an argument, so encode each type in a vec4 + * appropriately. */ +static const char *glsl_store_expression_from_result_type(ResultType type) +{ + switch (type) { + case ResultType::Float: + return "vec4(value)"; + case ResultType::Vector: + return "vec4(vector, 0.0)"; + case ResultType::Color: + return "color"; + } + + BLI_assert_unreachable(); + return nullptr; +} + +void ShaderOperation::generate_code_for_outputs(ShaderCreateInfo &shader_create_info) +{ + const std::string store_float_function_header = "void store_float(const uint id, float value)"; + const std::string store_vector_function_header = "void store_vector(const uint id, vec3 vector)"; + const std::string store_color_function_header = "void store_color(const uint id, vec4 color)"; + + /* The store functions are used by the node_compositor_store_output_[float|vector|color] + * functions but are only defined later as part of the compute source, so they need to be forward + * declared. */ + shader_create_info.typedef_source_generated += store_float_function_header + ";\n"; + shader_create_info.typedef_source_generated += store_vector_function_header + ";\n"; + shader_create_info.typedef_source_generated += store_color_function_header + ";\n"; + + /* Each of the store functions is essentially a single switch case on the given ID, so start by + * opening the function with a curly bracket followed by opening a switch statement in each of + * the functions. */ + std::stringstream store_float_function; + std::stringstream store_vector_function; + std::stringstream store_color_function; + const std::string store_function_start = "\n{\n switch (id) {\n"; + store_float_function << store_float_function_header << store_function_start; + store_vector_function << store_vector_function_header << store_function_start; + store_color_function << store_color_function_header << store_function_start; + + for (StringRefNull output_identifier : output_sockets_to_output_identifiers_map_.values()) { + const Result &result = get_result(output_identifier); + + /* Add a write-only image for this output where its values will be written. */ + shader_create_info.image(0, + texture_format_from_result_type(result.type()), + Qualifier::WRITE, + ImageType::FLOAT_2D, + output_identifier, + Frequency::BATCH); + + /* Add a case for the index of this output followed by a break statement. */ + std::stringstream case_code; + const std::string store_expression = glsl_store_expression_from_result_type(result.type()); + const std::string texel = ", ivec2(gl_GlobalInvocationID.xy), "; + case_code << " case " << StringRef(output_identifier).drop_known_prefix("output") << ":\n" + << " imageStore(" << output_identifier << texel << store_expression << ");\n" + << " break;\n"; + + /* Only add the case to the function with the matching type. */ + switch (result.type()) { + case ResultType::Float: + store_float_function << case_code.str(); + break; + case ResultType::Vector: + store_vector_function << case_code.str(); + break; + case ResultType::Color: + store_color_function << case_code.str(); + break; + } + } + + /* Close the previously opened switch statement as well as the function itself. */ + const std::string store_function_end = " }\n}\n\n"; + store_float_function << store_function_end; + store_vector_function << store_function_end; + store_color_function << store_function_end; + + shader_create_info.compute_source_generated += store_float_function.str() + + store_vector_function.str() + + store_color_function.str(); +} + +static const char *glsl_type_from_result_type(ResultType type) +{ + switch (type) { + case ResultType::Float: + return "float"; + case ResultType::Vector: + return "vec3"; + case ResultType::Color: + return "vec4"; + } + + BLI_assert_unreachable(); + return nullptr; +} + +/* Texture loaders in the shader always return a vec4, so a swizzle is needed to retrieve the + * actual value for each type. */ +static const char *glsl_swizzle_from_result_type(ResultType type) +{ + switch (type) { + case ResultType::Float: + return "x"; + case ResultType::Vector: + return "xyz"; + case ResultType::Color: + return "rgba"; + } + + BLI_assert_unreachable(); + return nullptr; +} + +void ShaderOperation::generate_code_for_inputs(GPUMaterial *material, + ShaderCreateInfo &shader_create_info) +{ + /* The attributes of the GPU material represents the inputs of the operation. */ + ListBase attributes = GPU_material_attributes(material); + + if (BLI_listbase_is_empty(&attributes)) { + return; + } + + /* Add a texture sampler for each of the inputs with the same name as the attribute. */ + LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) { + shader_create_info.sampler(0, ImageType::FLOAT_2D, attribute->name, Frequency::BATCH); + } + + /* Declare a struct called var_attrs that includes an appropriately typed member for each of the + * inputs. The names of the members should be the letter v followed by the ID of the attribute + * corresponding to the input. Such names are expected by the code generator. */ + std::stringstream declare_attributes; + declare_attributes << "struct {\n"; + LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) { + const InputDescriptor &input_descriptor = get_input_descriptor(attribute->name); + const std::string type = glsl_type_from_result_type(input_descriptor.type); + declare_attributes << " " << type << " v" << attribute->id << ";\n"; + } + declare_attributes << "} var_attrs;\n\n"; + + shader_create_info.compute_source_generated += declare_attributes.str(); + + /* The texture loader utilities are needed to sample the input textures and initialize the + * attributes. */ + shader_create_info.typedef_source("gpu_shader_compositor_texture_utilities.glsl"); + + /* Initialize each member of the previously declared struct by loading its corresponding texture + * with an appropriate swizzle for its type. */ + std::stringstream initialize_attributes; + LISTBASE_FOREACH (GPUMaterialAttribute *, attribute, &attributes) { + const InputDescriptor &input_descriptor = get_input_descriptor(attribute->name); + const std::string swizzle = glsl_swizzle_from_result_type(input_descriptor.type); + initialize_attributes << "var_attrs.v" << attribute->id << " = " + << "texture_load(" << attribute->name + << ", ivec2(gl_GlobalInvocationID.xy))." << swizzle << ";\n"; + } + initialize_attributes << "\n"; + + shader_create_info.compute_source_generated += initialize_attributes.str(); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/simple_operation.cc b/source/blender/compositor/realtime_compositor/intern/simple_operation.cc new file mode 100644 index 00000000000..d55a20e5c54 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/simple_operation.cc @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "COM_input_descriptor.hh" +#include "COM_operation.hh" +#include "COM_result.hh" +#include "COM_simple_operation.hh" + +namespace blender::realtime_compositor { + +const StringRef SimpleOperation::input_identifier_ = StringRef("Input"); +const StringRef SimpleOperation::output_identifier_ = StringRef("Output"); + +Result &SimpleOperation::get_result() +{ + return Operation::get_result(output_identifier_); +} + +void SimpleOperation::map_input_to_result(Result *result) +{ + Operation::map_input_to_result(input_identifier_, result); +} + +void SimpleOperation::add_and_evaluate_input_processors() +{ +} + +Result &SimpleOperation::get_input() +{ + return Operation::get_input(input_identifier_); +} + +void SimpleOperation::switch_result_mapped_to_input(Result *result) +{ + Operation::switch_result_mapped_to_input(input_identifier_, result); +} + +void SimpleOperation::populate_result(Result result) +{ + Operation::populate_result(output_identifier_, result); + + /* The result of a simple operation is guaranteed to have a single user. */ + get_result().set_initial_reference_count(1); +} + +void SimpleOperation::declare_input_descriptor(InputDescriptor descriptor) +{ + Operation::declare_input_descriptor(input_identifier_, descriptor); +} + +InputDescriptor &SimpleOperation::get_input_descriptor() +{ + return Operation::get_input_descriptor(input_identifier_); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/static_shader_manager.cc b/source/blender/compositor/realtime_compositor/intern/static_shader_manager.cc new file mode 100644 index 00000000000..c9c8a056f87 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/static_shader_manager.cc @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "GPU_shader.h" + +#include "COM_static_shader_manager.hh" + +namespace blender::realtime_compositor { + +StaticShaderManager::~StaticShaderManager() +{ + for (GPUShader *shader : shaders_.values()) { + GPU_shader_free(shader); + } +} + +GPUShader *StaticShaderManager::get(const char *info_name) +{ + /* If a shader with the same info name already exists in the manager, return it, otherwise, + * create a new shader from the info name and return it. */ + return shaders_.lookup_or_add_cb( + info_name, [info_name]() { return GPU_shader_create_from_info_name(info_name); }); +} + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/texture_pool.cc b/source/blender/compositor/realtime_compositor/intern/texture_pool.cc new file mode 100644 index 00000000000..6bf2041e6ba --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/texture_pool.cc @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <cstdint> + +#include "BLI_hash.hh" +#include "BLI_map.hh" +#include "BLI_math_vec_types.hh" +#include "BLI_vector.hh" + +#include "GPU_texture.h" + +#include "COM_texture_pool.hh" + +namespace blender::realtime_compositor { + +/* -------------------------------------------------------------------- */ +/** \name Texture Pool Key + * \{ */ + +TexturePoolKey::TexturePoolKey(int2 size, eGPUTextureFormat format) : size(size), format(format) +{ +} + +TexturePoolKey::TexturePoolKey(const GPUTexture *texture) +{ + size = int2(GPU_texture_width(texture), GPU_texture_height(texture)); + format = GPU_texture_format(texture); +} + +uint64_t TexturePoolKey::hash() const +{ + return get_default_hash_3(size.x, size.y, format); +} + +bool operator==(const TexturePoolKey &a, const TexturePoolKey &b) +{ + return a.size == b.size && a.format == b.format; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Texture Pool + * \{ */ + +GPUTexture *TexturePool::acquire(int2 size, eGPUTextureFormat format) +{ + /* Check if there is an available texture with the required specification, and if one exists, + * return it. */ + const TexturePoolKey key = TexturePoolKey(size, format); + Vector<GPUTexture *> &available_textures = textures_.lookup_or_add_default(key); + if (!available_textures.is_empty()) { + return available_textures.pop_last(); + } + + /* Otherwise, allocate a new texture. */ + return allocate_texture(size, format); +} + +GPUTexture *TexturePool::acquire_color(int2 size) +{ + return acquire(size, GPU_RGBA16F); +} + +GPUTexture *TexturePool::acquire_vector(int2 size) +{ + /* Vectors are stored in RGBA textures because RGB textures have limited support. */ + return acquire(size, GPU_RGBA16F); +} + +GPUTexture *TexturePool::acquire_float(int2 size) +{ + return acquire(size, GPU_R16F); +} + +void TexturePool::release(GPUTexture *texture) +{ + textures_.lookup(TexturePoolKey(texture)).append(texture); +} + +void TexturePool::reset() +{ + textures_.clear(); +} + +/** \} */ + +} // namespace blender::realtime_compositor diff --git a/source/blender/compositor/realtime_compositor/intern/utilities.cc b/source/blender/compositor/realtime_compositor/intern/utilities.cc new file mode 100644 index 00000000000..1a5823b8441 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/intern/utilities.cc @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_assert.h" +#include "BLI_function_ref.hh" +#include "BLI_math_vec_types.hh" +#include "BLI_math_vector.hh" +#include "BLI_utildefines.h" + +#include "DNA_node_types.h" + +#include "NOD_derived_node_tree.hh" +#include "NOD_node_declaration.hh" + +#include "GPU_compute.h" +#include "GPU_shader.h" + +#include "COM_operation.hh" +#include "COM_result.hh" +#include "COM_utilities.hh" + +namespace blender::realtime_compositor { + +using namespace nodes::derived_node_tree_types; +using TargetSocketPathInfo = DOutputSocket::TargetSocketPathInfo; + +DSocket get_input_origin_socket(DInputSocket input) +{ + /* The input is unlinked. Return the socket itself. */ + if (!input->is_logically_linked()) { + return input; + } + + /* Only a single origin socket is guaranteed to exist. */ + DSocket socket; + input.foreach_origin_socket([&](const DSocket origin) { socket = origin; }); + return socket; +} + +DOutputSocket get_output_linked_to_input(DInputSocket input) +{ + /* Get the origin socket of this input, which will be an output socket if the input is linked + * to an output. */ + const DSocket origin = get_input_origin_socket(input); + + /* If the origin socket is an input, that means the input is unlinked, so return a null output + * socket. */ + if (origin->is_input()) { + return DOutputSocket(); + } + + /* Now that we know the origin is an output, return a derived output from it. */ + return DOutputSocket(origin); +} + +ResultType get_node_socket_result_type(const bNodeSocket *socket) +{ + switch (socket->type) { + case SOCK_FLOAT: + return ResultType::Float; + case SOCK_VECTOR: + return ResultType::Vector; + case SOCK_RGBA: + return ResultType::Color; + default: + BLI_assert_unreachable(); + return ResultType::Float; + } +} + +bool is_output_linked_to_node_conditioned(DOutputSocket output, FunctionRef<bool(DNode)> condition) +{ + bool condition_satisfied = false; + output.foreach_target_socket( + [&](DInputSocket target, const TargetSocketPathInfo &UNUSED(path_info)) { + if (condition(target.node())) { + condition_satisfied = true; + return; + } + }); + return condition_satisfied; +} + +int number_of_inputs_linked_to_output_conditioned(DOutputSocket output, + FunctionRef<bool(DInputSocket)> condition) +{ + int count = 0; + output.foreach_target_socket( + [&](DInputSocket target, const TargetSocketPathInfo &UNUSED(path_info)) { + if (condition(target)) { + count++; + } + }); + return count; +} + +bool is_shader_node(DNode node) +{ + return node->typeinfo->get_compositor_shader_node; +} + +bool is_node_supported(DNode node) +{ + return node->typeinfo->get_compositor_operation || node->typeinfo->get_compositor_shader_node; +} + +InputDescriptor input_descriptor_from_input_socket(const bNodeSocket *socket) +{ + using namespace nodes; + InputDescriptor input_descriptor; + input_descriptor.type = get_node_socket_result_type(socket); + const NodeDeclaration *node_declaration = socket->owner_node().declaration(); + /* Not every node have a declaration, in which case, we assume the default values for the rest of + * the properties. */ + if (!node_declaration) { + return input_descriptor; + } + const SocketDeclarationPtr &socket_declaration = node_declaration->inputs()[socket->index()]; + input_descriptor.domain_priority = socket_declaration->compositor_domain_priority(); + input_descriptor.skip_realization = socket_declaration->compositor_skip_realization(); + input_descriptor.expects_single_value = socket_declaration->compositor_expects_single_value(); + return input_descriptor; +} + +void compute_dispatch_threads_at_least(GPUShader *shader, int2 threads_range, int2 local_size) +{ + /* If the threads range is divisible by the local size, dispatch the number of needed groups, + * which is their division. If it is not divisible, then dispatch an extra group to cover the + * remaining invocations, which means the actual threads range of the dispatch will be a bit + * larger than the given one. */ + const int2 groups_to_dispatch = math::divide_ceil(threads_range, local_size); + GPU_compute_dispatch(shader, groups_to_dispatch.x, groups_to_dispatch.y, 1); +} + +} // namespace blender::realtime_compositor |