/* SPDX-License-Identifier: GPL-2.0-or-later */ #include "BLI_map.hh" #include "BLI_set.hh" #include "BLI_stack.hh" #include "BLI_vector.hh" #include "BLI_vector_set.hh" #include "NOD_derived_node_tree.hh" #include "BKE_node.h" #include "BKE_node_runtime.hh" #include "COM_scheduler.hh" #include "COM_utilities.hh" namespace blender::realtime_compositor { using namespace nodes::derived_node_tree_types; /* Find the active context from the given context and its descendants contexts. The active context * is the one whose node instance key matches the active_viewer_key stored in the root node tree. * The instance key of each context is computed by calling BKE_node_instance_key given the key of * the parent as well as the group node making the context. */ static const DTreeContext *find_active_context_recursive(const DTreeContext *context, bNodeInstanceKey key) { /* The instance key of the given context matches the active viewer instance key, so this is the * active context, return it. */ if (key.value == context->derived_tree().root_context().btree().active_viewer_key.value) { return context; } /* For each of the group nodes, compute their instance key and contexts and call this function * recursively. */ for (const bNode *group_node : context->btree().group_nodes()) { const bNodeInstanceKey child_key = BKE_node_instance_key(key, &context->btree(), group_node); const DTreeContext *child_context = context->child_context(*group_node); const DTreeContext *found_context = find_active_context_recursive(child_context, child_key); /* If the found context is null, that means neither the child context nor one of its descendant * contexts is active. */ if (!found_context) { continue; } /* Otherwise, we have found our active context, return it. */ return found_context; } /* Neither the given context nor one of its descendant contexts is active, so return null. */ return nullptr; } /* Find the active context for the given node tree. The active context represents the node tree * currently being edited. In most cases, that would be the top level node tree itself, but in the * case where the user is editing the node tree of a node group, the active context would be a * representation of the node tree of that node group. Note that the context also stores the group * node that the user selected to edit the node tree, so the context fully represents a particular * instance of the node group. */ static const DTreeContext *find_active_context(const DerivedNodeTree &tree) { /* The root context has an instance key of NODE_INSTANCE_KEY_BASE by definition. */ return find_active_context_recursive(&tree.root_context(), NODE_INSTANCE_KEY_BASE); } /* Return the output node which is marked as NODE_DO_OUTPUT. If multiple types of output nodes are * marked, then the preference will be CMP_NODE_COMPOSITE > CMP_NODE_VIEWER > CMP_NODE_SPLITVIEWER. * If no output node exists, a null node will be returned. */ static DNode find_output_in_context(const DTreeContext *context) { const bNodeTree &tree = context->btree(); for (const bNode *node : tree.nodes_by_type("CompositorNodeComposite")) { if (node->flag & NODE_DO_OUTPUT) { return DNode(context, node); } } for (const bNode *node : tree.nodes_by_type("CompositorNodeViewer")) { if (node->flag & NODE_DO_OUTPUT) { return DNode(context, node); } } for (const bNode *node : tree.nodes_by_type("CompositorNodeSplitViewer")) { if (node->flag & NODE_DO_OUTPUT) { return DNode(context, node); } } return DNode(); } /* Compute the output node whose result should be computed. This node is the output node that * satisfies the requirements in the find_output_in_context function. First, the active context is * searched for an output node, if non was found, the root context is search. For more information * on what contexts mean here, see the find_active_context function. */ static DNode compute_output_node(const DerivedNodeTree &tree) { const DTreeContext *active_context = find_active_context(tree); const DNode node = find_output_in_context(active_context); if (node) { return node; } /* If the active context is the root one and no output node was found, we consider this node tree * to have no output node, even if one of the non-active descendants have an output node. */ if (active_context->is_root()) { return DNode(); } /* The active context doesn't have an output node, search in the root context as a fallback. */ return find_output_in_context(&tree.root_context()); } /* A type representing a mapping that associates each node with a heuristic estimation of the * number of intermediate buffers needed to compute it and all of its dependencies. See the * compute_number_of_needed_buffers function for more information. */ using NeededBuffers = Map; /* Compute a heuristic estimation of the number of intermediate buffers needed to compute each node * and all of its dependencies for all nodes that the given node depends on. The output is a map * that maps each node with the number of intermediate buffers needed to compute it and all of its * dependencies. * * Consider a node that takes n number of buffers as an input from a number of node dependencies, * which we shall call the input nodes. The node also computes and outputs m number of buffers. * In order for the node to compute its output, a number of intermediate buffers will be needed. * Since the node takes n buffers and outputs m buffers, then the number of buffers directly * needed by the node is (n + m). But each of the input buffers are computed by a node that, in * turn, needs a number of buffers to compute its output. So the total number of buffers needed * to compute the output of the node is max(n + m, d) where d is the number of buffers needed by * the input node that needs the largest number of buffers. We only consider the input node that * needs the largest number of buffers, because those buffers can be reused by any input node * that needs a lesser number of buffers. * * Shader nodes, however, are a special case because links between two shader nodes inside the same * shader operation don't pass a buffer, but a single value in the compiled shader. So for shader * nodes, only inputs and outputs linked to nodes that are not shader nodes should be considered. * Note that this might not actually be true, because the compiler may decide to split a shader * operation into multiples ones that will pass buffers, but this is not something that can be * known at scheduling-time. See the discussion in COM_compile_state.hh, COM_evaluator.hh, and * COM_shader_operation.hh for more information. In the node tree shown below, node 4 will have * exactly the same number of needed buffers by node 3, because its inputs and outputs are all * internally linked in the shader operation. * * Shader Operation * +------------------------------------------------------+ * .------------. | .------------. .------------. .------------. | .------------. * | Node 1 | | | Node 3 | | Node 4 | | Node 5 | | | Node 6 | * | |----|--| |--| |------| |--|--| | * | | .-|--| | | | .---| | | | | * '------------' | | '------------' '------------' | '------------' | '------------' * | +----------------------------------|-------------------+ * .------------. | | * | Node 2 | | | * | |--'------------------------------------' * | | * '------------' * * Note that the computed output is not guaranteed to be accurate, and will not be in most cases. * The computation is merely a heuristic estimation that works well in most cases. This is due to a * number of reasons: * - The node tree is actually a graph that allows output sharing, which is not something that was * taken into consideration in this implementation because it is difficult to correctly consider. * - Each node may allocate any number of internal buffers, which is not taken into account in this * implementation because it rarely affects the output and is done by very few nodes. * - The compiler may decide to compiler the schedule differently depending on runtime information * which we can merely speculate at scheduling-time as described above. */ static NeededBuffers compute_number_of_needed_buffers(DNode output_node) { NeededBuffers needed_buffers; /* A stack of nodes used to traverse the node tree starting from the output node. */ Stack node_stack = {output_node}; /* Traverse the node tree in a post order depth first manner and compute the number of needed * buffers for each node. Post order traversal guarantee that all the node dependencies of each * node are computed before it. This is done by pushing all the uncomputed node dependencies to * the node stack first and only popping and computing the node when all its node dependencies * were computed. */ while (!node_stack.is_empty()) { /* Do not pop the node immediately, as it may turn out that we can't compute its number of * needed buffers just yet because its dependencies weren't computed, it will be popped later * when needed. */ DNode &node = node_stack.peek(); /* Go over the node dependencies connected to the inputs of the node and push them to the node * stack if they were not computed already. */ Set pushed_nodes; for (const bNodeSocket *input : node->input_sockets()) { const DInputSocket dinput{node.context(), input}; /* Get the output linked to the input. If it is null, that means the input is unlinked and * has no dependency node. */ const DOutputSocket doutput = get_output_linked_to_input(dinput); if (!doutput) { continue; } /* The node dependency was already computed or pushed before, so skip it. */ if (needed_buffers.contains(doutput.node()) || pushed_nodes.contains(doutput.node())) { continue; } /* The output node needs to be computed, push the node dependency to the node stack and * indicate that it was pushed. */ node_stack.push(doutput.node()); pushed_nodes.add_new(doutput.node()); } /* If any of the node dependencies were pushed, that means that not all of them were computed * and consequently we can't compute the number of needed buffers for this node just yet. */ if (!pushed_nodes.is_empty()) { continue; } /* We don't need to store the result of the pop because we already peeked at it before. */ node_stack.pop(); /* Compute the number of buffers that the node takes as an input as well as the number of * buffers needed to compute the most demanding of the node dependencies. */ int number_of_input_buffers = 0; int buffers_needed_by_dependencies = 0; for (const bNodeSocket *input : node->input_sockets()) { const DInputSocket dinput{node.context(), input}; /* Get the output linked to the input. If it is null, that means the input is unlinked. * Unlinked inputs do not take a buffer, so skip those inputs. */ const DOutputSocket doutput = get_output_linked_to_input(dinput); if (!doutput) { continue; } /* Since this input is linked, if the link is not between two shader nodes, it means that the * node takes a buffer through this input and so we increment the number of input buffers. */ if (!is_shader_node(node) || !is_shader_node(doutput.node())) { number_of_input_buffers++; } /* If the number of buffers needed by the node dependency is more than the total number of * buffers needed by the dependencies, then update the latter to be the former. This is * computing the "d" in the aforementioned equation "max(n + m, d)". */ const int buffers_needed_by_dependency = needed_buffers.lookup(doutput.node()); if (buffers_needed_by_dependency > buffers_needed_by_dependencies) { buffers_needed_by_dependencies = buffers_needed_by_dependency; } } /* Compute the number of buffers that will be computed/output by this node. */ int number_of_output_buffers = 0; for (const bNodeSocket *output : node->output_sockets()) { const DOutputSocket doutput{node.context(), output}; /* The output is not linked, it outputs no buffer. */ if (!output->is_logically_linked()) { continue; } /* If any of the links is not between two shader nodes, it means that the node outputs * a buffer through this output and so we increment the number of output buffers. */ if (!is_output_linked_to_node_conditioned(doutput, is_shader_node) || !is_shader_node(node)) { number_of_output_buffers++; } } /* Compute the heuristic estimation of the number of needed intermediate buffers to compute * this node and all of its dependencies. This is computing the aforementioned equation * "max(n + m, d)". */ const int total_buffers = MAX2(number_of_input_buffers + number_of_output_buffers, buffers_needed_by_dependencies); needed_buffers.add(node, total_buffers); } return needed_buffers; } /* There are multiple different possible orders of evaluating a node graph, each of which needs * to allocate a number of intermediate buffers to store its intermediate results. It follows * that we need to find the evaluation order which uses the least amount of intermediate buffers. * For instance, consider a node that takes two input buffers A and B. Each of those buffers is * computed through a number of nodes constituting a sub-graph whose root is the node that * outputs that buffer. Suppose the number of intermediate buffers needed to compute A and B are * N(A) and N(B) respectively and N(A) > N(B). Then evaluating the sub-graph computing A would be * a better option than that of B, because had B was computed first, its outputs will need to be * stored in extra buffers in addition to the buffers needed by A. The number of buffers needed by * each node is estimated as described in the compute_number_of_needed_buffers function. * * This is a heuristic generalization of the Sethi–Ullman algorithm, a generalization that * doesn't always guarantee an optimal evaluation order, as the optimal evaluation order is very * difficult to compute, however, this method works well in most cases. Moreover it assumes that * all buffers will have roughly the same size, which may not always be the case. */ Schedule compute_schedule(const DerivedNodeTree &tree) { Schedule schedule; /* Compute the output node whose result should be computed. */ const DNode output_node = compute_output_node(tree); /* No output node, the node tree has no effect, return an empty schedule. */ if (!output_node) { return schedule; } /* Compute the number of buffers needed by each node connected to the output. */ const NeededBuffers needed_buffers = compute_number_of_needed_buffers(output_node); /* A stack of nodes used to traverse the node tree starting from the output node. */ Stack node_stack = {output_node}; /* Traverse the node tree in a post order depth first manner, scheduling the nodes in an order * informed by the number of buffers needed by each node. Post order traversal guarantee that all * the node dependencies of each node are scheduled before it. This is done by pushing all the * unscheduled node dependencies to the node stack first and only popping and scheduling the node * when all its node dependencies were scheduled. */ while (!node_stack.is_empty()) { /* Do not pop the node immediately, as it may turn out that we can't schedule it just yet * because its dependencies weren't scheduled, it will be popped later when needed. */ DNode &node = node_stack.peek(); /* Compute the nodes directly connected to the node inputs sorted by their needed buffers such * that the node with the lowest number of needed buffers comes first. Note that we actually * want the node with the highest number of needed buffers to be schedule first, but since * those are pushed to the traversal stack, we need to push them in reverse order. */ Vector sorted_dependency_nodes; for (const bNodeSocket *input : node->input_sockets()) { const DInputSocket dinput{node.context(), input}; /* Get the output linked to the input. If it is null, that means the input is unlinked and * has no dependency node, so skip it. */ const DOutputSocket doutput = get_output_linked_to_input(dinput); if (!doutput) { continue; } /* The dependency node was added before, so skip it. The number of dependency nodes is very * small, typically less than 3, so a linear search is okay. */ if (sorted_dependency_nodes.contains(doutput.node())) { continue; } /* The dependency node was already schedule, so skip it. */ if (schedule.contains(doutput.node())) { continue; } /* Sort in ascending order on insertion, the number of dependency nodes is very small, * typically less than 3, so insertion sort is okay. */ int insertion_position = 0; for (int i = 0; i < sorted_dependency_nodes.size(); i++) { if (needed_buffers.lookup(doutput.node()) > needed_buffers.lookup(sorted_dependency_nodes[i])) { insertion_position++; } else { break; } } sorted_dependency_nodes.insert(insertion_position, doutput.node()); } /* Push the sorted dependency nodes to the node stack in order. */ for (const DNode &dependency_node : sorted_dependency_nodes) { node_stack.push(dependency_node); } /* If there are no sorted dependency nodes, that means they were all already scheduled or that * none exists in the first place, so we can pop and schedule the node now. */ if (sorted_dependency_nodes.is_empty()) { /* The node might have already been scheduled, so we don't use add_new here and simply don't * add it if it was already scheduled. */ schedule.add(node_stack.pop()); } } return schedule; } } // namespace blender::realtime_compositor