Geometry Nodes: new geometry nodes evaluator

The old geometry nodes evaluator was quite basic and missed many features. It was useful to get the geometry nodes project started. However, nowadays we run into its limitations from time to time. The new evaluator is more complex, but comes with new capabilities. The two most important capabilities are that it can now execute nodes in parallel and it supports lazy evaluation. The performance improvement by multi-threading depends a lot on the specific node tree. In our demo files, the speedup is measurable but not huge. This is mainly because they are bottlenecked by one or two nodes that have to be executed one after the other (often the Boolean or Attribute Proximity nodes) or because the bottleneck is multi-threaded already (often openvdb nodes). Lazy evaluation of inputs is only supported by the Switch node for now. Previously, geometry nodes would always compute both inputs and then just discard the one that is not used. Now, only the input that is required is computed. For some more details read D11191, T87620 and the in-code documentation. Differential Revision: https://developer.blender.org/D11191
author: Jacques Lucke <jacques@blender.org> 2021-05-20 12:34:47 +0300
committer: Jacques Lucke <jacques@blender.org> 2021-05-20 12:35:13 +0300
commit: b084b57fbf6d2b6c2a1adb2192f5d82581cede5c (patch)
tree: 14c11a07c6d0dbb58f6eb71804d28b741125b8d4 /source/blender/modifiers
parent: 44e7192745f0b53e29d20bb88d4df0945a808758 (diff)
1 files changed, 1373 insertions, 240 deletions
diff --git a/source/blender/modifiers/intern/MOD_nodes_evaluator.cc b/source/blender/modifiers/intern/MOD_nodes_evaluator.cc
index 2637c7db0fc..ac5249b40a2 100644
--- a/source/blender/modifiers/intern/MOD_nodes_evaluator.cc
+++ b/source/blender/modifiers/intern/MOD_nodes_evaluator.cc
@@ -24,6 +24,12 @@
 #include "FN_generic_value_map.hh"
 #include "FN_multi_function.hh"
 
+#include "BLI_enumerable_thread_specific.hh"
+#include "BLI_stack.hh"
+#include "BLI_task.h"
+#include "BLI_task.hh"
+#include "BLI_vector_set.hh"
+
 namespace blender::modifiers::geometry_nodes {
 
 using fn::CPPType;
@@ -31,404 +37,1531 @@ using fn::GValueMap;
 using nodes::GeoNodeExecParams;
 using namespace fn::multi_function_types;
 
-class NodeParamsProvider : public nodes::GeoNodeExecParamsProvider {
- public:
-  LinearAllocator<> *allocator;
-  GValueMap<StringRef> *input_values;
-  GValueMap<StringRef> *output_values;
+enum class ValueUsage : uint8_t {
+  /* The value is definitely used. */
+  Required,
+  /* The value may be used. */
+  Maybe,
+  /* The value will definitely not be used. */
+  Unused,
+};
+
+struct SingleInputValue {
+  /**
+   * Points either to null or to a value of the type of input.
+   */
+  void *value = nullptr;
+};
+
+struct MultiInputValueItem {
+  /**
+   * The socket where this value is coming from. This is required to sort the inputs correctly
+   * based on the link order later on.
+   */
+  DSocket origin;
+  /**
+   * Should only be null directly after construction. After that it should always point to a value
+   * of the correct type.
+   */
+  void *value = nullptr;
+};
+
+struct MultiInputValue {
+  /**
+   * Collection of all the inputs that have been provided already. Note, the same origin can occur
+   * multiple times. However, it is guaranteed that if two items have the same origin, they will
+   * also have the same value (the pointer is different, but they point to values that would
+   * compare equal).
+   */
+  Vector<MultiInputValueItem> items;
+  /**
+   * Number of items that need to be added until all inputs have been provided.
+   */
+  int expected_size = 0;
+};
+
+struct InputState {
+
+  /**
+   * Type of the socket. If this is null, the socket should just be ignored.
+   */
+  const CPPType *type = nullptr;
+
+  /**
+   * Value of this input socket. By default, the value is empty. When other nodes are done
+   * computing their outputs, the computed values will be forwarded to linked input sockets.
+   * The value will then live here until it is consumed by the node or it was found that the value
+   * is not needed anymore.
+   * Whether the `single` or `multi` value is used depends on the socket.
+   */
+  union {
+    SingleInputValue *single;
+    MultiInputValue *multi;
+  } value;
+
+  /**
+   * How the node intends to use this input. By default all inputs may be used. Based on which
+   * outputs are used, a node can tell the evaluator that an input will definitely be used or is
+   * never used. This allows the evaluator to free values early, avoid copies and other unnecessary
+   * computations.
+   */
+  ValueUsage usage = ValueUsage::Maybe;
+
+  /**
+   * True when this input is/was used for an execution. While a node is running, only the inputs
+   * that have this set to true are allowed to be used. This makes sure that inputs created while
+   * the node is running correctly trigger the node to run again. Furthermore, it gives the node a
+   * consistent view of which inputs are available that does not change unexpectedly.
+   *
+   * While the node is running, this can be checked without a lock, because no one is writing to
+   * it. If this is true, the value can be read without a lock as well, because the value is not
+   * changed by others anymore.
+   */
+  bool was_ready_for_execution = false;
+};
+
+struct OutputState {
+  /**
+   * If this output has been computed and forwarded already. If this is true, the value is not
+   * computed/forwarded again.
+   */
+  bool has_been_computed = false;
+
+  /**
+   * Keeps track of how the output value is used. If a connected input becomes required, this
+   * output has to become required as well. The output becomes ignored when it has zero potential
+   * users that are counted below.
+   */
+  ValueUsage output_usage = ValueUsage::Maybe;
+
+  /**
+   * This is a copy of `output_usage` that is done right before node execution starts. This is
+   * done so that the node gets a consistent view of what outputs are used, even when this changes
+   * while the node is running (the node might be reevaluated in that case).
+   *
+   * While the node is running, this can be checked without a lock, because no one is writing to
+   * it.
+   */
+  ValueUsage output_usage_for_execution = ValueUsage::Maybe;
+
+  /**
+   * Counts how many times the value from this output might be used. If this number reaches zero,
+   * the output is not needed anymore.
+   */
+  int potential_users = 0;
+};
+
+enum class NodeScheduleState {
+  /**
+   * Default state of every node.
+   */
+  NotScheduled,
+  /**
+   * The node has been added to the task group and will be executed by it in the future.
+   */
+  Scheduled,
+  /**
+   * The node is currently running.
+   */
+  Running,
+  /**
+   * The node is running and has been rescheduled while running. In this case the node will run
+   * again. However, we don't add it to the task group immediately, because then the node might run
+   * twice at the same time, which is not allowed. Instead, once the node is done running, it will
+   * reschedule itself.
+   */
+  RunningAndRescheduled,
+};
+
+struct NodeState {
+  /**
+   * Needs to be locked when any data in this state is accessed that is not explicitely marked as
+   * otherwise.
+   */
+  std::mutex mutex;
+
+  /**
+   * States of the individual input and output sockets. One can index into these arrays without
+   * locking. However, to access the data inside a lock is generally necessary.
+   *
+   * These spans have to be indexed with the socket index. Unavailable sockets have a state as
+   * well. Maybe we can handle unavailable sockets differently in Blender in general, so I did not
+   * want to add complexity around it here.
+   */
+  MutableSpan<InputState> inputs;
+  MutableSpan<OutputState> outputs;
 
-  bool can_get_input(StringRef identifier) const override
+  /**
+   * Nodes that don't support lazyness have some special handling the first time they are executed.
+   */
+  bool non_lazy_node_is_initialized = false;
+
+  /**
+   * Used to check that nodes that don't support lazyness do not run more than once.
+   */
+  bool has_been_executed = false;
+
+  /**
+   * Becomes true when the node will never be executed again and its inputs are destructed.
+   * Generally, a node has finished once all of its outputs with (potential) users have been
+   * computed.
+   */
+  bool node_has_finished = false;
+
+  /**
+   * Counts the number of values that still have to be forwarded to this node until it should run
+   * again. It counts values from a multi input socket separately.
+   * This is used as an optimization so that nodes are not scheduled unnecessarily in many cases.
+   */
+  int missing_required_inputs = 0;
+
+  /**
+   * A node is always in one specific schedule state. This helps to ensure that the same node does
+   * not run twice at the same time accidentally.
+   */
+  NodeScheduleState schedule_state = NodeScheduleState::NotScheduled;
+};
+
+/**
+ * Container for a node and its state. Packing them into a single struct allows the use of
+ * `VectorSet` instead of a `Map` for `node_states_` which simplifies parallel loops over all
+ * states.
+ *
+ * Equality operators and a hash function for `DNode` are provided so that one can lookup this type
+ * in `node_states_` just with a `DNode`.
+ */
+struct NodeWithState {
+  DNode node;
+  /* Store a pointer instead of `NodeState` directly to keep it small and movable. */
+  NodeState *state = nullptr;
+
+  friend bool operator==(const NodeWithState &a, const NodeWithState &b)
   {
-    return input_values->contains(identifier);
+    return a.node == b.node;
   }
 
-  bool can_set_output(StringRef identifier) const override
+  friend bool operator==(const NodeWithState &a, const DNode &b)
   {
-    return !output_values->contains(identifier);
+    return a.node == b;
   }
 
-  GMutablePointer extract_input(StringRef identifier) override
+  friend bool operator==(const DNode &a, const NodeWithState &b)
   {
-    return this->input_values->extract(identifier);
+    return a == b.node;
   }
 
-  Vector<GMutablePointer> extract_multi_input(StringRef identifier) override
+  uint64_t hash() const
   {
-    Vector<GMutablePointer> values;
-    int index = 0;
-    while (true) {
-      std::string sub_identifier = identifier;
-      if (index > 0) {
-        sub_identifier += "[" + std::to_string(index) + "]";
-      }
-      if (!this->input_values->contains(sub_identifier)) {
-        break;
-      }
-      values.append(input_values->extract(sub_identifier));
-      index++;
-    }
-    return values;
+    return node.hash();
   }
 
-  GPointer get_input(StringRef identifier) const override
+  static uint64_t hash_as(const DNode &node)
   {
-    return this->input_values->lookup(identifier);
+    return node.hash();
   }
+};
+
+class GeometryNodesEvaluator;
+
+/**
+ * Utility class that locks the state of a node. Having this is a separate class is useful because
+ * it allows methods to communicate that they expect the node to be locked.
+ */
+class LockedNode : NonCopyable, NonMovable {
+ private:
+  GeometryNodesEvaluator &evaluator_;
 
-  GMutablePointer alloc_output_value(StringRef identifier, const CPPType &type) override
+ public:
+  /**
+   * This is the node that is currently locked.
+   */
+  const DNode node;
+  NodeState &node_state;
+
+  /**
+   * Used to delay notifying (and therefore locking) other nodes until the current node is not
+   * locked anymore. This might not be strictly necessary to avoid deadlocks in the current code,
+   * but it is a good measure to avoid accidentally adding a deadlock later on. By not locking
+   * more than one node per thread at a time, deadlocks are avoided.
+   *
+   * The notifications will be send right after the node is not locked anymore.
+   */
+  Vector<DOutputSocket> delayed_required_outputs;
+  Vector<DOutputSocket> delayed_unused_outputs;
+  Vector<DNode> delayed_scheduled_nodes;
+
+  LockedNode(GeometryNodesEvaluator &evaluator, const DNode node, NodeState &node_state)
+      : evaluator_(evaluator), node(node), node_state(node_state)
   {
-    void *buffer = this->allocator->allocate(type.size(), type.alignment());
-    GMutablePointer ptr{&type, buffer};
-    this->output_values->add_new_direct(identifier, ptr);
-    return ptr;
+    node_state.mutex.lock();
   }
+
+  ~LockedNode();
 };
 
-class GeometryNodesEvaluator {
+static const CPPType *get_socket_cpp_type(const DSocket socket)
+{
+  return nodes::socket_cpp_type_get(*socket->typeinfo());
+}
+
+static const CPPType *get_socket_cpp_type(const SocketRef &socket)
+{
+  return nodes::socket_cpp_type_get(*socket.typeinfo());
+}
+
+static bool node_supports_lazyness(const DNode node)
+{
+  return node->typeinfo()->geometry_node_execute_supports_lazyness;
+}
+
+/** Implements the callbacks that might be called when a node is executed. */
+class NodeParamsProvider : public nodes::GeoNodeExecParamsProvider {
+ private:
+  GeometryNodesEvaluator &evaluator_;
+  NodeState &node_state_;
+
  public:
-  using LogSocketValueFn = std::function<void(DSocket, Span<GPointer>)>;
+  NodeParamsProvider(GeometryNodesEvaluator &evaluator, DNode dnode, NodeState &node_state);
+
+  bool can_get_input(StringRef identifier) const override;
+  bool can_set_output(StringRef identifier) const override;
+  GMutablePointer extract_input(StringRef identifier) override;
+  Vector<GMutablePointer> extract_multi_input(StringRef identifier) override;
+  GPointer get_input(StringRef identifier) const override;
+  GMutablePointer alloc_output_value(const CPPType &type) override;
+  void set_output(StringRef identifier, GMutablePointer value) override;
+  void set_input_unused(StringRef identifier) override;
+  bool output_is_required(StringRef identifier) const override;
 
+  bool lazy_require_input(StringRef identifier) override;
+  bool lazy_output_is_required(StringRef identifier) const override;
+};
+
+class GeometryNodesEvaluator {
  private:
-  blender::LinearAllocator<> &allocator_;
-  Map<std::pair<DInputSocket, DOutputSocket>, GMutablePointer> value_by_input_;
-  Vector<DInputSocket> group_outputs_;
-  blender::nodes::MultiFunctionByNode &mf_by_node_;
+  /**
+   * This allocator lives on after the evaluator has been destructed. Therefore outputs of the
+   * entire evaluator should be allocated here.
+   */
+  LinearAllocator<> &outer_allocator_;
+  /**
+   * A local linear allocator for each thread. Only use this for values that do not need to live
+   * longer than the lifetime of the evaluator itself. Considerations for the future:
+   * - We could use an allocator that can free here, some temporary values don't live long.
+   * - If we ever run into false sharing bottlenecks, we could use local allocators that allocate
+   *   on cache line boundaries. Note, just because a value is allocated in one specific thread,
+   *   does not mean that it will only be used by that thread.
+   */
+  EnumerableThreadSpecific<LinearAllocator<>> local_allocators_;
+
+  /**
+   * Every node that is reachable from the output gets its own state. Once all states have been
+   * constructed, this map can be used for lookups from multiple threads.
+   */
+  VectorSet<NodeWithState> node_states_;
+
+  /**
+   * Contains all the tasks for the nodes that are currently scheduled.
+   */
+  TaskPool *task_pool_ = nullptr;
+
+  GeometryNodesEvaluationParams &params_;
   const blender::nodes::DataTypeConversions &conversions_;
-  const Object *self_object_;
-  const ModifierData *modifier_;
-  Depsgraph *depsgraph_;
-  LogSocketValueFn log_socket_value_fn_;
+
+  friend NodeParamsProvider;
 
  public:
   GeometryNodesEvaluator(GeometryNodesEvaluationParams &params)
-      : allocator_(params.allocator),
-        group_outputs_(std::move(params.output_sockets)),
-        mf_by_node_(*params.mf_by_node),
-        conversions_(blender::nodes::get_implicit_type_conversions()),
-        self_object_(params.self_object),
-        modifier_(&params.modifier_->modifier),
-        depsgraph_(params.depsgraph),
-        log_socket_value_fn_(std::move(params.log_socket_value_fn))
+      : outer_allocator_(params.allocator),
+        params_(params),
+        conversions_(blender::nodes::get_implicit_type_conversions())
   {
-    for (auto item : params.input_values.items()) {
-      this->log_socket_value(item.key, item.value);
-      this->forward_to_inputs(item.key, item.value);
-    }
   }
 
-  Vector<GMutablePointer> execute()
+  void execute()
+  {
+    task_pool_ = BLI_task_pool_create(this, TASK_PRIORITY_HIGH);
+
+    this->create_states_for_reachable_nodes();
+    this->forward_group_inputs();
+    this->schedule_initial_nodes();
+
+    /* This runs until all initially requested inputs have been computed. */
+    BLI_task_pool_work_and_wait(task_pool_);
+    BLI_task_pool_free(task_pool_);
+
+    this->extract_group_outputs();
+    this->destruct_node_states();
+  }
+
+  void create_states_for_reachable_nodes()
   {
-    Vector<GMutablePointer> results;
-    for (const DInputSocket &group_output : group_outputs_) {
-      Vector<GMutablePointer> result = this->get_input_values(group_output);
-      this->log_socket_value(group_output, result);
-      results.append(result[0]);
+    /* This does a depth first search for all the nodes that are reachable from the group
+     * outputs. This finds all nodes that are relevant. */
+    Stack<DNode> nodes_to_check;
+    /* Start at the output sockets. */
+    for (const DInputSocket &socket : params_.output_sockets) {
+      nodes_to_check.push(socket.node());
     }
-    for (GMutablePointer value : value_by_input_.values()) {
-      value.destruct();
+    /* Use the local allocator because the states do not need to outlive the evaluator. */
+    LinearAllocator<> &allocator = local_allocators_.local();
+    while (!nodes_to_check.is_empty()) {
+      const DNode node = nodes_to_check.pop();
+      if (node_states_.contains_as(node)) {
+        /* This node has been handled already. */
+        continue;
+      }
+      /* Create a new state for the node. */
+      NodeState &node_state = *allocator.construct<NodeState>().release();
+      node_states_.add_new({node, &node_state});
+
+      /* Push all linked origins on the stack. */
+      for (const InputSocketRef *input_ref : node->inputs()) {
+        const DInputSocket input{node.context(), input_ref};
+        input.foreach_origin_socket(
+            [&](const DSocket origin) { nodes_to_check.push(origin.node()); });
+      }
     }
-    return results;
+
+    /* Initialize the more complex parts of the node states in parallel. At this point no new
+     * node states are added anymore, so it is safe to lookup states from `node_states_` from
+     * multiple threads. */
+    parallel_for(IndexRange(node_states_.size()), 50, [&, this](const IndexRange range) {
+      LinearAllocator<> &allocator = this->local_allocators_.local();
+      for (const NodeWithState &item : node_states_.as_span().slice(range)) {
+        this->initialize_node_state(item.node, *item.state, allocator);
+      }
+    });
   }
 
- private:
-  Vector<GMutablePointer> get_input_values(const DInputSocket socket_to_compute)
+  void initialize_node_state(const DNode node, NodeState &node_state, LinearAllocator<> &allocator)
   {
-    Vector<DSocket> from_sockets;
-    socket_to_compute.foreach_origin_socket([&](DSocket socket) { from_sockets.append(socket); });
+    /* Construct arrays of the correct size. */
+    node_state.inputs = allocator.construct_array<InputState>(node->inputs().size());
+    node_state.outputs = allocator.construct_array<OutputState>(node->outputs().size());
 
-    if (from_sockets.is_empty()) {
-      /* The input is not connected, use the value from the socket itself. */
-      const CPPType &type = *blender::nodes::socket_cpp_type_get(*socket_to_compute->typeinfo());
-      return {get_unlinked_input_value(socket_to_compute, type)};
+    /* Initialize input states. */
+    for (const int i : node->inputs().index_range()) {
+      InputState &input_state = node_state.inputs[i];
+      const DInputSocket socket = node.input(i);
+      if (!socket->is_available()) {
+        /* Unavailable sockets should never be used. */
+        input_state.type = nullptr;
+        input_state.usage = ValueUsage::Unused;
+        continue;
+      }
+      const CPPType *type = get_socket_cpp_type(socket);
+      input_state.type = type;
+      if (type == nullptr) {
+        /* This is not a known data socket, it shouldn't be used. */
+        input_state.usage = ValueUsage::Unused;
+        continue;
+      }
+      /* Construct the correct struct that can hold the input(s). */
+      if (socket->is_multi_input_socket()) {
+        input_state.value.multi = allocator.construct<MultiInputValue>().release();
+        /* Count how many values should be added until the socket is complete. */
+        socket.foreach_origin_socket(
+            [&](DSocket UNUSED(origin)) { input_state.value.multi->expected_size++; });
+        /* If no links are connected, we do read the value from socket itself. */
+        if (input_state.value.multi->expected_size == 0) {
+          input_state.value.multi->expected_size = 1;
+        }
+      }
+      else {
+        input_state.value.single = allocator.construct<SingleInputValue>().release();
+      }
     }
-
-    /* Multi-input sockets contain a vector of inputs. */
-    if (socket_to_compute->is_multi_input_socket()) {
-      return this->get_inputs_from_incoming_links(socket_to_compute, from_sockets);
+    /* Initialize output states. */
+    for (const int i : node->outputs().index_range()) {
+      OutputState &output_state = node_state.outputs[i];
+      const DOutputSocket socket = node.output(i);
+      if (!socket->is_available()) {
+        /* Unavailable outputs should never be used. */
+        output_state.output_usage = ValueUsage::Unused;
+        continue;
+      }
+      const CPPType *type = get_socket_cpp_type(socket);
+      if (type == nullptr) {
+        /* Non data sockets should never be used. */
+        output_state.output_usage = ValueUsage::Unused;
+        continue;
+      }
+      /* Count the number of potential users for this socket. */
+      socket.foreach_target_socket(
+          [&, this](const DInputSocket target_socket) {
+            const DNode target_node = target_socket.node();
+            if (!this->node_states_.contains_as(target_node)) {
+              /* The target node is not computed because it is not computed to the output. */
+              return;
+            }
+            output_state.potential_users += 1;
+          },
+          {});
+      if (output_state.potential_users == 0) {
+        /* If it does not have any potential users, it is unused. */
+        output_state.output_usage = ValueUsage::Unused;
+      }
     }
+  }
 
-    const DSocket from_socket = from_sockets[0];
-    GMutablePointer value = this->get_input_from_incoming_link(socket_to_compute, from_socket);
-    return {value};
+  void destruct_node_states()
+  {
+    parallel_for(IndexRange(node_states_.size()), 50, [&, this](const IndexRange range) {
+      for (const NodeWithState &item : node_states_.as_span().slice(range)) {
+        this->destruct_node_state(item.node, *item.state);
+      }
+    });
   }
 
-  Vector<GMutablePointer> get_inputs_from_incoming_links(const DInputSocket socket_to_compute,
-                                                         const Span<DSocket> from_sockets)
+  void destruct_node_state(const DNode node, NodeState &node_state)
   {
-    Vector<GMutablePointer> values;
-    for (const int i : from_sockets.index_range()) {
-      const DSocket from_socket = from_sockets[i];
-      const int first_occurence = from_sockets.take_front(i).first_index_try(from_socket);
-      if (first_occurence == -1) {
-        values.append(this->get_input_from_incoming_link(socket_to_compute, from_socket));
+    /* Need to destruct stuff manually, because it's allocated by a custom allocator. */
+    for (const int i : node->inputs().index_range()) {
+      InputState &input_state = node_state.inputs[i];
+      if (input_state.type == nullptr) {
+        continue;
+      }
+      const InputSocketRef &socket_ref = node->input(i);
+      if (socket_ref.is_multi_input_socket()) {
+        MultiInputValue &multi_value = *input_state.value.multi;
+        for (MultiInputValueItem &item : multi_value.items) {
+          input_state.type->destruct(item.value);
+        }
+        multi_value.~MultiInputValue();
       }
       else {
-        /* If the same from-socket occurs more than once, we make a copy of the first value. This
-         * can happen when a node linked to a multi-input-socket is muted. */
-        GMutablePointer value = values[first_occurence];
-        const CPPType *type = value.type();
-        void *copy_buffer = allocator_.allocate(type->size(), type->alignment());
-        type->copy_to_uninitialized(value.get(), copy_buffer);
-        values.append({type, copy_buffer});
+        SingleInputValue &single_value = *input_state.value.single;
+        void *value = single_value.value;
+        if (value != nullptr) {
+          input_state.type->destruct(value);
+        }
+        single_value.~SingleInputValue();
       }
     }
-    return values;
+
+    destruct_n(node_state.inputs.data(), node_state.inputs.size());
+    destruct_n(node_state.outputs.data(), node_state.outputs.size());
+
+    node_state.~NodeState();
   }
 
-  GMutablePointer get_input_from_incoming_link(const DInputSocket socket_to_compute,
-                                               const DSocket from_socket)
+  void forward_group_inputs()
   {
-    if (from_socket->is_output()) {
-      const DOutputSocket from_output_socket{from_socket};
-      const std::pair<DInputSocket, DOutputSocket> key = std::make_pair(socket_to_compute,
-                                                                        from_output_socket);
-      std::optional<GMutablePointer> value = value_by_input_.pop_try(key);
-      if (value.has_value()) {
-        /* This input has been computed before, return it directly. */
-        return {*value};
+    for (auto &&item : params_.input_values.items()) {
+      const DOutputSocket socket = item.key;
+      GMutablePointer value = item.value;
+      this->log_socket_value(socket, value);
+
+      const DNode node = socket.node();
+      if (!node_states_.contains_as(node)) {
+        /* The socket is not connected to any output. */
+        value.destruct();
+        continue;
       }
+      this->forward_output(socket, value);
+    }
+  }
 
-      /* Compute the socket now. */
-      this->compute_output_and_forward(from_output_socket);
-      return {value_by_input_.pop(key)};
+  void schedule_initial_nodes()
+  {
+    for (const DInputSocket &socket : params_.output_sockets) {
+      const DNode node = socket.node();
+      NodeState &node_state = this->get_node_state(node);
+      LockedNode locked_node{*this, node, node_state};
+      /* Setting an input as required will schedule any linked node. */
+      this->set_input_required(locked_node, socket);
     }
+  }
 
-    /* Get value from an unlinked input socket. */
-    const CPPType &type = *blender::nodes::socket_cpp_type_get(*socket_to_compute->typeinfo());
-    const DInputSocket from_input_socket{from_socket};
-    return {get_unlinked_input_value(from_input_socket, type)};
+  void schedule_node(LockedNode &locked_node)
+  {
+    switch (locked_node.node_state.schedule_state) {
+      case NodeScheduleState::NotScheduled: {
+        /* The node will be scheduled once it is not locked anymore. We could schedule the node
+         * right here, but that would result in a deadlock if the task pool decides to run the task
+         * immediately (this only happens when Blender is started with a single thread). */
+        locked_node.node_state.schedule_state = NodeScheduleState::Scheduled;
+        locked_node.delayed_scheduled_nodes.append(locked_node.node);
+        break;
+      }
+      case NodeScheduleState::Scheduled: {
+        /* Scheduled already, nothing to do. */
+        break;
+      }
+      case NodeScheduleState::Running: {
+        /* Reschedule node while it is running.
+         * The node will reschedule itself when it is done. */
+        locked_node.node_state.schedule_state = NodeScheduleState::RunningAndRescheduled;
+        break;
+      }
+      case NodeScheduleState::RunningAndRescheduled: {
+        /* Scheduled already, nothing to do. */
+        break;
+      }
+    }
   }
 
-  void compute_output_and_forward(const DOutputSocket socket_to_compute)
+  static void run_node_from_task_pool(TaskPool *task_pool, void *task_data)
   {
-    const DNode node{socket_to_compute.context(), &socket_to_compute->node()};
+    void *user_data = BLI_task_pool_user_data(task_pool);
+    GeometryNodesEvaluator &evaluator = *(GeometryNodesEvaluator *)user_data;
+    const NodeWithState *node_with_state = (const NodeWithState *)task_data;
+
+    evaluator.node_task_run(node_with_state->node, *node_with_state->state);
+  }
 
-    if (!socket_to_compute->is_available()) {
-      /* If the output is not available, use a default value. */
-      const CPPType &type = *blender::nodes::socket_cpp_type_get(*socket_to_compute->typeinfo());
-      void *buffer = allocator_.allocate(type.size(), type.alignment());
-      type.copy_to_uninitialized(type.default_value(), buffer);
-      this->forward_to_inputs(socket_to_compute, {type, buffer});
+  void node_task_run(const DNode node, NodeState &node_state)
+  {
+    /* These nodes are sometimes scheduled. We could also check for them in other places, but
+     * it's the easiest to do it here. */
+    if (node->is_group_input_node() || node->is_group_output_node()) {
       return;
     }
 
-    /* Prepare inputs required to execute the node. */
-    GValueMap<StringRef> node_inputs_map{allocator_};
-    for (const InputSocketRef *input_socket : node->inputs()) {
-      if (input_socket->is_available()) {
-        DInputSocket dsocket{node.context(), input_socket};
-        Vector<GMutablePointer> values = this->get_input_values(dsocket);
-        this->log_socket_value(dsocket, values);
-        for (int i = 0; i < values.size(); ++i) {
-          /* Values from Multi Input Sockets are stored in input map with the format
-           * <identifier>[<index>]. */
-          blender::StringRefNull key = allocator_.copy_string(
-              input_socket->identifier() + (i > 0 ? ("[" + std::to_string(i)) + "]" : ""));
-          node_inputs_map.add_new_direct(key, std::move(values[i]));
-        }
-      }
+    const bool do_execute_node = this->node_task_preprocessing(node, node_state);
+
+    /* Only execute the node if all prerequisites are met. There has to be an output that is
+     * required and all required inputs have to be provided already. */
+    if (do_execute_node) {
+      this->execute_node(node, node_state);
     }
 
-    /* Execute the node. */
-    GValueMap<StringRef> node_outputs_map{allocator_};
-    NodeParamsProvider params_provider;
-    params_provider.dnode = node;
-    params_provider.self_object = self_object_;
-    params_provider.depsgraph = depsgraph_;
-    params_provider.allocator = &allocator_;
-    params_provider.input_values = &node_inputs_map;
-    params_provider.output_values = &node_outputs_map;
-    params_provider.modifier = modifier_;
-    this->execute_node(node, params_provider);
+    this->node_task_postprocessing(node, node_state);
+  }
 
-    /* Forward computed outputs to linked input sockets. */
-    for (const OutputSocketRef *output_socket : node->outputs()) {
-      if (output_socket->is_available()) {
-        const DOutputSocket dsocket{node.context(), output_socket};
-        GMutablePointer value = node_outputs_map.extract(output_socket->identifier());
-        this->log_socket_value(dsocket, value);
-        this->forward_to_inputs(dsocket, value);
+  bool node_task_preprocessing(const DNode node, NodeState &node_state)
+  {
+    LockedNode locked_node{*this, node, node_state};
+    BLI_assert(node_state.schedule_state == NodeScheduleState::Scheduled);
+    node_state.schedule_state = NodeScheduleState::Running;
+
+    /* Early return if the node has finished already. */
+    if (locked_node.node_state.node_has_finished) {
+      return false;
+    }
+    /* Prepare outputs and check if actually any new outputs have to be computed. */
+    if (!this->prepare_node_outputs_for_execution(locked_node)) {
+      return false;
+    }
+    /* Initialize nodes that don't support lazyness. This is done after at least one output is
+     * required and before we check that all required inputs are provided. This reduces the
+     * number of "round-trips" through the task pool by one for most nodes. */
+    if (!node_state.non_lazy_node_is_initialized && !node_supports_lazyness(node)) {
+      this->initialize_non_lazy_node(locked_node);
+      node_state.non_lazy_node_is_initialized = true;
+    }
+    /* Prepare inputs and check if all required inputs are provided. */
+    if (!this->prepare_node_inputs_for_execution(locked_node)) {
+      return false;
+    }
+    return true;
+  }
+
+  /* A node is finished when it has computed all outputs that may be used. */
+  bool finish_node_if_possible(LockedNode &locked_node)
+  {
+    if (locked_node.node_state.node_has_finished) {
+      /* Early return in case this node is known to have finished already. */
+      return true;
+    }
+
+    /* Check if there is any output that might be used but has not been computed yet. */
+    bool has_remaining_output = false;
+    for (OutputState &output_state : locked_node.node_state.outputs) {
+      if (output_state.has_been_computed) {
+        continue;
+      }
+      if (output_state.output_usage != ValueUsage::Unused) {
+        has_remaining_output = true;
+        break;
       }
     }
+    if (!has_remaining_output) {
+      /* If there are no remaining outputs, all the inputs can be destructed and/or can become
+       * unused. This can also trigger a chain reaction where nodes to the left become finished
+       * too. */
+      for (const int i : locked_node.node->inputs().index_range()) {
+        const DInputSocket socket = locked_node.node.input(i);
+        InputState &input_state = locked_node.node_state.inputs[i];
+        if (input_state.usage == ValueUsage::Maybe) {
+          this->set_input_unused(locked_node, socket);
+        }
+        else if (input_state.usage == ValueUsage::Required) {
+          /* The value was required, so it cannot become unused. However, we can destruct the
+           * value. */
+          this->destruct_input_value_if_exists(locked_node, socket);
+        }
+      }
+      locked_node.node_state.node_has_finished = true;
+    }
+    return locked_node.node_state.node_has_finished;
   }
 
-  void log_socket_value(const DSocket socket, Span<GPointer> values)
+  bool prepare_node_outputs_for_execution(LockedNode &locked_node)
   {
-    if (log_socket_value_fn_) {
-      log_socket_value_fn_(socket, values);
+    bool execution_is_necessary = false;
+    for (OutputState &output_state : locked_node.node_state.outputs) {
+      /* Update the output usage for execution to the latest value. */
+      output_state.output_usage_for_execution = output_state.output_usage;
+      if (!output_state.has_been_computed) {
+        if (output_state.output_usage == ValueUsage::Required) {
+          /* Only evaluate when there is an output that is required but has not been computed. */
+          execution_is_necessary = true;
+        }
+      }
     }
+    return execution_is_necessary;
   }
 
-  void log_socket_value(const DSocket socket, Span<GMutablePointer> values)
+  void initialize_non_lazy_node(LockedNode &locked_node)
   {
-    this->log_socket_value(socket, values.cast<GPointer>());
+    for (const int i : locked_node.node->inputs().index_range()) {
+      InputState &input_state = locked_node.node_state.inputs[i];
+      if (input_state.type == nullptr) {
+        /* Ignore unavailable/non-data sockets. */
+        continue;
+      }
+      /* Nodes that don't support lazyness require all inputs. */
+      const DInputSocket input_socket = locked_node.node.input(i);
+      this->set_input_required(locked_node, input_socket);
+    }
   }
 
-  void log_socket_value(const DSocket socket, GPointer value)
+  /**
+   * Checks if requested inputs are available and "marks" all the inputs that are available
+   * during the node execution. Inputs that are provided after this function ends but before the
+   * node is executed, cannot be read by the node in the execution (note that this only affects
+   * nodes that support lazy inputs).
+   */
+  bool prepare_node_inputs_for_execution(LockedNode &locked_node)
   {
-    this->log_socket_value(socket, Span<GPointer>(&value, 1));
+    for (const int i : locked_node.node_state.inputs.index_range()) {
+      InputState &input_state = locked_node.node_state.inputs[i];
+      if (input_state.type == nullptr) {
+        /* Ignore unavailable and non-data sockets. */
+        continue;
+      }
+      const DInputSocket socket = locked_node.node.input(i);
+      const bool is_required = input_state.usage == ValueUsage::Required;
+
+      /* No need to check this socket again. */
+      if (input_state.was_ready_for_execution) {
+        continue;
+      }
+
+      if (socket->is_multi_input_socket()) {
+        MultiInputValue &multi_value = *input_state.value.multi;
+        /* Checks if all the linked sockets have been provided already. */
+        if (multi_value.items.size() == multi_value.expected_size) {
+          input_state.was_ready_for_execution = true;
+          this->log_socket_value(socket, input_state, multi_value.items);
+        }
+        else if (is_required) {
+          /* The input is required but is not fully provided yet. Therefore the node cannot be
+           * executed yet. */
+          return false;
+        }
+      }
+      else {
+        SingleInputValue &single_value = *input_state.value.single;
+        if (single_value.value != nullptr) {
+          input_state.was_ready_for_execution = true;
+          this->log_socket_value(socket, GPointer{input_state.type, single_value.value});
+        }
+        else if (is_required) {
+          /* The input is required but has not been provided yet. Therefore the node cannot be
+           * executed yet. */
+          return false;
+        }
+      }
+    }
+    /* All required inputs have been provided. */
+    return true;
   }
 
-  void execute_node(const DNode node, NodeParamsProvider &params_provider)
+  /**
+   * Actually execute the node. All the required inputs are available and at least one output is
+   * required.
+   */
+  void execute_node(const DNode node, NodeState &node_state)
   {
-    const bNode &bnode = *params_provider.dnode->bnode();
+    const bNode &bnode = *node->bnode();
+
+    if (node_state.has_been_executed) {
+      if (!node_supports_lazyness(node)) {
+        /* Nodes that don't support lazyness must not be executed more than once. */
+        BLI_assert_unreachable();
+      }
+    }
+    node_state.has_been_executed = true;
 
-    /* Use the geometry-node-execute callback if it exists. */
+    /* Use the geometry node execute callback if it exists. */
     if (bnode.typeinfo->geometry_node_execute != nullptr) {
-      GeoNodeExecParams params{params_provider};
-      bnode.typeinfo->geometry_node_execute(params);
+      this->execute_geometry_node(node, node_state);
       return;
     }
 
     /* Use the multi-function implementation if it exists. */
-    const MultiFunction *multi_function = mf_by_node_.lookup_default(node, nullptr);
+    const MultiFunction *multi_function = params_.mf_by_node->lookup_default(node, nullptr);
     if (multi_function != nullptr) {
-      this->execute_multi_function_node(node, params_provider, *multi_function);
+      this->execute_multi_function_node(node, *multi_function, node_state);
       return;
     }
 
-    /* Just output default values if no implementation exists. */
-    this->execute_unknown_node(node, params_provider);
+    this->execute_unknown_node(node, node_state);
+  }
+
+  void execute_geometry_node(const DNode node, NodeState &node_state)
+  {
+    const bNode &bnode = *node->bnode();
+
+    NodeParamsProvider params_provider{*this, node, node_state};
+    GeoNodeExecParams params{params_provider};
+    bnode.typeinfo->geometry_node_execute(params);
   }
 
   void execute_multi_function_node(const DNode node,
-                                   NodeParamsProvider &params_provider,
-                                   const MultiFunction &fn)
+                                   const MultiFunction &fn,
+                                   NodeState &node_state)
   {
     MFContextBuilder fn_context;
     MFParamsBuilder fn_params{fn, 1};
-    Vector<GMutablePointer> input_data;
-    for (const InputSocketRef *socket_ref : node->inputs()) {
-      if (socket_ref->is_available()) {
-        GMutablePointer data = params_provider.extract_input(socket_ref->identifier());
-        fn_params.add_readonly_single_input(GSpan(*data.type(), data.get(), 1));
-        input_data.append(data);
+    LinearAllocator<> &allocator = local_allocators_.local();
+
+    /* Prepare the inputs for the multi function. */
+    for (const int i : node->inputs().index_range()) {
+      const InputSocketRef &socket_ref = node->input(i);
+      if (!socket_ref.is_available()) {
+        continue;
       }
+      BLI_assert(!socket_ref.is_multi_input_socket());
+      InputState &input_state = node_state.inputs[i];
+      BLI_assert(input_state.was_ready_for_execution);
+      SingleInputValue &single_value = *input_state.value.single;
+      BLI_assert(single_value.value != nullptr);
+      fn_params.add_readonly_single_input(GPointer{*input_state.type, single_value.value});
     }
-    Vector<GMutablePointer> output_data;
-    for (const OutputSocketRef *socket_ref : node->outputs()) {
-      if (socket_ref->is_available()) {
-        const CPPType &type = *blender::nodes::socket_cpp_type_get(*socket_ref->typeinfo());
-        GMutablePointer output_value = params_provider.alloc_output_value(socket_ref->identifier(),
-                                                                          type);
-        fn_params.add_uninitialized_single_output(GMutableSpan{type, output_value.get(), 1});
-        output_data.append(output_value);
+    /* Prepare the outputs for the multi function. */
+    Vector<GMutablePointer> outputs;
+    for (const int i : node->outputs().index_range()) {
+      const OutputSocketRef &socket_ref = node->output(i);
+      if (!socket_ref.is_available()) {
+        continue;
       }
+      const CPPType &type = *get_socket_cpp_type(socket_ref);
+      void *buffer = allocator.allocate(type.size(), type.alignment());
+      fn_params.add_uninitialized_single_output(GMutableSpan{type, buffer, 1});
+      outputs.append({type, buffer});
     }
+
     fn.call(IndexRange(1), fn_params, fn_context);
-    for (GMutablePointer value : input_data) {
-      value.destruct();
+
+    /* Forward the computed outputs. */
+    int output_index = 0;
+    for (const int i : node->outputs().index_range()) {
+      const OutputSocketRef &socket_ref = node->output(i);
+      if (!socket_ref.is_available()) {
+        continue;
+      }
+      OutputState &output_state = node_state.outputs[i];
+      const DOutputSocket socket{node.context(), &socket_ref};
+      GMutablePointer value = outputs[output_index];
+      this->forward_output(socket, value);
+      output_state.has_been_computed = true;
+      output_index++;
     }
   }
 
-  void execute_unknown_node(const DNode node, NodeParamsProvider &params_provider)
+  void execute_unknown_node(const DNode node, NodeState &node_state)
   {
+    LinearAllocator<> &allocator = local_allocators_.local();
     for (const OutputSocketRef *socket : node->outputs()) {
-      if (socket->is_available()) {
-        const CPPType &type = *blender::nodes::socket_cpp_type_get(*socket->typeinfo());
-        params_provider.output_values->add_new_by_copy(socket->identifier(),
-                                                       {type, type.default_value()});
+      if (!socket->is_available()) {
+        continue;
+      }
+      const CPPType *type = get_socket_cpp_type(*socket);
+      if (type == nullptr) {
+        continue;
+      }
+      /* Just forward the default value of the type as a fallback. That's typically better than
+       * crashing or doing nothing. */
+      OutputState &output_state = node_state.outputs[socket->index()];
+      output_state.has_been_computed = true;
+      void *buffer = allocator.allocate(type->size(), type->alignment());
+      type->copy_to_uninitialized(type->default_value(), buffer);
+      this->forward_output({node.context(), socket}, {*type, buffer});
+    }
+  }
+
+  void node_task_postprocessing(const DNode node, NodeState &node_state)
+  {
+    LockedNode locked_node{*this, node, node_state};
+
+    const bool node_has_finished = this->finish_node_if_possible(locked_node);
+    const bool reschedule_requested = node_state.schedule_state ==
+                                      NodeScheduleState::RunningAndRescheduled;
+    node_state.schedule_state = NodeScheduleState::NotScheduled;
+    if (reschedule_requested && !node_has_finished) {
+      /* Either the node rescheduled itself or another node tried to schedule it while it ran. */
+      this->schedule_node(locked_node);
+    }
+
+    this->assert_expected_outputs_have_been_computed(locked_node);
+  }
+
+  void assert_expected_outputs_have_been_computed(LockedNode &locked_node)
+  {
+#ifdef DEBUG
+    /* Outputs can only be computed when all required inputs have been provided. */
+    if (locked_node.node_state.missing_required_inputs > 0) {
+      return;
+    }
+    /* If the node is still scheduled, it is not necessary that all its expected outputs are
+     * computed yet. */
+    if (locked_node.node_state.schedule_state == NodeScheduleState::Scheduled) {
+      return;
+    }
+
+    const bool supports_lazyness = node_supports_lazyness(locked_node.node);
+    /* Iterating over sockets instead of the states directly, because that makes it easier to
+     * figure out which socket is missing when one of the asserts is hit. */
+    for (const OutputSocketRef *socket_ref : locked_node.node->outputs()) {
+      OutputState &output_state = locked_node.node_state.outputs[socket_ref->index()];
+      if (supports_lazyness) {
+        /* Expected that at least all required sockets have been computed. If more outputs become
+         * required later, the node will be executed again. */
+        if (output_state.output_usage_for_execution == ValueUsage::Required) {
+          BLI_assert(output_state.has_been_computed);
+        }
+      }
+      else {
+        /* Expect that all outputs that may be used have been computed, because the node cannot
+         * be executed again. */
+        if (output_state.output_usage_for_execution != ValueUsage::Unused) {
+          BLI_assert(output_state.has_been_computed);
+        }
+      }
+    }
+#else
+    UNUSED_VARS(locked_node);
+#endif
+  }
+
+  void extract_group_outputs()
+  {
+    for (const DInputSocket &socket : params_.output_sockets) {
+      BLI_assert(socket->is_available());
+      BLI_assert(!socket->is_multi_input_socket());
+
+      const DNode node = socket.node();
+      NodeState &node_state = this->get_node_state(node);
+      InputState &input_state = node_state.inputs[socket->index()];
+
+      SingleInputValue &single_value = *input_state.value.single;
+      void *value = single_value.value;
+
+      /* The value should have been computed by now. If this assert is hit, it means that there
+       * was some scheduling issue before. */
+      BLI_assert(value != nullptr);
+
+      /* Move value into memory owned by the outer allocator. */
+      const CPPType &type = *input_state.type;
+      void *buffer = outer_allocator_.allocate(type.size(), type.alignment());
+      type.move_to_uninitialized(value, buffer);
+
+      params_.r_output_values.append({type, buffer});
+    }
+  }
+
+  /**
+   * Load the required input from the socket or trigger nodes to the left to compute the value.
+   * When this function is called, the node will always be executed again eventually (either
+   * immediately, or when all required inputs have been computed by other nodes).
+   */
+  void set_input_required(LockedNode &locked_node, const DInputSocket input_socket)
+  {
+    BLI_assert(locked_node.node == input_socket.node());
+    InputState &input_state = locked_node.node_state.inputs[input_socket->index()];
+
+    /* Value set as unused cannot become used again. */
+    BLI_assert(input_state.usage != ValueUsage::Unused);
+
+    if (input_state.usage == ValueUsage::Required) {
+      /* The value is already required, but the node might expect to be evaluated again. */
+      this->schedule_node(locked_node);
+      /* Returning here also ensure that the code below is executed at most once per input. */
+      return;
+    }
+    input_state.usage = ValueUsage::Required;
+
+    if (input_state.was_ready_for_execution) {
+      /* The value was already ready, but the node might expect to be evaluated again. */
+      this->schedule_node(locked_node);
+      return;
+    }
+
+    /* Count how many values still have to be added to this input until it is "complete". */
+    int missing_values = 0;
+    if (input_socket->is_multi_input_socket()) {
+      MultiInputValue &multi_value = *input_state.value.multi;
+      missing_values = multi_value.expected_size - multi_value.items.size();
+    }
+    else {
+      SingleInputValue &single_value = *input_state.value.single;
+      if (single_value.value == nullptr) {
+        missing_values = 1;
       }
     }
+    if (missing_values == 0) {
+      /* The input is fully available already, but the node might expect to be evaluated again. */
+      this->schedule_node(locked_node);
+      return;
+    }
+    /* Increase the total number of missing required inputs. This ensures that the node will be
+     * scheduled correctly when all inputs have been provided. */
+    locked_node.node_state.missing_required_inputs += missing_values;
+
+    /* Get all origin sockets, because we have to tag those as required as well. */
+    Vector<DSocket> origin_sockets;
+    input_socket.foreach_origin_socket(
+        [&, this](const DSocket origin_socket) { origin_sockets.append(origin_socket); });
+
+    if (origin_sockets.is_empty()) {
+      /* If there are no origin sockets, just load the value from the socket directly. */
+      this->load_unlinked_input_value(locked_node, input_socket, input_state, input_socket);
+      locked_node.node_state.missing_required_inputs -= 1;
+      this->schedule_node(locked_node);
+      return;
+    }
+    bool will_be_triggered_by_other_node = false;
+    for (const DSocket origin_socket : origin_sockets) {
+      if (origin_socket->is_input()) {
+        /* Load the value directly from the origin socket. In most cases this is an unlinked
+         * group input. */
+        this->load_unlinked_input_value(locked_node, input_socket, input_state, origin_socket);
+        locked_node.node_state.missing_required_inputs -= 1;
+        this->schedule_node(locked_node);
+        return;
+      }
+      /* The value has not been computed yet, so when it will be forwarded by another node, this
+       * node will be triggered. */
+      will_be_triggered_by_other_node = true;
+
+      locked_node.delayed_required_outputs.append(DOutputSocket(origin_socket));
+    }
+    /* If this node will be triggered by another node, we don't have to schedule it now. */
+    if (!will_be_triggered_by_other_node) {
+      this->schedule_node(locked_node);
+    }
+  }
+
+  void set_input_unused(LockedNode &locked_node, const DInputSocket socket)
+  {
+    InputState &input_state = locked_node.node_state.inputs[socket->index()];
+
+    /* A required socket cannot become unused. */
+    BLI_assert(input_state.usage != ValueUsage::Required);
+
+    if (input_state.usage == ValueUsage::Unused) {
+      /* Nothing to do in this case. */
+      return;
+    }
+    input_state.usage = ValueUsage::Unused;
+
+    /* If the input is unused, it's value can be destructed now. */
+    this->destruct_input_value_if_exists(locked_node, socket);
+
+    if (input_state.was_ready_for_execution) {
+      /* If the value was already computed, we don't need to notify origin nodes. */
+      return;
+    }
+
+    /* Notify origin nodes that might want to set its inputs as unused as well. */
+    socket.foreach_origin_socket([&, this](const DSocket origin_socket) {
+      if (origin_socket->is_input()) {
+        /* Values from these sockets are loaded directly from the sockets, so there is no node to
+         * notify. */
+        return;
+      }
+      /* Delay notification of the other node until this node is not locked anymore. */
+      locked_node.delayed_unused_outputs.append(DOutputSocket(origin_socket));
+    });
+  }
+
+  void send_output_required_notification(const DOutputSocket socket)
+  {
+    const DNode node = socket.node();
+    NodeState &node_state = this->get_node_state(node);
+    OutputState &output_state = node_state.outputs[socket->index()];
+
+    LockedNode locked_node{*this, node, node_state};
+    if (output_state.output_usage == ValueUsage::Required) {
+      /* Output is marked as required already. So the node is scheduled already. */
+      return;
+    }
+    /* The origin node needs to be scheduled so that it provides the requested input
+     * eventually. */
+    output_state.output_usage = ValueUsage::Required;
+    this->schedule_node(locked_node);
   }
 
-  void forward_to_inputs(const DOutputSocket from_socket, GMutablePointer value_to_forward)
+  void send_output_unused_notification(const DOutputSocket socket)
   {
-    /* For all sockets that are linked with the from_socket push the value to their node. */
-    Vector<DInputSocket> to_sockets_all;
+    const DNode node = socket.node();
+    NodeState &node_state = this->get_node_state(node);
+    OutputState &output_state = node_state.outputs[socket->index()];
 
-    auto handle_target_socket_fn = [&](DInputSocket to_socket) {
-      to_sockets_all.append_non_duplicates(to_socket);
+    LockedNode locked_node{*this, node, node_state};
+    output_state.potential_users -= 1;
+    if (output_state.potential_users == 0) {
+      /* The output socket has no users anymore. */
+      output_state.output_usage = ValueUsage::Unused;
+      /* Schedule the origin node in case it wants to set its inputs as unused as well. */
+      this->schedule_node(locked_node);
+    }
+  }
+
+  void add_node_to_task_pool(const DNode node)
+  {
+    /* Push the task to the pool while it is not locked to avoid a deadlock in case when the task
+     * is executed immediately. */
+    const NodeWithState *node_with_state = node_states_.lookup_key_ptr_as(node);
+    BLI_task_pool_push(
+        task_pool_, run_node_from_task_pool, (void *)node_with_state, false, nullptr);
+  }
+
+  /**
+   * Moves a newly computed value from an output socket to all the inputs that might need it.
+   */
+  void forward_output(const DOutputSocket from_socket, GMutablePointer value_to_forward)
+  {
+    BLI_assert(value_to_forward.get() != nullptr);
+
+    Vector<DInputSocket> to_sockets;
+    auto handle_target_socket_fn = [&, this](const DInputSocket to_socket) {
+      if (this->should_forward_to_socket(to_socket)) {
+        to_sockets.append(to_socket);
+      }
     };
-    auto handle_skipped_socket_fn = [&, this](DSocket socket) {
+    auto handle_skipped_socket_fn = [&, this](const DSocket socket) {
+      /* Log socket value on intermediate sockets to support e.g. attribute search or spreadsheet
+       * breadcrumbs on group nodes. */
       this->log_socket_value(socket, value_to_forward);
     };
-
     from_socket.foreach_target_socket(handle_target_socket_fn, handle_skipped_socket_fn);
 
+    LinearAllocator<> &allocator = local_allocators_.local();
+
     const CPPType &from_type = *value_to_forward.type();
     Vector<DInputSocket> to_sockets_same_type;
-    for (const DInputSocket &to_socket : to_sockets_all) {
-      const CPPType &to_type = *blender::nodes::socket_cpp_type_get(*to_socket->typeinfo());
-      const std::pair<DInputSocket, DOutputSocket> key = std::make_pair(to_socket, from_socket);
+    for (const DInputSocket &to_socket : to_sockets) {
+      const CPPType &to_type = *get_socket_cpp_type(to_socket);
       if (from_type == to_type) {
+        /* All target sockets that do not need a conversion will be handled afterwards. */
         to_sockets_same_type.append(to_socket);
+        continue;
       }
-      else {
-        void *buffer = allocator_.allocate(to_type.size(), to_type.alignment());
-        if (conversions_.is_convertible(from_type, to_type)) {
-          conversions_.convert_to_uninitialized(
-              from_type, to_type, value_to_forward.get(), buffer);
-        }
-        else {
-          to_type.copy_to_uninitialized(to_type.default_value(), buffer);
-        }
-        add_value_to_input_socket(key, GMutablePointer{to_type, buffer});
-      }
+      this->forward_to_socket_with_different_type(
+          allocator, value_to_forward, from_socket, to_socket, to_type);
     }
+    this->forward_to_sockets_with_same_type(
+        allocator, to_sockets_same_type, value_to_forward, from_socket);
+  }
 
-    if (to_sockets_same_type.size() == 0) {
-      /* This value is not further used, so destruct it. */
-      value_to_forward.destruct();
+  bool should_forward_to_socket(const DInputSocket socket)
+  {
+    const DNode to_node = socket.node();
+    const NodeWithState *target_node_with_state = node_states_.lookup_key_ptr_as(to_node);
+    if (target_node_with_state == nullptr) {
+      /* If the socket belongs to a node that has no state, the entire node is not used. */
+      return false;
+    }
+    NodeState &target_node_state = *target_node_with_state->state;
+    InputState &target_input_state = target_node_state.inputs[socket->index()];
+
+    std::lock_guard lock{target_node_state.mutex};
+    /* Do not forward to an input socket whose value won't be used. */
+    return target_input_state.usage != ValueUsage::Unused;
+  }
+
+  void forward_to_socket_with_different_type(LinearAllocator<> &allocator,
+                                             const GPointer value_to_forward,
+                                             const DOutputSocket from_socket,
+                                             const DInputSocket to_socket,
+                                             const CPPType &to_type)
+  {
+    const CPPType &from_type = *value_to_forward.type();
+
+    /* Allocate a buffer for the converted value. */
+    void *buffer = allocator.allocate(to_type.size(), to_type.alignment());
+
+    if (conversions_.is_convertible(from_type, to_type)) {
+      /* Do the conversion if possible. */
+      conversions_.convert_to_uninitialized(from_type, to_type, value_to_forward.get(), buffer);
+    }
+    else {
+      /* Cannot convert, use default value instead. */
+      to_type.copy_to_uninitialized(to_type.default_value(), buffer);
     }
-    else if (to_sockets_same_type.size() == 1) {
-      /* This value is only used on one input socket, no need to copy it. */
-      const DInputSocket to_socket = to_sockets_same_type[0];
-      const std::pair<DInputSocket, DOutputSocket> key = std::make_pair(to_socket, from_socket);
+    this->add_value_to_input_socket(to_socket, from_socket, {to_type, buffer});
+  }
 
-      add_value_to_input_socket(key, value_to_forward);
+  void forward_to_sockets_with_same_type(LinearAllocator<> &allocator,
+                                         Span<DInputSocket> to_sockets,
+                                         GMutablePointer value_to_forward,
+                                         const DOutputSocket from_socket)
+  {
+    if (to_sockets.is_empty()) {
+      /* Value is not used anymore, so it can be destructed. */
+      value_to_forward.destruct();
+    }
+    else if (to_sockets.size() == 1) {
+      /* Value is only used by one input socket, no need to copy it. */
+      const DInputSocket to_socket = to_sockets[0];
+      this->add_value_to_input_socket(to_socket, from_socket, value_to_forward);
     }
     else {
       /* Multiple inputs use the value, make a copy for every input except for one. */
-      const DInputSocket first_to_socket = to_sockets_same_type[0];
-      Span<DInputSocket> other_to_sockets = to_sockets_same_type.as_span().drop_front(1);
+      /* First make the copies, so that the next node does not start modifying the value while we
+       * are still making copies. */
       const CPPType &type = *value_to_forward.type();
-      const std::pair<DInputSocket, DOutputSocket> first_key = std::make_pair(first_to_socket,
-                                                                              from_socket);
-      add_value_to_input_socket(first_key, value_to_forward);
-      for (const DInputSocket &to_socket : other_to_sockets) {
-        const std::pair<DInputSocket, DOutputSocket> key = std::make_pair(to_socket, from_socket);
-        void *buffer = allocator_.allocate(type.size(), type.alignment());
+      for (const DInputSocket &to_socket : to_sockets.drop_front(1)) {
+        void *buffer = allocator.allocate(type.size(), type.alignment());
         type.copy_to_uninitialized(value_to_forward.get(), buffer);
-        add_value_to_input_socket(key, GMutablePointer{type, buffer});
+        this->add_value_to_input_socket(to_socket, from_socket, {type, buffer});
       }
+      /* Forward the original value to one of the targets. */
+      const DInputSocket to_socket = to_sockets[0];
+      this->add_value_to_input_socket(to_socket, from_socket, value_to_forward);
     }
   }
 
-  void add_value_to_input_socket(const std::pair<DInputSocket, DOutputSocket> key,
+  void add_value_to_input_socket(const DInputSocket socket,
+                                 const DOutputSocket origin,
                                  GMutablePointer value)
   {
-    value_by_input_.add_new(key, value);
+    BLI_assert(socket->is_available());
+
+    const DNode node = socket.node();
+    NodeState &node_state = this->get_node_state(node);
+    InputState &input_state = node_state.inputs[socket->index()];
+
+    /* Lock the node because we want to change its state. */
+    LockedNode locked_node{*this, node, node_state};
+
+    if (socket->is_multi_input_socket()) {
+      /* Add a new value to the multi-input. */
+      MultiInputValue &multi_value = *input_state.value.multi;
+      multi_value.items.append({origin, value.get()});
+    }
+    else {
+      /* Assign the value to the input. */
+      SingleInputValue &single_value = *input_state.value.single;
+      BLI_assert(single_value.value == nullptr);
+      single_value.value = value.get();
+    }
+
+    if (input_state.usage == ValueUsage::Required) {
+      node_state.missing_required_inputs--;
+      if (node_state.missing_required_inputs == 0) {
+        /* Schedule node if all the required inputs have been provided. */
+        this->schedule_node(locked_node);
+      }
+    }
+  }
+
+  void load_unlinked_input_value(LockedNode &locked_node,
+                                 const DInputSocket input_socket,
+                                 InputState &input_state,
+                                 const DSocket origin_socket)
+  {
+    /* Only takes locked node as parameter, because the node needs to be locked. */
+    UNUSED_VARS(locked_node);
+
+    GMutablePointer value = this->get_value_from_socket(origin_socket, *input_state.type);
+    if (input_socket->is_multi_input_socket()) {
+      MultiInputValue &multi_value = *input_state.value.multi;
+      multi_value.items.append({input_socket, value.get()});
+    }
+    else {
+      SingleInputValue &single_value = *input_state.value.single;
+      single_value.value = value.get();
+    }
+  }
+
+  void destruct_input_value_if_exists(LockedNode &locked_node, const DInputSocket socket)
+  {
+    InputState &input_state = locked_node.node_state.inputs[socket->index()];
+    if (socket->is_multi_input_socket()) {
+      MultiInputValue &multi_value = *input_state.value.multi;
+      for (MultiInputValueItem &item : multi_value.items) {
+        input_state.type->destruct(item.value);
+      }
+      multi_value.items.clear();
+    }
+    else {
+      SingleInputValue &single_value = *input_state.value.single;
+      if (single_value.value != nullptr) {
+        input_state.type->destruct(single_value.value);
+        single_value.value = nullptr;
+      }
+    }
   }
 
-  GMutablePointer get_unlinked_input_value(const DInputSocket &socket,
-                                           const CPPType &required_type)
+  GMutablePointer get_value_from_socket(const DSocket socket, const CPPType &required_type)
   {
+    LinearAllocator<> &allocator = local_allocators_.local();
+
     bNodeSocket *bsocket = socket->bsocket();
-    const CPPType &type = *blender::nodes::socket_cpp_type_get(*socket->typeinfo());
-    void *buffer = allocator_.allocate(type.size(), type.alignment());
+    const CPPType &type = *get_socket_cpp_type(socket);
+    void *buffer = allocator.allocate(type.size(), type.alignment());
     blender::nodes::socket_cpp_value_get(*bsocket, buffer);
 
     if (type == required_type) {
       return {type, buffer};
     }
     if (conversions_.is_convertible(type, required_type)) {
-      void *converted_buffer = allocator_.allocate(required_type.size(),
-                                                   required_type.alignment());
+      /* Convert the loaded value to the required type if possible. */
+      void *converted_buffer = allocator.allocate(required_type.size(), required_type.alignment());
       conversions_.convert_to_uninitialized(type, required_type, buffer, converted_buffer);
       type.destruct(buffer);
       return {required_type, converted_buffer};
     }
-    void *default_buffer = allocator_.allocate(required_type.size(), required_type.alignment());
+    /* Use a default fallback value when the loaded type is not compatible. */
+    void *default_buffer = allocator.allocate(required_type.size(), required_type.alignment());
     required_type.copy_to_uninitialized(required_type.default_value(), default_buffer);
     return {required_type, default_buffer};
   }
+
+  NodeState &get_node_state(const DNode node)
+  {
+    return *node_states_.lookup_key_as(node).state;
+  }
+
+  void log_socket_value(const DSocket socket, Span<GPointer> values)
+  {
+    if (params_.log_socket_value_fn) {
+      params_.log_socket_value_fn(socket, values);
+    }
+  }
+
+  void log_socket_value(const DSocket socket,
+                        InputState &input_state,
+                        Span<MultiInputValueItem> values)
+  {
+    Vector<GPointer, 16> value_pointers;
+    value_pointers.reserve(values.size());
+    const CPPType &type = *input_state.type;
+    for (const MultiInputValueItem &item : values) {
+      value_pointers.append({type, item.value});
+    }
+    this->log_socket_value(socket, value_pointers);
+  }
+
+  void log_socket_value(const DSocket socket, GPointer value)
+  {
+    this->log_socket_value(socket, Span<GPointer>(&value, 1));
+  }
 };
 
+LockedNode::~LockedNode()
+{
+  /* First unlock the current node. */
+  node_state.mutex.unlock();
+  /* Then send notifications to the other nodes. */
+  for (const DOutputSocket &socket : delayed_required_outputs) {
+    evaluator_.send_output_required_notification(socket);
+  }
+  for (const DOutputSocket &socket : delayed_unused_outputs) {
+    evaluator_.send_output_unused_notification(socket);
+  }
+  for (const DNode &node : delayed_scheduled_nodes) {
+    evaluator_.add_node_to_task_pool(node);
+  }
+}
+
+/* TODO: Use a map data structure or so to make this faster. */
+static DInputSocket get_input_by_identifier(const DNode node, const StringRef identifier)
+{
+  for (const InputSocketRef *socket : node->inputs()) {
+    if (socket->identifier() == identifier) {
+      return {node.context(), socket};
+    }
+  }
+  return {};
+}
+
+static DOutputSocket get_output_by_identifier(const DNode node, const StringRef identifier)
+{
+  for (const OutputSocketRef *socket : node->outputs()) {
+    if (socket->identifier() == identifier) {
+      return {node.context(), socket};
+    }
+  }
+  return {};
+}
+
+NodeParamsProvider::NodeParamsProvider(GeometryNodesEvaluator &evaluator,
+                                       DNode dnode,
+                                       NodeState &node_state)
+    : evaluator_(evaluator), node_state_(node_state)
+{
+  this->dnode = dnode;
+  this->self_object = evaluator.params_.self_object;
+  this->modifier = &evaluator.params_.modifier_->modifier;
+  this->depsgraph = evaluator.params_.depsgraph;
+}
+
+bool NodeParamsProvider::can_get_input(StringRef identifier) const
+{
+  const DInputSocket socket = get_input_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+
+  InputState &input_state = node_state_.inputs[socket->index()];
+  if (!input_state.was_ready_for_execution) {
+    return false;
+  }
+
+  if (socket->is_multi_input_socket()) {
+    MultiInputValue &multi_value = *input_state.value.multi;
+    return multi_value.items.size() == multi_value.expected_size;
+  }
+  SingleInputValue &single_value = *input_state.value.single;
+  return single_value.value != nullptr;
+}
+
+bool NodeParamsProvider::can_set_output(StringRef identifier) const
+{
+  const DOutputSocket socket = get_output_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+
+  OutputState &output_state = node_state_.outputs[socket->index()];
+  return !output_state.has_been_computed;
+}
+
+GMutablePointer NodeParamsProvider::extract_input(StringRef identifier)
+{
+  const DInputSocket socket = get_input_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+  BLI_assert(!socket->is_multi_input_socket());
+  BLI_assert(this->can_get_input(identifier));
+
+  InputState &input_state = node_state_.inputs[socket->index()];
+  SingleInputValue &single_value = *input_state.value.single;
+  void *value = single_value.value;
+  single_value.value = nullptr;
+  return {*input_state.type, value};
+}
+
+Vector<GMutablePointer> NodeParamsProvider::extract_multi_input(StringRef identifier)
+{
+  const DInputSocket socket = get_input_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+  BLI_assert(socket->is_multi_input_socket());
+  BLI_assert(this->can_get_input(identifier));
+
+  InputState &input_state = node_state_.inputs[socket->index()];
+  MultiInputValue &multi_value = *input_state.value.multi;
+
+  Vector<GMutablePointer> ret_values;
+  socket.foreach_origin_socket([&](DSocket origin) {
+    for (const MultiInputValueItem &item : multi_value.items) {
+      if (item.origin == origin) {
+        ret_values.append({*input_state.type, item.value});
+        return;
+      }
+    }
+    BLI_assert_unreachable();
+  });
+  multi_value.items.clear();
+  return ret_values;
+}
+
+GPointer NodeParamsProvider::get_input(StringRef identifier) const
+{
+  const DInputSocket socket = get_input_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+  BLI_assert(!socket->is_multi_input_socket());
+  BLI_assert(this->can_get_input(identifier));
+
+  InputState &input_state = node_state_.inputs[socket->index()];
+  SingleInputValue &single_value = *input_state.value.single;
+  return {*input_state.type, single_value.value};
+}
+
+GMutablePointer NodeParamsProvider::alloc_output_value(const CPPType &type)
+{
+  LinearAllocator<> &allocator = evaluator_.local_allocators_.local();
+  return {type, allocator.allocate(type.size(), type.alignment())};
+}
+
+void NodeParamsProvider::set_output(StringRef identifier, GMutablePointer value)
+{
+  const DOutputSocket socket = get_output_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+
+  evaluator_.log_socket_value(socket, value);
+
+  OutputState &output_state = node_state_.outputs[socket->index()];
+  BLI_assert(!output_state.has_been_computed);
+  evaluator_.forward_output(socket, value);
+  output_state.has_been_computed = true;
+}
+
+bool NodeParamsProvider::lazy_require_input(StringRef identifier)
+{
+  BLI_assert(node_supports_lazyness(this->dnode));
+  const DInputSocket socket = get_input_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+
+  InputState &input_state = node_state_.inputs[socket->index()];
+  if (input_state.was_ready_for_execution) {
+    return false;
+  }
+  LockedNode locked_node{evaluator_, this->dnode, node_state_};
+  evaluator_.set_input_required(locked_node, socket);
+  return true;
+}
+
+void NodeParamsProvider::set_input_unused(StringRef identifier)
+{
+  const DInputSocket socket = get_input_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+
+  LockedNode locked_node{evaluator_, this->dnode, node_state_};
+  evaluator_.set_input_unused(locked_node, socket);
+}
+
+bool NodeParamsProvider::output_is_required(StringRef identifier) const
+{
+  const DOutputSocket socket = get_output_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+
+  OutputState &output_state = node_state_.outputs[socket->index()];
+  if (output_state.has_been_computed) {
+    return false;
+  }
+  return output_state.output_usage_for_execution != ValueUsage::Unused;
+}
+
+bool NodeParamsProvider::lazy_output_is_required(StringRef identifier) const
+{
+  BLI_assert(node_supports_lazyness(this->dnode));
+  const DOutputSocket socket = get_output_by_identifier(this->dnode, identifier);
+  BLI_assert(socket);
+
+  OutputState &output_state = node_state_.outputs[socket->index()];
+  if (output_state.has_been_computed) {
+    return false;
+  }
+  return output_state.output_usage_for_execution == ValueUsage::Required;
+}
+
 void evaluate_geometry_nodes(GeometryNodesEvaluationParams &params)
 {
   GeometryNodesEvaluator evaluator{params};
-  params.r_output_values = evaluator.execute();
+  evaluator.execute();
 }
 
 }  // namespace blender::modifiers::geometry_nodes
author	Jacques Lucke <jacques@blender.org>	2021-05-20 12:34:47 +0300
committer	Jacques Lucke <jacques@blender.org>	2021-05-20 12:35:13 +0300
commit	b084b57fbf6d2b6c2a1adb2192f5d82581cede5c (patch)
tree	14c11a07c6d0dbb58f6eb71804d28b741125b8d4 /source/blender/modifiers
parent	44e7192745f0b53e29d20bb88d4df0945a808758 (diff)