25 files changed, 1028 insertions, 498 deletions
diff --git a/source/blender/functions/CMakeLists.txt b/source/blender/functions/CMakeLists.txt
index 54670c0d1b3..9cfaf3eabea 100644
--- a/source/blender/functions/CMakeLists.txt
+++ b/source/blender/functions/CMakeLists.txt
@@ -34,10 +34,11 @@ set(SRC
   intern/generic_virtual_vector_array.cc
   intern/multi_function.cc
   intern/multi_function_builder.cc
-  intern/multi_function_parallel.cc
+  intern/multi_function_params.cc
   intern/multi_function_procedure.cc
   intern/multi_function_procedure_builder.cc
   intern/multi_function_procedure_executor.cc
+  intern/multi_function_procedure_optimization.cc
 
   FN_cpp_type.hh
   FN_cpp_type_make.hh
@@ -54,12 +55,12 @@ set(SRC
   FN_multi_function_builder.hh
   FN_multi_function_context.hh
   FN_multi_function_data_type.hh
-  FN_multi_function_parallel.hh
   FN_multi_function_param_type.hh
   FN_multi_function_params.hh
   FN_multi_function_procedure.hh
   FN_multi_function_procedure_builder.hh
   FN_multi_function_procedure_executor.hh
+  FN_multi_function_procedure_optimization.hh
   FN_multi_function_signature.hh
 )
 
diff --git a/source/blender/functions/FN_cpp_type.hh b/source/blender/functions/FN_cpp_type.hh
index 643b2fc1f28..7ddb5bf1f46 100644
--- a/source/blender/functions/FN_cpp_type.hh
+++ b/source/blender/functions/FN_cpp_type.hh
@@ -207,6 +207,18 @@ class CPPType : NonCopyable, NonMovable {
     return is_trivially_destructible_;
   }
 
+  /**
+   * When true, the value is like a normal C type, it can be copied around with #memcpy and does
+   * not have to be destructed.
+   *
+   * C++ equivalent:
+   *   std::is_trivial_v<T>;
+   */
+  bool is_trivial() const
+  {
+    return is_trivial_;
+  }
+
   bool is_default_constructible() const
   {
     return default_construct_ != nullptr;
diff --git a/source/blender/functions/FN_field.hh b/source/blender/functions/FN_field.hh
index fb488fdbfa9..a591aaed34a 100644
--- a/source/blender/functions/FN_field.hh
+++ b/source/blender/functions/FN_field.hh
@@ -49,16 +49,15 @@
 #include "BLI_function_ref.hh"
 #include "BLI_string_ref.hh"
 #include "BLI_vector.hh"
+#include "BLI_vector_set.hh"
 
 #include "FN_generic_virtual_array.hh"
 #include "FN_multi_function_builder.hh"
-#include "FN_multi_function_procedure.hh"
-#include "FN_multi_function_procedure_builder.hh"
-#include "FN_multi_function_procedure_executor.hh"
 
 namespace blender::fn {
 
 class FieldInput;
+struct FieldInputs;
 
 /**
  * A node in a field-tree. It has at least one output that can be referenced by fields.
@@ -66,18 +65,18 @@ class FieldInput;
 class FieldNode {
  private:
   bool is_input_;
+
+ protected:
   /**
-   * True when this node is a #FieldInput or (potentially indirectly) depends on one. This could
-   * always be derived again later by traversing the field-tree, but keeping track of it while the
-   * field is built is cheaper.
-   *
-   * If this is false, the field is constant. Note that even when this is true, the field may be
-   * constant when all inputs are constant.
+   * Keeps track of the inputs that this node depends on. This avoids recomputing it every time the
+   * data is required. It is a shared pointer, because very often multiple nodes depend on the same
+   * inputs.
+   * Might contain null.
    */
-  bool depends_on_input_;
+  std::shared_ptr<const FieldInputs> field_inputs_;
 
  public:
-  FieldNode(bool is_input, bool depends_on_input);
+  FieldNode(bool is_input);
 
   virtual ~FieldNode() = default;
 
@@ -87,11 +86,7 @@ class FieldNode {
   bool is_operation() const;
   bool depends_on_input() const;
 
-  /**
-   * Invoke callback for every field input. It might be called multiple times for the same input.
-   * The caller is responsible for deduplication if required.
-   */
-  virtual void foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const = 0;
+  const std::shared_ptr<const FieldInputs> &field_inputs() const;
 
   virtual uint64_t hash() const;
   virtual bool is_equal_to(const FieldNode &other) const;
@@ -178,11 +173,19 @@ class GFieldRef : public GFieldBase<const FieldNode *> {
   }
 };
 
+namespace detail {
+/* Utility class to make #is_field_v work. */
+struct TypedFieldBase {
+};
+}  // namespace detail
+
 /**
  * A typed version of #GField. It has the same memory layout as #GField.
  */
-template<typename T> class Field : public GField {
+template<typename T> class Field : public GField, detail::TypedFieldBase {
  public:
+  using base_type = T;
+
   Field() = default;
 
   Field(GField field) : GField(std::move(field))
@@ -196,6 +199,11 @@ template<typename T> class Field : public GField {
   }
 };
 
+/** True when T is any Field<...> type. */
+template<typename T>
+static constexpr bool is_field_v = std::is_base_of_v<detail::TypedFieldBase, T> &&
+                                   !std::is_same_v<detail::TypedFieldBase, T>;
+
 /**
  * A #FieldNode that allows composing existing fields into new fields.
  */
@@ -218,7 +226,6 @@ class FieldOperation : public FieldNode {
   const MultiFunction &multi_function() const;
 
   const CPPType &output_cpp_type(int output_index) const override;
-  void foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const override;
 };
 
 class FieldContext;
@@ -258,7 +265,19 @@ class FieldInput : public FieldNode {
   Category category() const;
 
   const CPPType &output_cpp_type(int output_index) const override;
-  void foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const override;
+};
+
+/**
+ * Keeps track of the inputs of a field.
+ */
+struct FieldInputs {
+  /** All #FieldInput nodes that a field (possibly indirectly) depends on. */
+  VectorSet<const FieldInput *> nodes;
+  /**
+   * Same as above but the inputs are deduplicated. For example, when there are two separate index
+   * input nodes, only one will show up in this list.
+   */
+  VectorSet<std::reference_wrapper<const FieldInput>> deduplicated_nodes;
 };
 
 /**
@@ -294,6 +313,9 @@ class FieldEvaluator : NonMovable, NonCopyable {
   Vector<OutputPointerInfo> output_pointer_infos_;
   bool is_evaluated_ = false;
 
+  Field<bool> selection_field_;
+  IndexMask selection_mask_;
+
  public:
   /** Takes #mask by pointer because the mask has to live longer than the evaluator. */
   FieldEvaluator(const FieldContext &context, const IndexMask *mask)
@@ -314,6 +336,18 @@ class FieldEvaluator : NonMovable, NonCopyable {
   }
 
   /**
+   * The selection field is evaluated first to determine which indices of the other fields should
+   * be evaluated. Calling this method multiple times will just replace the previously set
+   * selection field. Only the elements selected by both this selection and the selection provided
+   * in the constructor are calculated. If no selection field is set, it is assumed that all
+   * indices passed to the constructor are selected.
+   */
+  void set_selection(Field<bool> selection)
+  {
+    selection_field_ = std::move(selection);
+  }
+
+  /**
    * \param field: Field to add to the evaluator.
    * \param dst: Mutable virtual array that the evaluated result for this field is be written into.
    */
@@ -384,6 +418,8 @@ class FieldEvaluator : NonMovable, NonCopyable {
     return this->get_evaluated(field_index).typed<T>();
   }
 
+  IndexMask get_evaluated_selection_as_mask();
+
   /**
    * Retrieve the output of an evaluated boolean field and convert it to a mask, which can be used
    * to avoid calculations for unnecessary elements later on. The evaluator will own the indices in
@@ -392,6 +428,24 @@ class FieldEvaluator : NonMovable, NonCopyable {
   IndexMask get_evaluated_as_mask(const int field_index);
 };
 
+/**
+ * Evaluate fields in the given context. If possible, multiple fields should be evaluated together,
+ * because that can be more efficient when they share common sub-fields.
+ *
+ * \param scope: The resource scope that owns data that makes up the output virtual arrays. Make
+ *   sure the scope is not destructed when the output virtual arrays are still used.
+ * \param fields_to_evaluate: The fields that should be evaluated together.
+ * \param mask: Determines which indices are computed. The mask may be referenced by the returned
+ *   virtual arrays. So the underlying indices (if applicable) should live longer then #scope.
+ * \param context: The context that the field is evaluated in. Used to retrieve data from each
+ *   #FieldInput in the field network.
+ * \param dst_varrays: If provided, the computed data will be written into those virtual arrays
+ *   instead of into newly created ones. That allows making the computed data live longer than
+ *   #scope and is more efficient when the data will be written into those virtual arrays
+ *   later anyway.
+ * \return The computed virtual arrays for each provided field. If #dst_varrays is passed, the
+ *   provided virtual arrays are returned.
+ */
 Vector<GVArray> evaluate_fields(ResourceScope &scope,
                                 Span<GFieldRef> fields_to_evaluate,
                                 IndexMask mask,
@@ -419,13 +473,24 @@ template<typename T> Field<T> make_constant_field(T value)
   return Field<T>{GField{std::move(operation), 0}};
 }
 
+GField make_constant_field(const CPPType &type, const void *value);
+
+/**
+ * If the field depends on some input, the same field is returned.
+ * Otherwise the field is evaluated and a new field is created that just computes this constant.
+ *
+ * Making the field constant has two benefits:
+ * - The field-tree becomes a single node, which is more efficient when the field is evaluated many
+ *   times.
+ * - Memory of the input fields may be freed.
+ */
 GField make_field_constant_if_possible(GField field);
 
 class IndexFieldInput final : public FieldInput {
  public:
   IndexFieldInput();
 
-  static GVArray get_index_varray(IndexMask mask, ResourceScope &scope);
+  static GVArray get_index_varray(IndexMask mask);
 
   GVArray get_varray_for_context(const FieldContext &context,
                                  IndexMask mask,
@@ -438,11 +503,56 @@ class IndexFieldInput final : public FieldInput {
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Value or Field Class
+ *
+ * Utility class that wraps a single value and a field, to simplify accessing both of the types.
+ * \{ */
+
+template<typename T> struct ValueOrField {
+  /** Value that is used when the field is empty. */
+  T value{};
+  Field<T> field;
+
+  ValueOrField() = default;
+
+  ValueOrField(T value) : value(std::move(value))
+  {
+  }
+
+  ValueOrField(Field<T> field) : field(std::move(field))
+  {
+  }
+
+  bool is_field() const
+  {
+    return (bool)this->field;
+  }
+
+  Field<T> as_field() const
+  {
+    if (this->field) {
+      return this->field;
+    }
+    return make_constant_field(this->value);
+  }
+
+  T as_value() const
+  {
+    if (this->field) {
+      /* This returns a default value when the field is not constant. */
+      return evaluate_constant_field(this->field);
+    }
+    return this->value;
+  }
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name #FieldNode Inline Methods
  * \{ */
 
-inline FieldNode::FieldNode(bool is_input, bool depends_on_input)
-    : is_input_(is_input), depends_on_input_(depends_on_input)
+inline FieldNode::FieldNode(bool is_input) : is_input_(is_input)
 {
 }
 
@@ -458,7 +568,12 @@ inline bool FieldNode::is_operation() const
 
 inline bool FieldNode::depends_on_input() const
 {
-  return depends_on_input_;
+  return field_inputs_ && !field_inputs_->nodes.is_empty();
+}
+
+inline const std::shared_ptr<const FieldInputs> &FieldNode::field_inputs() const
+{
+  return field_inputs_;
 }
 
 inline uint64_t FieldNode::hash() const
diff --git a/source/blender/functions/FN_field_cpp_type.hh b/source/blender/functions/FN_field_cpp_type.hh
index 5e6f1b5a585..940faba6d70 100644
--- a/source/blender/functions/FN_field_cpp_type.hh
+++ b/source/blender/functions/FN_field_cpp_type.hh
@@ -30,19 +30,19 @@ template<typename T> struct FieldCPPTypeParam {
 
 class FieldCPPType : public CPPType {
  private:
-  const CPPType &field_type_;
+  const CPPType &base_type_;
 
  public:
   template<typename T>
   FieldCPPType(FieldCPPTypeParam<Field<T>> /* unused */, StringRef debug_name)
       : CPPType(CPPTypeParam<Field<T>, CPPTypeFlags::None>(), debug_name),
-        field_type_(CPPType::get<T>())
+        base_type_(CPPType::get<T>())
   {
   }
 
-  const CPPType &field_type() const
+  const CPPType &base_type() const
   {
-    return field_type_;
+    return base_type_;
   }
 
   /* Ensure that #GField and #Field<T> have the same layout, to enable casting between the two. */
@@ -60,6 +60,84 @@ class FieldCPPType : public CPPType {
   }
 };
 
+class ValueOrFieldCPPType : public CPPType {
+ private:
+  const CPPType &base_type_;
+  void (*construct_from_value_)(void *dst, const void *value);
+  void (*construct_from_field_)(void *dst, GField field);
+  const void *(*get_value_ptr_)(const void *value_or_field);
+  const GField *(*get_field_ptr_)(const void *value_or_field);
+  bool (*is_field_)(const void *value_or_field);
+  GField (*as_field_)(const void *value_or_field);
+
+ public:
+  template<typename T>
+  ValueOrFieldCPPType(FieldCPPTypeParam<ValueOrField<T>> /* unused */, StringRef debug_name)
+      : CPPType(CPPTypeParam<ValueOrField<T>, CPPTypeFlags::None>(), debug_name),
+        base_type_(CPPType::get<T>())
+  {
+    construct_from_value_ = [](void *dst, const void *value_or_field) {
+      new (dst) ValueOrField<T>(*(const T *)value_or_field);
+    };
+    construct_from_field_ = [](void *dst, GField field) {
+      new (dst) ValueOrField<T>(Field<T>(std::move(field)));
+    };
+    get_value_ptr_ = [](const void *value_or_field) {
+      return (const void *)&((ValueOrField<T> *)value_or_field)->value;
+    };
+    get_field_ptr_ = [](const void *value_or_field) -> const GField * {
+      return &((ValueOrField<T> *)value_or_field)->field;
+    };
+    is_field_ = [](const void *value_or_field) {
+      return ((ValueOrField<T> *)value_or_field)->is_field();
+    };
+    as_field_ = [](const void *value_or_field) -> GField {
+      return ((ValueOrField<T> *)value_or_field)->as_field();
+    };
+  }
+
+  const CPPType &base_type() const
+  {
+    return base_type_;
+  }
+
+  void construct_from_value(void *dst, const void *value) const
+  {
+    construct_from_value_(dst, value);
+  }
+
+  void construct_from_field(void *dst, GField field) const
+  {
+    construct_from_field_(dst, field);
+  }
+
+  const void *get_value_ptr(const void *value_or_field) const
+  {
+    return get_value_ptr_(value_or_field);
+  }
+
+  void *get_value_ptr(void *value_or_field) const
+  {
+    /* Use `const_cast` to avoid duplicating the callback for the non-const case. */
+    return const_cast<void *>(get_value_ptr_(value_or_field));
+  }
+
+  const GField *get_field_ptr(const void *value_or_field) const
+  {
+    return get_field_ptr_(value_or_field);
+  }
+
+  bool is_field(const void *value_or_field) const
+  {
+    return is_field_(value_or_field);
+  }
+
+  GField as_field(const void *value_or_field) const
+  {
+    return as_field_(value_or_field);
+  }
+};
+
 }  // namespace blender::fn
 
 #define MAKE_FIELD_CPP_TYPE(DEBUG_NAME, FIELD_TYPE) \
@@ -69,4 +147,13 @@ class FieldCPPType : public CPPType {
     static blender::fn::FieldCPPType cpp_type{ \
         blender::fn::FieldCPPTypeParam<blender::fn::Field<FIELD_TYPE>>(), STRINGIFY(DEBUG_NAME)}; \
     return cpp_type; \
+  } \
+  template<> \
+  const blender::fn::CPPType & \
+  blender::fn::CPPType::get_impl<blender::fn::ValueOrField<FIELD_TYPE>>() \
+  { \
+    static blender::fn::ValueOrFieldCPPType cpp_type{ \
+        blender::fn::FieldCPPTypeParam<blender::fn::ValueOrField<FIELD_TYPE>>(), \
+        STRINGIFY(DEBUG_NAME##OrValue)}; \
+    return cpp_type; \
   }
diff --git a/source/blender/functions/FN_generic_span.hh b/source/blender/functions/FN_generic_span.hh
index e2c49697ba9..6f0147b7fb3 100644
--- a/source/blender/functions/FN_generic_span.hh
+++ b/source/blender/functions/FN_generic_span.hh
@@ -93,6 +93,11 @@ class GSpan {
     const int64_t new_size = std::max<int64_t>(0, std::min(size, size_ - start));
     return GSpan(*type_, POINTER_OFFSET(data_, type_->size() * start), new_size);
   }
+
+  GSpan slice(const IndexRange range) const
+  {
+    return this->slice(range.start(), range.size());
+  }
 };
 
 /**
@@ -169,6 +174,11 @@ class GMutableSpan {
     const int64_t new_size = std::max<int64_t>(0, std::min(size, size_ - start));
     return GMutableSpan(*type_, POINTER_OFFSET(data_, type_->size() * start), new_size);
   }
+
+  GMutableSpan slice(IndexRange range) const
+  {
+    return this->slice(range.start(), range.size());
+  }
 };
 
 }  // namespace blender::fn
diff --git a/source/blender/functions/FN_generic_virtual_array.hh b/source/blender/functions/FN_generic_virtual_array.hh
index b822f3a7c33..fc8612d6f87 100644
--- a/source/blender/functions/FN_generic_virtual_array.hh
+++ b/source/blender/functions/FN_generic_virtual_array.hh
@@ -148,14 +148,37 @@ class GVArrayCommon {
   void materialize_to_uninitialized(void *dst) const;
   void materialize_to_uninitialized(const IndexMask mask, void *dst) const;
 
+  /**
+   * Returns true when the virtual array is stored as a span internally.
+   */
   bool is_span() const;
+  /**
+   * Returns the internally used span of the virtual array. This invokes undefined behavior is the
+   * virtual array is not stored as a span internally.
+   */
   GSpan get_internal_span() const;
 
+  /**
+   * Returns true when the virtual array returns the same value for every index.
+   */
   bool is_single() const;
+  /**
+   * Copies the value that is used for every element into `r_value`, which is expected to point to
+   * initialized memory. This invokes undefined behavior if the virtual array would not return the
+   * same value for every index.
+   */
   void get_internal_single(void *r_value) const;
+  /**
+   * Same as `get_internal_single`, but `r_value` points to initialized memory.
+   */
   void get_internal_single_to_uninitialized(void *r_value) const;
 
   void get(const int64_t index, void *r_value) const;
+  /**
+   * Returns a copy of the value at the given index. Usually a typed virtual array should
+   * be used instead, but sometimes this is simpler when only a few indices are needed.
+   */
+  template<typename T> T get(const int64_t index) const;
   void get_to_uninitialized(const int64_t index, void *r_value) const;
 };
 
@@ -226,6 +249,9 @@ class GVMutableArray : public GVArrayCommon {
   void set_by_relocate(const int64_t index, void *value);
 
   void fill(const void *value);
+  /**
+   * Copy the values from the source buffer to all elements in the virtual array.
+   */
   void set_all(const void *src);
 
   GVMutableArrayImpl *get_implementation() const;
@@ -555,37 +581,19 @@ template<typename T> class VMutableArrayImpl_For_GVMutableArray : public VMutabl
 /** \} */
 
 /* -------------------------------------------------------------------- */
-/** \name #GVArrayImpl_For_GSpan and #GVMutableArrayImpl_For_GMutableSpan.
+/** \name #GVArrayImpl_For_GSpan.
  * \{ */
 
-class GVArrayImpl_For_GSpan : public GVArrayImpl {
- protected:
-  const void *data_ = nullptr;
-  const int64_t element_size_;
-
- public:
-  GVArrayImpl_For_GSpan(const GSpan span);
-
- protected:
-  GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size);
-
-  void get(const int64_t index, void *r_value) const override;
-  void get_to_uninitialized(const int64_t index, void *r_value) const override;
-
-  bool is_span() const override;
-  GSpan get_internal_span() const override;
-};
-
-class GVMutableArrayImpl_For_GMutableSpan : public GVMutableArrayImpl {
+class GVArrayImpl_For_GSpan : public GVMutableArrayImpl {
  protected:
   void *data_ = nullptr;
   const int64_t element_size_;
 
  public:
-  GVMutableArrayImpl_For_GMutableSpan(const GMutableSpan span);
+  GVArrayImpl_For_GSpan(const GMutableSpan span);
 
  protected:
-  GVMutableArrayImpl_For_GMutableSpan(const CPPType &type, const int64_t size);
+  GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size);
 
  public:
   void get(const int64_t index, void *r_value) const override;
@@ -688,6 +696,16 @@ inline void GVArrayCommon::get(const int64_t index, void *r_value) const
   impl_->get(index, r_value);
 }
 
+template<typename T> inline T GVArrayCommon::get(const int64_t index) const
+{
+  BLI_assert(index >= 0);
+  BLI_assert(index < this->size());
+  BLI_assert(this->type().is<T>());
+  T value{};
+  impl_->get(index, &value);
+  return value;
+}
+
 /* Same as `get`, but `r_value` is expected to point to uninitialized memory. */
 inline void GVArrayCommon::get_to_uninitialized(const int64_t index, void *r_value) const
 {
diff --git a/source/blender/functions/FN_multi_function.hh b/source/blender/functions/FN_multi_function.hh
index c57f6cf574e..1e36d87668a 100644
--- a/source/blender/functions/FN_multi_function.hh
+++ b/source/blender/functions/FN_multi_function.hh
@@ -60,6 +60,13 @@ class MultiFunction {
   {
   }
 
+  /**
+   * The result is the same as using #call directly but this method has some additional features.
+   * - Automatic multi-threading when possible and appropriate.
+   * - Automatic index mask offsetting to avoid large temporary intermediate arrays that are mostly
+   *   unused.
+   */
+  void call_auto(IndexMask mask, MFParams params, MFContext context) const;
   virtual void call(IndexMask mask, MFParams params, MFContext context) const = 0;
 
   virtual uint64_t hash() const
@@ -97,6 +104,8 @@ class MultiFunction {
     return signature_ref_->function_name;
   }
 
+  virtual std::string debug_name() const;
+
   bool depends_on_context() const
   {
     return signature_ref_->depends_on_context;
@@ -108,6 +117,31 @@ class MultiFunction {
     return *signature_ref_;
   }
 
+  /**
+   * Information about how the multi-function behaves that help a caller to execute it efficiently.
+   */
+  struct ExecutionHints {
+    /**
+     * Suggested minimum workload under which multi-threading does not really help.
+     * This should be lowered when the multi-function is doing something computationally expensive.
+     */
+    int64_t min_grain_size = 10000;
+    /**
+     * Indicates that the multi-function will allocate an array large enough to hold all indices
+     * passed in as mask. This tells the caller that it would be preferable to pass in smaller
+     * indices. Also maybe the full mask should be split up into smaller segments to decrease peak
+     * memory usage.
+     */
+    bool allocates_array = false;
+    /**
+     * Tells the caller that every execution takes about the same time. This helps making a more
+     * educated guess about a good grain size.
+     */
+    bool uniform_execution_time = true;
+  };
+
+  ExecutionHints execution_hints() const;
+
  protected:
   /* Make the function use the given signature. This should be called once in the constructor of
    * child classes. No copy of the signature is made, so the caller has to make sure that the
@@ -119,6 +153,8 @@ class MultiFunction {
     BLI_assert(signature != nullptr);
     signature_ref_ = signature;
   }
+
+  virtual ExecutionHints get_execution_hints() const;
 };
 
 inline MFParamsBuilder::MFParamsBuilder(const MultiFunction &fn, int64_t mask_size)
diff --git a/source/blender/functions/FN_multi_function_builder.hh b/source/blender/functions/FN_multi_function_builder.hh
index 0ce05cbca30..eaf9e5ce70f 100644
--- a/source/blender/functions/FN_multi_function_builder.hh
+++ b/source/blender/functions/FN_multi_function_builder.hh
@@ -43,7 +43,7 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
   MFSignature signature_;
 
  public:
-  CustomMF_SI_SO(StringRef name, FunctionT function) : function_(std::move(function))
+  CustomMF_SI_SO(const char *name, FunctionT function) : function_(std::move(function))
   {
     MFSignatureBuilder signature{name};
     signature.single_input<In1>("In1");
@@ -53,7 +53,7 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
   }
 
   template<typename ElementFuncT>
-  CustomMF_SI_SO(StringRef name, ElementFuncT element_fn)
+  CustomMF_SI_SO(const char *name, ElementFuncT element_fn)
       : CustomMF_SI_SO(name, CustomMF_SI_SO::create_function(element_fn))
   {
   }
@@ -92,7 +92,7 @@ class CustomMF_SI_SI_SO : public MultiFunction {
   MFSignature signature_;
 
  public:
-  CustomMF_SI_SI_SO(StringRef name, FunctionT function) : function_(std::move(function))
+  CustomMF_SI_SI_SO(const char *name, FunctionT function) : function_(std::move(function))
   {
     MFSignatureBuilder signature{name};
     signature.single_input<In1>("In1");
@@ -103,7 +103,7 @@ class CustomMF_SI_SI_SO : public MultiFunction {
   }
 
   template<typename ElementFuncT>
-  CustomMF_SI_SI_SO(StringRef name, ElementFuncT element_fn)
+  CustomMF_SI_SI_SO(const char *name, ElementFuncT element_fn)
       : CustomMF_SI_SI_SO(name, CustomMF_SI_SI_SO::create_function(element_fn))
   {
   }
@@ -150,7 +150,7 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction {
   MFSignature signature_;
 
  public:
-  CustomMF_SI_SI_SI_SO(StringRef name, FunctionT function) : function_(std::move(function))
+  CustomMF_SI_SI_SI_SO(const char *name, FunctionT function) : function_(std::move(function))
   {
     MFSignatureBuilder signature{name};
     signature.single_input<In1>("In1");
@@ -162,7 +162,7 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction {
   }
 
   template<typename ElementFuncT>
-  CustomMF_SI_SI_SI_SO(StringRef name, ElementFuncT element_fn)
+  CustomMF_SI_SI_SI_SO(const char *name, ElementFuncT element_fn)
       : CustomMF_SI_SI_SI_SO(name, CustomMF_SI_SI_SI_SO::create_function(element_fn))
   {
   }
@@ -211,7 +211,7 @@ class CustomMF_SI_SI_SI_SI_SO : public MultiFunction {
   MFSignature signature_;
 
  public:
-  CustomMF_SI_SI_SI_SI_SO(StringRef name, FunctionT function) : function_(std::move(function))
+  CustomMF_SI_SI_SI_SI_SO(const char *name, FunctionT function) : function_(std::move(function))
   {
     MFSignatureBuilder signature{name};
     signature.single_input<In1>("In1");
@@ -224,7 +224,7 @@ class CustomMF_SI_SI_SI_SI_SO : public MultiFunction {
   }
 
   template<typename ElementFuncT>
-  CustomMF_SI_SI_SI_SI_SO(StringRef name, ElementFuncT element_fn)
+  CustomMF_SI_SI_SI_SI_SO(const char *name, ElementFuncT element_fn)
       : CustomMF_SI_SI_SI_SI_SO(name, CustomMF_SI_SI_SI_SI_SO::create_function(element_fn))
   {
   }
@@ -265,7 +265,7 @@ template<typename Mut1> class CustomMF_SM : public MultiFunction {
   MFSignature signature_;
 
  public:
-  CustomMF_SM(StringRef name, FunctionT function) : function_(std::move(function))
+  CustomMF_SM(const char *name, FunctionT function) : function_(std::move(function))
   {
     MFSignatureBuilder signature{name};
     signature.single_mutable<Mut1>("Mut1");
@@ -274,7 +274,7 @@ template<typename Mut1> class CustomMF_SM : public MultiFunction {
   }
 
   template<typename ElementFuncT>
-  CustomMF_SM(StringRef name, ElementFuncT element_fn)
+  CustomMF_SM(const char *name, ElementFuncT element_fn)
       : CustomMF_SM(name, CustomMF_SM::create_function(element_fn))
   {
   }
@@ -306,8 +306,8 @@ template<typename From, typename To> class CustomMF_Convert : public MultiFuncti
 
   static MFSignature create_signature()
   {
-    std::string name = CPPType::get<From>().name() + " to " + CPPType::get<To>().name();
-    MFSignatureBuilder signature{std::move(name)};
+    static std::string name = CPPType::get<From>().name() + " to " + CPPType::get<To>().name();
+    MFSignatureBuilder signature{name.c_str()};
     signature.single_input<From>("Input");
     signature.single_output<To>("Output");
     return signature.build();
@@ -372,9 +372,7 @@ template<typename T> class CustomMF_Constant : public MultiFunction {
   template<typename U> CustomMF_Constant(U &&value) : value_(std::forward<U>(value))
   {
     MFSignatureBuilder signature{"Constant"};
-    std::stringstream ss;
-    ss << value_;
-    signature.single_output<T>(ss.str());
+    signature.single_output<T>("Value");
     signature_ = signature.build();
     this->set_signature(&signature_);
   }
@@ -414,9 +412,7 @@ class CustomMF_DefaultOutput : public MultiFunction {
   MFSignature signature_;
 
  public:
-  CustomMF_DefaultOutput(StringRef name,
-                         Span<MFDataType> input_types,
-                         Span<MFDataType> output_types);
+  CustomMF_DefaultOutput(Span<MFDataType> input_types, Span<MFDataType> output_types);
   void call(IndexMask mask, MFParams params, MFContext context) const override;
 };
 
@@ -425,7 +421,7 @@ class CustomMF_GenericCopy : public MultiFunction {
   MFSignature signature_;
 
  public:
-  CustomMF_GenericCopy(StringRef name, MFDataType data_type);
+  CustomMF_GenericCopy(MFDataType data_type);
   void call(IndexMask mask, MFParams params, MFContext context) const override;
 };
 
diff --git a/source/blender/functions/FN_multi_function_parallel.hh b/source/blender/functions/FN_multi_function_parallel.hh
deleted file mode 100644
index 84c57efd434..00000000000
--- a/source/blender/functions/FN_multi_function_parallel.hh
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#pragma once
-
-/** \file
- * \ingroup fn
- */
-
-#include "FN_multi_function.hh"
-
-namespace blender::fn {
-
-class ParallelMultiFunction : public MultiFunction {
- private:
-  const MultiFunction &fn_;
-  const int64_t grain_size_;
-  bool threading_supported_;
-
- public:
-  ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size);
-
-  void call(IndexMask mask, MFParams params, MFContext context) const override;
-};
-
-}  // namespace blender::fn
diff --git a/source/blender/functions/FN_multi_function_params.hh b/source/blender/functions/FN_multi_function_params.hh
index 5c7e75230f3..f4ddc4f2881 100644
--- a/source/blender/functions/FN_multi_function_params.hh
+++ b/source/blender/functions/FN_multi_function_params.hh
@@ -25,6 +25,8 @@
  * the function. `MFParams` is then used inside the called function to access the parameters.
  */
 
+#include <mutex>
+
 #include "BLI_resource_scope.hh"
 
 #include "FN_generic_pointer.hh"
@@ -45,6 +47,9 @@ class MFParamsBuilder {
   Vector<const GVVectorArray *> virtual_vector_arrays_;
   Vector<GVectorArray *> vector_arrays_;
 
+  std::mutex mutex_;
+  Vector<std::pair<int, GMutableSpan>> dummy_output_spans_;
+
   friend class MFParams;
 
   MFParamsBuilder(const MFSignature &signature, const IndexMask mask)
@@ -62,8 +67,8 @@ class MFParamsBuilder {
 
   template<typename T> void add_readonly_single_input_value(T value, StringRef expected_name = "")
   {
-    T *value_ptr = &scope_.add_value<T>(std::move(value));
-    this->add_readonly_single_input(value_ptr, expected_name);
+    this->add_readonly_single_input(VArray<T>::ForSingle(std::move(value), min_array_size_),
+                                    expected_name);
   }
   template<typename T> void add_readonly_single_input(const T *value, StringRef expected_name = "")
   {
@@ -254,20 +259,12 @@ class MFParams {
     this->assert_correct_param(param_index, name, MFParamType::SingleOutput);
     int data_index = builder_->signature_->data_index(param_index);
     GMutableSpan span = builder_->mutable_spans_[data_index];
-    if (span.is_empty()) {
-      /* The output is ignored by the caller, but the multi-function does not handle this case. So
-       * create a temporary buffer that the multi-function can write to. */
-      const CPPType &type = span.type();
-      void *buffer = builder_->scope_.linear_allocator().allocate(
-          builder_->min_array_size_ * type.size(), type.alignment());
-      if (!type.is_trivially_destructible()) {
-        /* Make sure the temporary elements will be destructed in the end. */
-        builder_->scope_.add_destruct_call(
-            [&type, buffer, mask = builder_->mask_]() { type.destruct_indices(buffer, mask); });
-      }
-      span = GMutableSpan{type, buffer, builder_->min_array_size_};
+    if (!span.is_empty()) {
+      return span;
     }
-    return span;
+    /* The output is ignored by the caller, but the multi-function does not handle this case. So
+     * create a temporary buffer that the multi-function can write to. */
+    return this->ensure_dummy_single_output(data_index);
   }
 
   /**
@@ -356,6 +353,8 @@ class MFParams {
     }
 #endif
   }
+
+  GMutableSpan ensure_dummy_single_output(int data_index);
 };
 
 }  // namespace blender::fn
diff --git a/source/blender/functions/FN_multi_function_procedure_executor.hh b/source/blender/functions/FN_multi_function_procedure_executor.hh
index 9c8b59739b8..409a031e7ed 100644
--- a/source/blender/functions/FN_multi_function_procedure_executor.hh
+++ b/source/blender/functions/FN_multi_function_procedure_executor.hh
@@ -31,9 +31,12 @@ class MFProcedureExecutor : public MultiFunction {
   const MFProcedure &procedure_;
 
  public:
-  MFProcedureExecutor(std::string name, const MFProcedure &procedure);
+  MFProcedureExecutor(const MFProcedure &procedure);
 
   void call(IndexMask mask, MFParams params, MFContext context) const override;
+
+ private:
+  ExecutionHints get_execution_hints() const override;
 };
 
 }  // namespace blender::fn
diff --git a/source/blender/functions/FN_multi_function_procedure_optimization.hh b/source/blender/functions/FN_multi_function_procedure_optimization.hh
new file mode 100644
index 00000000000..e5ffc12b241
--- /dev/null
+++ b/source/blender/functions/FN_multi_function_procedure_optimization.hh
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#pragma once
+
+/** \file
+ * \ingroup fn
+ *
+ * A #MFProcedure optimization pass takes an existing procedure and changes it in a way that
+ * improves its performance when executed.
+ *
+ * Oftentimes it would also be possible to implement a specific optimization directly during
+ * construction of the initial #MFProcedure. There is a trade-off between doing that or just
+ * building a "simple" procedure and then optimizing it uses separate optimization passes.
+ * - Doing optimizations directly during construction is typically faster than doing it as a
+ *   separate pass. However, it would be much harder to turn the optimization off when it is not
+ *   necessary, making the construction potentially slower in those cases.
+ * - Doing optimizations directly would also make code more complex, because it mixes the logic
+ *   that generates the procedure from some other data with optimization decisions.
+ * - Having a separate pass allows us to use it in different places when necessary.
+ * - Having a separate pass allows us to enable and disable it easily to better understand its
+ *   impact on performance.
+ */
+
+#include "FN_multi_function_procedure.hh"
+
+namespace blender::fn::procedure_optimization {
+
+/**
+ * When generating a procedure, destruct instructions (#MFDestructInstruction) have to be inserted
+ * for all variables that are not outputs. Often the simplest approach is to add these instructions
+ * at the very end. However, when the procedure is executed this is not optimal, because many more
+ * variables are initialized at the same time than necessary. This inhibits the reuse of memory
+ * buffers which decreases performance and increases memory use.
+ *
+ * This optimization pass moves destruct instructions up in the procedure. The goal is to destruct
+ * each variable right after its last use.
+ *
+ * For simplicity, and because this is the most common use case, this optimization currently only
+ * works on a single chain of instructions. Destruct instructions are not moved across branches.
+ *
+ * \param procedure The procedure that should be optimized.
+ * \param block_end_instr The instruction that points to the last instruction within a linear chain
+ *   of instructions. The algorithm moves instructions backward starting at this instruction.
+ */
+void move_destructs_up(MFProcedure &procedure, MFInstruction &block_end_instr);
+
+}  // namespace blender::fn::procedure_optimization
diff --git a/source/blender/functions/FN_multi_function_signature.hh b/source/blender/functions/FN_multi_function_signature.hh
index d05948cc645..3c991bc9c56 100644
--- a/source/blender/functions/FN_multi_function_signature.hh
+++ b/source/blender/functions/FN_multi_function_signature.hh
@@ -30,8 +30,15 @@
 namespace blender::fn {
 
 struct MFSignature {
-  std::string function_name;
-  Vector<std::string> param_names;
+  /**
+   * The name should be statically allocated so that it lives longer than this signature. This is
+   * used instead of an #std::string because of the overhead when many functions are created.
+   * If the name of the function has to be more dynamic for debugging purposes, override
+   * #MultiFunction::debug_name() instead. Then the dynamic name will only be computed when it is
+   * actually needed.
+   */
+  const char *function_name;
+  Vector<const char *> param_names;
   Vector<MFParamType> param_types;
   Vector<int> param_data_indices;
   bool depends_on_context = false;
@@ -51,9 +58,9 @@ class MFSignatureBuilder {
   int vector_array_count_ = 0;
 
  public:
-  MFSignatureBuilder(std::string function_name)
+  MFSignatureBuilder(const char *function_name)
   {
-    signature_.function_name = std::move(function_name);
+    signature_.function_name = function_name;
   }
 
   MFSignature build() const
@@ -63,23 +70,23 @@ class MFSignatureBuilder {
 
   /* Input Parameter Types */
 
-  template<typename T> void single_input(StringRef name)
+  template<typename T> void single_input(const char *name)
   {
     this->single_input(name, CPPType::get<T>());
   }
-  void single_input(StringRef name, const CPPType &type)
+  void single_input(const char *name, const CPPType &type)
   {
     this->input(name, MFDataType::ForSingle(type));
   }
-  template<typename T> void vector_input(StringRef name)
+  template<typename T> void vector_input(const char *name)
   {
     this->vector_input(name, CPPType::get<T>());
   }
-  void vector_input(StringRef name, const CPPType &base_type)
+  void vector_input(const char *name, const CPPType &base_type)
   {
     this->input(name, MFDataType::ForVector(base_type));
   }
-  void input(StringRef name, MFDataType data_type)
+  void input(const char *name, MFDataType data_type)
   {
     signature_.param_names.append(name);
     signature_.param_types.append(MFParamType(MFParamType::Input, data_type));
@@ -96,23 +103,23 @@ class MFSignatureBuilder {
 
   /* Output Parameter Types */
 
-  template<typename T> void single_output(StringRef name)
+  template<typename T> void single_output(const char *name)
   {
     this->single_output(name, CPPType::get<T>());
   }
-  void single_output(StringRef name, const CPPType &type)
+  void single_output(const char *name, const CPPType &type)
   {
     this->output(name, MFDataType::ForSingle(type));
   }
-  template<typename T> void vector_output(StringRef name)
+  template<typename T> void vector_output(const char *name)
   {
     this->vector_output(name, CPPType::get<T>());
   }
-  void vector_output(StringRef name, const CPPType &base_type)
+  void vector_output(const char *name, const CPPType &base_type)
   {
     this->output(name, MFDataType::ForVector(base_type));
   }
-  void output(StringRef name, MFDataType data_type)
+  void output(const char *name, MFDataType data_type)
   {
     signature_.param_names.append(name);
     signature_.param_types.append(MFParamType(MFParamType::Output, data_type));
@@ -129,23 +136,23 @@ class MFSignatureBuilder {
 
   /* Mutable Parameter Types */
 
-  template<typename T> void single_mutable(StringRef name)
+  template<typename T> void single_mutable(const char *name)
   {
     this->single_mutable(name, CPPType::get<T>());
   }
-  void single_mutable(StringRef name, const CPPType &type)
+  void single_mutable(const char *name, const CPPType &type)
   {
     this->mutable_(name, MFDataType::ForSingle(type));
   }
-  template<typename T> void vector_mutable(StringRef name)
+  template<typename T> void vector_mutable(const char *name)
   {
     this->vector_mutable(name, CPPType::get<T>());
   }
-  void vector_mutable(StringRef name, const CPPType &base_type)
+  void vector_mutable(const char *name, const CPPType &base_type)
   {
     this->mutable_(name, MFDataType::ForVector(base_type));
   }
-  void mutable_(StringRef name, MFDataType data_type)
+  void mutable_(const char *name, MFDataType data_type)
   {
     signature_.param_names.append(name);
     signature_.param_types.append(MFParamType(MFParamType::Mutable, data_type));
@@ -160,7 +167,7 @@ class MFSignatureBuilder {
     }
   }
 
-  void add(StringRef name, const MFParamType &param_type)
+  void add(const char *name, const MFParamType &param_type)
   {
     switch (param_type.interface_type()) {
       case MFParamType::Input:
diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc
index 68a8446e6ae..604e5c6d13f 100644
--- a/source/blender/functions/intern/field.cc
+++ b/source/blender/functions/intern/field.cc
@@ -21,7 +21,10 @@
 #include "BLI_vector_set.hh"
 
 #include "FN_field.hh"
-#include "FN_multi_function_parallel.hh"
+#include "FN_multi_function_procedure.hh"
+#include "FN_multi_function_procedure_builder.hh"
+#include "FN_multi_function_procedure_executor.hh"
+#include "FN_multi_function_procedure_optimization.hh"
 
 namespace blender::fn {
 
@@ -237,8 +240,7 @@ static void build_multi_function_procedure_for_fields(MFProcedure &procedure,
     if (!already_output_variables.add(variable)) {
       /* One variable can be output at most once. To output the same value twice, we have to make
        * a copy first. */
-      const MultiFunction &copy_fn = scope.construct<CustomMF_GenericCopy>("copy",
-                                                                           variable->data_type());
+      const MultiFunction &copy_fn = scope.construct<CustomMF_GenericCopy>(variable->data_type());
       variable = builder.add_call<1>(copy_fn, {variable})[0];
     }
     builder.add_output_parameter(*variable);
@@ -253,30 +255,14 @@ static void build_multi_function_procedure_for_fields(MFProcedure &procedure,
     builder.add_destruct(*variable);
   }
 
-  builder.add_return();
+  MFReturnInstruction &return_instr = builder.add_return();
+
+  procedure_optimization::move_destructs_up(procedure, return_instr);
 
   // std::cout << procedure.to_dot() << "\n";
   BLI_assert(procedure.validate());
 }
 
-/**
- * Evaluate fields in the given context. If possible, multiple fields should be evaluated together,
- * because that can be more efficient when they share common sub-fields.
- *
- * \param scope: The resource scope that owns data that makes up the output virtual arrays. Make
- *   sure the scope is not destructed when the output virtual arrays are still used.
- * \param fields_to_evaluate: The fields that should be evaluated together.
- * \param mask: Determines which indices are computed. The mask may be referenced by the returned
- *   virtual arrays. So the underlying indices (if applicable) should live longer then #scope.
- * \param context: The context that the field is evaluated in. Used to retrieve data from each
- *   #FieldInput in the field network.
- * \param dst_varrays: If provided, the computed data will be written into those virtual arrays
- *   instead of into newly created ones. That allows making the computed data live longer than
- *   #scope and is more efficient when the data will be written into those virtual arrays
- *   later anyway.
- * \return The computed virtual arrays for each provided field. If #dst_varrays is passed, the
- *   provided virtual arrays are returned.
- */
 Vector<GVArray> evaluate_fields(ResourceScope &scope,
                                 Span<GFieldRef> fields_to_evaluate,
                                 IndexMask mask,
@@ -358,14 +344,9 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope,
     MFProcedure procedure;
     build_multi_function_procedure_for_fields(
         procedure, scope, field_tree_info, varying_fields_to_evaluate);
-    MFProcedureExecutor procedure_executor{"Procedure", procedure};
-    /* Add multi threading capabilities to the field evaluation. */
-    const int grain_size = 10000;
-    fn::ParallelMultiFunction parallel_procedure_executor{procedure_executor, grain_size};
-    /* Utility variable to make easy to switch the executor. */
-    const MultiFunction &executor_fn = parallel_procedure_executor;
-
-    MFParamsBuilder mf_params{executor_fn, &mask};
+    MFProcedureExecutor procedure_executor{procedure};
+
+    MFParamsBuilder mf_params{procedure_executor, &mask};
     MFContextBuilder mf_context;
 
     /* Provide inputs to the procedure executor. */
@@ -406,7 +387,7 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope,
       mf_params.add_uninitialized_single_output(span);
     }
 
-    executor_fn.call(mask, mf_params, mf_context);
+    procedure_executor.call_auto(mask, mf_params, mf_context);
   }
 
   /* Evaluate constant fields if necessary. */
@@ -415,7 +396,7 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope,
     MFProcedure procedure;
     build_multi_function_procedure_for_fields(
         procedure, scope, field_tree_info, constant_fields_to_evaluate);
-    MFProcedureExecutor procedure_executor{"Procedure", procedure};
+    MFProcedureExecutor procedure_executor{procedure};
     MFParamsBuilder mf_params{procedure_executor, 1};
     MFContextBuilder mf_context;
 
@@ -495,15 +476,6 @@ void evaluate_constant_field(const GField &field, void *r_value)
   varrays[0].get_to_uninitialized(0, r_value);
 }
 
-/**
- * If the field depends on some input, the same field is returned.
- * Otherwise the field is evaluated and a new field is created that just computes this constant.
- *
- * Making the field constant has two benefits:
- * - The field-tree becomes a single node, which is more efficient when the field is evaluated many
- *   times.
- * - Memory of the input fields may be freed.
- */
 GField make_field_constant_if_possible(GField field)
 {
   if (field.node().depends_on_input()) {
@@ -512,10 +484,16 @@ GField make_field_constant_if_possible(GField field)
   const CPPType &type = field.cpp_type();
   BUFFER_FOR_CPP_TYPE_VALUE(type, buffer);
   evaluate_constant_field(field, buffer);
-  auto constant_fn = std::make_unique<CustomMF_GenericConstant>(type, buffer, true);
+  GField new_field = make_constant_field(type, buffer);
   type.destruct(buffer);
+  return new_field;
+}
+
+GField make_constant_field(const CPPType &type, const void *value)
+{
+  auto constant_fn = std::make_unique<CustomMF_GenericConstant>(type, value, true);
   auto operation = std::make_shared<FieldOperation>(std::move(constant_fn));
-  return GField{operation, 0};
+  return GField{std::move(operation), 0};
 }
 
 GVArray FieldContext::get_varray_for_input(const FieldInput &field_input,
@@ -532,7 +510,7 @@ IndexFieldInput::IndexFieldInput() : FieldInput(CPPType::get<int>(), "Index")
   category_ = Category::Generated;
 }
 
-GVArray IndexFieldInput::get_index_varray(IndexMask mask, ResourceScope &UNUSED(scope))
+GVArray IndexFieldInput::get_index_varray(IndexMask mask)
 {
   auto index_func = [](int i) { return i; };
   return VArray<int>::ForFunc(mask.min_array_size(), index_func);
@@ -540,10 +518,10 @@ GVArray IndexFieldInput::get_index_varray(IndexMask mask, ResourceScope &UNUSED(
 
 GVArray IndexFieldInput::get_varray_for_context(const fn::FieldContext &UNUSED(context),
                                                 IndexMask mask,
-                                                ResourceScope &scope) const
+                                                ResourceScope &UNUSED(scope)) const
 {
   /* TODO: Investigate a similar method to IndexRange::as_span() */
-  return get_index_varray(mask, scope);
+  return get_index_varray(mask);
 }
 
 uint64_t IndexFieldInput::hash() const
@@ -568,28 +546,65 @@ FieldOperation::FieldOperation(std::shared_ptr<const MultiFunction> function,
   owned_function_ = std::move(function);
 }
 
-static bool any_field_depends_on_input(Span<GField> fields)
+/**
+ * Returns the field inputs used by all the provided fields.
+ * This tries to reuse an existing #FieldInputs whenever possible to avoid copying it.
+ */
+static std::shared_ptr<const FieldInputs> combine_field_inputs(Span<GField> fields)
 {
+  /* The #FieldInputs that we try to reuse if possible. */
+  const std::shared_ptr<const FieldInputs> *field_inputs_candidate = nullptr;
   for (const GField &field : fields) {
-    if (field.node().depends_on_input()) {
-      return true;
+    const std::shared_ptr<const FieldInputs> &field_inputs = field.node().field_inputs();
+    /* Only try to reuse non-empty #FieldInputs. */
+    if (field_inputs && !field_inputs->nodes.is_empty()) {
+      if (field_inputs_candidate == nullptr) {
+        field_inputs_candidate = &field_inputs;
+      }
+      else if ((*field_inputs_candidate)->nodes.size() < field_inputs->nodes.size()) {
+        /* Always try to reuse the #FieldInputs that has the most nodes already. */
+        field_inputs_candidate = &field_inputs;
+      }
+    }
+  }
+  if (field_inputs_candidate == nullptr) {
+    /* None of the field depends on an input. */
+    return {};
+  }
+  /* Check if all inputs are in the candidate. */
+  Vector<const FieldInput *> inputs_not_in_candidate;
+  for (const GField &field : fields) {
+    const std::shared_ptr<const FieldInputs> &field_inputs = field.node().field_inputs();
+    if (!field_inputs) {
+      continue;
+    }
+    if (&field_inputs == field_inputs_candidate) {
+      continue;
     }
+    for (const FieldInput *field_input : field_inputs->nodes) {
+      if (!(*field_inputs_candidate)->nodes.contains(field_input)) {
+        inputs_not_in_candidate.append(field_input);
+      }
+    }
+  }
+  if (inputs_not_in_candidate.is_empty()) {
+    /* The existing #FieldInputs can be reused, because no other field has additional inputs. */
+    return *field_inputs_candidate;
   }
-  return false;
+  /* Create new #FieldInputs that contains all of the inputs that the fields depend on. */
+  std::shared_ptr<FieldInputs> new_field_inputs = std::make_shared<FieldInputs>(
+      **field_inputs_candidate);
+  for (const FieldInput *field_input : inputs_not_in_candidate) {
+    new_field_inputs->nodes.add(field_input);
+    new_field_inputs->deduplicated_nodes.add(*field_input);
+  }
+  return new_field_inputs;
 }
 
 FieldOperation::FieldOperation(const MultiFunction &function, Vector<GField> inputs)
-    : FieldNode(false, any_field_depends_on_input(inputs)),
-      function_(&function),
-      inputs_(std::move(inputs))
+    : FieldNode(false), function_(&function), inputs_(std::move(inputs))
 {
-}
-
-void FieldOperation::foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const
-{
-  for (const GField &field : inputs_) {
-    field.node().foreach_field_input(foreach_fn);
-  }
+  field_inputs_ = combine_field_inputs(inputs_);
 }
 
 /* --------------------------------------------------------------------
@@ -597,20 +612,19 @@ void FieldOperation::foreach_field_input(FunctionRef<void(const FieldInput &)> f
  */
 
 FieldInput::FieldInput(const CPPType &type, std::string debug_name)
-    : FieldNode(true, true), type_(&type), debug_name_(std::move(debug_name))
-{
-}
-
-void FieldInput::foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const
+    : FieldNode(true), type_(&type), debug_name_(std::move(debug_name))
 {
-  foreach_fn(*this);
+  std::shared_ptr<FieldInputs> field_inputs = std::make_shared<FieldInputs>();
+  field_inputs->nodes.add_new(this);
+  field_inputs->deduplicated_nodes.add_new(*this);
+  field_inputs_ = std::move(field_inputs);
 }
 
 /* --------------------------------------------------------------------
  * FieldEvaluator.
  */
 
-static Vector<int64_t> indices_from_selection(const VArray<bool> &selection)
+static Vector<int64_t> indices_from_selection(IndexMask mask, const VArray<bool> &selection)
 {
   /* If the selection is just a single value, it's best to avoid calling this
    * function when constructing an IndexMask and use an IndexRange instead. */
@@ -619,14 +633,14 @@ static Vector<int64_t> indices_from_selection(const VArray<bool> &selection)
   Vector<int64_t> indices;
   if (selection.is_span()) {
     Span<bool> span = selection.get_internal_span();
-    for (const int64_t i : span.index_range()) {
+    for (const int64_t i : mask) {
       if (span[i]) {
         indices.append(i);
       }
     }
   }
   else {
-    for (const int i : selection.index_range()) {
+    for (const int i : mask) {
       if (selection[i]) {
         indices.append(i);
       }
@@ -667,14 +681,36 @@ int FieldEvaluator::add(GField field)
   return field_index;
 }
 
+static IndexMask evaluate_selection(const Field<bool> &selection_field,
+                                    const FieldContext &context,
+                                    IndexMask full_mask,
+                                    ResourceScope &scope)
+{
+  if (selection_field) {
+    VArray<bool> selection =
+        evaluate_fields(scope, {selection_field}, full_mask, context)[0].typed<bool>();
+    if (selection.is_single()) {
+      if (selection.get_internal_single()) {
+        return full_mask;
+      }
+      return IndexRange(0);
+    }
+    return scope.add_value(indices_from_selection(full_mask, selection)).as_span();
+  }
+  return full_mask;
+}
+
 void FieldEvaluator::evaluate()
 {
   BLI_assert_msg(!is_evaluated_, "Cannot evaluate fields twice.");
+
+  selection_mask_ = evaluate_selection(selection_field_, context_, mask_, scope_);
+
   Array<GFieldRef> fields(fields_to_evaluate_.size());
   for (const int i : fields_to_evaluate_.index_range()) {
     fields[i] = fields_to_evaluate_[i];
   }
-  evaluated_varrays_ = evaluate_fields(scope_, fields, mask_, context_, dst_varrays_);
+  evaluated_varrays_ = evaluate_fields(scope_, fields, selection_mask_, context_, dst_varrays_);
   BLI_assert(fields_to_evaluate_.size() == evaluated_varrays_.size());
   for (const int i : fields_to_evaluate_.index_range()) {
     OutputPointerInfo &info = output_pointer_infos_[i];
@@ -696,7 +732,13 @@ IndexMask FieldEvaluator::get_evaluated_as_mask(const int field_index)
     return IndexRange(0);
   }
 
-  return scope_.add_value(indices_from_selection(varray)).as_span();
+  return scope_.add_value(indices_from_selection(mask_, varray)).as_span();
+}
+
+IndexMask FieldEvaluator::get_evaluated_selection_as_mask()
+{
+  BLI_assert(is_evaluated_);
+  return selection_mask_;
 }
 
 }  // namespace blender::fn
diff --git a/source/blender/functions/intern/generic_virtual_array.cc b/source/blender/functions/intern/generic_virtual_array.cc
index 1fe1c2fc229..b4180a885e7 100644
--- a/source/blender/functions/intern/generic_virtual_array.cc
+++ b/source/blender/functions/intern/generic_virtual_array.cc
@@ -139,107 +139,56 @@ bool GVMutableArrayImpl::try_assign_VMutableArray(void *UNUSED(varray)) const
 /** \name #GVArrayImpl_For_GSpan
  * \{ */
 
-GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const GSpan span)
-    : GVArrayImpl(span.type(), span.size()), data_(span.data()), element_size_(span.type().size())
-{
-}
-
-GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size)
-    : GVArrayImpl(type, size), element_size_(type.size())
-{
-}
-
-void GVArrayImpl_For_GSpan::get(const int64_t index, void *r_value) const
-{
-  type_->copy_assign(POINTER_OFFSET(data_, element_size_ * index), r_value);
-}
-
-void GVArrayImpl_For_GSpan::get_to_uninitialized(const int64_t index, void *r_value) const
-{
-  type_->copy_construct(POINTER_OFFSET(data_, element_size_ * index), r_value);
-}
-
-bool GVArrayImpl_For_GSpan::is_span() const
-{
-  return true;
-}
-
-GSpan GVArrayImpl_For_GSpan::get_internal_span() const
-{
-  return GSpan(*type_, data_, size_);
-}
-
-/** See #VArrayImpl_For_Span_final. */
-class GVArrayImpl_For_GSpan_final final : public GVArrayImpl_For_GSpan {
- public:
-  using GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan;
-
- private:
-  bool may_have_ownership() const override
-  {
-    return false;
-  }
-};
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name #GVMutableArrayImpl_For_GMutableSpan
- * \{ */
-
-GVMutableArrayImpl_For_GMutableSpan::GVMutableArrayImpl_For_GMutableSpan(const GMutableSpan span)
+GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const GMutableSpan span)
     : GVMutableArrayImpl(span.type(), span.size()),
       data_(span.data()),
       element_size_(span.type().size())
 {
 }
 
-GVMutableArrayImpl_For_GMutableSpan::GVMutableArrayImpl_For_GMutableSpan(const CPPType &type,
-                                                                         const int64_t size)
+GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size)
     : GVMutableArrayImpl(type, size), element_size_(type.size())
 {
 }
 
-void GVMutableArrayImpl_For_GMutableSpan::get(const int64_t index, void *r_value) const
+void GVArrayImpl_For_GSpan::get(const int64_t index, void *r_value) const
 {
   type_->copy_assign(POINTER_OFFSET(data_, element_size_ * index), r_value);
 }
 
-void GVMutableArrayImpl_For_GMutableSpan::get_to_uninitialized(const int64_t index,
-                                                               void *r_value) const
+void GVArrayImpl_For_GSpan::get_to_uninitialized(const int64_t index, void *r_value) const
 {
   type_->copy_construct(POINTER_OFFSET(data_, element_size_ * index), r_value);
 }
 
-void GVMutableArrayImpl_For_GMutableSpan::set_by_copy(const int64_t index, const void *value)
+void GVArrayImpl_For_GSpan::set_by_copy(const int64_t index, const void *value)
 {
   type_->copy_assign(value, POINTER_OFFSET(data_, element_size_ * index));
 }
 
-void GVMutableArrayImpl_For_GMutableSpan::set_by_move(const int64_t index, void *value)
+void GVArrayImpl_For_GSpan::set_by_move(const int64_t index, void *value)
 {
   type_->move_construct(value, POINTER_OFFSET(data_, element_size_ * index));
 }
 
-void GVMutableArrayImpl_For_GMutableSpan::set_by_relocate(const int64_t index, void *value)
+void GVArrayImpl_For_GSpan::set_by_relocate(const int64_t index, void *value)
 {
   type_->relocate_assign(value, POINTER_OFFSET(data_, element_size_ * index));
 }
 
-bool GVMutableArrayImpl_For_GMutableSpan::is_span() const
+bool GVArrayImpl_For_GSpan::is_span() const
 {
   return true;
 }
 
-GSpan GVMutableArrayImpl_For_GMutableSpan::get_internal_span() const
+GSpan GVArrayImpl_For_GSpan::get_internal_span() const
 {
   return GSpan(*type_, data_, size_);
 }
 
-class GVMutableArrayImpl_For_GMutableSpan_final final
-    : public GVMutableArrayImpl_For_GMutableSpan {
+class GVArrayImpl_For_GSpan_final final : public GVArrayImpl_For_GSpan {
  public:
-  using GVMutableArrayImpl_For_GMutableSpan::GVMutableArrayImpl_For_GMutableSpan;
+  using GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan;
 
  private:
   bool may_have_ownership() const override
@@ -337,6 +286,57 @@ class GVArrayImpl_For_SingleValue : public GVArrayImpl_For_SingleValueRef,
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name #GVArrayImpl_For_SmallTrivialSingleValue
+ * \{ */
+
+/**
+ * Contains an inline buffer that can store a single value of a trivial type.
+ * This avoids the allocation that would be done by #GVArrayImpl_For_SingleValue.
+ */
+template<int BufferSize> class GVArrayImpl_For_SmallTrivialSingleValue : public GVArrayImpl {
+ private:
+  AlignedBuffer<BufferSize, 8> buffer_;
+
+ public:
+  GVArrayImpl_For_SmallTrivialSingleValue(const CPPType &type,
+                                          const int64_t size,
+                                          const void *value)
+      : GVArrayImpl(type, size)
+  {
+    BLI_assert(type.is_trivial());
+    BLI_assert(type.alignment() <= 8);
+    BLI_assert(type.size() <= BufferSize);
+    type.copy_construct(value, &buffer_);
+  }
+
+ private:
+  void get(const int64_t UNUSED(index), void *r_value) const override
+  {
+    this->copy_value_to(r_value);
+  }
+  void get_to_uninitialized(const int64_t UNUSED(index), void *r_value) const override
+  {
+    this->copy_value_to(r_value);
+  }
+
+  bool is_single() const override
+  {
+    return true;
+  }
+  void get_internal_single(void *r_value) const override
+  {
+    this->copy_value_to(r_value);
+  }
+
+  void copy_value_to(void *dst) const
+  {
+    memcpy(dst, &buffer_, type_->size());
+  }
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name #GVArray_GSpan
  * \{ */
 
@@ -426,12 +426,14 @@ class GVArrayImpl_For_SlicedGVArray : public GVArrayImpl {
  protected:
   GVArray varray_;
   int64_t offset_;
+  IndexRange slice_;
 
  public:
   GVArrayImpl_For_SlicedGVArray(GVArray varray, const IndexRange slice)
       : GVArrayImpl(varray.type(), slice.size()),
         varray_(std::move(varray)),
-        offset_(slice.start())
+        offset_(slice.start()),
+        slice_(slice)
   {
     BLI_assert(slice.one_after_last() <= varray_.size());
   }
@@ -445,6 +447,24 @@ class GVArrayImpl_For_SlicedGVArray : public GVArrayImpl {
   {
     varray_.get_to_uninitialized(index + offset_, r_value);
   }
+
+  bool is_span() const override
+  {
+    return varray_.is_span();
+  }
+  GSpan get_internal_span() const override
+  {
+    return varray_.get_internal_span().slice(slice_);
+  }
+
+  bool is_single() const override
+  {
+    return varray_.is_single();
+  }
+  void get_internal_single(void *r_value) const override
+  {
+    varray_.get_internal_single(r_value);
+  }
 };
 
 /** \} */
@@ -527,49 +547,28 @@ void GVArrayCommon::move_from(GVArrayCommon &&other) noexcept
   other.impl_ = nullptr;
 }
 
-/* Returns true when the virtual array is stored as a span internally. */
 bool GVArrayCommon::is_span() const
 {
-  if (this->is_empty()) {
-    return true;
-  }
   return impl_->is_span();
 }
 
-/* Returns the internally used span of the virtual array. This invokes undefined behavior is the
- * virtual array is not stored as a span internally. */
 GSpan GVArrayCommon::get_internal_span() const
 {
   BLI_assert(this->is_span());
-  if (this->is_empty()) {
-    return GSpan(impl_->type());
-  }
   return impl_->get_internal_span();
 }
 
-/* Returns true when the virtual array returns the same value for every index. */
 bool GVArrayCommon::is_single() const
 {
-  if (impl_->size() == 1) {
-    return true;
-  }
   return impl_->is_single();
 }
 
-/* Copies the value that is used for every element into `r_value`, which is expected to point to
- * initialized memory. This invokes undefined behavior if the virtual array would not return the
- * same value for every index. */
 void GVArrayCommon::get_internal_single(void *r_value) const
 {
   BLI_assert(this->is_single());
-  if (impl_->size() == 1) {
-    impl_->get(0, r_value);
-    return;
-  }
   impl_->get_internal_single(r_value);
 }
 
-/* Same as `get_internal_single`, but `r_value` points to initialized memory. */
 void GVArrayCommon::get_internal_single_to_uninitialized(void *r_value) const
 {
   impl_->type().default_construct(r_value);
@@ -606,6 +605,9 @@ GVArray::GVArray(std::shared_ptr<const GVArrayImpl> impl) : GVArrayCommon(std::m
 
 GVArray GVArray::ForSingle(const CPPType &type, const int64_t size, const void *value)
 {
+  if (type.is_trivial() && type.size() <= 16 && type.alignment() <= 8) {
+    return GVArray::For<GVArrayImpl_For_SmallTrivialSingleValue<16>>(type, size, value);
+  }
   return GVArray::For<GVArrayImpl_For_SingleValue>(type, size, value);
 }
 
@@ -621,7 +623,10 @@ GVArray GVArray::ForSingleDefault(const CPPType &type, const int64_t size)
 
 GVArray GVArray::ForSpan(GSpan span)
 {
-  return GVArray::For<GVArrayImpl_For_GSpan_final>(span);
+  /* Use const-cast because the underlying virtual array implementation is shared between const
+   * and non const data. */
+  GMutableSpan mutable_span{span.type(), const_cast<void *>(span.data()), span.size()};
+  return GVArray::For<GVArrayImpl_For_GSpan_final>(mutable_span);
 }
 
 class GVArrayImpl_For_GArray : public GVArrayImpl_For_GSpan {
@@ -630,7 +635,7 @@ class GVArrayImpl_For_GArray : public GVArrayImpl_For_GSpan {
 
  public:
   GVArrayImpl_For_GArray(GArray<> array)
-      : GVArrayImpl_For_GSpan(array.as_span()), array_(std::move(array))
+      : GVArrayImpl_For_GSpan(array.as_mutable_span()), array_(std::move(array))
   {
   }
 };
@@ -682,7 +687,7 @@ GVMutableArray::GVMutableArray(std::shared_ptr<GVMutableArrayImpl> impl)
 
 GVMutableArray GVMutableArray::ForSpan(GMutableSpan span)
 {
-  return GVMutableArray::For<GVMutableArrayImpl_For_GMutableSpan_final>(span);
+  return GVMutableArray::For<GVArrayImpl_For_GSpan_final>(span);
 }
 
 GVMutableArray::operator GVArray() const &
@@ -716,7 +721,6 @@ GVMutableArrayImpl *GVMutableArray::get_implementation() const
   return this->get_impl();
 }
 
-/* Copy the values from the source buffer to all elements in the virtual array. */
 void GVMutableArray::set_all(const void *src)
 {
   this->get_impl()->set_all(src);
diff --git a/source/blender/functions/intern/multi_function.cc b/source/blender/functions/intern/multi_function.cc
index 43eacdcd2a1..837ccc4f4fb 100644
--- a/source/blender/functions/intern/multi_function.cc
+++ b/source/blender/functions/intern/multi_function.cc
@@ -16,6 +16,138 @@
 
 #include "FN_multi_function.hh"
 
+#include "BLI_task.hh"
+#include "BLI_threads.h"
+
 namespace blender::fn {
 
+using ExecutionHints = MultiFunction::ExecutionHints;
+
+ExecutionHints MultiFunction::execution_hints() const
+{
+  return this->get_execution_hints();
+}
+
+ExecutionHints MultiFunction::get_execution_hints() const
+{
+  return ExecutionHints{};
+}
+
+static bool supports_threading_by_slicing_params(const MultiFunction &fn)
+{
+  for (const int i : fn.param_indices()) {
+    const MFParamType param_type = fn.param_type(i);
+    if (ELEM(param_type.interface_type(),
+             MFParamType::InterfaceType::Mutable,
+             MFParamType::InterfaceType::Output)) {
+      if (param_type.data_type().is_vector()) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+static int64_t compute_grain_size(const ExecutionHints &hints, const IndexMask mask)
+{
+  int64_t grain_size = hints.min_grain_size;
+  if (hints.uniform_execution_time) {
+    const int thread_count = BLI_system_thread_count();
+    /* Avoid using a small grain size even if it is not necessary. */
+    const int64_t thread_based_grain_size = mask.size() / thread_count / 4;
+    grain_size = std::max(grain_size, thread_based_grain_size);
+  }
+  if (hints.allocates_array) {
+    const int64_t max_grain_size = 10000;
+    /* Avoid allocating many large intermediate arrays. Better process data in smaller chunks to
+     * keep peak memory usage lower. */
+    grain_size = std::min(grain_size, max_grain_size);
+  }
+  return grain_size;
+}
+
+void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context) const
+{
+  if (mask.is_empty()) {
+    return;
+  }
+  const ExecutionHints hints = this->execution_hints();
+  const int64_t grain_size = compute_grain_size(hints, mask);
+
+  if (mask.size() <= grain_size) {
+    this->call(mask, params, context);
+    return;
+  }
+
+  const bool supports_threading = supports_threading_by_slicing_params(*this);
+  if (!supports_threading) {
+    this->call(mask, params, context);
+    return;
+  }
+
+  threading::parallel_for(mask.index_range(), grain_size, [&](const IndexRange sub_range) {
+    const IndexMask sliced_mask = mask.slice(sub_range);
+    if (!hints.allocates_array) {
+      /* There is no benefit to changing indices in this case. */
+      this->call(sliced_mask, params, context);
+      return;
+    }
+    if (sliced_mask[0] < grain_size) {
+      /* The indices are low, no need to offset them. */
+      this->call(sliced_mask, params, context);
+      return;
+    }
+    const int64_t input_slice_start = sliced_mask[0];
+    const int64_t input_slice_size = sliced_mask.last() - input_slice_start + 1;
+    const IndexRange input_slice_range{input_slice_start, input_slice_size};
+
+    Vector<int64_t> offset_mask_indices;
+    const IndexMask offset_mask = mask.slice_and_offset(sub_range, offset_mask_indices);
+
+    MFParamsBuilder offset_params{*this, offset_mask.min_array_size()};
+
+    /* Slice all parameters so that for the actual function call. */
+    for (const int param_index : this->param_indices()) {
+      const MFParamType param_type = this->param_type(param_index);
+      switch (param_type.category()) {
+        case MFParamType::SingleInput: {
+          const GVArray &varray = params.readonly_single_input(param_index);
+          offset_params.add_readonly_single_input(varray.slice(input_slice_range));
+          break;
+        }
+        case MFParamType::SingleMutable: {
+          const GMutableSpan span = params.single_mutable(param_index);
+          const GMutableSpan sliced_span = span.slice(input_slice_range);
+          offset_params.add_single_mutable(sliced_span);
+          break;
+        }
+        case MFParamType::SingleOutput: {
+          const GMutableSpan span = params.uninitialized_single_output_if_required(param_index);
+          if (span.is_empty()) {
+            offset_params.add_ignored_single_output();
+          }
+          else {
+            const GMutableSpan sliced_span = span.slice(input_slice_range);
+            offset_params.add_uninitialized_single_output(sliced_span);
+          }
+          break;
+        }
+        case MFParamType::VectorInput:
+        case MFParamType::VectorMutable:
+        case MFParamType::VectorOutput: {
+          BLI_assert_unreachable();
+          break;
+        }
+      }
+    }
+
+    this->call(offset_mask, offset_params, context);
+  });
+}
+
+std::string MultiFunction::debug_name() const
+{
+  return signature_ref_->function_name;
+}
+
 }  // namespace blender::fn
diff --git a/source/blender/functions/intern/multi_function_builder.cc b/source/blender/functions/intern/multi_function_builder.cc
index f891f162820..24f9bbe0179 100644
--- a/source/blender/functions/intern/multi_function_builder.cc
+++ b/source/blender/functions/intern/multi_function_builder.cc
@@ -32,10 +32,8 @@ CustomMF_GenericConstant::CustomMF_GenericConstant(const CPPType &type,
   }
   value_ = value;
 
-  MFSignatureBuilder signature{"Constant " + type.name()};
-  std::stringstream ss;
-  type.print_or_default(value, ss, type.name());
-  signature.single_output(ss.str(), type);
+  MFSignatureBuilder signature{"Constant"};
+  signature.single_output("Value", type);
   signature_ = signature.build();
   this->set_signature(&signature_);
 }
@@ -73,28 +71,11 @@ bool CustomMF_GenericConstant::equals(const MultiFunction &other) const
   return type_.is_equal(value_, _other->value_);
 }
 
-static std::string gspan_to_string(GSpan array)
-{
-  const CPPType &type = array.type();
-  std::stringstream ss;
-  ss << "[";
-  const int64_t max_amount = 5;
-  for (int64_t i : IndexRange(std::min(max_amount, array.size()))) {
-    type.print_or_default(array[i], ss, type.name());
-    ss << ", ";
-  }
-  if (max_amount < array.size()) {
-    ss << "...";
-  }
-  ss << "]";
-  return ss.str();
-}
-
 CustomMF_GenericConstantArray::CustomMF_GenericConstantArray(GSpan array) : array_(array)
 {
   const CPPType &type = array.type();
-  MFSignatureBuilder signature{"Constant " + type.name() + " Vector"};
-  signature.vector_output(gspan_to_string(array), type);
+  MFSignatureBuilder signature{"Constant Vector"};
+  signature.vector_output("Value", type);
   signature_ = signature.build();
   this->set_signature(&signature_);
 }
@@ -109,12 +90,11 @@ void CustomMF_GenericConstantArray::call(IndexMask mask,
   }
 }
 
-CustomMF_DefaultOutput::CustomMF_DefaultOutput(StringRef name,
-                                               Span<MFDataType> input_types,
+CustomMF_DefaultOutput::CustomMF_DefaultOutput(Span<MFDataType> input_types,
                                                Span<MFDataType> output_types)
     : output_amount_(output_types.size())
 {
-  MFSignatureBuilder signature{name};
+  MFSignatureBuilder signature{"Default Output"};
   for (MFDataType data_type : input_types) {
     signature.input("Input", data_type);
   }
@@ -140,9 +120,9 @@ void CustomMF_DefaultOutput::call(IndexMask mask, MFParams params, MFContext UNU
   }
 }
 
-CustomMF_GenericCopy::CustomMF_GenericCopy(StringRef name, MFDataType data_type)
+CustomMF_GenericCopy::CustomMF_GenericCopy(MFDataType data_type)
 {
-  MFSignatureBuilder signature{name};
+  MFSignatureBuilder signature{"Copy"};
   signature.input("Input", data_type);
   signature.output("Output", data_type);
   signature_ = signature.build();
diff --git a/source/blender/functions/intern/multi_function_parallel.cc b/source/blender/functions/intern/multi_function_parallel.cc
deleted file mode 100644
index eefe647644d..00000000000
--- a/source/blender/functions/intern/multi_function_parallel.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "FN_multi_function_parallel.hh"
-
-#include "BLI_task.hh"
-
-namespace blender::fn {
-
-ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size)
-    : fn_(fn), grain_size_(grain_size)
-{
-  this->set_signature(&fn.signature());
-
-  threading_supported_ = true;
-  for (const int param_index : fn.param_indices()) {
-    const MFParamType param_type = fn.param_type(param_index);
-    if (param_type.data_type().category() == MFDataType::Vector) {
-      /* Vector parameters do not support threading yet. */
-      threading_supported_ = false;
-      break;
-    }
-  }
-}
-
-void ParallelMultiFunction::call(IndexMask full_mask, MFParams params, MFContext context) const
-{
-  if (full_mask.size() <= grain_size_ || !threading_supported_) {
-    fn_.call(full_mask, params, context);
-    return;
-  }
-
-  threading::parallel_for(full_mask.index_range(), grain_size_, [&](const IndexRange mask_slice) {
-    Vector<int64_t> sub_mask_indices;
-    const IndexMask sub_mask = full_mask.slice_and_offset(mask_slice, sub_mask_indices);
-    if (sub_mask.is_empty()) {
-      return;
-    }
-    const int64_t input_slice_start = full_mask[mask_slice.first()];
-    const int64_t input_slice_size = full_mask[mask_slice.last()] - input_slice_start + 1;
-    const IndexRange input_slice_range{input_slice_start, input_slice_size};
-
-    MFParamsBuilder sub_params{fn_, sub_mask.min_array_size()};
-
-    /* All parameters are sliced so that the wrapped multi-function does not have to take care of
-     * the index offset. */
-    for (const int param_index : fn_.param_indices()) {
-      const MFParamType param_type = fn_.param_type(param_index);
-      switch (param_type.category()) {
-        case MFParamType::SingleInput: {
-          const GVArray &varray = params.readonly_single_input(param_index);
-          sub_params.add_readonly_single_input(varray.slice(input_slice_range));
-          break;
-        }
-        case MFParamType::SingleMutable: {
-          const GMutableSpan span = params.single_mutable(param_index);
-          const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size);
-          sub_params.add_single_mutable(sliced_span);
-          break;
-        }
-        case MFParamType::SingleOutput: {
-          const GMutableSpan span = params.uninitialized_single_output(param_index);
-          const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size);
-          sub_params.add_uninitialized_single_output(sliced_span);
-          break;
-        }
-        case MFParamType::VectorInput:
-        case MFParamType::VectorMutable:
-        case MFParamType::VectorOutput: {
-          BLI_assert_unreachable();
-          break;
-        }
-      }
-    }
-
-    fn_.call(sub_mask, sub_params, context);
-  });
-}
-
-}  // namespace blender::fn
diff --git a/source/blender/functions/intern/multi_function_params.cc b/source/blender/functions/intern/multi_function_params.cc
new file mode 100644
index 00000000000..376c5b2deb7
--- /dev/null
+++ b/source/blender/functions/intern/multi_function_params.cc
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "FN_multi_function_params.hh"
+
+namespace blender::fn {
+
+GMutableSpan MFParams::ensure_dummy_single_output(int data_index)
+{
+  /* Lock because we are actually modifying #builder_ and it may be used by multiple threads. */
+  std::lock_guard lock{builder_->mutex_};
+
+  for (const std::pair<int, GMutableSpan> &items : builder_->dummy_output_spans_) {
+    if (items.first == data_index) {
+      return items.second;
+    }
+  }
+
+  const CPPType &type = builder_->mutable_spans_[data_index].type();
+  void *buffer = builder_->scope_.linear_allocator().allocate(
+      builder_->min_array_size_ * type.size(), type.alignment());
+  if (!type.is_trivially_destructible()) {
+    builder_->scope_.add_destruct_call(
+        [&type, buffer, mask = builder_->mask_]() { type.destruct_indices(buffer, mask); });
+  }
+  const GMutableSpan span{type, buffer, builder_->min_array_size_};
+  builder_->dummy_output_spans_.append({data_index, span});
+  return span;
+}
+
+}  // namespace blender::fn
diff --git a/source/blender/functions/intern/multi_function_procedure.cc b/source/blender/functions/intern/multi_function_procedure.cc
index 986c5dff0c4..804beb7d66f 100644
--- a/source/blender/functions/intern/multi_function_procedure.cc
+++ b/source/blender/functions/intern/multi_function_procedure.cc
@@ -782,7 +782,7 @@ class MFProcedureDotExport {
   void instruction_to_string(const MFCallInstruction &instruction, std::stringstream &ss)
   {
     const MultiFunction &fn = instruction.fn();
-    this->instruction_name_format(fn.name() + ": ", ss);
+    this->instruction_name_format(fn.debug_name() + ": ", ss);
     for (const int param_index : fn.param_indices()) {
       const MFParamType param_type = fn.param_type(param_index);
       const MFVariable *variable = instruction.params()[param_index];
diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc
index 85d0cf4909f..47643ed22ee 100644
--- a/source/blender/functions/intern/multi_function_procedure_executor.cc
+++ b/source/blender/functions/intern/multi_function_procedure_executor.cc
@@ -20,13 +20,12 @@
 
 namespace blender::fn {
 
-MFProcedureExecutor::MFProcedureExecutor(std::string name, const MFProcedure &procedure)
-    : procedure_(procedure)
+MFProcedureExecutor::MFProcedureExecutor(const MFProcedure &procedure) : procedure_(procedure)
 {
-  MFSignatureBuilder signature(std::move(name));
+  MFSignatureBuilder signature("Procedure Executor");
 
   for (const ConstMFParameter &param : procedure.params()) {
-    signature.add(param.variable->name(), MFParamType(param.type, param.variable->data_type()));
+    signature.add("Parameter", MFParamType(param.type, param.variable->data_type()));
   }
 
   signature_ = signature.build();
@@ -140,32 +139,36 @@ class VariableState;
  */
 class ValueAllocator : NonCopyable, NonMovable {
  private:
-  /* Allocate with 64 byte alignment for better reusability of buffers and improved cache
-   * performance. */
+  /**
+   * Allocate with 64 byte alignment for better reusability of buffers and improved cache
+   * performance.
+   */
   static constexpr inline int min_alignment = 64;
 
-  /* Use stacks so that the most recently used buffers are reused first. This improves cache
-   * efficiency. */
-  std::array<Stack<VariableValue *>, tot_variable_value_types> values_free_lists_;
-  /* The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are
-   * aligned to #min_alignment bytes. */
+  /** All buffers in the free-lists below have been allocated with this allocator. */
+  LinearAllocator<> &linear_allocator_;
+
+  /**
+   * Use stacks so that the most recently used buffers are reused first. This improves cache
+   * efficiency.
+   */
+  std::array<Stack<VariableValue *>, tot_variable_value_types> variable_value_free_lists_;
+
+  /**
+   * The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are
+   * aligned to #min_alignment bytes.
+   */
   Map<int, Stack<void *>> span_buffers_free_list_;
 
- public:
-  ValueAllocator() = default;
+  /** Cache buffers for single values of different types. */
+  Map<const CPPType *, Stack<void *>> single_value_free_lists_;
 
-  ~ValueAllocator()
+  /** The cached memory buffers can hold #VariableState values. */
+  Stack<void *> variable_state_free_list_;
+
+ public:
+  ValueAllocator(LinearAllocator<> &linear_allocator) : linear_allocator_(linear_allocator)
   {
-    for (Stack<VariableValue *> &stack : values_free_lists_) {
-      while (!stack.is_empty()) {
-        MEM_freeN(stack.pop());
-      }
-    }
-    for (Stack<void *> &stack : span_buffers_free_list_.values()) {
-      while (!stack.is_empty()) {
-        MEM_freeN(stack.pop());
-      }
-    }
   }
 
   template<typename... Args> VariableState *obtain_variable_state(Args &&...args);
@@ -191,17 +194,17 @@ class ValueAllocator : NonCopyable, NonMovable {
   {
     void *buffer = nullptr;
 
-    const int element_size = type.size();
-    const int alignment = type.alignment();
+    const int64_t element_size = type.size();
+    const int64_t alignment = type.alignment();
 
     if (alignment > min_alignment) {
       /* In this rare case we fallback to not reusing existing buffers. */
-      buffer = MEM_mallocN_aligned(element_size * size, alignment, __func__);
+      buffer = linear_allocator_.allocate(element_size * size, alignment);
     }
     else {
       Stack<void *> *stack = span_buffers_free_list_.lookup_ptr(element_size);
       if (stack == nullptr || stack->is_empty()) {
-        buffer = MEM_mallocN_aligned(element_size * size, min_alignment, __func__);
+        buffer = linear_allocator_.allocate(element_size * size, min_alignment);
       }
       else {
         /* Reuse existing buffer. */
@@ -225,7 +228,14 @@ class ValueAllocator : NonCopyable, NonMovable {
 
   VariableValue_OneSingle *obtain_OneSingle(const CPPType &type)
   {
-    void *buffer = MEM_mallocN_aligned(type.size(), type.alignment(), __func__);
+    Stack<void *> &stack = single_value_free_lists_.lookup_or_add_default(&type);
+    void *buffer;
+    if (stack.is_empty()) {
+      buffer = linear_allocator_.allocate(type.size(), type.alignment());
+    }
+    else {
+      buffer = stack.pop();
+    }
     return this->obtain<VariableValue_OneSingle>(buffer);
   }
 
@@ -263,11 +273,11 @@ class ValueAllocator : NonCopyable, NonMovable {
       }
       case ValueType::OneSingle: {
         auto *value_typed = static_cast<VariableValue_OneSingle *>(value);
+        const CPPType &type = data_type.single_type();
         if (value_typed->is_initialized) {
-          const CPPType &type = data_type.single_type();
           type.destruct(value_typed->data);
         }
-        MEM_freeN(value_typed->data);
+        single_value_free_lists_.lookup_or_add_default(&type).push(value_typed->data);
         break;
       }
       case ValueType::OneVector: {
@@ -277,7 +287,7 @@ class ValueAllocator : NonCopyable, NonMovable {
       }
     }
 
-    Stack<VariableValue *> &stack = values_free_lists_[(int)value->type];
+    Stack<VariableValue *> &stack = variable_value_free_lists_[(int)value->type];
     stack.push(value);
   }
 
@@ -285,9 +295,9 @@ class ValueAllocator : NonCopyable, NonMovable {
   template<typename T, typename... Args> T *obtain(Args &&...args)
   {
     static_assert(std::is_base_of_v<VariableValue, T>);
-    Stack<VariableValue *> &stack = values_free_lists_[(int)T::static_type];
+    Stack<VariableValue *> &stack = variable_value_free_lists_[(int)T::static_type];
     if (stack.is_empty()) {
-      void *buffer = MEM_mallocN(sizeof(T), __func__);
+      void *buffer = linear_allocator_.allocate(sizeof(T), alignof(T));
       return new (buffer) T(std::forward<Args>(args)...);
     }
     return new (stack.pop()) T(std::forward<Args>(args)...);
@@ -670,7 +680,12 @@ class VariableState : NonCopyable, NonMovable {
     tot_initialized_ += mask.size();
   }
 
-  void destruct(IndexMask mask,
+  /**
+   * Destruct the masked elements in this variable.
+   * \return True when all elements of this variable are initialized and the variable state can be
+   *  released.
+   */
+  bool destruct(IndexMask mask,
                 IndexMask full_mask,
                 const MFDataType &data_type,
                 ValueAllocator &value_allocator)
@@ -680,13 +695,12 @@ class VariableState : NonCopyable, NonMovable {
     /* Sanity check to make sure that enough indices can be destructed. */
     BLI_assert(new_tot_initialized >= 0);
 
+    bool do_destruct_self = false;
+
     switch (value_->type) {
       case ValueType::GVArray: {
         if (mask.size() == full_mask.size()) {
-          /* All elements are destructed. The elements are owned by the caller, so we don't
-           * actually destruct them. */
-          value_allocator.release_value(value_, data_type);
-          value_ = value_allocator.obtain_OneSingle(data_type.single_type());
+          do_destruct_self = true;
         }
         else {
           /* Not all elements are destructed. Since we can't work on the original array, we have to
@@ -702,18 +716,13 @@ class VariableState : NonCopyable, NonMovable {
         const CPPType &type = data_type.single_type();
         type.destruct_indices(this->value_as<VariableValue_Span>()->data, mask);
         if (new_tot_initialized == 0) {
-          /* Release span when all values are initialized. */
-          value_allocator.release_value(value_, data_type);
-          value_ = value_allocator.obtain_OneSingle(data_type.single_type());
+          do_destruct_self = true;
         }
         break;
       }
       case ValueType::GVVectorArray: {
         if (mask.size() == full_mask.size()) {
-          /* All elements are cleared. The elements are owned by the caller, so don't actually
-           * destruct them. */
-          value_allocator.release_value(value_, data_type);
-          value_ = value_allocator.obtain_OneVector(data_type.vector_base_type());
+          do_destruct_self = true;
         }
         else {
           /* Not all elements are cleared. Since we can't work on the original vector array, we
@@ -732,23 +741,24 @@ class VariableState : NonCopyable, NonMovable {
       case ValueType::OneSingle: {
         auto *value_typed = this->value_as<VariableValue_OneSingle>();
         BLI_assert(value_typed->is_initialized);
+        UNUSED_VARS_NDEBUG(value_typed);
         if (mask.size() == tot_initialized_) {
-          const CPPType &type = data_type.single_type();
-          type.destruct(value_typed->data);
-          value_typed->is_initialized = false;
+          do_destruct_self = true;
         }
         break;
       }
       case ValueType::OneVector: {
         auto *value_typed = this->value_as<VariableValue_OneVector>();
+        UNUSED_VARS(value_typed);
         if (mask.size() == tot_initialized_) {
-          value_typed->data.clear({0});
+          do_destruct_self = true;
         }
         break;
       }
     }
 
     tot_initialized_ = new_tot_initialized;
+    return do_destruct_self;
   }
 
   void indices_split(IndexMask mask, IndicesSplitVectors &r_indices)
@@ -802,12 +812,17 @@ class VariableState : NonCopyable, NonMovable {
 
 template<typename... Args> VariableState *ValueAllocator::obtain_variable_state(Args &&...args)
 {
-  return new VariableState(std::forward<Args>(args)...);
+  if (variable_state_free_list_.is_empty()) {
+    void *buffer = linear_allocator_.allocate(sizeof(VariableState), alignof(VariableState));
+    return new (buffer) VariableState(std::forward<Args>(args)...);
+  }
+  return new (variable_state_free_list_.pop()) VariableState(std::forward<Args>(args)...);
 }
 
 void ValueAllocator::release_variable_state(VariableState *state)
 {
-  delete state;
+  state->~VariableState();
+  variable_state_free_list_.push(state);
 }
 
 /** Keeps track of the states of all variables during evaluation. */
@@ -818,7 +833,8 @@ class VariableStates {
   IndexMask full_mask_;
 
  public:
-  VariableStates(IndexMask full_mask) : full_mask_(full_mask)
+  VariableStates(LinearAllocator<> &linear_allocator, IndexMask full_mask)
+      : value_allocator_(linear_allocator), full_mask_(full_mask)
   {
   }
 
@@ -940,7 +956,10 @@ class VariableStates {
   void destruct(const MFVariable &variable, const IndexMask &mask)
   {
     VariableState &variable_state = this->get_variable_state(variable);
-    variable_state.destruct(mask, full_mask_, variable.data_type(), value_allocator_);
+    if (variable_state.destruct(mask, full_mask_, variable.data_type(), value_allocator_)) {
+      variable_state.destruct_self(value_allocator_, variable.data_type());
+      variable_states_.remove_contained(&variable);
+    }
   }
 
   VariableState &get_variable_state(const MFVariable &variable)
@@ -1046,7 +1065,7 @@ static void execute_call_instruction(const MFCallInstruction &instruction,
     }
 
     try {
-      fn.call(mask, params, context);
+      fn.call_auto(mask, params, context);
     }
     catch (...) {
       /* Multi-functions must not throw exceptions. */
@@ -1162,9 +1181,9 @@ void MFProcedureExecutor::call(IndexMask full_mask, MFParams params, MFContext c
 {
   BLI_assert(procedure_.validate());
 
-  LinearAllocator<> allocator;
+  LinearAllocator<> linear_allocator;
 
-  VariableStates variable_states{full_mask};
+  VariableStates variable_states{linear_allocator, full_mask};
   variable_states.add_initial_variable_states(*this, procedure_, params);
 
   InstructionScheduler scheduler;
@@ -1237,4 +1256,12 @@ void MFProcedureExecutor::call(IndexMask full_mask, MFParams params, MFContext c
   }
 }
 
+MultiFunction::ExecutionHints MFProcedureExecutor::get_execution_hints() const
+{
+  ExecutionHints hints;
+  hints.allocates_array = true;
+  hints.min_grain_size = 10000;
+  return hints;
+}
+
 }  // namespace blender::fn
diff --git a/source/blender/functions/intern/multi_function_procedure_optimization.cc b/source/blender/functions/intern/multi_function_procedure_optimization.cc
new file mode 100644
index 00000000000..f220c85e535
--- /dev/null
+++ b/source/blender/functions/intern/multi_function_procedure_optimization.cc
@@ -0,0 +1,90 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "FN_multi_function_procedure_optimization.hh"
+
+namespace blender::fn::procedure_optimization {
+
+void move_destructs_up(MFProcedure &procedure, MFInstruction &block_end_instr)
+{
+  /* A mapping from a variable to its destruct instruction. */
+  Map<MFVariable *, MFDestructInstruction *> destruct_instructions;
+  MFInstruction *current_instr = &block_end_instr;
+  while (true) {
+    MFInstructionType instr_type = current_instr->type();
+    switch (instr_type) {
+      case MFInstructionType::Destruct: {
+        MFDestructInstruction &destruct_instr = static_cast<MFDestructInstruction &>(
+            *current_instr);
+        MFVariable *variable = destruct_instr.variable();
+        if (variable == nullptr) {
+          continue;
+        }
+        /* Remember this destruct instruction so that it can be moved up later on when the last use
+         * of the variable is found. */
+        destruct_instructions.add(variable, &destruct_instr);
+        break;
+      }
+      case MFInstructionType::Call: {
+        MFCallInstruction &call_instr = static_cast<MFCallInstruction &>(*current_instr);
+        /* For each variable, place the corresponding remembered destruct instruction right after
+         * this call instruction. */
+        for (MFVariable *variable : call_instr.params()) {
+          if (variable == nullptr) {
+            continue;
+          }
+          MFDestructInstruction *destruct_instr = destruct_instructions.pop_default(variable,
+                                                                                    nullptr);
+          if (destruct_instr == nullptr) {
+            continue;
+          }
+
+          /* Unlink destruct instruction from previous position. */
+          MFInstruction *after_destruct_instr = destruct_instr->next();
+          while (!destruct_instr->prev().is_empty()) {
+            /* Do a copy of the cursor here, because `destruct_instr->prev()` changes when
+             * #set_next is called below. */
+            const MFInstructionCursor cursor = destruct_instr->prev()[0];
+            cursor.set_next(procedure, after_destruct_instr);
+          }
+
+          /* Insert destruct instruction in new position. */
+          MFInstruction *next_instr = call_instr.next();
+          call_instr.set_next(destruct_instr);
+          destruct_instr->set_next(next_instr);
+        }
+        break;
+      }
+      default: {
+        break;
+      }
+    }
+
+    const Span<MFInstructionCursor> prev_cursors = current_instr->prev();
+    if (prev_cursors.size() != 1) {
+      /* Stop when there is some branching before this instruction. */
+      break;
+    }
+    const MFInstructionCursor &prev_cursor = prev_cursors[0];
+    current_instr = prev_cursor.instruction();
+    if (current_instr == nullptr) {
+      /* Stop when there is no previous instruction. E.g. when this is the first instruction.  */
+      break;
+    }
+  }
+}
+
+}  // namespace blender::fn::procedure_optimization
diff --git a/source/blender/functions/tests/FN_field_test.cc b/source/blender/functions/tests/FN_field_test.cc
index 16a6c73183d..4614f9eee58 100644
--- a/source/blender/functions/tests/FN_field_test.cc
+++ b/source/blender/functions/tests/FN_field_test.cc
@@ -160,9 +160,9 @@ class TwoOutputFunction : public MultiFunction {
   MFSignature signature_;
 
  public:
-  TwoOutputFunction(StringRef name)
+  TwoOutputFunction()
   {
-    MFSignatureBuilder signature{name};
+    MFSignatureBuilder signature{"Two Outputs"};
     signature.single_input<int>("In1");
     signature.single_input<int>("In2");
     signature.single_output<int>("Add");
@@ -190,8 +190,8 @@ TEST(field, FunctionTwoOutputs)
   GField index_field_1{std::make_shared<IndexFieldInput>()};
   GField index_field_2{std::make_shared<IndexFieldInput>()};
 
-  std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>(FieldOperation(
-      std::make_unique<TwoOutputFunction>("SI_SI_SO_SO"), {index_field_1, index_field_2}));
+  std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>(
+      FieldOperation(std::make_unique<TwoOutputFunction>(), {index_field_1, index_field_2}));
 
   GField result_field_1{fn, 0};
   GField result_field_2{fn, 1};
@@ -221,8 +221,8 @@ TEST(field, TwoFunctionsTwoOutputs)
 {
   GField index_field{std::make_shared<IndexFieldInput>()};
 
-  std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>(FieldOperation(
-      std::make_unique<TwoOutputFunction>("SI_SI_SO_SO"), {index_field, index_field}));
+  std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>(
+      FieldOperation(std::make_unique<TwoOutputFunction>(), {index_field, index_field}));
 
   Array<int64_t> mask_indices = {2, 4, 6, 8};
   IndexMask mask = mask_indices.as_span();
diff --git a/source/blender/functions/tests/FN_multi_function_procedure_test.cc b/source/blender/functions/tests/FN_multi_function_procedure_test.cc
index a0919d7926e..e3de23550c5 100644
--- a/source/blender/functions/tests/FN_multi_function_procedure_test.cc
+++ b/source/blender/functions/tests/FN_multi_function_procedure_test.cc
@@ -32,7 +32,7 @@ TEST(multi_function_procedure, ConstantOutput)
 
   EXPECT_TRUE(procedure.validate());
 
-  MFProcedureExecutor executor{"My Procedure", procedure};
+  MFProcedureExecutor executor{procedure};
 
   MFParamsBuilder params{executor, 2};
   MFContextBuilder context;
@@ -73,7 +73,7 @@ TEST(multi_function_procedure, SimpleTest)
 
   EXPECT_TRUE(procedure.validate());
 
-  MFProcedureExecutor executor{"My Procedure", procedure};
+  MFProcedureExecutor executor{procedure};
 
   MFParamsBuilder params{executor, 3};
   MFContextBuilder context;
@@ -125,7 +125,7 @@ TEST(multi_function_procedure, BranchTest)
 
   EXPECT_TRUE(procedure.validate());
 
-  MFProcedureExecutor procedure_fn{"Condition Test", procedure};
+  MFProcedureExecutor procedure_fn{procedure};
   MFParamsBuilder params(procedure_fn, 5);
 
   Array<int> values_a = {1, 5, 3, 6, 2};
@@ -167,7 +167,7 @@ TEST(multi_function_procedure, EvaluateOne)
   builder.add_return();
   builder.add_output_parameter(*var2);
 
-  MFProcedureExecutor procedure_fn{"Evaluate One", procedure};
+  MFProcedureExecutor procedure_fn{procedure};
   MFParamsBuilder params{procedure_fn, 5};
 
   Array<int> values_out = {1, 2, 3, 4, 5};
@@ -239,7 +239,7 @@ TEST(multi_function_procedure, SimpleLoop)
 
   EXPECT_TRUE(procedure.validate());
 
-  MFProcedureExecutor procedure_fn{"Simple Loop", procedure};
+  MFProcedureExecutor procedure_fn{procedure};
   MFParamsBuilder params{procedure_fn, 5};
 
   Array<int> counts = {4, 3, 7, 6, 4};
@@ -295,7 +295,7 @@ TEST(multi_function_procedure, Vectors)
 
   EXPECT_TRUE(procedure.validate());
 
-  MFProcedureExecutor procedure_fn{"Vectors", procedure};
+  MFProcedureExecutor procedure_fn{procedure};
   MFParamsBuilder params{procedure_fn, 5};
 
   Array<int> v1 = {5, 2, 3};
@@ -359,7 +359,7 @@ TEST(multi_function_procedure, BufferReuse)
 
   EXPECT_TRUE(procedure.validate());
 
-  MFProcedureExecutor procedure_fn{"Buffer Reuse", procedure};
+  MFProcedureExecutor procedure_fn{procedure};
 
   Array<int> inputs = {4, 1, 6, 2, 3};
   Array<int> results(5, -1);
diff --git a/source/blender/functions/tests/FN_multi_function_test.cc b/source/blender/functions/tests/FN_multi_function_test.cc
index d99993b35ac..20ca00cf598 100644
--- a/source/blender/functions/tests/FN_multi_function_test.cc
+++ b/source/blender/functions/tests/FN_multi_function_test.cc
@@ -264,7 +264,6 @@ TEST(multi_function, CustomMF_GenericConstant)
 {
   int value = 42;
   CustomMF_GenericConstant fn{CPPType::get<int32_t>(), (const void *)&value, false};
-  EXPECT_EQ(fn.param_name(0), "42");
 
   Array<int> outputs(4, 0);
 
@@ -285,7 +284,6 @@ TEST(multi_function, CustomMF_GenericConstantArray)
 {
   std::array<int, 4> values = {3, 4, 5, 6};
   CustomMF_GenericConstantArray fn{GSpan(Span(values))};
-  EXPECT_EQ(fn.param_name(0), "[3, 4, 5, 6, ]");
 
   GVectorArray vector_array{CPPType::get<int32_t>(), 4};
   GVectorArray_TypedMutableRef<int> vector_array_ref{vector_array};