diff options
Diffstat (limited to 'source/blender/functions')
25 files changed, 1028 insertions, 498 deletions
diff --git a/source/blender/functions/CMakeLists.txt b/source/blender/functions/CMakeLists.txt index 54670c0d1b3..9cfaf3eabea 100644 --- a/source/blender/functions/CMakeLists.txt +++ b/source/blender/functions/CMakeLists.txt @@ -34,10 +34,11 @@ set(SRC intern/generic_virtual_vector_array.cc intern/multi_function.cc intern/multi_function_builder.cc - intern/multi_function_parallel.cc + intern/multi_function_params.cc intern/multi_function_procedure.cc intern/multi_function_procedure_builder.cc intern/multi_function_procedure_executor.cc + intern/multi_function_procedure_optimization.cc FN_cpp_type.hh FN_cpp_type_make.hh @@ -54,12 +55,12 @@ set(SRC FN_multi_function_builder.hh FN_multi_function_context.hh FN_multi_function_data_type.hh - FN_multi_function_parallel.hh FN_multi_function_param_type.hh FN_multi_function_params.hh FN_multi_function_procedure.hh FN_multi_function_procedure_builder.hh FN_multi_function_procedure_executor.hh + FN_multi_function_procedure_optimization.hh FN_multi_function_signature.hh ) diff --git a/source/blender/functions/FN_cpp_type.hh b/source/blender/functions/FN_cpp_type.hh index 643b2fc1f28..7ddb5bf1f46 100644 --- a/source/blender/functions/FN_cpp_type.hh +++ b/source/blender/functions/FN_cpp_type.hh @@ -207,6 +207,18 @@ class CPPType : NonCopyable, NonMovable { return is_trivially_destructible_; } + /** + * When true, the value is like a normal C type, it can be copied around with #memcpy and does + * not have to be destructed. + * + * C++ equivalent: + * std::is_trivial_v<T>; + */ + bool is_trivial() const + { + return is_trivial_; + } + bool is_default_constructible() const { return default_construct_ != nullptr; diff --git a/source/blender/functions/FN_field.hh b/source/blender/functions/FN_field.hh index fb488fdbfa9..a591aaed34a 100644 --- a/source/blender/functions/FN_field.hh +++ b/source/blender/functions/FN_field.hh @@ -49,16 +49,15 @@ #include "BLI_function_ref.hh" #include "BLI_string_ref.hh" #include "BLI_vector.hh" +#include "BLI_vector_set.hh" #include "FN_generic_virtual_array.hh" #include "FN_multi_function_builder.hh" -#include "FN_multi_function_procedure.hh" -#include "FN_multi_function_procedure_builder.hh" -#include "FN_multi_function_procedure_executor.hh" namespace blender::fn { class FieldInput; +struct FieldInputs; /** * A node in a field-tree. It has at least one output that can be referenced by fields. @@ -66,18 +65,18 @@ class FieldInput; class FieldNode { private: bool is_input_; + + protected: /** - * True when this node is a #FieldInput or (potentially indirectly) depends on one. This could - * always be derived again later by traversing the field-tree, but keeping track of it while the - * field is built is cheaper. - * - * If this is false, the field is constant. Note that even when this is true, the field may be - * constant when all inputs are constant. + * Keeps track of the inputs that this node depends on. This avoids recomputing it every time the + * data is required. It is a shared pointer, because very often multiple nodes depend on the same + * inputs. + * Might contain null. */ - bool depends_on_input_; + std::shared_ptr<const FieldInputs> field_inputs_; public: - FieldNode(bool is_input, bool depends_on_input); + FieldNode(bool is_input); virtual ~FieldNode() = default; @@ -87,11 +86,7 @@ class FieldNode { bool is_operation() const; bool depends_on_input() const; - /** - * Invoke callback for every field input. It might be called multiple times for the same input. - * The caller is responsible for deduplication if required. - */ - virtual void foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const = 0; + const std::shared_ptr<const FieldInputs> &field_inputs() const; virtual uint64_t hash() const; virtual bool is_equal_to(const FieldNode &other) const; @@ -178,11 +173,19 @@ class GFieldRef : public GFieldBase<const FieldNode *> { } }; +namespace detail { +/* Utility class to make #is_field_v work. */ +struct TypedFieldBase { +}; +} // namespace detail + /** * A typed version of #GField. It has the same memory layout as #GField. */ -template<typename T> class Field : public GField { +template<typename T> class Field : public GField, detail::TypedFieldBase { public: + using base_type = T; + Field() = default; Field(GField field) : GField(std::move(field)) @@ -196,6 +199,11 @@ template<typename T> class Field : public GField { } }; +/** True when T is any Field<...> type. */ +template<typename T> +static constexpr bool is_field_v = std::is_base_of_v<detail::TypedFieldBase, T> && + !std::is_same_v<detail::TypedFieldBase, T>; + /** * A #FieldNode that allows composing existing fields into new fields. */ @@ -218,7 +226,6 @@ class FieldOperation : public FieldNode { const MultiFunction &multi_function() const; const CPPType &output_cpp_type(int output_index) const override; - void foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const override; }; class FieldContext; @@ -258,7 +265,19 @@ class FieldInput : public FieldNode { Category category() const; const CPPType &output_cpp_type(int output_index) const override; - void foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const override; +}; + +/** + * Keeps track of the inputs of a field. + */ +struct FieldInputs { + /** All #FieldInput nodes that a field (possibly indirectly) depends on. */ + VectorSet<const FieldInput *> nodes; + /** + * Same as above but the inputs are deduplicated. For example, when there are two separate index + * input nodes, only one will show up in this list. + */ + VectorSet<std::reference_wrapper<const FieldInput>> deduplicated_nodes; }; /** @@ -294,6 +313,9 @@ class FieldEvaluator : NonMovable, NonCopyable { Vector<OutputPointerInfo> output_pointer_infos_; bool is_evaluated_ = false; + Field<bool> selection_field_; + IndexMask selection_mask_; + public: /** Takes #mask by pointer because the mask has to live longer than the evaluator. */ FieldEvaluator(const FieldContext &context, const IndexMask *mask) @@ -314,6 +336,18 @@ class FieldEvaluator : NonMovable, NonCopyable { } /** + * The selection field is evaluated first to determine which indices of the other fields should + * be evaluated. Calling this method multiple times will just replace the previously set + * selection field. Only the elements selected by both this selection and the selection provided + * in the constructor are calculated. If no selection field is set, it is assumed that all + * indices passed to the constructor are selected. + */ + void set_selection(Field<bool> selection) + { + selection_field_ = std::move(selection); + } + + /** * \param field: Field to add to the evaluator. * \param dst: Mutable virtual array that the evaluated result for this field is be written into. */ @@ -384,6 +418,8 @@ class FieldEvaluator : NonMovable, NonCopyable { return this->get_evaluated(field_index).typed<T>(); } + IndexMask get_evaluated_selection_as_mask(); + /** * Retrieve the output of an evaluated boolean field and convert it to a mask, which can be used * to avoid calculations for unnecessary elements later on. The evaluator will own the indices in @@ -392,6 +428,24 @@ class FieldEvaluator : NonMovable, NonCopyable { IndexMask get_evaluated_as_mask(const int field_index); }; +/** + * Evaluate fields in the given context. If possible, multiple fields should be evaluated together, + * because that can be more efficient when they share common sub-fields. + * + * \param scope: The resource scope that owns data that makes up the output virtual arrays. Make + * sure the scope is not destructed when the output virtual arrays are still used. + * \param fields_to_evaluate: The fields that should be evaluated together. + * \param mask: Determines which indices are computed. The mask may be referenced by the returned + * virtual arrays. So the underlying indices (if applicable) should live longer then #scope. + * \param context: The context that the field is evaluated in. Used to retrieve data from each + * #FieldInput in the field network. + * \param dst_varrays: If provided, the computed data will be written into those virtual arrays + * instead of into newly created ones. That allows making the computed data live longer than + * #scope and is more efficient when the data will be written into those virtual arrays + * later anyway. + * \return The computed virtual arrays for each provided field. If #dst_varrays is passed, the + * provided virtual arrays are returned. + */ Vector<GVArray> evaluate_fields(ResourceScope &scope, Span<GFieldRef> fields_to_evaluate, IndexMask mask, @@ -419,13 +473,24 @@ template<typename T> Field<T> make_constant_field(T value) return Field<T>{GField{std::move(operation), 0}}; } +GField make_constant_field(const CPPType &type, const void *value); + +/** + * If the field depends on some input, the same field is returned. + * Otherwise the field is evaluated and a new field is created that just computes this constant. + * + * Making the field constant has two benefits: + * - The field-tree becomes a single node, which is more efficient when the field is evaluated many + * times. + * - Memory of the input fields may be freed. + */ GField make_field_constant_if_possible(GField field); class IndexFieldInput final : public FieldInput { public: IndexFieldInput(); - static GVArray get_index_varray(IndexMask mask, ResourceScope &scope); + static GVArray get_index_varray(IndexMask mask); GVArray get_varray_for_context(const FieldContext &context, IndexMask mask, @@ -438,11 +503,56 @@ class IndexFieldInput final : public FieldInput { /** \} */ /* -------------------------------------------------------------------- */ +/** \name Value or Field Class + * + * Utility class that wraps a single value and a field, to simplify accessing both of the types. + * \{ */ + +template<typename T> struct ValueOrField { + /** Value that is used when the field is empty. */ + T value{}; + Field<T> field; + + ValueOrField() = default; + + ValueOrField(T value) : value(std::move(value)) + { + } + + ValueOrField(Field<T> field) : field(std::move(field)) + { + } + + bool is_field() const + { + return (bool)this->field; + } + + Field<T> as_field() const + { + if (this->field) { + return this->field; + } + return make_constant_field(this->value); + } + + T as_value() const + { + if (this->field) { + /* This returns a default value when the field is not constant. */ + return evaluate_constant_field(this->field); + } + return this->value; + } +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name #FieldNode Inline Methods * \{ */ -inline FieldNode::FieldNode(bool is_input, bool depends_on_input) - : is_input_(is_input), depends_on_input_(depends_on_input) +inline FieldNode::FieldNode(bool is_input) : is_input_(is_input) { } @@ -458,7 +568,12 @@ inline bool FieldNode::is_operation() const inline bool FieldNode::depends_on_input() const { - return depends_on_input_; + return field_inputs_ && !field_inputs_->nodes.is_empty(); +} + +inline const std::shared_ptr<const FieldInputs> &FieldNode::field_inputs() const +{ + return field_inputs_; } inline uint64_t FieldNode::hash() const diff --git a/source/blender/functions/FN_field_cpp_type.hh b/source/blender/functions/FN_field_cpp_type.hh index 5e6f1b5a585..940faba6d70 100644 --- a/source/blender/functions/FN_field_cpp_type.hh +++ b/source/blender/functions/FN_field_cpp_type.hh @@ -30,19 +30,19 @@ template<typename T> struct FieldCPPTypeParam { class FieldCPPType : public CPPType { private: - const CPPType &field_type_; + const CPPType &base_type_; public: template<typename T> FieldCPPType(FieldCPPTypeParam<Field<T>> /* unused */, StringRef debug_name) : CPPType(CPPTypeParam<Field<T>, CPPTypeFlags::None>(), debug_name), - field_type_(CPPType::get<T>()) + base_type_(CPPType::get<T>()) { } - const CPPType &field_type() const + const CPPType &base_type() const { - return field_type_; + return base_type_; } /* Ensure that #GField and #Field<T> have the same layout, to enable casting between the two. */ @@ -60,6 +60,84 @@ class FieldCPPType : public CPPType { } }; +class ValueOrFieldCPPType : public CPPType { + private: + const CPPType &base_type_; + void (*construct_from_value_)(void *dst, const void *value); + void (*construct_from_field_)(void *dst, GField field); + const void *(*get_value_ptr_)(const void *value_or_field); + const GField *(*get_field_ptr_)(const void *value_or_field); + bool (*is_field_)(const void *value_or_field); + GField (*as_field_)(const void *value_or_field); + + public: + template<typename T> + ValueOrFieldCPPType(FieldCPPTypeParam<ValueOrField<T>> /* unused */, StringRef debug_name) + : CPPType(CPPTypeParam<ValueOrField<T>, CPPTypeFlags::None>(), debug_name), + base_type_(CPPType::get<T>()) + { + construct_from_value_ = [](void *dst, const void *value_or_field) { + new (dst) ValueOrField<T>(*(const T *)value_or_field); + }; + construct_from_field_ = [](void *dst, GField field) { + new (dst) ValueOrField<T>(Field<T>(std::move(field))); + }; + get_value_ptr_ = [](const void *value_or_field) { + return (const void *)&((ValueOrField<T> *)value_or_field)->value; + }; + get_field_ptr_ = [](const void *value_or_field) -> const GField * { + return &((ValueOrField<T> *)value_or_field)->field; + }; + is_field_ = [](const void *value_or_field) { + return ((ValueOrField<T> *)value_or_field)->is_field(); + }; + as_field_ = [](const void *value_or_field) -> GField { + return ((ValueOrField<T> *)value_or_field)->as_field(); + }; + } + + const CPPType &base_type() const + { + return base_type_; + } + + void construct_from_value(void *dst, const void *value) const + { + construct_from_value_(dst, value); + } + + void construct_from_field(void *dst, GField field) const + { + construct_from_field_(dst, field); + } + + const void *get_value_ptr(const void *value_or_field) const + { + return get_value_ptr_(value_or_field); + } + + void *get_value_ptr(void *value_or_field) const + { + /* Use `const_cast` to avoid duplicating the callback for the non-const case. */ + return const_cast<void *>(get_value_ptr_(value_or_field)); + } + + const GField *get_field_ptr(const void *value_or_field) const + { + return get_field_ptr_(value_or_field); + } + + bool is_field(const void *value_or_field) const + { + return is_field_(value_or_field); + } + + GField as_field(const void *value_or_field) const + { + return as_field_(value_or_field); + } +}; + } // namespace blender::fn #define MAKE_FIELD_CPP_TYPE(DEBUG_NAME, FIELD_TYPE) \ @@ -69,4 +147,13 @@ class FieldCPPType : public CPPType { static blender::fn::FieldCPPType cpp_type{ \ blender::fn::FieldCPPTypeParam<blender::fn::Field<FIELD_TYPE>>(), STRINGIFY(DEBUG_NAME)}; \ return cpp_type; \ + } \ + template<> \ + const blender::fn::CPPType & \ + blender::fn::CPPType::get_impl<blender::fn::ValueOrField<FIELD_TYPE>>() \ + { \ + static blender::fn::ValueOrFieldCPPType cpp_type{ \ + blender::fn::FieldCPPTypeParam<blender::fn::ValueOrField<FIELD_TYPE>>(), \ + STRINGIFY(DEBUG_NAME##OrValue)}; \ + return cpp_type; \ } diff --git a/source/blender/functions/FN_generic_span.hh b/source/blender/functions/FN_generic_span.hh index e2c49697ba9..6f0147b7fb3 100644 --- a/source/blender/functions/FN_generic_span.hh +++ b/source/blender/functions/FN_generic_span.hh @@ -93,6 +93,11 @@ class GSpan { const int64_t new_size = std::max<int64_t>(0, std::min(size, size_ - start)); return GSpan(*type_, POINTER_OFFSET(data_, type_->size() * start), new_size); } + + GSpan slice(const IndexRange range) const + { + return this->slice(range.start(), range.size()); + } }; /** @@ -169,6 +174,11 @@ class GMutableSpan { const int64_t new_size = std::max<int64_t>(0, std::min(size, size_ - start)); return GMutableSpan(*type_, POINTER_OFFSET(data_, type_->size() * start), new_size); } + + GMutableSpan slice(IndexRange range) const + { + return this->slice(range.start(), range.size()); + } }; } // namespace blender::fn diff --git a/source/blender/functions/FN_generic_virtual_array.hh b/source/blender/functions/FN_generic_virtual_array.hh index b822f3a7c33..fc8612d6f87 100644 --- a/source/blender/functions/FN_generic_virtual_array.hh +++ b/source/blender/functions/FN_generic_virtual_array.hh @@ -148,14 +148,37 @@ class GVArrayCommon { void materialize_to_uninitialized(void *dst) const; void materialize_to_uninitialized(const IndexMask mask, void *dst) const; + /** + * Returns true when the virtual array is stored as a span internally. + */ bool is_span() const; + /** + * Returns the internally used span of the virtual array. This invokes undefined behavior is the + * virtual array is not stored as a span internally. + */ GSpan get_internal_span() const; + /** + * Returns true when the virtual array returns the same value for every index. + */ bool is_single() const; + /** + * Copies the value that is used for every element into `r_value`, which is expected to point to + * initialized memory. This invokes undefined behavior if the virtual array would not return the + * same value for every index. + */ void get_internal_single(void *r_value) const; + /** + * Same as `get_internal_single`, but `r_value` points to initialized memory. + */ void get_internal_single_to_uninitialized(void *r_value) const; void get(const int64_t index, void *r_value) const; + /** + * Returns a copy of the value at the given index. Usually a typed virtual array should + * be used instead, but sometimes this is simpler when only a few indices are needed. + */ + template<typename T> T get(const int64_t index) const; void get_to_uninitialized(const int64_t index, void *r_value) const; }; @@ -226,6 +249,9 @@ class GVMutableArray : public GVArrayCommon { void set_by_relocate(const int64_t index, void *value); void fill(const void *value); + /** + * Copy the values from the source buffer to all elements in the virtual array. + */ void set_all(const void *src); GVMutableArrayImpl *get_implementation() const; @@ -555,37 +581,19 @@ template<typename T> class VMutableArrayImpl_For_GVMutableArray : public VMutabl /** \} */ /* -------------------------------------------------------------------- */ -/** \name #GVArrayImpl_For_GSpan and #GVMutableArrayImpl_For_GMutableSpan. +/** \name #GVArrayImpl_For_GSpan. * \{ */ -class GVArrayImpl_For_GSpan : public GVArrayImpl { - protected: - const void *data_ = nullptr; - const int64_t element_size_; - - public: - GVArrayImpl_For_GSpan(const GSpan span); - - protected: - GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size); - - void get(const int64_t index, void *r_value) const override; - void get_to_uninitialized(const int64_t index, void *r_value) const override; - - bool is_span() const override; - GSpan get_internal_span() const override; -}; - -class GVMutableArrayImpl_For_GMutableSpan : public GVMutableArrayImpl { +class GVArrayImpl_For_GSpan : public GVMutableArrayImpl { protected: void *data_ = nullptr; const int64_t element_size_; public: - GVMutableArrayImpl_For_GMutableSpan(const GMutableSpan span); + GVArrayImpl_For_GSpan(const GMutableSpan span); protected: - GVMutableArrayImpl_For_GMutableSpan(const CPPType &type, const int64_t size); + GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size); public: void get(const int64_t index, void *r_value) const override; @@ -688,6 +696,16 @@ inline void GVArrayCommon::get(const int64_t index, void *r_value) const impl_->get(index, r_value); } +template<typename T> inline T GVArrayCommon::get(const int64_t index) const +{ + BLI_assert(index >= 0); + BLI_assert(index < this->size()); + BLI_assert(this->type().is<T>()); + T value{}; + impl_->get(index, &value); + return value; +} + /* Same as `get`, but `r_value` is expected to point to uninitialized memory. */ inline void GVArrayCommon::get_to_uninitialized(const int64_t index, void *r_value) const { diff --git a/source/blender/functions/FN_multi_function.hh b/source/blender/functions/FN_multi_function.hh index c57f6cf574e..1e36d87668a 100644 --- a/source/blender/functions/FN_multi_function.hh +++ b/source/blender/functions/FN_multi_function.hh @@ -60,6 +60,13 @@ class MultiFunction { { } + /** + * The result is the same as using #call directly but this method has some additional features. + * - Automatic multi-threading when possible and appropriate. + * - Automatic index mask offsetting to avoid large temporary intermediate arrays that are mostly + * unused. + */ + void call_auto(IndexMask mask, MFParams params, MFContext context) const; virtual void call(IndexMask mask, MFParams params, MFContext context) const = 0; virtual uint64_t hash() const @@ -97,6 +104,8 @@ class MultiFunction { return signature_ref_->function_name; } + virtual std::string debug_name() const; + bool depends_on_context() const { return signature_ref_->depends_on_context; @@ -108,6 +117,31 @@ class MultiFunction { return *signature_ref_; } + /** + * Information about how the multi-function behaves that help a caller to execute it efficiently. + */ + struct ExecutionHints { + /** + * Suggested minimum workload under which multi-threading does not really help. + * This should be lowered when the multi-function is doing something computationally expensive. + */ + int64_t min_grain_size = 10000; + /** + * Indicates that the multi-function will allocate an array large enough to hold all indices + * passed in as mask. This tells the caller that it would be preferable to pass in smaller + * indices. Also maybe the full mask should be split up into smaller segments to decrease peak + * memory usage. + */ + bool allocates_array = false; + /** + * Tells the caller that every execution takes about the same time. This helps making a more + * educated guess about a good grain size. + */ + bool uniform_execution_time = true; + }; + + ExecutionHints execution_hints() const; + protected: /* Make the function use the given signature. This should be called once in the constructor of * child classes. No copy of the signature is made, so the caller has to make sure that the @@ -119,6 +153,8 @@ class MultiFunction { BLI_assert(signature != nullptr); signature_ref_ = signature; } + + virtual ExecutionHints get_execution_hints() const; }; inline MFParamsBuilder::MFParamsBuilder(const MultiFunction &fn, int64_t mask_size) diff --git a/source/blender/functions/FN_multi_function_builder.hh b/source/blender/functions/FN_multi_function_builder.hh index 0ce05cbca30..eaf9e5ce70f 100644 --- a/source/blender/functions/FN_multi_function_builder.hh +++ b/source/blender/functions/FN_multi_function_builder.hh @@ -43,7 +43,7 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio MFSignature signature_; public: - CustomMF_SI_SO(StringRef name, FunctionT function) : function_(std::move(function)) + CustomMF_SI_SO(const char *name, FunctionT function) : function_(std::move(function)) { MFSignatureBuilder signature{name}; signature.single_input<In1>("In1"); @@ -53,7 +53,7 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio } template<typename ElementFuncT> - CustomMF_SI_SO(StringRef name, ElementFuncT element_fn) + CustomMF_SI_SO(const char *name, ElementFuncT element_fn) : CustomMF_SI_SO(name, CustomMF_SI_SO::create_function(element_fn)) { } @@ -92,7 +92,7 @@ class CustomMF_SI_SI_SO : public MultiFunction { MFSignature signature_; public: - CustomMF_SI_SI_SO(StringRef name, FunctionT function) : function_(std::move(function)) + CustomMF_SI_SI_SO(const char *name, FunctionT function) : function_(std::move(function)) { MFSignatureBuilder signature{name}; signature.single_input<In1>("In1"); @@ -103,7 +103,7 @@ class CustomMF_SI_SI_SO : public MultiFunction { } template<typename ElementFuncT> - CustomMF_SI_SI_SO(StringRef name, ElementFuncT element_fn) + CustomMF_SI_SI_SO(const char *name, ElementFuncT element_fn) : CustomMF_SI_SI_SO(name, CustomMF_SI_SI_SO::create_function(element_fn)) { } @@ -150,7 +150,7 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction { MFSignature signature_; public: - CustomMF_SI_SI_SI_SO(StringRef name, FunctionT function) : function_(std::move(function)) + CustomMF_SI_SI_SI_SO(const char *name, FunctionT function) : function_(std::move(function)) { MFSignatureBuilder signature{name}; signature.single_input<In1>("In1"); @@ -162,7 +162,7 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction { } template<typename ElementFuncT> - CustomMF_SI_SI_SI_SO(StringRef name, ElementFuncT element_fn) + CustomMF_SI_SI_SI_SO(const char *name, ElementFuncT element_fn) : CustomMF_SI_SI_SI_SO(name, CustomMF_SI_SI_SI_SO::create_function(element_fn)) { } @@ -211,7 +211,7 @@ class CustomMF_SI_SI_SI_SI_SO : public MultiFunction { MFSignature signature_; public: - CustomMF_SI_SI_SI_SI_SO(StringRef name, FunctionT function) : function_(std::move(function)) + CustomMF_SI_SI_SI_SI_SO(const char *name, FunctionT function) : function_(std::move(function)) { MFSignatureBuilder signature{name}; signature.single_input<In1>("In1"); @@ -224,7 +224,7 @@ class CustomMF_SI_SI_SI_SI_SO : public MultiFunction { } template<typename ElementFuncT> - CustomMF_SI_SI_SI_SI_SO(StringRef name, ElementFuncT element_fn) + CustomMF_SI_SI_SI_SI_SO(const char *name, ElementFuncT element_fn) : CustomMF_SI_SI_SI_SI_SO(name, CustomMF_SI_SI_SI_SI_SO::create_function(element_fn)) { } @@ -265,7 +265,7 @@ template<typename Mut1> class CustomMF_SM : public MultiFunction { MFSignature signature_; public: - CustomMF_SM(StringRef name, FunctionT function) : function_(std::move(function)) + CustomMF_SM(const char *name, FunctionT function) : function_(std::move(function)) { MFSignatureBuilder signature{name}; signature.single_mutable<Mut1>("Mut1"); @@ -274,7 +274,7 @@ template<typename Mut1> class CustomMF_SM : public MultiFunction { } template<typename ElementFuncT> - CustomMF_SM(StringRef name, ElementFuncT element_fn) + CustomMF_SM(const char *name, ElementFuncT element_fn) : CustomMF_SM(name, CustomMF_SM::create_function(element_fn)) { } @@ -306,8 +306,8 @@ template<typename From, typename To> class CustomMF_Convert : public MultiFuncti static MFSignature create_signature() { - std::string name = CPPType::get<From>().name() + " to " + CPPType::get<To>().name(); - MFSignatureBuilder signature{std::move(name)}; + static std::string name = CPPType::get<From>().name() + " to " + CPPType::get<To>().name(); + MFSignatureBuilder signature{name.c_str()}; signature.single_input<From>("Input"); signature.single_output<To>("Output"); return signature.build(); @@ -372,9 +372,7 @@ template<typename T> class CustomMF_Constant : public MultiFunction { template<typename U> CustomMF_Constant(U &&value) : value_(std::forward<U>(value)) { MFSignatureBuilder signature{"Constant"}; - std::stringstream ss; - ss << value_; - signature.single_output<T>(ss.str()); + signature.single_output<T>("Value"); signature_ = signature.build(); this->set_signature(&signature_); } @@ -414,9 +412,7 @@ class CustomMF_DefaultOutput : public MultiFunction { MFSignature signature_; public: - CustomMF_DefaultOutput(StringRef name, - Span<MFDataType> input_types, - Span<MFDataType> output_types); + CustomMF_DefaultOutput(Span<MFDataType> input_types, Span<MFDataType> output_types); void call(IndexMask mask, MFParams params, MFContext context) const override; }; @@ -425,7 +421,7 @@ class CustomMF_GenericCopy : public MultiFunction { MFSignature signature_; public: - CustomMF_GenericCopy(StringRef name, MFDataType data_type); + CustomMF_GenericCopy(MFDataType data_type); void call(IndexMask mask, MFParams params, MFContext context) const override; }; diff --git a/source/blender/functions/FN_multi_function_parallel.hh b/source/blender/functions/FN_multi_function_parallel.hh deleted file mode 100644 index 84c57efd434..00000000000 --- a/source/blender/functions/FN_multi_function_parallel.hh +++ /dev/null @@ -1,39 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#pragma once - -/** \file - * \ingroup fn - */ - -#include "FN_multi_function.hh" - -namespace blender::fn { - -class ParallelMultiFunction : public MultiFunction { - private: - const MultiFunction &fn_; - const int64_t grain_size_; - bool threading_supported_; - - public: - ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size); - - void call(IndexMask mask, MFParams params, MFContext context) const override; -}; - -} // namespace blender::fn diff --git a/source/blender/functions/FN_multi_function_params.hh b/source/blender/functions/FN_multi_function_params.hh index 5c7e75230f3..f4ddc4f2881 100644 --- a/source/blender/functions/FN_multi_function_params.hh +++ b/source/blender/functions/FN_multi_function_params.hh @@ -25,6 +25,8 @@ * the function. `MFParams` is then used inside the called function to access the parameters. */ +#include <mutex> + #include "BLI_resource_scope.hh" #include "FN_generic_pointer.hh" @@ -45,6 +47,9 @@ class MFParamsBuilder { Vector<const GVVectorArray *> virtual_vector_arrays_; Vector<GVectorArray *> vector_arrays_; + std::mutex mutex_; + Vector<std::pair<int, GMutableSpan>> dummy_output_spans_; + friend class MFParams; MFParamsBuilder(const MFSignature &signature, const IndexMask mask) @@ -62,8 +67,8 @@ class MFParamsBuilder { template<typename T> void add_readonly_single_input_value(T value, StringRef expected_name = "") { - T *value_ptr = &scope_.add_value<T>(std::move(value)); - this->add_readonly_single_input(value_ptr, expected_name); + this->add_readonly_single_input(VArray<T>::ForSingle(std::move(value), min_array_size_), + expected_name); } template<typename T> void add_readonly_single_input(const T *value, StringRef expected_name = "") { @@ -254,20 +259,12 @@ class MFParams { this->assert_correct_param(param_index, name, MFParamType::SingleOutput); int data_index = builder_->signature_->data_index(param_index); GMutableSpan span = builder_->mutable_spans_[data_index]; - if (span.is_empty()) { - /* The output is ignored by the caller, but the multi-function does not handle this case. So - * create a temporary buffer that the multi-function can write to. */ - const CPPType &type = span.type(); - void *buffer = builder_->scope_.linear_allocator().allocate( - builder_->min_array_size_ * type.size(), type.alignment()); - if (!type.is_trivially_destructible()) { - /* Make sure the temporary elements will be destructed in the end. */ - builder_->scope_.add_destruct_call( - [&type, buffer, mask = builder_->mask_]() { type.destruct_indices(buffer, mask); }); - } - span = GMutableSpan{type, buffer, builder_->min_array_size_}; + if (!span.is_empty()) { + return span; } - return span; + /* The output is ignored by the caller, but the multi-function does not handle this case. So + * create a temporary buffer that the multi-function can write to. */ + return this->ensure_dummy_single_output(data_index); } /** @@ -356,6 +353,8 @@ class MFParams { } #endif } + + GMutableSpan ensure_dummy_single_output(int data_index); }; } // namespace blender::fn diff --git a/source/blender/functions/FN_multi_function_procedure_executor.hh b/source/blender/functions/FN_multi_function_procedure_executor.hh index 9c8b59739b8..409a031e7ed 100644 --- a/source/blender/functions/FN_multi_function_procedure_executor.hh +++ b/source/blender/functions/FN_multi_function_procedure_executor.hh @@ -31,9 +31,12 @@ class MFProcedureExecutor : public MultiFunction { const MFProcedure &procedure_; public: - MFProcedureExecutor(std::string name, const MFProcedure &procedure); + MFProcedureExecutor(const MFProcedure &procedure); void call(IndexMask mask, MFParams params, MFContext context) const override; + + private: + ExecutionHints get_execution_hints() const override; }; } // namespace blender::fn diff --git a/source/blender/functions/FN_multi_function_procedure_optimization.hh b/source/blender/functions/FN_multi_function_procedure_optimization.hh new file mode 100644 index 00000000000..e5ffc12b241 --- /dev/null +++ b/source/blender/functions/FN_multi_function_procedure_optimization.hh @@ -0,0 +1,61 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#pragma once + +/** \file + * \ingroup fn + * + * A #MFProcedure optimization pass takes an existing procedure and changes it in a way that + * improves its performance when executed. + * + * Oftentimes it would also be possible to implement a specific optimization directly during + * construction of the initial #MFProcedure. There is a trade-off between doing that or just + * building a "simple" procedure and then optimizing it uses separate optimization passes. + * - Doing optimizations directly during construction is typically faster than doing it as a + * separate pass. However, it would be much harder to turn the optimization off when it is not + * necessary, making the construction potentially slower in those cases. + * - Doing optimizations directly would also make code more complex, because it mixes the logic + * that generates the procedure from some other data with optimization decisions. + * - Having a separate pass allows us to use it in different places when necessary. + * - Having a separate pass allows us to enable and disable it easily to better understand its + * impact on performance. + */ + +#include "FN_multi_function_procedure.hh" + +namespace blender::fn::procedure_optimization { + +/** + * When generating a procedure, destruct instructions (#MFDestructInstruction) have to be inserted + * for all variables that are not outputs. Often the simplest approach is to add these instructions + * at the very end. However, when the procedure is executed this is not optimal, because many more + * variables are initialized at the same time than necessary. This inhibits the reuse of memory + * buffers which decreases performance and increases memory use. + * + * This optimization pass moves destruct instructions up in the procedure. The goal is to destruct + * each variable right after its last use. + * + * For simplicity, and because this is the most common use case, this optimization currently only + * works on a single chain of instructions. Destruct instructions are not moved across branches. + * + * \param procedure The procedure that should be optimized. + * \param block_end_instr The instruction that points to the last instruction within a linear chain + * of instructions. The algorithm moves instructions backward starting at this instruction. + */ +void move_destructs_up(MFProcedure &procedure, MFInstruction &block_end_instr); + +} // namespace blender::fn::procedure_optimization diff --git a/source/blender/functions/FN_multi_function_signature.hh b/source/blender/functions/FN_multi_function_signature.hh index d05948cc645..3c991bc9c56 100644 --- a/source/blender/functions/FN_multi_function_signature.hh +++ b/source/blender/functions/FN_multi_function_signature.hh @@ -30,8 +30,15 @@ namespace blender::fn { struct MFSignature { - std::string function_name; - Vector<std::string> param_names; + /** + * The name should be statically allocated so that it lives longer than this signature. This is + * used instead of an #std::string because of the overhead when many functions are created. + * If the name of the function has to be more dynamic for debugging purposes, override + * #MultiFunction::debug_name() instead. Then the dynamic name will only be computed when it is + * actually needed. + */ + const char *function_name; + Vector<const char *> param_names; Vector<MFParamType> param_types; Vector<int> param_data_indices; bool depends_on_context = false; @@ -51,9 +58,9 @@ class MFSignatureBuilder { int vector_array_count_ = 0; public: - MFSignatureBuilder(std::string function_name) + MFSignatureBuilder(const char *function_name) { - signature_.function_name = std::move(function_name); + signature_.function_name = function_name; } MFSignature build() const @@ -63,23 +70,23 @@ class MFSignatureBuilder { /* Input Parameter Types */ - template<typename T> void single_input(StringRef name) + template<typename T> void single_input(const char *name) { this->single_input(name, CPPType::get<T>()); } - void single_input(StringRef name, const CPPType &type) + void single_input(const char *name, const CPPType &type) { this->input(name, MFDataType::ForSingle(type)); } - template<typename T> void vector_input(StringRef name) + template<typename T> void vector_input(const char *name) { this->vector_input(name, CPPType::get<T>()); } - void vector_input(StringRef name, const CPPType &base_type) + void vector_input(const char *name, const CPPType &base_type) { this->input(name, MFDataType::ForVector(base_type)); } - void input(StringRef name, MFDataType data_type) + void input(const char *name, MFDataType data_type) { signature_.param_names.append(name); signature_.param_types.append(MFParamType(MFParamType::Input, data_type)); @@ -96,23 +103,23 @@ class MFSignatureBuilder { /* Output Parameter Types */ - template<typename T> void single_output(StringRef name) + template<typename T> void single_output(const char *name) { this->single_output(name, CPPType::get<T>()); } - void single_output(StringRef name, const CPPType &type) + void single_output(const char *name, const CPPType &type) { this->output(name, MFDataType::ForSingle(type)); } - template<typename T> void vector_output(StringRef name) + template<typename T> void vector_output(const char *name) { this->vector_output(name, CPPType::get<T>()); } - void vector_output(StringRef name, const CPPType &base_type) + void vector_output(const char *name, const CPPType &base_type) { this->output(name, MFDataType::ForVector(base_type)); } - void output(StringRef name, MFDataType data_type) + void output(const char *name, MFDataType data_type) { signature_.param_names.append(name); signature_.param_types.append(MFParamType(MFParamType::Output, data_type)); @@ -129,23 +136,23 @@ class MFSignatureBuilder { /* Mutable Parameter Types */ - template<typename T> void single_mutable(StringRef name) + template<typename T> void single_mutable(const char *name) { this->single_mutable(name, CPPType::get<T>()); } - void single_mutable(StringRef name, const CPPType &type) + void single_mutable(const char *name, const CPPType &type) { this->mutable_(name, MFDataType::ForSingle(type)); } - template<typename T> void vector_mutable(StringRef name) + template<typename T> void vector_mutable(const char *name) { this->vector_mutable(name, CPPType::get<T>()); } - void vector_mutable(StringRef name, const CPPType &base_type) + void vector_mutable(const char *name, const CPPType &base_type) { this->mutable_(name, MFDataType::ForVector(base_type)); } - void mutable_(StringRef name, MFDataType data_type) + void mutable_(const char *name, MFDataType data_type) { signature_.param_names.append(name); signature_.param_types.append(MFParamType(MFParamType::Mutable, data_type)); @@ -160,7 +167,7 @@ class MFSignatureBuilder { } } - void add(StringRef name, const MFParamType ¶m_type) + void add(const char *name, const MFParamType ¶m_type) { switch (param_type.interface_type()) { case MFParamType::Input: diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc index 68a8446e6ae..604e5c6d13f 100644 --- a/source/blender/functions/intern/field.cc +++ b/source/blender/functions/intern/field.cc @@ -21,7 +21,10 @@ #include "BLI_vector_set.hh" #include "FN_field.hh" -#include "FN_multi_function_parallel.hh" +#include "FN_multi_function_procedure.hh" +#include "FN_multi_function_procedure_builder.hh" +#include "FN_multi_function_procedure_executor.hh" +#include "FN_multi_function_procedure_optimization.hh" namespace blender::fn { @@ -237,8 +240,7 @@ static void build_multi_function_procedure_for_fields(MFProcedure &procedure, if (!already_output_variables.add(variable)) { /* One variable can be output at most once. To output the same value twice, we have to make * a copy first. */ - const MultiFunction ©_fn = scope.construct<CustomMF_GenericCopy>("copy", - variable->data_type()); + const MultiFunction ©_fn = scope.construct<CustomMF_GenericCopy>(variable->data_type()); variable = builder.add_call<1>(copy_fn, {variable})[0]; } builder.add_output_parameter(*variable); @@ -253,30 +255,14 @@ static void build_multi_function_procedure_for_fields(MFProcedure &procedure, builder.add_destruct(*variable); } - builder.add_return(); + MFReturnInstruction &return_instr = builder.add_return(); + + procedure_optimization::move_destructs_up(procedure, return_instr); // std::cout << procedure.to_dot() << "\n"; BLI_assert(procedure.validate()); } -/** - * Evaluate fields in the given context. If possible, multiple fields should be evaluated together, - * because that can be more efficient when they share common sub-fields. - * - * \param scope: The resource scope that owns data that makes up the output virtual arrays. Make - * sure the scope is not destructed when the output virtual arrays are still used. - * \param fields_to_evaluate: The fields that should be evaluated together. - * \param mask: Determines which indices are computed. The mask may be referenced by the returned - * virtual arrays. So the underlying indices (if applicable) should live longer then #scope. - * \param context: The context that the field is evaluated in. Used to retrieve data from each - * #FieldInput in the field network. - * \param dst_varrays: If provided, the computed data will be written into those virtual arrays - * instead of into newly created ones. That allows making the computed data live longer than - * #scope and is more efficient when the data will be written into those virtual arrays - * later anyway. - * \return The computed virtual arrays for each provided field. If #dst_varrays is passed, the - * provided virtual arrays are returned. - */ Vector<GVArray> evaluate_fields(ResourceScope &scope, Span<GFieldRef> fields_to_evaluate, IndexMask mask, @@ -358,14 +344,9 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope, MFProcedure procedure; build_multi_function_procedure_for_fields( procedure, scope, field_tree_info, varying_fields_to_evaluate); - MFProcedureExecutor procedure_executor{"Procedure", procedure}; - /* Add multi threading capabilities to the field evaluation. */ - const int grain_size = 10000; - fn::ParallelMultiFunction parallel_procedure_executor{procedure_executor, grain_size}; - /* Utility variable to make easy to switch the executor. */ - const MultiFunction &executor_fn = parallel_procedure_executor; - - MFParamsBuilder mf_params{executor_fn, &mask}; + MFProcedureExecutor procedure_executor{procedure}; + + MFParamsBuilder mf_params{procedure_executor, &mask}; MFContextBuilder mf_context; /* Provide inputs to the procedure executor. */ @@ -406,7 +387,7 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope, mf_params.add_uninitialized_single_output(span); } - executor_fn.call(mask, mf_params, mf_context); + procedure_executor.call_auto(mask, mf_params, mf_context); } /* Evaluate constant fields if necessary. */ @@ -415,7 +396,7 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope, MFProcedure procedure; build_multi_function_procedure_for_fields( procedure, scope, field_tree_info, constant_fields_to_evaluate); - MFProcedureExecutor procedure_executor{"Procedure", procedure}; + MFProcedureExecutor procedure_executor{procedure}; MFParamsBuilder mf_params{procedure_executor, 1}; MFContextBuilder mf_context; @@ -495,15 +476,6 @@ void evaluate_constant_field(const GField &field, void *r_value) varrays[0].get_to_uninitialized(0, r_value); } -/** - * If the field depends on some input, the same field is returned. - * Otherwise the field is evaluated and a new field is created that just computes this constant. - * - * Making the field constant has two benefits: - * - The field-tree becomes a single node, which is more efficient when the field is evaluated many - * times. - * - Memory of the input fields may be freed. - */ GField make_field_constant_if_possible(GField field) { if (field.node().depends_on_input()) { @@ -512,10 +484,16 @@ GField make_field_constant_if_possible(GField field) const CPPType &type = field.cpp_type(); BUFFER_FOR_CPP_TYPE_VALUE(type, buffer); evaluate_constant_field(field, buffer); - auto constant_fn = std::make_unique<CustomMF_GenericConstant>(type, buffer, true); + GField new_field = make_constant_field(type, buffer); type.destruct(buffer); + return new_field; +} + +GField make_constant_field(const CPPType &type, const void *value) +{ + auto constant_fn = std::make_unique<CustomMF_GenericConstant>(type, value, true); auto operation = std::make_shared<FieldOperation>(std::move(constant_fn)); - return GField{operation, 0}; + return GField{std::move(operation), 0}; } GVArray FieldContext::get_varray_for_input(const FieldInput &field_input, @@ -532,7 +510,7 @@ IndexFieldInput::IndexFieldInput() : FieldInput(CPPType::get<int>(), "Index") category_ = Category::Generated; } -GVArray IndexFieldInput::get_index_varray(IndexMask mask, ResourceScope &UNUSED(scope)) +GVArray IndexFieldInput::get_index_varray(IndexMask mask) { auto index_func = [](int i) { return i; }; return VArray<int>::ForFunc(mask.min_array_size(), index_func); @@ -540,10 +518,10 @@ GVArray IndexFieldInput::get_index_varray(IndexMask mask, ResourceScope &UNUSED( GVArray IndexFieldInput::get_varray_for_context(const fn::FieldContext &UNUSED(context), IndexMask mask, - ResourceScope &scope) const + ResourceScope &UNUSED(scope)) const { /* TODO: Investigate a similar method to IndexRange::as_span() */ - return get_index_varray(mask, scope); + return get_index_varray(mask); } uint64_t IndexFieldInput::hash() const @@ -568,28 +546,65 @@ FieldOperation::FieldOperation(std::shared_ptr<const MultiFunction> function, owned_function_ = std::move(function); } -static bool any_field_depends_on_input(Span<GField> fields) +/** + * Returns the field inputs used by all the provided fields. + * This tries to reuse an existing #FieldInputs whenever possible to avoid copying it. + */ +static std::shared_ptr<const FieldInputs> combine_field_inputs(Span<GField> fields) { + /* The #FieldInputs that we try to reuse if possible. */ + const std::shared_ptr<const FieldInputs> *field_inputs_candidate = nullptr; for (const GField &field : fields) { - if (field.node().depends_on_input()) { - return true; + const std::shared_ptr<const FieldInputs> &field_inputs = field.node().field_inputs(); + /* Only try to reuse non-empty #FieldInputs. */ + if (field_inputs && !field_inputs->nodes.is_empty()) { + if (field_inputs_candidate == nullptr) { + field_inputs_candidate = &field_inputs; + } + else if ((*field_inputs_candidate)->nodes.size() < field_inputs->nodes.size()) { + /* Always try to reuse the #FieldInputs that has the most nodes already. */ + field_inputs_candidate = &field_inputs; + } + } + } + if (field_inputs_candidate == nullptr) { + /* None of the field depends on an input. */ + return {}; + } + /* Check if all inputs are in the candidate. */ + Vector<const FieldInput *> inputs_not_in_candidate; + for (const GField &field : fields) { + const std::shared_ptr<const FieldInputs> &field_inputs = field.node().field_inputs(); + if (!field_inputs) { + continue; + } + if (&field_inputs == field_inputs_candidate) { + continue; } + for (const FieldInput *field_input : field_inputs->nodes) { + if (!(*field_inputs_candidate)->nodes.contains(field_input)) { + inputs_not_in_candidate.append(field_input); + } + } + } + if (inputs_not_in_candidate.is_empty()) { + /* The existing #FieldInputs can be reused, because no other field has additional inputs. */ + return *field_inputs_candidate; } - return false; + /* Create new #FieldInputs that contains all of the inputs that the fields depend on. */ + std::shared_ptr<FieldInputs> new_field_inputs = std::make_shared<FieldInputs>( + **field_inputs_candidate); + for (const FieldInput *field_input : inputs_not_in_candidate) { + new_field_inputs->nodes.add(field_input); + new_field_inputs->deduplicated_nodes.add(*field_input); + } + return new_field_inputs; } FieldOperation::FieldOperation(const MultiFunction &function, Vector<GField> inputs) - : FieldNode(false, any_field_depends_on_input(inputs)), - function_(&function), - inputs_(std::move(inputs)) + : FieldNode(false), function_(&function), inputs_(std::move(inputs)) { -} - -void FieldOperation::foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const -{ - for (const GField &field : inputs_) { - field.node().foreach_field_input(foreach_fn); - } + field_inputs_ = combine_field_inputs(inputs_); } /* -------------------------------------------------------------------- @@ -597,20 +612,19 @@ void FieldOperation::foreach_field_input(FunctionRef<void(const FieldInput &)> f */ FieldInput::FieldInput(const CPPType &type, std::string debug_name) - : FieldNode(true, true), type_(&type), debug_name_(std::move(debug_name)) -{ -} - -void FieldInput::foreach_field_input(FunctionRef<void(const FieldInput &)> foreach_fn) const + : FieldNode(true), type_(&type), debug_name_(std::move(debug_name)) { - foreach_fn(*this); + std::shared_ptr<FieldInputs> field_inputs = std::make_shared<FieldInputs>(); + field_inputs->nodes.add_new(this); + field_inputs->deduplicated_nodes.add_new(*this); + field_inputs_ = std::move(field_inputs); } /* -------------------------------------------------------------------- * FieldEvaluator. */ -static Vector<int64_t> indices_from_selection(const VArray<bool> &selection) +static Vector<int64_t> indices_from_selection(IndexMask mask, const VArray<bool> &selection) { /* If the selection is just a single value, it's best to avoid calling this * function when constructing an IndexMask and use an IndexRange instead. */ @@ -619,14 +633,14 @@ static Vector<int64_t> indices_from_selection(const VArray<bool> &selection) Vector<int64_t> indices; if (selection.is_span()) { Span<bool> span = selection.get_internal_span(); - for (const int64_t i : span.index_range()) { + for (const int64_t i : mask) { if (span[i]) { indices.append(i); } } } else { - for (const int i : selection.index_range()) { + for (const int i : mask) { if (selection[i]) { indices.append(i); } @@ -667,14 +681,36 @@ int FieldEvaluator::add(GField field) return field_index; } +static IndexMask evaluate_selection(const Field<bool> &selection_field, + const FieldContext &context, + IndexMask full_mask, + ResourceScope &scope) +{ + if (selection_field) { + VArray<bool> selection = + evaluate_fields(scope, {selection_field}, full_mask, context)[0].typed<bool>(); + if (selection.is_single()) { + if (selection.get_internal_single()) { + return full_mask; + } + return IndexRange(0); + } + return scope.add_value(indices_from_selection(full_mask, selection)).as_span(); + } + return full_mask; +} + void FieldEvaluator::evaluate() { BLI_assert_msg(!is_evaluated_, "Cannot evaluate fields twice."); + + selection_mask_ = evaluate_selection(selection_field_, context_, mask_, scope_); + Array<GFieldRef> fields(fields_to_evaluate_.size()); for (const int i : fields_to_evaluate_.index_range()) { fields[i] = fields_to_evaluate_[i]; } - evaluated_varrays_ = evaluate_fields(scope_, fields, mask_, context_, dst_varrays_); + evaluated_varrays_ = evaluate_fields(scope_, fields, selection_mask_, context_, dst_varrays_); BLI_assert(fields_to_evaluate_.size() == evaluated_varrays_.size()); for (const int i : fields_to_evaluate_.index_range()) { OutputPointerInfo &info = output_pointer_infos_[i]; @@ -696,7 +732,13 @@ IndexMask FieldEvaluator::get_evaluated_as_mask(const int field_index) return IndexRange(0); } - return scope_.add_value(indices_from_selection(varray)).as_span(); + return scope_.add_value(indices_from_selection(mask_, varray)).as_span(); +} + +IndexMask FieldEvaluator::get_evaluated_selection_as_mask() +{ + BLI_assert(is_evaluated_); + return selection_mask_; } } // namespace blender::fn diff --git a/source/blender/functions/intern/generic_virtual_array.cc b/source/blender/functions/intern/generic_virtual_array.cc index 1fe1c2fc229..b4180a885e7 100644 --- a/source/blender/functions/intern/generic_virtual_array.cc +++ b/source/blender/functions/intern/generic_virtual_array.cc @@ -139,107 +139,56 @@ bool GVMutableArrayImpl::try_assign_VMutableArray(void *UNUSED(varray)) const /** \name #GVArrayImpl_For_GSpan * \{ */ -GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const GSpan span) - : GVArrayImpl(span.type(), span.size()), data_(span.data()), element_size_(span.type().size()) -{ -} - -GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size) - : GVArrayImpl(type, size), element_size_(type.size()) -{ -} - -void GVArrayImpl_For_GSpan::get(const int64_t index, void *r_value) const -{ - type_->copy_assign(POINTER_OFFSET(data_, element_size_ * index), r_value); -} - -void GVArrayImpl_For_GSpan::get_to_uninitialized(const int64_t index, void *r_value) const -{ - type_->copy_construct(POINTER_OFFSET(data_, element_size_ * index), r_value); -} - -bool GVArrayImpl_For_GSpan::is_span() const -{ - return true; -} - -GSpan GVArrayImpl_For_GSpan::get_internal_span() const -{ - return GSpan(*type_, data_, size_); -} - -/** See #VArrayImpl_For_Span_final. */ -class GVArrayImpl_For_GSpan_final final : public GVArrayImpl_For_GSpan { - public: - using GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan; - - private: - bool may_have_ownership() const override - { - return false; - } -}; - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name #GVMutableArrayImpl_For_GMutableSpan - * \{ */ - -GVMutableArrayImpl_For_GMutableSpan::GVMutableArrayImpl_For_GMutableSpan(const GMutableSpan span) +GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const GMutableSpan span) : GVMutableArrayImpl(span.type(), span.size()), data_(span.data()), element_size_(span.type().size()) { } -GVMutableArrayImpl_For_GMutableSpan::GVMutableArrayImpl_For_GMutableSpan(const CPPType &type, - const int64_t size) +GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan(const CPPType &type, const int64_t size) : GVMutableArrayImpl(type, size), element_size_(type.size()) { } -void GVMutableArrayImpl_For_GMutableSpan::get(const int64_t index, void *r_value) const +void GVArrayImpl_For_GSpan::get(const int64_t index, void *r_value) const { type_->copy_assign(POINTER_OFFSET(data_, element_size_ * index), r_value); } -void GVMutableArrayImpl_For_GMutableSpan::get_to_uninitialized(const int64_t index, - void *r_value) const +void GVArrayImpl_For_GSpan::get_to_uninitialized(const int64_t index, void *r_value) const { type_->copy_construct(POINTER_OFFSET(data_, element_size_ * index), r_value); } -void GVMutableArrayImpl_For_GMutableSpan::set_by_copy(const int64_t index, const void *value) +void GVArrayImpl_For_GSpan::set_by_copy(const int64_t index, const void *value) { type_->copy_assign(value, POINTER_OFFSET(data_, element_size_ * index)); } -void GVMutableArrayImpl_For_GMutableSpan::set_by_move(const int64_t index, void *value) +void GVArrayImpl_For_GSpan::set_by_move(const int64_t index, void *value) { type_->move_construct(value, POINTER_OFFSET(data_, element_size_ * index)); } -void GVMutableArrayImpl_For_GMutableSpan::set_by_relocate(const int64_t index, void *value) +void GVArrayImpl_For_GSpan::set_by_relocate(const int64_t index, void *value) { type_->relocate_assign(value, POINTER_OFFSET(data_, element_size_ * index)); } -bool GVMutableArrayImpl_For_GMutableSpan::is_span() const +bool GVArrayImpl_For_GSpan::is_span() const { return true; } -GSpan GVMutableArrayImpl_For_GMutableSpan::get_internal_span() const +GSpan GVArrayImpl_For_GSpan::get_internal_span() const { return GSpan(*type_, data_, size_); } -class GVMutableArrayImpl_For_GMutableSpan_final final - : public GVMutableArrayImpl_For_GMutableSpan { +class GVArrayImpl_For_GSpan_final final : public GVArrayImpl_For_GSpan { public: - using GVMutableArrayImpl_For_GMutableSpan::GVMutableArrayImpl_For_GMutableSpan; + using GVArrayImpl_For_GSpan::GVArrayImpl_For_GSpan; private: bool may_have_ownership() const override @@ -337,6 +286,57 @@ class GVArrayImpl_For_SingleValue : public GVArrayImpl_For_SingleValueRef, /** \} */ /* -------------------------------------------------------------------- */ +/** \name #GVArrayImpl_For_SmallTrivialSingleValue + * \{ */ + +/** + * Contains an inline buffer that can store a single value of a trivial type. + * This avoids the allocation that would be done by #GVArrayImpl_For_SingleValue. + */ +template<int BufferSize> class GVArrayImpl_For_SmallTrivialSingleValue : public GVArrayImpl { + private: + AlignedBuffer<BufferSize, 8> buffer_; + + public: + GVArrayImpl_For_SmallTrivialSingleValue(const CPPType &type, + const int64_t size, + const void *value) + : GVArrayImpl(type, size) + { + BLI_assert(type.is_trivial()); + BLI_assert(type.alignment() <= 8); + BLI_assert(type.size() <= BufferSize); + type.copy_construct(value, &buffer_); + } + + private: + void get(const int64_t UNUSED(index), void *r_value) const override + { + this->copy_value_to(r_value); + } + void get_to_uninitialized(const int64_t UNUSED(index), void *r_value) const override + { + this->copy_value_to(r_value); + } + + bool is_single() const override + { + return true; + } + void get_internal_single(void *r_value) const override + { + this->copy_value_to(r_value); + } + + void copy_value_to(void *dst) const + { + memcpy(dst, &buffer_, type_->size()); + } +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name #GVArray_GSpan * \{ */ @@ -426,12 +426,14 @@ class GVArrayImpl_For_SlicedGVArray : public GVArrayImpl { protected: GVArray varray_; int64_t offset_; + IndexRange slice_; public: GVArrayImpl_For_SlicedGVArray(GVArray varray, const IndexRange slice) : GVArrayImpl(varray.type(), slice.size()), varray_(std::move(varray)), - offset_(slice.start()) + offset_(slice.start()), + slice_(slice) { BLI_assert(slice.one_after_last() <= varray_.size()); } @@ -445,6 +447,24 @@ class GVArrayImpl_For_SlicedGVArray : public GVArrayImpl { { varray_.get_to_uninitialized(index + offset_, r_value); } + + bool is_span() const override + { + return varray_.is_span(); + } + GSpan get_internal_span() const override + { + return varray_.get_internal_span().slice(slice_); + } + + bool is_single() const override + { + return varray_.is_single(); + } + void get_internal_single(void *r_value) const override + { + varray_.get_internal_single(r_value); + } }; /** \} */ @@ -527,49 +547,28 @@ void GVArrayCommon::move_from(GVArrayCommon &&other) noexcept other.impl_ = nullptr; } -/* Returns true when the virtual array is stored as a span internally. */ bool GVArrayCommon::is_span() const { - if (this->is_empty()) { - return true; - } return impl_->is_span(); } -/* Returns the internally used span of the virtual array. This invokes undefined behavior is the - * virtual array is not stored as a span internally. */ GSpan GVArrayCommon::get_internal_span() const { BLI_assert(this->is_span()); - if (this->is_empty()) { - return GSpan(impl_->type()); - } return impl_->get_internal_span(); } -/* Returns true when the virtual array returns the same value for every index. */ bool GVArrayCommon::is_single() const { - if (impl_->size() == 1) { - return true; - } return impl_->is_single(); } -/* Copies the value that is used for every element into `r_value`, which is expected to point to - * initialized memory. This invokes undefined behavior if the virtual array would not return the - * same value for every index. */ void GVArrayCommon::get_internal_single(void *r_value) const { BLI_assert(this->is_single()); - if (impl_->size() == 1) { - impl_->get(0, r_value); - return; - } impl_->get_internal_single(r_value); } -/* Same as `get_internal_single`, but `r_value` points to initialized memory. */ void GVArrayCommon::get_internal_single_to_uninitialized(void *r_value) const { impl_->type().default_construct(r_value); @@ -606,6 +605,9 @@ GVArray::GVArray(std::shared_ptr<const GVArrayImpl> impl) : GVArrayCommon(std::m GVArray GVArray::ForSingle(const CPPType &type, const int64_t size, const void *value) { + if (type.is_trivial() && type.size() <= 16 && type.alignment() <= 8) { + return GVArray::For<GVArrayImpl_For_SmallTrivialSingleValue<16>>(type, size, value); + } return GVArray::For<GVArrayImpl_For_SingleValue>(type, size, value); } @@ -621,7 +623,10 @@ GVArray GVArray::ForSingleDefault(const CPPType &type, const int64_t size) GVArray GVArray::ForSpan(GSpan span) { - return GVArray::For<GVArrayImpl_For_GSpan_final>(span); + /* Use const-cast because the underlying virtual array implementation is shared between const + * and non const data. */ + GMutableSpan mutable_span{span.type(), const_cast<void *>(span.data()), span.size()}; + return GVArray::For<GVArrayImpl_For_GSpan_final>(mutable_span); } class GVArrayImpl_For_GArray : public GVArrayImpl_For_GSpan { @@ -630,7 +635,7 @@ class GVArrayImpl_For_GArray : public GVArrayImpl_For_GSpan { public: GVArrayImpl_For_GArray(GArray<> array) - : GVArrayImpl_For_GSpan(array.as_span()), array_(std::move(array)) + : GVArrayImpl_For_GSpan(array.as_mutable_span()), array_(std::move(array)) { } }; @@ -682,7 +687,7 @@ GVMutableArray::GVMutableArray(std::shared_ptr<GVMutableArrayImpl> impl) GVMutableArray GVMutableArray::ForSpan(GMutableSpan span) { - return GVMutableArray::For<GVMutableArrayImpl_For_GMutableSpan_final>(span); + return GVMutableArray::For<GVArrayImpl_For_GSpan_final>(span); } GVMutableArray::operator GVArray() const & @@ -716,7 +721,6 @@ GVMutableArrayImpl *GVMutableArray::get_implementation() const return this->get_impl(); } -/* Copy the values from the source buffer to all elements in the virtual array. */ void GVMutableArray::set_all(const void *src) { this->get_impl()->set_all(src); diff --git a/source/blender/functions/intern/multi_function.cc b/source/blender/functions/intern/multi_function.cc index 43eacdcd2a1..837ccc4f4fb 100644 --- a/source/blender/functions/intern/multi_function.cc +++ b/source/blender/functions/intern/multi_function.cc @@ -16,6 +16,138 @@ #include "FN_multi_function.hh" +#include "BLI_task.hh" +#include "BLI_threads.h" + namespace blender::fn { +using ExecutionHints = MultiFunction::ExecutionHints; + +ExecutionHints MultiFunction::execution_hints() const +{ + return this->get_execution_hints(); +} + +ExecutionHints MultiFunction::get_execution_hints() const +{ + return ExecutionHints{}; +} + +static bool supports_threading_by_slicing_params(const MultiFunction &fn) +{ + for (const int i : fn.param_indices()) { + const MFParamType param_type = fn.param_type(i); + if (ELEM(param_type.interface_type(), + MFParamType::InterfaceType::Mutable, + MFParamType::InterfaceType::Output)) { + if (param_type.data_type().is_vector()) { + return false; + } + } + } + return true; +} + +static int64_t compute_grain_size(const ExecutionHints &hints, const IndexMask mask) +{ + int64_t grain_size = hints.min_grain_size; + if (hints.uniform_execution_time) { + const int thread_count = BLI_system_thread_count(); + /* Avoid using a small grain size even if it is not necessary. */ + const int64_t thread_based_grain_size = mask.size() / thread_count / 4; + grain_size = std::max(grain_size, thread_based_grain_size); + } + if (hints.allocates_array) { + const int64_t max_grain_size = 10000; + /* Avoid allocating many large intermediate arrays. Better process data in smaller chunks to + * keep peak memory usage lower. */ + grain_size = std::min(grain_size, max_grain_size); + } + return grain_size; +} + +void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context) const +{ + if (mask.is_empty()) { + return; + } + const ExecutionHints hints = this->execution_hints(); + const int64_t grain_size = compute_grain_size(hints, mask); + + if (mask.size() <= grain_size) { + this->call(mask, params, context); + return; + } + + const bool supports_threading = supports_threading_by_slicing_params(*this); + if (!supports_threading) { + this->call(mask, params, context); + return; + } + + threading::parallel_for(mask.index_range(), grain_size, [&](const IndexRange sub_range) { + const IndexMask sliced_mask = mask.slice(sub_range); + if (!hints.allocates_array) { + /* There is no benefit to changing indices in this case. */ + this->call(sliced_mask, params, context); + return; + } + if (sliced_mask[0] < grain_size) { + /* The indices are low, no need to offset them. */ + this->call(sliced_mask, params, context); + return; + } + const int64_t input_slice_start = sliced_mask[0]; + const int64_t input_slice_size = sliced_mask.last() - input_slice_start + 1; + const IndexRange input_slice_range{input_slice_start, input_slice_size}; + + Vector<int64_t> offset_mask_indices; + const IndexMask offset_mask = mask.slice_and_offset(sub_range, offset_mask_indices); + + MFParamsBuilder offset_params{*this, offset_mask.min_array_size()}; + + /* Slice all parameters so that for the actual function call. */ + for (const int param_index : this->param_indices()) { + const MFParamType param_type = this->param_type(param_index); + switch (param_type.category()) { + case MFParamType::SingleInput: { + const GVArray &varray = params.readonly_single_input(param_index); + offset_params.add_readonly_single_input(varray.slice(input_slice_range)); + break; + } + case MFParamType::SingleMutable: { + const GMutableSpan span = params.single_mutable(param_index); + const GMutableSpan sliced_span = span.slice(input_slice_range); + offset_params.add_single_mutable(sliced_span); + break; + } + case MFParamType::SingleOutput: { + const GMutableSpan span = params.uninitialized_single_output_if_required(param_index); + if (span.is_empty()) { + offset_params.add_ignored_single_output(); + } + else { + const GMutableSpan sliced_span = span.slice(input_slice_range); + offset_params.add_uninitialized_single_output(sliced_span); + } + break; + } + case MFParamType::VectorInput: + case MFParamType::VectorMutable: + case MFParamType::VectorOutput: { + BLI_assert_unreachable(); + break; + } + } + } + + this->call(offset_mask, offset_params, context); + }); +} + +std::string MultiFunction::debug_name() const +{ + return signature_ref_->function_name; +} + } // namespace blender::fn diff --git a/source/blender/functions/intern/multi_function_builder.cc b/source/blender/functions/intern/multi_function_builder.cc index f891f162820..24f9bbe0179 100644 --- a/source/blender/functions/intern/multi_function_builder.cc +++ b/source/blender/functions/intern/multi_function_builder.cc @@ -32,10 +32,8 @@ CustomMF_GenericConstant::CustomMF_GenericConstant(const CPPType &type, } value_ = value; - MFSignatureBuilder signature{"Constant " + type.name()}; - std::stringstream ss; - type.print_or_default(value, ss, type.name()); - signature.single_output(ss.str(), type); + MFSignatureBuilder signature{"Constant"}; + signature.single_output("Value", type); signature_ = signature.build(); this->set_signature(&signature_); } @@ -73,28 +71,11 @@ bool CustomMF_GenericConstant::equals(const MultiFunction &other) const return type_.is_equal(value_, _other->value_); } -static std::string gspan_to_string(GSpan array) -{ - const CPPType &type = array.type(); - std::stringstream ss; - ss << "["; - const int64_t max_amount = 5; - for (int64_t i : IndexRange(std::min(max_amount, array.size()))) { - type.print_or_default(array[i], ss, type.name()); - ss << ", "; - } - if (max_amount < array.size()) { - ss << "..."; - } - ss << "]"; - return ss.str(); -} - CustomMF_GenericConstantArray::CustomMF_GenericConstantArray(GSpan array) : array_(array) { const CPPType &type = array.type(); - MFSignatureBuilder signature{"Constant " + type.name() + " Vector"}; - signature.vector_output(gspan_to_string(array), type); + MFSignatureBuilder signature{"Constant Vector"}; + signature.vector_output("Value", type); signature_ = signature.build(); this->set_signature(&signature_); } @@ -109,12 +90,11 @@ void CustomMF_GenericConstantArray::call(IndexMask mask, } } -CustomMF_DefaultOutput::CustomMF_DefaultOutput(StringRef name, - Span<MFDataType> input_types, +CustomMF_DefaultOutput::CustomMF_DefaultOutput(Span<MFDataType> input_types, Span<MFDataType> output_types) : output_amount_(output_types.size()) { - MFSignatureBuilder signature{name}; + MFSignatureBuilder signature{"Default Output"}; for (MFDataType data_type : input_types) { signature.input("Input", data_type); } @@ -140,9 +120,9 @@ void CustomMF_DefaultOutput::call(IndexMask mask, MFParams params, MFContext UNU } } -CustomMF_GenericCopy::CustomMF_GenericCopy(StringRef name, MFDataType data_type) +CustomMF_GenericCopy::CustomMF_GenericCopy(MFDataType data_type) { - MFSignatureBuilder signature{name}; + MFSignatureBuilder signature{"Copy"}; signature.input("Input", data_type); signature.output("Output", data_type); signature_ = signature.build(); diff --git a/source/blender/functions/intern/multi_function_parallel.cc b/source/blender/functions/intern/multi_function_parallel.cc deleted file mode 100644 index eefe647644d..00000000000 --- a/source/blender/functions/intern/multi_function_parallel.cc +++ /dev/null @@ -1,93 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "FN_multi_function_parallel.hh" - -#include "BLI_task.hh" - -namespace blender::fn { - -ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size) - : fn_(fn), grain_size_(grain_size) -{ - this->set_signature(&fn.signature()); - - threading_supported_ = true; - for (const int param_index : fn.param_indices()) { - const MFParamType param_type = fn.param_type(param_index); - if (param_type.data_type().category() == MFDataType::Vector) { - /* Vector parameters do not support threading yet. */ - threading_supported_ = false; - break; - } - } -} - -void ParallelMultiFunction::call(IndexMask full_mask, MFParams params, MFContext context) const -{ - if (full_mask.size() <= grain_size_ || !threading_supported_) { - fn_.call(full_mask, params, context); - return; - } - - threading::parallel_for(full_mask.index_range(), grain_size_, [&](const IndexRange mask_slice) { - Vector<int64_t> sub_mask_indices; - const IndexMask sub_mask = full_mask.slice_and_offset(mask_slice, sub_mask_indices); - if (sub_mask.is_empty()) { - return; - } - const int64_t input_slice_start = full_mask[mask_slice.first()]; - const int64_t input_slice_size = full_mask[mask_slice.last()] - input_slice_start + 1; - const IndexRange input_slice_range{input_slice_start, input_slice_size}; - - MFParamsBuilder sub_params{fn_, sub_mask.min_array_size()}; - - /* All parameters are sliced so that the wrapped multi-function does not have to take care of - * the index offset. */ - for (const int param_index : fn_.param_indices()) { - const MFParamType param_type = fn_.param_type(param_index); - switch (param_type.category()) { - case MFParamType::SingleInput: { - const GVArray &varray = params.readonly_single_input(param_index); - sub_params.add_readonly_single_input(varray.slice(input_slice_range)); - break; - } - case MFParamType::SingleMutable: { - const GMutableSpan span = params.single_mutable(param_index); - const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size); - sub_params.add_single_mutable(sliced_span); - break; - } - case MFParamType::SingleOutput: { - const GMutableSpan span = params.uninitialized_single_output(param_index); - const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size); - sub_params.add_uninitialized_single_output(sliced_span); - break; - } - case MFParamType::VectorInput: - case MFParamType::VectorMutable: - case MFParamType::VectorOutput: { - BLI_assert_unreachable(); - break; - } - } - } - - fn_.call(sub_mask, sub_params, context); - }); -} - -} // namespace blender::fn diff --git a/source/blender/functions/intern/multi_function_params.cc b/source/blender/functions/intern/multi_function_params.cc new file mode 100644 index 00000000000..376c5b2deb7 --- /dev/null +++ b/source/blender/functions/intern/multi_function_params.cc @@ -0,0 +1,44 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "FN_multi_function_params.hh" + +namespace blender::fn { + +GMutableSpan MFParams::ensure_dummy_single_output(int data_index) +{ + /* Lock because we are actually modifying #builder_ and it may be used by multiple threads. */ + std::lock_guard lock{builder_->mutex_}; + + for (const std::pair<int, GMutableSpan> &items : builder_->dummy_output_spans_) { + if (items.first == data_index) { + return items.second; + } + } + + const CPPType &type = builder_->mutable_spans_[data_index].type(); + void *buffer = builder_->scope_.linear_allocator().allocate( + builder_->min_array_size_ * type.size(), type.alignment()); + if (!type.is_trivially_destructible()) { + builder_->scope_.add_destruct_call( + [&type, buffer, mask = builder_->mask_]() { type.destruct_indices(buffer, mask); }); + } + const GMutableSpan span{type, buffer, builder_->min_array_size_}; + builder_->dummy_output_spans_.append({data_index, span}); + return span; +} + +} // namespace blender::fn diff --git a/source/blender/functions/intern/multi_function_procedure.cc b/source/blender/functions/intern/multi_function_procedure.cc index 986c5dff0c4..804beb7d66f 100644 --- a/source/blender/functions/intern/multi_function_procedure.cc +++ b/source/blender/functions/intern/multi_function_procedure.cc @@ -782,7 +782,7 @@ class MFProcedureDotExport { void instruction_to_string(const MFCallInstruction &instruction, std::stringstream &ss) { const MultiFunction &fn = instruction.fn(); - this->instruction_name_format(fn.name() + ": ", ss); + this->instruction_name_format(fn.debug_name() + ": ", ss); for (const int param_index : fn.param_indices()) { const MFParamType param_type = fn.param_type(param_index); const MFVariable *variable = instruction.params()[param_index]; diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc index 85d0cf4909f..47643ed22ee 100644 --- a/source/blender/functions/intern/multi_function_procedure_executor.cc +++ b/source/blender/functions/intern/multi_function_procedure_executor.cc @@ -20,13 +20,12 @@ namespace blender::fn { -MFProcedureExecutor::MFProcedureExecutor(std::string name, const MFProcedure &procedure) - : procedure_(procedure) +MFProcedureExecutor::MFProcedureExecutor(const MFProcedure &procedure) : procedure_(procedure) { - MFSignatureBuilder signature(std::move(name)); + MFSignatureBuilder signature("Procedure Executor"); for (const ConstMFParameter ¶m : procedure.params()) { - signature.add(param.variable->name(), MFParamType(param.type, param.variable->data_type())); + signature.add("Parameter", MFParamType(param.type, param.variable->data_type())); } signature_ = signature.build(); @@ -140,32 +139,36 @@ class VariableState; */ class ValueAllocator : NonCopyable, NonMovable { private: - /* Allocate with 64 byte alignment for better reusability of buffers and improved cache - * performance. */ + /** + * Allocate with 64 byte alignment for better reusability of buffers and improved cache + * performance. + */ static constexpr inline int min_alignment = 64; - /* Use stacks so that the most recently used buffers are reused first. This improves cache - * efficiency. */ - std::array<Stack<VariableValue *>, tot_variable_value_types> values_free_lists_; - /* The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are - * aligned to #min_alignment bytes. */ + /** All buffers in the free-lists below have been allocated with this allocator. */ + LinearAllocator<> &linear_allocator_; + + /** + * Use stacks so that the most recently used buffers are reused first. This improves cache + * efficiency. + */ + std::array<Stack<VariableValue *>, tot_variable_value_types> variable_value_free_lists_; + + /** + * The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are + * aligned to #min_alignment bytes. + */ Map<int, Stack<void *>> span_buffers_free_list_; - public: - ValueAllocator() = default; + /** Cache buffers for single values of different types. */ + Map<const CPPType *, Stack<void *>> single_value_free_lists_; - ~ValueAllocator() + /** The cached memory buffers can hold #VariableState values. */ + Stack<void *> variable_state_free_list_; + + public: + ValueAllocator(LinearAllocator<> &linear_allocator) : linear_allocator_(linear_allocator) { - for (Stack<VariableValue *> &stack : values_free_lists_) { - while (!stack.is_empty()) { - MEM_freeN(stack.pop()); - } - } - for (Stack<void *> &stack : span_buffers_free_list_.values()) { - while (!stack.is_empty()) { - MEM_freeN(stack.pop()); - } - } } template<typename... Args> VariableState *obtain_variable_state(Args &&...args); @@ -191,17 +194,17 @@ class ValueAllocator : NonCopyable, NonMovable { { void *buffer = nullptr; - const int element_size = type.size(); - const int alignment = type.alignment(); + const int64_t element_size = type.size(); + const int64_t alignment = type.alignment(); if (alignment > min_alignment) { /* In this rare case we fallback to not reusing existing buffers. */ - buffer = MEM_mallocN_aligned(element_size * size, alignment, __func__); + buffer = linear_allocator_.allocate(element_size * size, alignment); } else { Stack<void *> *stack = span_buffers_free_list_.lookup_ptr(element_size); if (stack == nullptr || stack->is_empty()) { - buffer = MEM_mallocN_aligned(element_size * size, min_alignment, __func__); + buffer = linear_allocator_.allocate(element_size * size, min_alignment); } else { /* Reuse existing buffer. */ @@ -225,7 +228,14 @@ class ValueAllocator : NonCopyable, NonMovable { VariableValue_OneSingle *obtain_OneSingle(const CPPType &type) { - void *buffer = MEM_mallocN_aligned(type.size(), type.alignment(), __func__); + Stack<void *> &stack = single_value_free_lists_.lookup_or_add_default(&type); + void *buffer; + if (stack.is_empty()) { + buffer = linear_allocator_.allocate(type.size(), type.alignment()); + } + else { + buffer = stack.pop(); + } return this->obtain<VariableValue_OneSingle>(buffer); } @@ -263,11 +273,11 @@ class ValueAllocator : NonCopyable, NonMovable { } case ValueType::OneSingle: { auto *value_typed = static_cast<VariableValue_OneSingle *>(value); + const CPPType &type = data_type.single_type(); if (value_typed->is_initialized) { - const CPPType &type = data_type.single_type(); type.destruct(value_typed->data); } - MEM_freeN(value_typed->data); + single_value_free_lists_.lookup_or_add_default(&type).push(value_typed->data); break; } case ValueType::OneVector: { @@ -277,7 +287,7 @@ class ValueAllocator : NonCopyable, NonMovable { } } - Stack<VariableValue *> &stack = values_free_lists_[(int)value->type]; + Stack<VariableValue *> &stack = variable_value_free_lists_[(int)value->type]; stack.push(value); } @@ -285,9 +295,9 @@ class ValueAllocator : NonCopyable, NonMovable { template<typename T, typename... Args> T *obtain(Args &&...args) { static_assert(std::is_base_of_v<VariableValue, T>); - Stack<VariableValue *> &stack = values_free_lists_[(int)T::static_type]; + Stack<VariableValue *> &stack = variable_value_free_lists_[(int)T::static_type]; if (stack.is_empty()) { - void *buffer = MEM_mallocN(sizeof(T), __func__); + void *buffer = linear_allocator_.allocate(sizeof(T), alignof(T)); return new (buffer) T(std::forward<Args>(args)...); } return new (stack.pop()) T(std::forward<Args>(args)...); @@ -670,7 +680,12 @@ class VariableState : NonCopyable, NonMovable { tot_initialized_ += mask.size(); } - void destruct(IndexMask mask, + /** + * Destruct the masked elements in this variable. + * \return True when all elements of this variable are initialized and the variable state can be + * released. + */ + bool destruct(IndexMask mask, IndexMask full_mask, const MFDataType &data_type, ValueAllocator &value_allocator) @@ -680,13 +695,12 @@ class VariableState : NonCopyable, NonMovable { /* Sanity check to make sure that enough indices can be destructed. */ BLI_assert(new_tot_initialized >= 0); + bool do_destruct_self = false; + switch (value_->type) { case ValueType::GVArray: { if (mask.size() == full_mask.size()) { - /* All elements are destructed. The elements are owned by the caller, so we don't - * actually destruct them. */ - value_allocator.release_value(value_, data_type); - value_ = value_allocator.obtain_OneSingle(data_type.single_type()); + do_destruct_self = true; } else { /* Not all elements are destructed. Since we can't work on the original array, we have to @@ -702,18 +716,13 @@ class VariableState : NonCopyable, NonMovable { const CPPType &type = data_type.single_type(); type.destruct_indices(this->value_as<VariableValue_Span>()->data, mask); if (new_tot_initialized == 0) { - /* Release span when all values are initialized. */ - value_allocator.release_value(value_, data_type); - value_ = value_allocator.obtain_OneSingle(data_type.single_type()); + do_destruct_self = true; } break; } case ValueType::GVVectorArray: { if (mask.size() == full_mask.size()) { - /* All elements are cleared. The elements are owned by the caller, so don't actually - * destruct them. */ - value_allocator.release_value(value_, data_type); - value_ = value_allocator.obtain_OneVector(data_type.vector_base_type()); + do_destruct_self = true; } else { /* Not all elements are cleared. Since we can't work on the original vector array, we @@ -732,23 +741,24 @@ class VariableState : NonCopyable, NonMovable { case ValueType::OneSingle: { auto *value_typed = this->value_as<VariableValue_OneSingle>(); BLI_assert(value_typed->is_initialized); + UNUSED_VARS_NDEBUG(value_typed); if (mask.size() == tot_initialized_) { - const CPPType &type = data_type.single_type(); - type.destruct(value_typed->data); - value_typed->is_initialized = false; + do_destruct_self = true; } break; } case ValueType::OneVector: { auto *value_typed = this->value_as<VariableValue_OneVector>(); + UNUSED_VARS(value_typed); if (mask.size() == tot_initialized_) { - value_typed->data.clear({0}); + do_destruct_self = true; } break; } } tot_initialized_ = new_tot_initialized; + return do_destruct_self; } void indices_split(IndexMask mask, IndicesSplitVectors &r_indices) @@ -802,12 +812,17 @@ class VariableState : NonCopyable, NonMovable { template<typename... Args> VariableState *ValueAllocator::obtain_variable_state(Args &&...args) { - return new VariableState(std::forward<Args>(args)...); + if (variable_state_free_list_.is_empty()) { + void *buffer = linear_allocator_.allocate(sizeof(VariableState), alignof(VariableState)); + return new (buffer) VariableState(std::forward<Args>(args)...); + } + return new (variable_state_free_list_.pop()) VariableState(std::forward<Args>(args)...); } void ValueAllocator::release_variable_state(VariableState *state) { - delete state; + state->~VariableState(); + variable_state_free_list_.push(state); } /** Keeps track of the states of all variables during evaluation. */ @@ -818,7 +833,8 @@ class VariableStates { IndexMask full_mask_; public: - VariableStates(IndexMask full_mask) : full_mask_(full_mask) + VariableStates(LinearAllocator<> &linear_allocator, IndexMask full_mask) + : value_allocator_(linear_allocator), full_mask_(full_mask) { } @@ -940,7 +956,10 @@ class VariableStates { void destruct(const MFVariable &variable, const IndexMask &mask) { VariableState &variable_state = this->get_variable_state(variable); - variable_state.destruct(mask, full_mask_, variable.data_type(), value_allocator_); + if (variable_state.destruct(mask, full_mask_, variable.data_type(), value_allocator_)) { + variable_state.destruct_self(value_allocator_, variable.data_type()); + variable_states_.remove_contained(&variable); + } } VariableState &get_variable_state(const MFVariable &variable) @@ -1046,7 +1065,7 @@ static void execute_call_instruction(const MFCallInstruction &instruction, } try { - fn.call(mask, params, context); + fn.call_auto(mask, params, context); } catch (...) { /* Multi-functions must not throw exceptions. */ @@ -1162,9 +1181,9 @@ void MFProcedureExecutor::call(IndexMask full_mask, MFParams params, MFContext c { BLI_assert(procedure_.validate()); - LinearAllocator<> allocator; + LinearAllocator<> linear_allocator; - VariableStates variable_states{full_mask}; + VariableStates variable_states{linear_allocator, full_mask}; variable_states.add_initial_variable_states(*this, procedure_, params); InstructionScheduler scheduler; @@ -1237,4 +1256,12 @@ void MFProcedureExecutor::call(IndexMask full_mask, MFParams params, MFContext c } } +MultiFunction::ExecutionHints MFProcedureExecutor::get_execution_hints() const +{ + ExecutionHints hints; + hints.allocates_array = true; + hints.min_grain_size = 10000; + return hints; +} + } // namespace blender::fn diff --git a/source/blender/functions/intern/multi_function_procedure_optimization.cc b/source/blender/functions/intern/multi_function_procedure_optimization.cc new file mode 100644 index 00000000000..f220c85e535 --- /dev/null +++ b/source/blender/functions/intern/multi_function_procedure_optimization.cc @@ -0,0 +1,90 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "FN_multi_function_procedure_optimization.hh" + +namespace blender::fn::procedure_optimization { + +void move_destructs_up(MFProcedure &procedure, MFInstruction &block_end_instr) +{ + /* A mapping from a variable to its destruct instruction. */ + Map<MFVariable *, MFDestructInstruction *> destruct_instructions; + MFInstruction *current_instr = &block_end_instr; + while (true) { + MFInstructionType instr_type = current_instr->type(); + switch (instr_type) { + case MFInstructionType::Destruct: { + MFDestructInstruction &destruct_instr = static_cast<MFDestructInstruction &>( + *current_instr); + MFVariable *variable = destruct_instr.variable(); + if (variable == nullptr) { + continue; + } + /* Remember this destruct instruction so that it can be moved up later on when the last use + * of the variable is found. */ + destruct_instructions.add(variable, &destruct_instr); + break; + } + case MFInstructionType::Call: { + MFCallInstruction &call_instr = static_cast<MFCallInstruction &>(*current_instr); + /* For each variable, place the corresponding remembered destruct instruction right after + * this call instruction. */ + for (MFVariable *variable : call_instr.params()) { + if (variable == nullptr) { + continue; + } + MFDestructInstruction *destruct_instr = destruct_instructions.pop_default(variable, + nullptr); + if (destruct_instr == nullptr) { + continue; + } + + /* Unlink destruct instruction from previous position. */ + MFInstruction *after_destruct_instr = destruct_instr->next(); + while (!destruct_instr->prev().is_empty()) { + /* Do a copy of the cursor here, because `destruct_instr->prev()` changes when + * #set_next is called below. */ + const MFInstructionCursor cursor = destruct_instr->prev()[0]; + cursor.set_next(procedure, after_destruct_instr); + } + + /* Insert destruct instruction in new position. */ + MFInstruction *next_instr = call_instr.next(); + call_instr.set_next(destruct_instr); + destruct_instr->set_next(next_instr); + } + break; + } + default: { + break; + } + } + + const Span<MFInstructionCursor> prev_cursors = current_instr->prev(); + if (prev_cursors.size() != 1) { + /* Stop when there is some branching before this instruction. */ + break; + } + const MFInstructionCursor &prev_cursor = prev_cursors[0]; + current_instr = prev_cursor.instruction(); + if (current_instr == nullptr) { + /* Stop when there is no previous instruction. E.g. when this is the first instruction. */ + break; + } + } +} + +} // namespace blender::fn::procedure_optimization diff --git a/source/blender/functions/tests/FN_field_test.cc b/source/blender/functions/tests/FN_field_test.cc index 16a6c73183d..4614f9eee58 100644 --- a/source/blender/functions/tests/FN_field_test.cc +++ b/source/blender/functions/tests/FN_field_test.cc @@ -160,9 +160,9 @@ class TwoOutputFunction : public MultiFunction { MFSignature signature_; public: - TwoOutputFunction(StringRef name) + TwoOutputFunction() { - MFSignatureBuilder signature{name}; + MFSignatureBuilder signature{"Two Outputs"}; signature.single_input<int>("In1"); signature.single_input<int>("In2"); signature.single_output<int>("Add"); @@ -190,8 +190,8 @@ TEST(field, FunctionTwoOutputs) GField index_field_1{std::make_shared<IndexFieldInput>()}; GField index_field_2{std::make_shared<IndexFieldInput>()}; - std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>(FieldOperation( - std::make_unique<TwoOutputFunction>("SI_SI_SO_SO"), {index_field_1, index_field_2})); + std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>( + FieldOperation(std::make_unique<TwoOutputFunction>(), {index_field_1, index_field_2})); GField result_field_1{fn, 0}; GField result_field_2{fn, 1}; @@ -221,8 +221,8 @@ TEST(field, TwoFunctionsTwoOutputs) { GField index_field{std::make_shared<IndexFieldInput>()}; - std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>(FieldOperation( - std::make_unique<TwoOutputFunction>("SI_SI_SO_SO"), {index_field, index_field})); + std::shared_ptr<FieldOperation> fn = std::make_shared<FieldOperation>( + FieldOperation(std::make_unique<TwoOutputFunction>(), {index_field, index_field})); Array<int64_t> mask_indices = {2, 4, 6, 8}; IndexMask mask = mask_indices.as_span(); diff --git a/source/blender/functions/tests/FN_multi_function_procedure_test.cc b/source/blender/functions/tests/FN_multi_function_procedure_test.cc index a0919d7926e..e3de23550c5 100644 --- a/source/blender/functions/tests/FN_multi_function_procedure_test.cc +++ b/source/blender/functions/tests/FN_multi_function_procedure_test.cc @@ -32,7 +32,7 @@ TEST(multi_function_procedure, ConstantOutput) EXPECT_TRUE(procedure.validate()); - MFProcedureExecutor executor{"My Procedure", procedure}; + MFProcedureExecutor executor{procedure}; MFParamsBuilder params{executor, 2}; MFContextBuilder context; @@ -73,7 +73,7 @@ TEST(multi_function_procedure, SimpleTest) EXPECT_TRUE(procedure.validate()); - MFProcedureExecutor executor{"My Procedure", procedure}; + MFProcedureExecutor executor{procedure}; MFParamsBuilder params{executor, 3}; MFContextBuilder context; @@ -125,7 +125,7 @@ TEST(multi_function_procedure, BranchTest) EXPECT_TRUE(procedure.validate()); - MFProcedureExecutor procedure_fn{"Condition Test", procedure}; + MFProcedureExecutor procedure_fn{procedure}; MFParamsBuilder params(procedure_fn, 5); Array<int> values_a = {1, 5, 3, 6, 2}; @@ -167,7 +167,7 @@ TEST(multi_function_procedure, EvaluateOne) builder.add_return(); builder.add_output_parameter(*var2); - MFProcedureExecutor procedure_fn{"Evaluate One", procedure}; + MFProcedureExecutor procedure_fn{procedure}; MFParamsBuilder params{procedure_fn, 5}; Array<int> values_out = {1, 2, 3, 4, 5}; @@ -239,7 +239,7 @@ TEST(multi_function_procedure, SimpleLoop) EXPECT_TRUE(procedure.validate()); - MFProcedureExecutor procedure_fn{"Simple Loop", procedure}; + MFProcedureExecutor procedure_fn{procedure}; MFParamsBuilder params{procedure_fn, 5}; Array<int> counts = {4, 3, 7, 6, 4}; @@ -295,7 +295,7 @@ TEST(multi_function_procedure, Vectors) EXPECT_TRUE(procedure.validate()); - MFProcedureExecutor procedure_fn{"Vectors", procedure}; + MFProcedureExecutor procedure_fn{procedure}; MFParamsBuilder params{procedure_fn, 5}; Array<int> v1 = {5, 2, 3}; @@ -359,7 +359,7 @@ TEST(multi_function_procedure, BufferReuse) EXPECT_TRUE(procedure.validate()); - MFProcedureExecutor procedure_fn{"Buffer Reuse", procedure}; + MFProcedureExecutor procedure_fn{procedure}; Array<int> inputs = {4, 1, 6, 2, 3}; Array<int> results(5, -1); diff --git a/source/blender/functions/tests/FN_multi_function_test.cc b/source/blender/functions/tests/FN_multi_function_test.cc index d99993b35ac..20ca00cf598 100644 --- a/source/blender/functions/tests/FN_multi_function_test.cc +++ b/source/blender/functions/tests/FN_multi_function_test.cc @@ -264,7 +264,6 @@ TEST(multi_function, CustomMF_GenericConstant) { int value = 42; CustomMF_GenericConstant fn{CPPType::get<int32_t>(), (const void *)&value, false}; - EXPECT_EQ(fn.param_name(0), "42"); Array<int> outputs(4, 0); @@ -285,7 +284,6 @@ TEST(multi_function, CustomMF_GenericConstantArray) { std::array<int, 4> values = {3, 4, 5, 6}; CustomMF_GenericConstantArray fn{GSpan(Span(values))}; - EXPECT_EQ(fn.param_name(0), "[3, 4, 5, 6, ]"); GVectorArray vector_array{CPPType::get<int32_t>(), 4}; GVectorArray_TypedMutableRef<int> vector_array_ref{vector_array}; |