From 6d77b87b1310eb73c86d4d117f2159937dc38218 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Sun, 22 Aug 2021 15:07:23 +0200 Subject: support span buffer reuse --- .../intern/multi_function_procedure_executor.cc | 49 ++++++++++++++++++++-- .../tests/FN_multi_function_procedure_test.cc | 49 ++++++++++++++++++++++ 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc index 45d7e03a138..7337f53f293 100644 --- a/source/blender/functions/intern/multi_function_procedure_executor.cc +++ b/source/blender/functions/intern/multi_function_procedure_executor.cc @@ -132,7 +132,16 @@ class VariableState; class ValueAllocator : NonCopyable, NonMovable { private: + /* Allocate with 64 byte alignment for better reusability of buffers and improved cache + * performance. */ + static constexpr inline int min_alignment = 64; + + /* Use stacks so that the most recently used buffers are reused first. This improves cache + * efficiency. */ std::array, tot_variable_value_types> values_free_lists_; + /* The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are + * aligned to #min_alignment bytes. */ + Map> span_buffers_free_list_; public: ValueAllocator() = default; @@ -144,6 +153,11 @@ class ValueAllocator : NonCopyable, NonMovable { MEM_freeN(stack.pop()); } } + for (Stack &stack : span_buffers_free_list_.values()) { + while (!stack.is_empty()) { + MEM_freeN(stack.pop()); + } + } } template VariableState *obtain_variable_state(Args &&...args); @@ -167,7 +181,25 @@ class ValueAllocator : NonCopyable, NonMovable { VariableValue_Span *obtain_Span(const CPPType &type, int size) { - void *buffer = MEM_mallocN_aligned(type.size() * size, type.alignment(), __func__); + void *buffer = nullptr; + + const int element_size = type.size(); + const int alignment = type.alignment(); + + if (alignment > min_alignment) { + /* In this rare case we fallback to not reusing existing buffers. */ + buffer = MEM_mallocN_aligned(element_size * size, alignment, __func__); + } + else { + Stack *stack = span_buffers_free_list_.lookup_ptr(element_size); + if (stack == nullptr || stack->is_empty()) { + buffer = MEM_mallocN_aligned(element_size * size, min_alignment, __func__); + } + else { + buffer = stack->pop(); + } + } + return this->obtain(buffer, true); } @@ -203,8 +235,10 @@ class ValueAllocator : NonCopyable, NonMovable { case ValueType::Span: { auto *value_typed = static_cast(value); if (value_typed->owned) { + const CPPType &type = data_type.single_type(); /* Assumes all values in the buffer are uninitialized already. */ - MEM_freeN(value_typed->data); + Stack &buffers = span_buffers_free_list_.lookup_or_add_default(type.size()); + buffers.push(value_typed->data); } break; } @@ -630,8 +664,10 @@ class VariableState : NonCopyable, NonMovable { const MFDataType &data_type, ValueAllocator &value_allocator) { + int new_tot_initialized = tot_initialized_ - mask.size(); + /* Sanity check to make sure that enough indices can be destructed. */ - BLI_assert(tot_initialized_ >= mask.size()); + BLI_assert(new_tot_initialized >= 0); switch (value_->type) { case ValueType::GVArray: { @@ -654,6 +690,11 @@ class VariableState : NonCopyable, NonMovable { case ValueType::Span: { const CPPType &type = data_type.single_type(); type.destruct_indices(this->value_as()->data, mask); + if (new_tot_initialized == 0) { + /* Release span when all values are initialized. */ + value_allocator.release_value(value_, data_type); + value_ = value_allocator.obtain_OneSingle(data_type.single_type()); + } break; } case ValueType::GVVectorArray: { @@ -696,7 +737,7 @@ class VariableState : NonCopyable, NonMovable { } } - tot_initialized_ -= mask.size(); + tot_initialized_ = new_tot_initialized; } void indices_split(IndexMask mask, IndicesSplitVectors &r_indices) diff --git a/source/blender/functions/tests/FN_multi_function_procedure_test.cc b/source/blender/functions/tests/FN_multi_function_procedure_test.cc index de0d06b4af2..95267e3cf37 100644 --- a/source/blender/functions/tests/FN_multi_function_procedure_test.cc +++ b/source/blender/functions/tests/FN_multi_function_procedure_test.cc @@ -276,4 +276,53 @@ TEST(multi_function_procedure, Vectors) EXPECT_EQ(v3[4].size(), 35); } +TEST(multi_function_procedure, BufferReuse) +{ + /** + * procedure(int a, int *out) { + * int b = a + 10; + * int c = c + 10; + * int d = d + 10; + * int e = d + 10; + * out = e + 10; + * } + */ + + CustomMF_SI_SO add_10_fn{"add 10", [](int a) { return a + 10; }}; + + MFProcedure procedure; + MFProcedureBuilder builder{procedure}; + + MFVariable *var_a = &builder.add_single_input_parameter(); + auto [var_b] = builder.add_call<1>(add_10_fn, {var_a}); + builder.add_destruct(*var_a); + auto [var_c] = builder.add_call<1>(add_10_fn, {var_b}); + builder.add_destruct(*var_b); + auto [var_d] = builder.add_call<1>(add_10_fn, {var_c}); + builder.add_destruct(*var_c); + auto [var_e] = builder.add_call<1>(add_10_fn, {var_d}); + builder.add_destruct(*var_d); + auto [var_out] = builder.add_call<1>(add_10_fn, {var_e}); + builder.add_destruct(*var_e); + builder.add_output_parameter(*var_out); + + MFProcedureExecutor procedure_fn{"Buffer Reuse", procedure}; + + Array inputs = {4, 1, 6, 2, 3}; + Array results(5, -1); + + MFParamsBuilder params{procedure_fn, 5}; + params.add_readonly_single_input(inputs.as_span()); + params.add_uninitialized_single_output(results.as_mutable_span()); + + MFContextBuilder context; + procedure_fn.call({0, 2, 3, 4}, params, context); + + EXPECT_EQ(results[0], 54); + EXPECT_EQ(results[1], -1); + EXPECT_EQ(results[2], 56); + EXPECT_EQ(results[3], 52); + EXPECT_EQ(results[4], 53); +} + } // namespace blender::fn::tests -- cgit v1.2.3