diff options
author | Jacques Lucke <jacques@blender.org> | 2022-04-07 19:48:14 +0300 |
---|---|---|
committer | Jacques Lucke <jacques@blender.org> | 2022-04-07 19:48:29 +0300 |
commit | 67c42e7f034aad2564d8cde1a9901d9629527daa (patch) | |
tree | a300c9d16d0a2dd0160e31985c1d803cb53ca593 | |
parent | 8f344b530a6ed8530ceb780110006af68430c9d5 (diff) |
Functions: optimize simple generated multi-functions
This implements two optimizations:
* Reduce virtual function call overhead when a non-standard virtual
array is used as input.
* Use a lambda in `type_conversion.cc`.
In my test setup, which creates a float attribute filled with the index,
the running time drops from `4.0 ms` to `2.0 ms`.
Differential Revision: https://developer.blender.org/D14585
-rw-r--r-- | source/blender/blenkernel/intern/type_conversions.cc | 6 | ||||
-rw-r--r-- | source/blender/functions/FN_multi_function_builder.hh | 55 |
2 files changed, 56 insertions, 5 deletions
diff --git a/source/blender/blenkernel/intern/type_conversions.cc b/source/blender/blenkernel/intern/type_conversions.cc index d10979eeee9..aa79199d668 100644 --- a/source/blender/blenkernel/intern/type_conversions.cc +++ b/source/blender/blenkernel/intern/type_conversions.cc @@ -18,7 +18,11 @@ static void add_implicit_conversion(DataTypeConversions &conversions) static const CPPType &to_type = CPPType::get<To>(); static const std::string conversion_name = from_type.name() + " to " + to_type.name(); - static fn::CustomMF_SI_SO<From, To> multi_function{conversion_name.c_str(), ConversionF}; + static fn::CustomMF_SI_SO<From, To> multi_function{ + conversion_name.c_str(), + /* Use lambda instead of passing #ConversionF directly, because otherwise the compiler won't + inline the function. */ + [](const From &a) { return ConversionF(a); }}; static auto convert_single_to_initialized = [](const void *src, void *dst) { *(To *)dst = ConversionF(*(const From *)src); }; diff --git a/source/blender/functions/FN_multi_function_builder.hh b/source/blender/functions/FN_multi_function_builder.hh index b041e67390c..ed587a87695 100644 --- a/source/blender/functions/FN_multi_function_builder.hh +++ b/source/blender/functions/FN_multi_function_builder.hh @@ -47,11 +47,46 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn) { return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) { - /* Devirtualization results in a 2-3x speedup for some simple functions. */ - devirtualize_varray(in1, [&](const auto &in1) { + if (in1.is_single()) { + /* Only evaluate the function once when the input is a single value. */ + const In1 in1_single = in1.get_internal_single(); + const Out1 out1_single = element_fn(in1_single); + out1.fill_indices(mask, out1_single); + return; + } + + if (in1.is_span()) { + const Span<In1> in1_span = in1.get_internal_span(); mask.to_best_mask_type( - [&](const auto &mask) { execute_SI_SO(element_fn, mask, in1, out1.data()); }); - }); + [&](auto mask) { execute_SI_SO(element_fn, mask, in1_span, out1.data()); }); + return; + } + + /* The input is an unknown virtual array type. To avoid virtual function call overhead for + * every element, elements are retrieved and processed in chunks. */ + + static constexpr int64_t MaxChunkSize = 32; + TypedBuffer<In1, MaxChunkSize> in1_buffer_owner; + MutableSpan<In1> in1_buffer{in1_buffer_owner.ptr(), MaxChunkSize}; + + const int64_t mask_size = mask.size(); + for (int64_t chunk_start = 0; chunk_start < mask_size; chunk_start += MaxChunkSize) { + const int64_t chunk_size = std::min(mask_size - chunk_start, MaxChunkSize); + const IndexMask sliced_mask = mask.slice(chunk_start, chunk_size); + + /* Load input from the virtual array. */ + MutableSpan<In1> in1_chunk = in1_buffer.take_front(chunk_size); + in1.materialize_compressed_to_uninitialized(sliced_mask, in1_chunk); + + if (sliced_mask.is_range()) { + execute_SI_SO( + element_fn, IndexRange(chunk_size), in1_chunk, out1.data() + sliced_mask[0]); + } + else { + execute_SI_SO_compressed(element_fn, sliced_mask, in1_chunk, out1.data()); + } + destruct_n(in1_chunk.data(), chunk_size); + } }; } @@ -66,6 +101,18 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio } } + /** Expects the input array to be "compressed", i.e. there are no gaps between the elements. */ + template<typename ElementFuncT, typename MaskT, typename In1Array> + BLI_NOINLINE static void execute_SI_SO_compressed(const ElementFuncT &element_fn, + MaskT mask, + const In1Array &in1, + Out1 *__restrict r_out) + { + for (const int64_t i : IndexRange(mask.size())) { + new (r_out + mask[i]) Out1(element_fn(in1[i])); + } + } + void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override { const VArray<In1> &in1 = params.readonly_single_input<In1>(0); |