Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <jacques@blender.org>2022-04-07 19:48:14 +0300
committerJacques Lucke <jacques@blender.org>2022-04-07 19:48:29 +0300
commit67c42e7f034aad2564d8cde1a9901d9629527daa (patch)
treea300c9d16d0a2dd0160e31985c1d803cb53ca593
parent8f344b530a6ed8530ceb780110006af68430c9d5 (diff)
Functions: optimize simple generated multi-functions
This implements two optimizations: * Reduce virtual function call overhead when a non-standard virtual array is used as input. * Use a lambda in `type_conversion.cc`. In my test setup, which creates a float attribute filled with the index, the running time drops from `4.0 ms` to `2.0 ms`. Differential Revision: https://developer.blender.org/D14585
-rw-r--r--source/blender/blenkernel/intern/type_conversions.cc6
-rw-r--r--source/blender/functions/FN_multi_function_builder.hh55
2 files changed, 56 insertions, 5 deletions
diff --git a/source/blender/blenkernel/intern/type_conversions.cc b/source/blender/blenkernel/intern/type_conversions.cc
index d10979eeee9..aa79199d668 100644
--- a/source/blender/blenkernel/intern/type_conversions.cc
+++ b/source/blender/blenkernel/intern/type_conversions.cc
@@ -18,7 +18,11 @@ static void add_implicit_conversion(DataTypeConversions &conversions)
static const CPPType &to_type = CPPType::get<To>();
static const std::string conversion_name = from_type.name() + " to " + to_type.name();
- static fn::CustomMF_SI_SO<From, To> multi_function{conversion_name.c_str(), ConversionF};
+ static fn::CustomMF_SI_SO<From, To> multi_function{
+ conversion_name.c_str(),
+ /* Use lambda instead of passing #ConversionF directly, because otherwise the compiler won't
+ inline the function. */
+ [](const From &a) { return ConversionF(a); }};
static auto convert_single_to_initialized = [](const void *src, void *dst) {
*(To *)dst = ConversionF(*(const From *)src);
};
diff --git a/source/blender/functions/FN_multi_function_builder.hh b/source/blender/functions/FN_multi_function_builder.hh
index b041e67390c..ed587a87695 100644
--- a/source/blender/functions/FN_multi_function_builder.hh
+++ b/source/blender/functions/FN_multi_function_builder.hh
@@ -47,11 +47,46 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn)
{
return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
- /* Devirtualization results in a 2-3x speedup for some simple functions. */
- devirtualize_varray(in1, [&](const auto &in1) {
+ if (in1.is_single()) {
+ /* Only evaluate the function once when the input is a single value. */
+ const In1 in1_single = in1.get_internal_single();
+ const Out1 out1_single = element_fn(in1_single);
+ out1.fill_indices(mask, out1_single);
+ return;
+ }
+
+ if (in1.is_span()) {
+ const Span<In1> in1_span = in1.get_internal_span();
mask.to_best_mask_type(
- [&](const auto &mask) { execute_SI_SO(element_fn, mask, in1, out1.data()); });
- });
+ [&](auto mask) { execute_SI_SO(element_fn, mask, in1_span, out1.data()); });
+ return;
+ }
+
+ /* The input is an unknown virtual array type. To avoid virtual function call overhead for
+ * every element, elements are retrieved and processed in chunks. */
+
+ static constexpr int64_t MaxChunkSize = 32;
+ TypedBuffer<In1, MaxChunkSize> in1_buffer_owner;
+ MutableSpan<In1> in1_buffer{in1_buffer_owner.ptr(), MaxChunkSize};
+
+ const int64_t mask_size = mask.size();
+ for (int64_t chunk_start = 0; chunk_start < mask_size; chunk_start += MaxChunkSize) {
+ const int64_t chunk_size = std::min(mask_size - chunk_start, MaxChunkSize);
+ const IndexMask sliced_mask = mask.slice(chunk_start, chunk_size);
+
+ /* Load input from the virtual array. */
+ MutableSpan<In1> in1_chunk = in1_buffer.take_front(chunk_size);
+ in1.materialize_compressed_to_uninitialized(sliced_mask, in1_chunk);
+
+ if (sliced_mask.is_range()) {
+ execute_SI_SO(
+ element_fn, IndexRange(chunk_size), in1_chunk, out1.data() + sliced_mask[0]);
+ }
+ else {
+ execute_SI_SO_compressed(element_fn, sliced_mask, in1_chunk, out1.data());
+ }
+ destruct_n(in1_chunk.data(), chunk_size);
+ }
};
}
@@ -66,6 +101,18 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
}
}
+ /** Expects the input array to be "compressed", i.e. there are no gaps between the elements. */
+ template<typename ElementFuncT, typename MaskT, typename In1Array>
+ BLI_NOINLINE static void execute_SI_SO_compressed(const ElementFuncT &element_fn,
+ MaskT mask,
+ const In1Array &in1,
+ Out1 *__restrict r_out)
+ {
+ for (const int64_t i : IndexRange(mask.size())) {
+ new (r_out + mask[i]) Out1(element_fn(in1[i]));
+ }
+ }
+
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
{
const VArray<In1> &in1 = params.readonly_single_input<In1>(0);