/* SPDX-License-Identifier: GPL-2.0-or-later */ #pragma once /** \file * \ingroup bli * * In geometry nodes, many functions accept fields as inputs. For the implementation that means * that the inputs are virtual arrays. Usually those are backed by actual arrays or single values * but sometimes virtual arrays are used to compute values on demand or convert between data * formats. * * Using virtual arrays has the downside that individual elements are accessed through a virtual * method call, which has some overhead compared to normal array access. Whether this overhead is * negligible depends on the context. For very small functions (e.g. a single addition), the * overhead can make the function many times slower. Furthermore, it prevents the compiler from * doing some optimizations (e.g. loop unrolling and inserting SIMD instructions). * * The solution is to "devirtualize" the virtual arrays in cases when the overhead cannot be * ignored. That means that the function is instantiated multiple times at compile time for the * different cases. For example, there can be an optimized function that adds a span and a single * value, and another function that adds a span and another span. At run-time there is a dynamic * dispatch that executes the best function given the specific virtual arrays. * * The problem with this devirtualization is that it can result in exponentially increasing compile * times and binary sizes, depending on the number of parameters that are devirtualized separately. * So there is always a trade-off between run-time performance and compile-time/binary-size. * * This file provides a utility to devirtualize array parameters to a function using a high level * API. This makes it easy to experiment with different extremes of the mentioned trade-off and * allows finding a good compromise for each function. */ #include "BLI_parameter_pack_utils.hh" #include "BLI_virtual_array.hh" namespace blender::devirtualize_parameters { /** * Bit flag that specifies how an individual parameter is or can be devirtualized. */ enum class DeviMode { /* This is used as zero-value to compare to, to avoid casting to int. */ None = 0, /* Don't use devirtualization for that parameter, just pass it along. */ Keep = (1 << 0), /* Devirtualize #Varray as #Span. */ Span = (1 << 1), /* Devirtualize #VArray as #SingleAsSpan. */ Single = (1 << 2), /* Devirtualize #IndexMask as #IndexRange. */ Range = (1 << 3), }; ENUM_OPERATORS(DeviMode, DeviMode::Range); /** Utility to encode multiple #DeviMode in a type. */ template using DeviModeSequence = ValueSequence; /** * Main class that performs the devirtualization. */ template class Devirtualizer { private: /** Utility to get the tag of the I-th source type. */ template using type_at_index = typename TypeSequence::template at_index; static constexpr size_t SourceTypesNum = sizeof...(SourceTypes); /** Function to devirtualize. */ Fn fn_; /** * Source values that will be devirtualized. Note that these are stored as pointers to avoid * unnecessary copies. The caller is responsible for keeping the memory alive. */ std::tuple sources_; /** Keeps track of whether #fn_ has been called already to avoid calling it twice. */ bool executed_ = false; public: Devirtualizer(Fn fn, const SourceTypes *...sources) : fn_(std::move(fn)), sources_{sources...} { } /** * Return true when the function passed to the constructor has been called already. */ bool executed() const { return executed_; } /** * At compile time, generates multiple variants of the function, each optimized for a different * combination of devirtualized parameters. For every parameter, a bit flag is passed that * determines how it will be devirtualized. At run-time, if possible, one of the generated * functions is picked and executed. * * To check whether the function was called successfully, call #executed() afterwards. * * \note This generates an exponential amount of code in the final binary, depending on how many * to-be-virtualized parameters there are. */ template void try_execute_devirtualized(DeviModeSequence /* allowed_modes */) { BLI_assert(!executed_); static_assert(sizeof...(AllowedModes) == SourceTypesNum); this->try_execute_devirtualized_impl(DeviModeSequence<>(), DeviModeSequence()); } /** * Execute the function and pass in the original parameters without doing any devirtualization. */ void execute_without_devirtualization() { BLI_assert(!executed_); this->try_execute_devirtualized_impl_call( make_value_sequence(), std::make_index_sequence()); } private: /** * A recursive method that generates all the combinations of devirtualized parameters that the * caller requested. A recursive function is necessary to achieve generating an exponential * number of function calls (which has to be used with care, but is expected here). * * At every recursive step, the #DeviMode of one parameter is determined. This is achieved by * extending #DeviModeSequence by one element in each step. The recursion ends once all * parameters are handled. * * \return True when the function has been executed. */ template bool try_execute_devirtualized_impl( /* Initially empty, but then extended by one element in each recursive step. */ DeviModeSequence /* modes */, /* Bit flag for every parameter. */ DeviModeSequence /* allowed_modes */) { static_assert(SourceTypesNum == sizeof...(AllowedModes)); if constexpr (SourceTypesNum == sizeof...(Mode)) { /* End of recursion, now call the function with the determined #DeviModes. */ this->try_execute_devirtualized_impl_call(DeviModeSequence(), std::make_index_sequence()); return true; } else { /* Index of the parameter that is checked in the current recursive step. */ constexpr size_t I = sizeof...(Mode); /* Non-devirtualized parameter type. */ using SourceType = type_at_index; /* A bit flag indicating what devirtualizations are allowed in this step. */ [[maybe_unused]] constexpr DeviMode allowed_modes = DeviModeSequence::template at_index(); /* Handle #VArray types. */ if constexpr (is_VArray_v) { /* The actual virtual array, used for dynamic dispatch at run-time. */ const SourceType &varray = *std::get(sources_); /* Check if the virtual array is a single value. */ if constexpr ((allowed_modes & DeviMode::Single) != DeviMode::None) { if (varray.is_single()) { if (this->try_execute_devirtualized_impl(DeviModeSequence(), DeviModeSequence())) { return true; } } } /* Check if the virtual array is a span. */ if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) { if (varray.is_span()) { if (this->try_execute_devirtualized_impl(DeviModeSequence(), DeviModeSequence())) { return true; } } } /* Check if it is ok if the virtual array is not devirtualized. */ if constexpr ((allowed_modes & DeviMode::Keep) != DeviMode::None) { if (this->try_execute_devirtualized_impl(DeviModeSequence(), DeviModeSequence())) { return true; } } } /* Handle #IndexMask. */ else if constexpr (std::is_same_v) { /* Check if the mask is actually a contiguous range. */ if constexpr ((allowed_modes & DeviMode::Range) != DeviMode::None) { /* The actual mask used for dynamic dispatch at run-time. */ const IndexMask &mask = *std::get(sources_); if (mask.is_range()) { if (this->try_execute_devirtualized_impl(DeviModeSequence(), DeviModeSequence())) { return true; } } } /* Check if mask is also allowed to stay a span. */ if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) { if (this->try_execute_devirtualized_impl(DeviModeSequence(), DeviModeSequence())) { return true; } } } /* Handle unknown types. */ else { if (this->try_execute_devirtualized_impl(DeviModeSequence(), DeviModeSequence())) { return true; } } } return false; } /** * Actually call the function with devirtualized parameters. */ template void try_execute_devirtualized_impl_call(DeviModeSequence /* modes */, std::index_sequence /* indices */) { BLI_assert(!executed_); fn_(this->get_devirtualized_parameter()...); executed_ = true; } /** * Return the I-th parameter devirtualized using the passed in #DeviMode. This has different * return types based on the template parameters. * * \note It is expected that the caller already knows that the parameter can be devirtualized * with the given mode. */ template decltype(auto) get_devirtualized_parameter() { using SourceType = type_at_index; static_assert(Mode != DeviMode::None); if constexpr (Mode == DeviMode::Keep) { /* Don't change the original parameter at all. */ return *std::get(sources_); } if constexpr (is_VArray_v) { const SourceType &varray = *std::get(sources_); if constexpr (Mode == DeviMode::Single) { /* Devirtualize virtual array as single value. */ return SingleAsSpan(varray); } else if constexpr (Mode == DeviMode::Span) { /* Devirtualize virtual array as span. */ return varray.get_internal_span(); } } else if constexpr (std::is_same_v) { const IndexMask &mask = *std::get(sources_); if constexpr (ELEM(Mode, DeviMode::Span)) { /* Don't devirtualize mask, it's still a span. */ return mask; } else if constexpr (Mode == DeviMode::Range) { /* Devirtualize the mask as range. */ return mask.as_range(); } } } }; } // namespace blender::devirtualize_parameters namespace blender { /** * Generate multiple versions of the given function optimized for different virtual arrays. * One has to be careful with nesting multiple devirtualizations, because that results in an * exponential number of function instantiations (increasing compile time and binary size). * * Generally, this function should only be used when the virtual method call overhead to get an * element from a virtual array is significant. */ template inline void devirtualize_varray(const VArray &varray, const Func &func, bool enable = true) { using namespace devirtualize_parameters; if (enable) { Devirtualizer> devirtualizer(func, &varray); constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span; devirtualizer.try_execute_devirtualized(DeviModeSequence()); if (devirtualizer.executed()) { return; } } func(varray); } /** * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time. * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer * cases. */ template inline void devirtualize_varray2(const VArray &varray1, const VArray &varray2, const Func &func, bool enable = true) { using namespace devirtualize_parameters; if (enable) { Devirtualizer, VArray> devirtualizer(func, &varray1, &varray2); constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span; devirtualizer.try_execute_devirtualized(DeviModeSequence()); if (devirtualizer.executed()) { return; } } func(varray1, varray2); } } // namespace blender