Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/blenlib/BLI_devirtualize_parameters.hh')
-rw-r--r--source/blender/blenlib/BLI_devirtualize_parameters.hh309
1 files changed, 309 insertions, 0 deletions
diff --git a/source/blender/blenlib/BLI_devirtualize_parameters.hh b/source/blender/blenlib/BLI_devirtualize_parameters.hh
new file mode 100644
index 00000000000..bf4f6c47cfe
--- /dev/null
+++ b/source/blender/blenlib/BLI_devirtualize_parameters.hh
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * In geometry nodes, many functions accept fields as inputs. For the implementation that means
+ * that the inputs are virtual arrays. Usually those are backed by actual arrays or single values
+ * but sometimes virtual arrays are used to compute values on demand or convert between data
+ * formats.
+ *
+ * Using virtual arrays has the downside that individual elements are accessed through a virtual
+ * method call, which has some overhead compared to normal array access. Whether this overhead is
+ * negilible depends on the context. For very small functions (e.g. a single addition), the
+ * overhead can make the function many times slower. Furthermore, it prevents the compiler from
+ * doing some optimizations (e.g. loop unrolling and inserting SIMD instructions).
+ *
+ * The solution is to "devirtualize" the virtual arrays in cases when the overhead cannot be
+ * ignored. That means that the function is instantiated multiple times at compile time for the
+ * different cases. For example, there can be an optimized function that adds a span and a single
+ * value, and another function that adds a span and another span. At run-time there is a dynamic
+ * dispatch that executes the best function given the specific virtual arrays.
+ *
+ * The problem with this devirtualization is that it can result in exponentially increasing compile
+ * times and binary sizes, depending on the number of parameters that are devirtualized separately.
+ * So there is always a trade-off between run-time performance and compile-time/binary-size.
+ *
+ * This file provides a utility to devirtualize array parameters to a function using a high level
+ * API. This makes it easy to experiment with different extremes of the mentioned trade-off and
+ * allows finding a good compromise for each function.
+ */
+
+#include "BLI_parameter_pack_utils.hh"
+#include "BLI_virtual_array.hh"
+
+namespace blender::devirtualize_parameters {
+
+/**
+ * Bit flag that specifies how an individual parameter is or can be devirtualized.
+ */
+enum class DeviMode {
+ /* This is used as zero-value to compare to, to avoid casting to int. */
+ None = 0,
+ /* Don't use devirtualization for that parameter, just pass it along. */
+ Keep = (1 << 0),
+ /* Devirtualize #Varray as #Span. */
+ Span = (1 << 1),
+ /* Devirtualize #VArray as #SingleAsSpan. */
+ Single = (1 << 2),
+ /* Devirtualize #IndexMask as #IndexRange. */
+ Range = (1 << 3),
+};
+ENUM_OPERATORS(DeviMode, DeviMode::Range);
+
+/** Utility to encode multiple #DeviMode in a type. */
+template<DeviMode... Mode> using DeviModeSequence = ValueSequence<DeviMode, Mode...>;
+
+/**
+ * Main class that performs the devirtualization.
+ */
+template<typename Fn, typename... SourceTypes> class Devirtualizer {
+ private:
+ /** Utility to get the tag of the I-th source type. */
+ template<size_t I>
+ using type_at_index = typename TypeSequence<SourceTypes...>::template at_index<I>;
+ static constexpr size_t SourceTypesNum = sizeof...(SourceTypes);
+
+ /** Function to devirtualize. */
+ Fn fn_;
+
+ /**
+ * Source values that will be devirtualized. Note that these are stored as pointers to avoid
+ * unnecessary copies. The caller is responsible for keeping the memory alive.
+ */
+ std::tuple<const SourceTypes *...> sources_;
+
+ /** Keeps track of whether #fn_ has been called already to avoid calling it twice. */
+ bool executed_ = false;
+
+ public:
+ Devirtualizer(Fn fn, const SourceTypes *...sources) : fn_(std::move(fn)), sources_{sources...}
+ {
+ }
+
+ /**
+ * Return true when the function passed to the constructor has been called already.
+ */
+ bool executed() const
+ {
+ return executed_;
+ }
+
+ /**
+ * At compile time, generates multiple variants of the function, each optimized for a different
+ * combination of devirtualized parameters. For every parameter, a bit flag is passed that
+ * determines how it will be devirtualized. At run-time, if possible, one of the generated
+ * functions is picked and executed.
+ *
+ * To check whether the function was called successfully, call #executed() afterwards.
+ *
+ * \note This generates an exponential amount of code in the final binary, depending on how many
+ * to-be-virtualized parameters there are.
+ */
+ template<DeviMode... AllowedModes>
+ void try_execute_devirtualized(DeviModeSequence<AllowedModes...> /* allowed_modes */)
+ {
+ BLI_assert(!executed_);
+ static_assert(sizeof...(AllowedModes) == SourceTypesNum);
+ return this->try_execute_devirtualized_impl(DeviModeSequence<>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+
+ /**
+ * Execute the function and pass in the original parameters without doing any devirtualization.
+ */
+ void execute_without_devirtualization()
+ {
+ BLI_assert(!executed_);
+ this->try_execute_devirtualized_impl_call(
+ make_value_sequence<DeviMode, DeviMode::Keep, SourceTypesNum>(),
+ std::make_index_sequence<SourceTypesNum>());
+ }
+
+ private:
+ /**
+ * A recursive method that generates all the combinations of devirtualized parameters that the
+ * caller requested. A recursive function is necessary to achieve generating an exponential
+ * number of function calls (which has to be used with care, but is expected here).
+ *
+ * At every recursive step, the #DeviMode of one parameter is determined. This is achieved by
+ * extending #DeviModeSequence<Mode...> by one element in each step. The recursion ends once all
+ * parameters are handled.
+ */
+ template<DeviMode... Mode, DeviMode... AllowedModes>
+ void try_execute_devirtualized_impl(
+ /* Initially empty, but then extended by one element in each recursive step. */
+ DeviModeSequence<Mode...> /* modes */,
+ /* Bit flag for every parameter. */
+ DeviModeSequence<AllowedModes...> /* allowed_modes */)
+ {
+ static_assert(SourceTypesNum == sizeof...(AllowedModes));
+ if constexpr (SourceTypesNum == sizeof...(Mode)) {
+ /* End of recursion, now call the function with the determined #DeviModes. */
+ this->try_execute_devirtualized_impl_call(DeviModeSequence<Mode...>(),
+ std::make_index_sequence<SourceTypesNum>());
+ }
+ else {
+ /* Index of the parameter that is checked in the current recursive step. */
+ constexpr size_t I = sizeof...(Mode);
+ /* Non-devirtualized parameter type. */
+ using SourceType = type_at_index<I>;
+ /* A bit flag indicating what devirtualizations are allowed in this step. */
+ constexpr DeviMode allowed_modes = DeviModeSequence<AllowedModes...>::template at_index<I>();
+
+ /* Handle #VArray types. */
+ if constexpr (is_VArray_v<SourceType>) {
+ /* The actual virtual array, used for dynamic dispatch at run-time. */
+ const SourceType &varray = *std::get<I>(sources_);
+ /* Check if the virtual array is a single value. */
+ if constexpr ((allowed_modes & DeviMode::Single) != DeviMode::None) {
+ if (varray.is_single()) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Single>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ /* Check if the virtual array is a span. */
+ if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
+ if (varray.is_span()) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ /* Check if it is ok if the virtual array is not devirtualized. */
+ if constexpr ((allowed_modes & DeviMode::Keep) != DeviMode::None) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+
+ /* Handle #IndexMask. */
+ else if constexpr (std::is_same_v<IndexMask, SourceType>) {
+ /* Check if the mask is actually a contiguous range. */
+ if constexpr ((allowed_modes & DeviMode::Range) != DeviMode::None) {
+ /* The actual mask used for dynamic dispatch at run-time. */
+ const IndexMask &mask = *std::get<I>(sources_);
+ if (mask.is_range()) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Range>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ /* Check if mask is also allowed to stay a span. */
+ if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+
+ /* Handle unknown types. */
+ else {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ }
+
+ /**
+ * Actually call the function with devirtualized parameters.
+ */
+ template<DeviMode... Mode, size_t... I>
+ void try_execute_devirtualized_impl_call(DeviModeSequence<Mode...> /* modes */,
+ std::index_sequence<I...> /* indices */)
+ {
+
+ fn_(this->get_devirtualized_parameter<I, Mode>()...);
+ executed_ = true;
+ }
+
+ /**
+ * Return the I-th parameter devirtualized using the passed in #DeviMode. This has different
+ * return types based on the template parameters.
+ *
+ * \note It is expected that the caller already knows that the parameter can be devirtualized
+ * with the given mode.
+ */
+ template<size_t I, DeviMode Mode> decltype(auto) get_devirtualized_parameter()
+ {
+ using SourceType = type_at_index<I>;
+ static_assert(Mode != DeviMode::None);
+ if constexpr (Mode == DeviMode::Keep) {
+ /* Don't change the original parameter at all. */
+ return *std::get<I>(sources_);
+ }
+ if constexpr (is_VArray_v<SourceType>) {
+ const SourceType &varray = *std::get<I>(sources_);
+ if constexpr (Mode == DeviMode::Single) {
+ /* Devirtualize virtual array as single value. */
+ return SingleAsSpan(varray);
+ }
+ else if constexpr (Mode == DeviMode::Span) {
+ /* Devirtualize virtual array as span. */
+ return varray.get_internal_span();
+ }
+ }
+ else if constexpr (std::is_same_v<IndexMask, SourceType>) {
+ const IndexMask &mask = *std::get<I>(sources_);
+ if constexpr (ELEM(Mode, DeviMode::Span)) {
+ /* Don't devirtualize mask, it's still a span. */
+ return mask;
+ }
+ else if constexpr (Mode == DeviMode::Range) {
+ /* Devirtualize the mask as range. */
+ return mask.as_range();
+ }
+ }
+ }
+};
+
+} // namespace blender::devirtualize_parameters
+
+namespace blender {
+
+/**
+ * Generate multiple versions of the given function optimized for different virtual arrays.
+ * One has to be careful with nesting multiple devirtualizations, because that results in an
+ * exponential number of function instantiations (increasing compile time and binary size).
+ *
+ * Generally, this function should only be used when the virtual method call overhead to get an
+ * element from a virtual array is significant.
+ */
+template<typename T, typename Func>
+inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
+{
+ using namespace devirtualize_parameters;
+ if (enable) {
+ Devirtualizer<decltype(func), VArray<T>> devirtualizer(func, &varray);
+ constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
+ devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode>());
+ if (devirtualizer.executed()) {
+ return;
+ }
+ }
+ func(varray);
+}
+
+/**
+ * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
+ * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
+ * cases.
+ */
+template<typename T1, typename T2, typename Func>
+inline void devirtualize_varray2(const VArray<T1> &varray1,
+ const VArray<T2> &varray2,
+ const Func &func,
+ bool enable = true)
+{
+ using namespace devirtualize_parameters;
+ if (enable) {
+ Devirtualizer<decltype(func), VArray<T1>, VArray<T2>> devirtualizer(func, &varray1, &varray2);
+ constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
+ devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode, devi_mode>());
+ if (devirtualizer.executed()) {
+ return;
+ }
+ }
+ func(varray1, varray2);
+}
+
+} // namespace blender