Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/blenlib')
-rw-r--r--source/blender/blenlib/BLI_devirtualize_parameters.hh309
-rw-r--r--source/blender/blenlib/BLI_parameter_pack_utils.hh122
-rw-r--r--source/blender/blenlib/BLI_virtual_array.hh71
-rw-r--r--source/blender/blenlib/CMakeLists.txt2
4 files changed, 439 insertions, 65 deletions
diff --git a/source/blender/blenlib/BLI_devirtualize_parameters.hh b/source/blender/blenlib/BLI_devirtualize_parameters.hh
new file mode 100644
index 00000000000..bf4f6c47cfe
--- /dev/null
+++ b/source/blender/blenlib/BLI_devirtualize_parameters.hh
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * In geometry nodes, many functions accept fields as inputs. For the implementation that means
+ * that the inputs are virtual arrays. Usually those are backed by actual arrays or single values
+ * but sometimes virtual arrays are used to compute values on demand or convert between data
+ * formats.
+ *
+ * Using virtual arrays has the downside that individual elements are accessed through a virtual
+ * method call, which has some overhead compared to normal array access. Whether this overhead is
+ * negilible depends on the context. For very small functions (e.g. a single addition), the
+ * overhead can make the function many times slower. Furthermore, it prevents the compiler from
+ * doing some optimizations (e.g. loop unrolling and inserting SIMD instructions).
+ *
+ * The solution is to "devirtualize" the virtual arrays in cases when the overhead cannot be
+ * ignored. That means that the function is instantiated multiple times at compile time for the
+ * different cases. For example, there can be an optimized function that adds a span and a single
+ * value, and another function that adds a span and another span. At run-time there is a dynamic
+ * dispatch that executes the best function given the specific virtual arrays.
+ *
+ * The problem with this devirtualization is that it can result in exponentially increasing compile
+ * times and binary sizes, depending on the number of parameters that are devirtualized separately.
+ * So there is always a trade-off between run-time performance and compile-time/binary-size.
+ *
+ * This file provides a utility to devirtualize array parameters to a function using a high level
+ * API. This makes it easy to experiment with different extremes of the mentioned trade-off and
+ * allows finding a good compromise for each function.
+ */
+
+#include "BLI_parameter_pack_utils.hh"
+#include "BLI_virtual_array.hh"
+
+namespace blender::devirtualize_parameters {
+
+/**
+ * Bit flag that specifies how an individual parameter is or can be devirtualized.
+ */
+enum class DeviMode {
+ /* This is used as zero-value to compare to, to avoid casting to int. */
+ None = 0,
+ /* Don't use devirtualization for that parameter, just pass it along. */
+ Keep = (1 << 0),
+ /* Devirtualize #Varray as #Span. */
+ Span = (1 << 1),
+ /* Devirtualize #VArray as #SingleAsSpan. */
+ Single = (1 << 2),
+ /* Devirtualize #IndexMask as #IndexRange. */
+ Range = (1 << 3),
+};
+ENUM_OPERATORS(DeviMode, DeviMode::Range);
+
+/** Utility to encode multiple #DeviMode in a type. */
+template<DeviMode... Mode> using DeviModeSequence = ValueSequence<DeviMode, Mode...>;
+
+/**
+ * Main class that performs the devirtualization.
+ */
+template<typename Fn, typename... SourceTypes> class Devirtualizer {
+ private:
+ /** Utility to get the tag of the I-th source type. */
+ template<size_t I>
+ using type_at_index = typename TypeSequence<SourceTypes...>::template at_index<I>;
+ static constexpr size_t SourceTypesNum = sizeof...(SourceTypes);
+
+ /** Function to devirtualize. */
+ Fn fn_;
+
+ /**
+ * Source values that will be devirtualized. Note that these are stored as pointers to avoid
+ * unnecessary copies. The caller is responsible for keeping the memory alive.
+ */
+ std::tuple<const SourceTypes *...> sources_;
+
+ /** Keeps track of whether #fn_ has been called already to avoid calling it twice. */
+ bool executed_ = false;
+
+ public:
+ Devirtualizer(Fn fn, const SourceTypes *...sources) : fn_(std::move(fn)), sources_{sources...}
+ {
+ }
+
+ /**
+ * Return true when the function passed to the constructor has been called already.
+ */
+ bool executed() const
+ {
+ return executed_;
+ }
+
+ /**
+ * At compile time, generates multiple variants of the function, each optimized for a different
+ * combination of devirtualized parameters. For every parameter, a bit flag is passed that
+ * determines how it will be devirtualized. At run-time, if possible, one of the generated
+ * functions is picked and executed.
+ *
+ * To check whether the function was called successfully, call #executed() afterwards.
+ *
+ * \note This generates an exponential amount of code in the final binary, depending on how many
+ * to-be-virtualized parameters there are.
+ */
+ template<DeviMode... AllowedModes>
+ void try_execute_devirtualized(DeviModeSequence<AllowedModes...> /* allowed_modes */)
+ {
+ BLI_assert(!executed_);
+ static_assert(sizeof...(AllowedModes) == SourceTypesNum);
+ return this->try_execute_devirtualized_impl(DeviModeSequence<>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+
+ /**
+ * Execute the function and pass in the original parameters without doing any devirtualization.
+ */
+ void execute_without_devirtualization()
+ {
+ BLI_assert(!executed_);
+ this->try_execute_devirtualized_impl_call(
+ make_value_sequence<DeviMode, DeviMode::Keep, SourceTypesNum>(),
+ std::make_index_sequence<SourceTypesNum>());
+ }
+
+ private:
+ /**
+ * A recursive method that generates all the combinations of devirtualized parameters that the
+ * caller requested. A recursive function is necessary to achieve generating an exponential
+ * number of function calls (which has to be used with care, but is expected here).
+ *
+ * At every recursive step, the #DeviMode of one parameter is determined. This is achieved by
+ * extending #DeviModeSequence<Mode...> by one element in each step. The recursion ends once all
+ * parameters are handled.
+ */
+ template<DeviMode... Mode, DeviMode... AllowedModes>
+ void try_execute_devirtualized_impl(
+ /* Initially empty, but then extended by one element in each recursive step. */
+ DeviModeSequence<Mode...> /* modes */,
+ /* Bit flag for every parameter. */
+ DeviModeSequence<AllowedModes...> /* allowed_modes */)
+ {
+ static_assert(SourceTypesNum == sizeof...(AllowedModes));
+ if constexpr (SourceTypesNum == sizeof...(Mode)) {
+ /* End of recursion, now call the function with the determined #DeviModes. */
+ this->try_execute_devirtualized_impl_call(DeviModeSequence<Mode...>(),
+ std::make_index_sequence<SourceTypesNum>());
+ }
+ else {
+ /* Index of the parameter that is checked in the current recursive step. */
+ constexpr size_t I = sizeof...(Mode);
+ /* Non-devirtualized parameter type. */
+ using SourceType = type_at_index<I>;
+ /* A bit flag indicating what devirtualizations are allowed in this step. */
+ constexpr DeviMode allowed_modes = DeviModeSequence<AllowedModes...>::template at_index<I>();
+
+ /* Handle #VArray types. */
+ if constexpr (is_VArray_v<SourceType>) {
+ /* The actual virtual array, used for dynamic dispatch at run-time. */
+ const SourceType &varray = *std::get<I>(sources_);
+ /* Check if the virtual array is a single value. */
+ if constexpr ((allowed_modes & DeviMode::Single) != DeviMode::None) {
+ if (varray.is_single()) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Single>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ /* Check if the virtual array is a span. */
+ if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
+ if (varray.is_span()) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ /* Check if it is ok if the virtual array is not devirtualized. */
+ if constexpr ((allowed_modes & DeviMode::Keep) != DeviMode::None) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+
+ /* Handle #IndexMask. */
+ else if constexpr (std::is_same_v<IndexMask, SourceType>) {
+ /* Check if the mask is actually a contiguous range. */
+ if constexpr ((allowed_modes & DeviMode::Range) != DeviMode::None) {
+ /* The actual mask used for dynamic dispatch at run-time. */
+ const IndexMask &mask = *std::get<I>(sources_);
+ if (mask.is_range()) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Range>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ /* Check if mask is also allowed to stay a span. */
+ if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+
+ /* Handle unknown types. */
+ else {
+ this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
+ DeviModeSequence<AllowedModes...>());
+ }
+ }
+ }
+
+ /**
+ * Actually call the function with devirtualized parameters.
+ */
+ template<DeviMode... Mode, size_t... I>
+ void try_execute_devirtualized_impl_call(DeviModeSequence<Mode...> /* modes */,
+ std::index_sequence<I...> /* indices */)
+ {
+
+ fn_(this->get_devirtualized_parameter<I, Mode>()...);
+ executed_ = true;
+ }
+
+ /**
+ * Return the I-th parameter devirtualized using the passed in #DeviMode. This has different
+ * return types based on the template parameters.
+ *
+ * \note It is expected that the caller already knows that the parameter can be devirtualized
+ * with the given mode.
+ */
+ template<size_t I, DeviMode Mode> decltype(auto) get_devirtualized_parameter()
+ {
+ using SourceType = type_at_index<I>;
+ static_assert(Mode != DeviMode::None);
+ if constexpr (Mode == DeviMode::Keep) {
+ /* Don't change the original parameter at all. */
+ return *std::get<I>(sources_);
+ }
+ if constexpr (is_VArray_v<SourceType>) {
+ const SourceType &varray = *std::get<I>(sources_);
+ if constexpr (Mode == DeviMode::Single) {
+ /* Devirtualize virtual array as single value. */
+ return SingleAsSpan(varray);
+ }
+ else if constexpr (Mode == DeviMode::Span) {
+ /* Devirtualize virtual array as span. */
+ return varray.get_internal_span();
+ }
+ }
+ else if constexpr (std::is_same_v<IndexMask, SourceType>) {
+ const IndexMask &mask = *std::get<I>(sources_);
+ if constexpr (ELEM(Mode, DeviMode::Span)) {
+ /* Don't devirtualize mask, it's still a span. */
+ return mask;
+ }
+ else if constexpr (Mode == DeviMode::Range) {
+ /* Devirtualize the mask as range. */
+ return mask.as_range();
+ }
+ }
+ }
+};
+
+} // namespace blender::devirtualize_parameters
+
+namespace blender {
+
+/**
+ * Generate multiple versions of the given function optimized for different virtual arrays.
+ * One has to be careful with nesting multiple devirtualizations, because that results in an
+ * exponential number of function instantiations (increasing compile time and binary size).
+ *
+ * Generally, this function should only be used when the virtual method call overhead to get an
+ * element from a virtual array is significant.
+ */
+template<typename T, typename Func>
+inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
+{
+ using namespace devirtualize_parameters;
+ if (enable) {
+ Devirtualizer<decltype(func), VArray<T>> devirtualizer(func, &varray);
+ constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
+ devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode>());
+ if (devirtualizer.executed()) {
+ return;
+ }
+ }
+ func(varray);
+}
+
+/**
+ * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
+ * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
+ * cases.
+ */
+template<typename T1, typename T2, typename Func>
+inline void devirtualize_varray2(const VArray<T1> &varray1,
+ const VArray<T2> &varray2,
+ const Func &func,
+ bool enable = true)
+{
+ using namespace devirtualize_parameters;
+ if (enable) {
+ Devirtualizer<decltype(func), VArray<T1>, VArray<T2>> devirtualizer(func, &varray1, &varray2);
+ constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
+ devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode, devi_mode>());
+ if (devirtualizer.executed()) {
+ return;
+ }
+ }
+ func(varray1, varray2);
+}
+
+} // namespace blender
diff --git a/source/blender/blenlib/BLI_parameter_pack_utils.hh b/source/blender/blenlib/BLI_parameter_pack_utils.hh
new file mode 100644
index 00000000000..d1ef7bcbc65
--- /dev/null
+++ b/source/blender/blenlib/BLI_parameter_pack_utils.hh
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * C++ has a feature called "parameter packs" which allow building variadic templates.
+ * This file has some utilities to work with such parameter packs.
+ */
+
+#include <tuple>
+#include <type_traits>
+
+#include "BLI_utildefines.h"
+
+namespace blender {
+
+/**
+ * A type that encodes a specific value.
+ */
+template<typename T, T Element> struct TypeForValue {
+ static constexpr T value = Element;
+};
+
+/**
+ * A type that encodes a list of values of the same type.
+ * This is similar to #std::integer_sequence, but a bit more general. It's main purpose it to also
+ * support enums instead of just ints.
+ */
+template<typename T, T... Elements> struct ValueSequence {
+ /**
+ * Get the number of elements in the sequence.
+ */
+ static constexpr size_t size() noexcept
+ {
+ return sizeof...(Elements);
+ }
+
+ /**
+ * Get the element at a specific index.
+ */
+ template<size_t I> static constexpr T at_index()
+ {
+ static_assert(I < sizeof...(Elements));
+ return std::tuple_element_t<I, std::tuple<TypeForValue<T, Elements>...>>::value;
+ }
+
+ /**
+ * Return true if the element is in the sequence.
+ */
+ template<T Element> static constexpr bool contains()
+ {
+ return ((Element == Elements) || ...);
+ }
+};
+
+/**
+ * A type that encodes a list of types.
+ * #std::tuple can also encode a list of types, but has a much more complex implementation.
+ */
+template<typename... T> struct TypeSequence {
+ /**
+ * Get the number of types in the sequence.
+ */
+ static constexpr size_t size() noexcept
+ {
+ return sizeof...(T);
+ }
+
+ /**
+ * Get the type at a specific index.
+ */
+ template<size_t I> using at_index = std::tuple_element_t<I, std::tuple<T...>>;
+};
+
+namespace detail {
+
+template<typename T, T Value, size_t... I>
+inline ValueSequence<T, ((I == 0) ? Value : Value)...> make_value_sequence_impl(
+ std::index_sequence<I...> /* indices */)
+{
+ return {};
+}
+
+template<typename T, T Value1, T Value2, size_t... Value1Indices, size_t... I>
+inline ValueSequence<T,
+ (ValueSequence<size_t, Value1Indices...>::template contains<I>() ? Value1 :
+ Value2)...>
+ make_two_value_sequence_impl(ValueSequence<size_t, Value1Indices...> /* value1_indices */,
+ std::index_sequence<I...> /* indices */)
+{
+ return {};
+};
+
+} // namespace detail
+
+/**
+ * Utility to create a #ValueSequence that has the same value at every index.
+ */
+template<typename T, T Value, size_t Size>
+using make_value_sequence = decltype(detail::make_value_sequence_impl<T, Value>(
+ std::make_index_sequence<Size>()));
+
+/**
+ * Utility to create a #ValueSequence that contains two different values. The indices of where the
+ * first value should be used are passed in.
+ */
+template<typename T, T Value1, T Value2, size_t Size, size_t... Value1Indices>
+using make_two_value_sequence = decltype(detail::make_two_value_sequence_impl<T, Value1, Value2>(
+ ValueSequence<size_t, Value1Indices...>(), std::make_index_sequence<Size>()));
+
+namespace parameter_pack_utils_static_tests {
+enum class MyEnum { A, B };
+static_assert(std::is_same_v<make_value_sequence<MyEnum, MyEnum::A, 3>,
+ ValueSequence<MyEnum, MyEnum::A, MyEnum::A, MyEnum::A>>);
+static_assert(
+ std::is_same_v<make_two_value_sequence<MyEnum, MyEnum::A, MyEnum::B, 5, 1, 2>,
+ ValueSequence<MyEnum, MyEnum::B, MyEnum::A, MyEnum::A, MyEnum::B, MyEnum::B>>);
+} // namespace parameter_pack_utils_static_tests
+
+} // namespace blender
diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh
index 41a73b45853..7aa221f62ce 100644
--- a/source/blender/blenlib/BLI_virtual_array.hh
+++ b/source/blender/blenlib/BLI_virtual_array.hh
@@ -1089,6 +1089,12 @@ template<typename T> class VMutableArray : public VArrayCommon<T> {
}
};
+template<typename T> static constexpr bool is_VArray_v = false;
+template<typename T> static constexpr bool is_VArray_v<VArray<T>> = true;
+
+template<typename T> static constexpr bool is_VMutableArray_v = false;
+template<typename T> static constexpr bool is_VMutableArray_v<VMutableArray<T>> = true;
+
/**
* In many cases a virtual array is a span internally. In those cases, access to individual could
* be much more efficient than calling a virtual method. When the underlying virtual array is not a
@@ -1207,69 +1213,4 @@ template<typename T> class SingleAsSpan {
}
};
-/**
- * Generate multiple versions of the given function optimized for different virtual arrays.
- * One has to be careful with nesting multiple devirtualizations, because that results in an
- * exponential number of function instantiations (increasing compile time and binary size).
- *
- * Generally, this function should only be used when the virtual method call overhead to get an
- * element from a virtual array is significant.
- */
-template<typename T, typename Func>
-inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
-{
- /* Support disabling the devirtualization to simplify benchmarking. */
- if (enable) {
- if (varray.is_single()) {
- func(SingleAsSpan<T>(varray));
- return;
- }
- if (varray.is_span()) {
- func(varray.get_internal_span());
- return;
- }
- }
- func(varray);
-}
-
-/**
- * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
- * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
- * cases.
- */
-template<typename T1, typename T2, typename Func>
-inline void devirtualize_varray2(const VArray<T1> &varray1,
- const VArray<T2> &varray2,
- const Func &func,
- bool enable = true)
-{
- /* Support disabling the devirtualization to simplify benchmarking. */
- if (enable) {
- const bool is_span1 = varray1.is_span();
- const bool is_span2 = varray2.is_span();
- const bool is_single1 = varray1.is_single();
- const bool is_single2 = varray2.is_single();
- if (is_span1 && is_span2) {
- func(varray1.get_internal_span(), varray2.get_internal_span());
- return;
- }
- if (is_span1 && is_single2) {
- func(varray1.get_internal_span(), SingleAsSpan(varray2));
- return;
- }
- if (is_single1 && is_span2) {
- func(SingleAsSpan(varray1), varray2.get_internal_span());
- return;
- }
- if (is_single1 && is_single2) {
- func(SingleAsSpan(varray1), SingleAsSpan(varray2));
- return;
- }
- }
- /* This fallback is used even when one of the inputs could be optimized. It's probably not worth
- * it to optimize just one of the inputs, because then the compiler still has to call into
- * unknown code, which inhibits many compiler optimizations. */
- func(varray1, varray2);
-}
-
} // namespace blender
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 446a027b03b..e0f28522d6c 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -180,6 +180,7 @@ set(SRC
BLI_cpp_type.hh
BLI_cpp_type_make.hh
BLI_delaunay_2d.h
+ BLI_devirtualize_parameters.hh
BLI_dial_2d.h
BLI_disjoint_set.hh
BLI_dlrbTree.h
@@ -274,6 +275,7 @@ set(SRC
BLI_noise.h
BLI_noise.hh
BLI_path_util.h
+ BLI_parameter_pack_utils.hh
BLI_polyfill_2d.h
BLI_polyfill_2d_beautify.h
BLI_probing_strategies.hh