4 files changed, 439 insertions, 65 deletions
diff --git a/source/blender/blenlib/BLI_devirtualize_parameters.hh b/source/blender/blenlib/BLI_devirtualize_parameters.hh
new file mode 100644
index 00000000000..bf4f6c47cfe
--- /dev/null
+++ b/source/blender/blenlib/BLI_devirtualize_parameters.hh
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * In geometry nodes, many functions accept fields as inputs. For the implementation that means
+ * that the inputs are virtual arrays. Usually those are backed by actual arrays or single values
+ * but sometimes virtual arrays are used to compute values on demand or convert between data
+ * formats.
+ *
+ * Using virtual arrays has the downside that individual elements are accessed through a virtual
+ * method call, which has some overhead compared to normal array access. Whether this overhead is
+ * negilible depends on the context. For very small functions (e.g. a single addition), the
+ * overhead can make the function many times slower. Furthermore, it prevents the compiler from
+ * doing some optimizations (e.g. loop unrolling and inserting SIMD instructions).
+ *
+ * The solution is to "devirtualize" the virtual arrays in cases when the overhead cannot be
+ * ignored. That means that the function is instantiated multiple times at compile time for the
+ * different cases. For example, there can be an optimized function that adds a span and a single
+ * value, and another function that adds a span and another span. At run-time there is a dynamic
+ * dispatch that executes the best function given the specific virtual arrays.
+ *
+ * The problem with this devirtualization is that it can result in exponentially increasing compile
+ * times and binary sizes, depending on the number of parameters that are devirtualized separately.
+ * So there is always a trade-off between run-time performance and compile-time/binary-size.
+ *
+ * This file provides a utility to devirtualize array parameters to a function using a high level
+ * API. This makes it easy to experiment with different extremes of the mentioned trade-off and
+ * allows finding a good compromise for each function.
+ */
+
+#include "BLI_parameter_pack_utils.hh"
+#include "BLI_virtual_array.hh"
+
+namespace blender::devirtualize_parameters {
+
+/**
+ * Bit flag that specifies how an individual parameter is or can be devirtualized.
+ */
+enum class DeviMode {
+  /* This is used as zero-value to compare to, to avoid casting to int. */
+  None = 0,
+  /* Don't use devirtualization for that parameter, just pass it along. */
+  Keep = (1 << 0),
+  /* Devirtualize #Varray as #Span. */
+  Span = (1 << 1),
+  /* Devirtualize #VArray as #SingleAsSpan.  */
+  Single = (1 << 2),
+  /* Devirtualize #IndexMask as #IndexRange. */
+  Range = (1 << 3),
+};
+ENUM_OPERATORS(DeviMode, DeviMode::Range);
+
+/** Utility to encode multiple #DeviMode in a type. */
+template<DeviMode... Mode> using DeviModeSequence = ValueSequence<DeviMode, Mode...>;
+
+/**
+ * Main class that performs the devirtualization.
+ */
+template<typename Fn, typename... SourceTypes> class Devirtualizer {
+ private:
+  /** Utility to get the tag of the I-th source type. */
+  template<size_t I>
+  using type_at_index = typename TypeSequence<SourceTypes...>::template at_index<I>;
+  static constexpr size_t SourceTypesNum = sizeof...(SourceTypes);
+
+  /** Function to devirtualize. */
+  Fn fn_;
+
+  /**
+   * Source values that will be devirtualized. Note that these are stored as pointers to avoid
+   * unnecessary copies. The caller is responsible for keeping the memory alive.
+   */
+  std::tuple<const SourceTypes *...> sources_;
+
+  /** Keeps track of whether #fn_ has been called already to avoid calling it twice. */
+  bool executed_ = false;
+
+ public:
+  Devirtualizer(Fn fn, const SourceTypes *...sources) : fn_(std::move(fn)), sources_{sources...}
+  {
+  }
+
+  /**
+   * Return true when the function passed to the constructor has been called already.
+   */
+  bool executed() const
+  {
+    return executed_;
+  }
+
+  /**
+   * At compile time, generates multiple variants of the function, each optimized for a different
+   * combination of devirtualized parameters. For every parameter, a bit flag is passed that
+   * determines how it will be devirtualized. At run-time, if possible, one of the generated
+   * functions is picked and executed.
+   *
+   * To check whether the function was called successfully, call #executed() afterwards.
+   *
+   * \note This generates an exponential amount of code in the final binary, depending on how many
+   * to-be-virtualized parameters there are.
+   */
+  template<DeviMode... AllowedModes>
+  void try_execute_devirtualized(DeviModeSequence<AllowedModes...> /* allowed_modes */)
+  {
+    BLI_assert(!executed_);
+    static_assert(sizeof...(AllowedModes) == SourceTypesNum);
+    return this->try_execute_devirtualized_impl(DeviModeSequence<>(),
+                                                DeviModeSequence<AllowedModes...>());
+  }
+
+  /**
+   * Execute the function and pass in the original parameters without doing any devirtualization.
+   */
+  void execute_without_devirtualization()
+  {
+    BLI_assert(!executed_);
+    this->try_execute_devirtualized_impl_call(
+        make_value_sequence<DeviMode, DeviMode::Keep, SourceTypesNum>(),
+        std::make_index_sequence<SourceTypesNum>());
+  }
+
+ private:
+  /**
+   * A recursive method that generates all the combinations of devirtualized parameters that the
+   * caller requested. A recursive function is necessary to achieve generating an exponential
+   * number of function calls (which has to be used with care, but is expected here).
+   *
+   * At every recursive step, the #DeviMode of one parameter is determined. This is achieved by
+   * extending #DeviModeSequence<Mode...> by one element in each step. The recursion ends once all
+   * parameters are handled.
+   */
+  template<DeviMode... Mode, DeviMode... AllowedModes>
+  void try_execute_devirtualized_impl(
+      /* Initially empty, but then extended by one element in each recursive step.  */
+      DeviModeSequence<Mode...> /* modes */,
+      /* Bit flag for every parameter. */
+      DeviModeSequence<AllowedModes...> /* allowed_modes */)
+  {
+    static_assert(SourceTypesNum == sizeof...(AllowedModes));
+    if constexpr (SourceTypesNum == sizeof...(Mode)) {
+      /* End of recursion, now call the function with the determined #DeviModes. */
+      this->try_execute_devirtualized_impl_call(DeviModeSequence<Mode...>(),
+                                                std::make_index_sequence<SourceTypesNum>());
+    }
+    else {
+      /* Index of the parameter that is checked in the current recursive step. */
+      constexpr size_t I = sizeof...(Mode);
+      /* Non-devirtualized parameter type. */
+      using SourceType = type_at_index<I>;
+      /* A bit flag indicating what devirtualizations are allowed in this step. */
+      constexpr DeviMode allowed_modes = DeviModeSequence<AllowedModes...>::template at_index<I>();
+
+      /* Handle #VArray types. */
+      if constexpr (is_VArray_v<SourceType>) {
+        /* The actual virtual array, used for dynamic dispatch at run-time. */
+        const SourceType &varray = *std::get<I>(sources_);
+        /* Check if the virtual array is a single value. */
+        if constexpr ((allowed_modes & DeviMode::Single) != DeviMode::None) {
+          if (varray.is_single()) {
+            this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Single>(),
+                                                 DeviModeSequence<AllowedModes...>());
+          }
+        }
+        /* Check if the virtual array is a span. */
+        if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
+          if (varray.is_span()) {
+            this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
+                                                 DeviModeSequence<AllowedModes...>());
+          }
+        }
+        /* Check if it is ok if the virtual array is not devirtualized. */
+        if constexpr ((allowed_modes & DeviMode::Keep) != DeviMode::None) {
+          this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
+                                               DeviModeSequence<AllowedModes...>());
+        }
+      }
+
+      /* Handle #IndexMask. */
+      else if constexpr (std::is_same_v<IndexMask, SourceType>) {
+        /* Check if the mask is actually a contiguous range. */
+        if constexpr ((allowed_modes & DeviMode::Range) != DeviMode::None) {
+          /* The actual mask used for dynamic dispatch at run-time. */
+          const IndexMask &mask = *std::get<I>(sources_);
+          if (mask.is_range()) {
+            this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Range>(),
+                                                 DeviModeSequence<AllowedModes...>());
+          }
+        }
+        /* Check if mask is also allowed to stay a span. */
+        if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
+          this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
+                                               DeviModeSequence<AllowedModes...>());
+        }
+      }
+
+      /* Handle unknown types. */
+      else {
+        this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
+                                             DeviModeSequence<AllowedModes...>());
+      }
+    }
+  }
+
+  /**
+   * Actually call the function with devirtualized parameters.
+   */
+  template<DeviMode... Mode, size_t... I>
+  void try_execute_devirtualized_impl_call(DeviModeSequence<Mode...> /* modes */,
+                                           std::index_sequence<I...> /* indices */)
+  {
+
+    fn_(this->get_devirtualized_parameter<I, Mode>()...);
+    executed_ = true;
+  }
+
+  /**
+   * Return the I-th parameter devirtualized using the passed in #DeviMode. This has different
+   * return types based on the template parameters.
+   *
+   * \note It is expected that the caller already knows that the parameter can be devirtualized
+   * with the given mode.
+   */
+  template<size_t I, DeviMode Mode> decltype(auto) get_devirtualized_parameter()
+  {
+    using SourceType = type_at_index<I>;
+    static_assert(Mode != DeviMode::None);
+    if constexpr (Mode == DeviMode::Keep) {
+      /* Don't change the original parameter at all. */
+      return *std::get<I>(sources_);
+    }
+    if constexpr (is_VArray_v<SourceType>) {
+      const SourceType &varray = *std::get<I>(sources_);
+      if constexpr (Mode == DeviMode::Single) {
+        /* Devirtualize virtual array as single value. */
+        return SingleAsSpan(varray);
+      }
+      else if constexpr (Mode == DeviMode::Span) {
+        /* Devirtualize virtual array as span. */
+        return varray.get_internal_span();
+      }
+    }
+    else if constexpr (std::is_same_v<IndexMask, SourceType>) {
+      const IndexMask &mask = *std::get<I>(sources_);
+      if constexpr (ELEM(Mode, DeviMode::Span)) {
+        /* Don't devirtualize mask, it's still a span. */
+        return mask;
+      }
+      else if constexpr (Mode == DeviMode::Range) {
+        /* Devirtualize the mask as range. */
+        return mask.as_range();
+      }
+    }
+  }
+};
+
+}  // namespace blender::devirtualize_parameters
+
+namespace blender {
+
+/**
+ * Generate multiple versions of the given function optimized for different virtual arrays.
+ * One has to be careful with nesting multiple devirtualizations, because that results in an
+ * exponential number of function instantiations (increasing compile time and binary size).
+ *
+ * Generally, this function should only be used when the virtual method call overhead to get an
+ * element from a virtual array is significant.
+ */
+template<typename T, typename Func>
+inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
+{
+  using namespace devirtualize_parameters;
+  if (enable) {
+    Devirtualizer<decltype(func), VArray<T>> devirtualizer(func, &varray);
+    constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
+    devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode>());
+    if (devirtualizer.executed()) {
+      return;
+    }
+  }
+  func(varray);
+}
+
+/**
+ * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
+ * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
+ * cases.
+ */
+template<typename T1, typename T2, typename Func>
+inline void devirtualize_varray2(const VArray<T1> &varray1,
+                                 const VArray<T2> &varray2,
+                                 const Func &func,
+                                 bool enable = true)
+{
+  using namespace devirtualize_parameters;
+  if (enable) {
+    Devirtualizer<decltype(func), VArray<T1>, VArray<T2>> devirtualizer(func, &varray1, &varray2);
+    constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
+    devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode, devi_mode>());
+    if (devirtualizer.executed()) {
+      return;
+    }
+  }
+  func(varray1, varray2);
+}
+
+}  // namespace blender
diff --git a/source/blender/blenlib/BLI_parameter_pack_utils.hh b/source/blender/blenlib/BLI_parameter_pack_utils.hh
new file mode 100644
index 00000000000..d1ef7bcbc65
--- /dev/null
+++ b/source/blender/blenlib/BLI_parameter_pack_utils.hh
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * C++ has a feature called "parameter packs" which allow building variadic templates.
+ * This file has some utilities to work with such parameter packs.
+ */
+
+#include <tuple>
+#include <type_traits>
+
+#include "BLI_utildefines.h"
+
+namespace blender {
+
+/**
+ * A type that encodes a specific value.
+ */
+template<typename T, T Element> struct TypeForValue {
+  static constexpr T value = Element;
+};
+
+/**
+ * A type that encodes a list of values of the same type.
+ * This is similar to #std::integer_sequence, but a bit more general. It's main purpose it to also
+ * support enums instead of just ints.
+ */
+template<typename T, T... Elements> struct ValueSequence {
+  /**
+   * Get the number of elements in the sequence.
+   */
+  static constexpr size_t size() noexcept
+  {
+    return sizeof...(Elements);
+  }
+
+  /**
+   * Get the element at a specific index.
+   */
+  template<size_t I> static constexpr T at_index()
+  {
+    static_assert(I < sizeof...(Elements));
+    return std::tuple_element_t<I, std::tuple<TypeForValue<T, Elements>...>>::value;
+  }
+
+  /**
+   * Return true if the element is in the sequence.
+   */
+  template<T Element> static constexpr bool contains()
+  {
+    return ((Element == Elements) || ...);
+  }
+};
+
+/**
+ * A type that encodes a list of types.
+ * #std::tuple can also encode a list of types, but has a much more complex implementation.
+ */
+template<typename... T> struct TypeSequence {
+  /**
+   * Get the number of types in the sequence.
+   */
+  static constexpr size_t size() noexcept
+  {
+    return sizeof...(T);
+  }
+
+  /**
+   * Get the type at a specific index.
+   */
+  template<size_t I> using at_index = std::tuple_element_t<I, std::tuple<T...>>;
+};
+
+namespace detail {
+
+template<typename T, T Value, size_t... I>
+inline ValueSequence<T, ((I == 0) ? Value : Value)...> make_value_sequence_impl(
+    std::index_sequence<I...> /* indices */)
+{
+  return {};
+}
+
+template<typename T, T Value1, T Value2, size_t... Value1Indices, size_t... I>
+inline ValueSequence<T,
+                     (ValueSequence<size_t, Value1Indices...>::template contains<I>() ? Value1 :
+                                                                                        Value2)...>
+    make_two_value_sequence_impl(ValueSequence<size_t, Value1Indices...> /* value1_indices */,
+                                 std::index_sequence<I...> /* indices */)
+{
+  return {};
+};
+
+}  // namespace detail
+
+/**
+ * Utility to create a #ValueSequence that has the same value at every index.
+ */
+template<typename T, T Value, size_t Size>
+using make_value_sequence = decltype(detail::make_value_sequence_impl<T, Value>(
+    std::make_index_sequence<Size>()));
+
+/**
+ * Utility to create a #ValueSequence that contains two different values. The indices of where the
+ * first value should be used are passed in.
+ */
+template<typename T, T Value1, T Value2, size_t Size, size_t... Value1Indices>
+using make_two_value_sequence = decltype(detail::make_two_value_sequence_impl<T, Value1, Value2>(
+    ValueSequence<size_t, Value1Indices...>(), std::make_index_sequence<Size>()));
+
+namespace parameter_pack_utils_static_tests {
+enum class MyEnum { A, B };
+static_assert(std::is_same_v<make_value_sequence<MyEnum, MyEnum::A, 3>,
+                             ValueSequence<MyEnum, MyEnum::A, MyEnum::A, MyEnum::A>>);
+static_assert(
+    std::is_same_v<make_two_value_sequence<MyEnum, MyEnum::A, MyEnum::B, 5, 1, 2>,
+                   ValueSequence<MyEnum, MyEnum::B, MyEnum::A, MyEnum::A, MyEnum::B, MyEnum::B>>);
+}  // namespace parameter_pack_utils_static_tests
+
+}  // namespace blender
diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh
index 41a73b45853..7aa221f62ce 100644
--- a/source/blender/blenlib/BLI_virtual_array.hh
+++ b/source/blender/blenlib/BLI_virtual_array.hh
@@ -1089,6 +1089,12 @@ template<typename T> class VMutableArray : public VArrayCommon<T> {
   }
 };
 
+template<typename T> static constexpr bool is_VArray_v = false;
+template<typename T> static constexpr bool is_VArray_v<VArray<T>> = true;
+
+template<typename T> static constexpr bool is_VMutableArray_v = false;
+template<typename T> static constexpr bool is_VMutableArray_v<VMutableArray<T>> = true;
+
 /**
  * In many cases a virtual array is a span internally. In those cases, access to individual could
  * be much more efficient than calling a virtual method. When the underlying virtual array is not a
@@ -1207,69 +1213,4 @@ template<typename T> class SingleAsSpan {
   }
 };
 
-/**
- * Generate multiple versions of the given function optimized for different virtual arrays.
- * One has to be careful with nesting multiple devirtualizations, because that results in an
- * exponential number of function instantiations (increasing compile time and binary size).
- *
- * Generally, this function should only be used when the virtual method call overhead to get an
- * element from a virtual array is significant.
- */
-template<typename T, typename Func>
-inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
-{
-  /* Support disabling the devirtualization to simplify benchmarking. */
-  if (enable) {
-    if (varray.is_single()) {
-      func(SingleAsSpan<T>(varray));
-      return;
-    }
-    if (varray.is_span()) {
-      func(varray.get_internal_span());
-      return;
-    }
-  }
-  func(varray);
-}
-
-/**
- * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
- * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
- * cases.
- */
-template<typename T1, typename T2, typename Func>
-inline void devirtualize_varray2(const VArray<T1> &varray1,
-                                 const VArray<T2> &varray2,
-                                 const Func &func,
-                                 bool enable = true)
-{
-  /* Support disabling the devirtualization to simplify benchmarking. */
-  if (enable) {
-    const bool is_span1 = varray1.is_span();
-    const bool is_span2 = varray2.is_span();
-    const bool is_single1 = varray1.is_single();
-    const bool is_single2 = varray2.is_single();
-    if (is_span1 && is_span2) {
-      func(varray1.get_internal_span(), varray2.get_internal_span());
-      return;
-    }
-    if (is_span1 && is_single2) {
-      func(varray1.get_internal_span(), SingleAsSpan(varray2));
-      return;
-    }
-    if (is_single1 && is_span2) {
-      func(SingleAsSpan(varray1), varray2.get_internal_span());
-      return;
-    }
-    if (is_single1 && is_single2) {
-      func(SingleAsSpan(varray1), SingleAsSpan(varray2));
-      return;
-    }
-  }
-  /* This fallback is used even when one of the inputs could be optimized. It's probably not worth
-   * it to optimize just one of the inputs, because then the compiler still has to call into
-   * unknown code, which inhibits many compiler optimizations. */
-  func(varray1, varray2);
-}
-
 }  // namespace blender
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 446a027b03b..e0f28522d6c 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -180,6 +180,7 @@ set(SRC
   BLI_cpp_type.hh
   BLI_cpp_type_make.hh
   BLI_delaunay_2d.h
+  BLI_devirtualize_parameters.hh
   BLI_dial_2d.h
   BLI_disjoint_set.hh
   BLI_dlrbTree.h
@@ -274,6 +275,7 @@ set(SRC
   BLI_noise.h
   BLI_noise.hh
   BLI_path_util.h
+  BLI_parameter_pack_utils.hh
   BLI_polyfill_2d.h
   BLI_polyfill_2d_beautify.h
   BLI_probing_strategies.hh