diff options
-rw-r--r-- | source/blender/blenlib/BLI_virtual_array.hh | 92 | ||||
-rw-r--r-- | source/blender/functions/FN_multi_function_builder.hh | 14 |
2 files changed, 98 insertions, 8 deletions
diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh index 2f23dbac32f..3e5e5378cf4 100644 --- a/source/blender/blenlib/BLI_virtual_array.hh +++ b/source/blender/blenlib/BLI_virtual_array.hh @@ -144,8 +144,11 @@ template<typename T> class VArray { } }; -/* A virtual array implementation for a span. */ -template<typename T> class VArrayForSpan : public VArray<T> { +/** + * A virtual array implementation for a span. This class is final so that it can be devirtualized + * by the compiler in some cases (e.g. when #devirtualize_varray is used). + */ +template<typename T> class VArrayForSpan final : public VArray<T> { private: const T *data_; @@ -171,8 +174,12 @@ template<typename T> class VArrayForSpan : public VArray<T> { } }; -/* A virtual array implementation that returns the same value for every index. */ -template<typename T> class VArrayForSingle : public VArray<T> { +/** + * A virtual array implementation that returns the same value for every index. This class is final + * so that it can be devirtualized by the compiler in some cases (e.g. when #devirtualize_varray is + * used). + */ +template<typename T> class VArrayForSingle final : public VArray<T> { private: T value_; @@ -208,4 +215,81 @@ template<typename T> class VArrayForSingle : public VArray<T> { } }; +/** + * Generate multiple versions of the given function optimized for different virtual arrays. + * One has to be careful with nesting multiple devirtualizations, because that results in an + * exponential number of function instantiations (increasing compile time and binary size). + * + * Generally, this function should only be used when the virtual method call overhead to get an + * element from a virtual array is signifant. + */ +template<typename T, typename Func> +inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true) +{ + /* Support disabling the devirtualization to simplify benchmarking. */ + if (enable) { + if (varray.is_single()) { + /* `VArrayForSingle` can be used for devirtualization, because it is declared `final`. */ + const VArrayForSingle<T> varray_single{varray.get_single(), varray.size()}; + func(varray_single); + return; + } + if (varray.is_span()) { + /* `VArrayForSpan` can be used for devirtualization, because it is declared `final`. */ + const VArrayForSpan<T> varray_span{varray.get_span()}; + func(varray_span); + return; + } + } + func(varray); +} + +/** + * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time. + * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer + * cases. + */ +template<typename T1, typename T2, typename Func> +inline void devirtualize_varray2(const VArray<T1> &varray1, + const VArray<T2> &varray2, + const Func &func, + bool enable = true) +{ + /* Support disabling the devirtualization to simplify benchmarking. */ + if (enable) { + const bool is_span1 = varray1.is_span(); + const bool is_span2 = varray2.is_span(); + const bool is_single1 = varray1.is_single(); + const bool is_single2 = varray2.is_single(); + if (is_span1 && is_span2) { + const VArrayForSpan<T1> varray1_span{varray1.get_span()}; + const VArrayForSpan<T2> varray2_span{varray2.get_span()}; + func(varray1_span, varray2_span); + return; + } + if (is_span1 && is_single2) { + const VArrayForSpan<T1> varray1_span{varray1.get_span()}; + const VArrayForSingle<T2> varray2_single{varray2.get_single(), varray2.size()}; + func(varray1_span, varray2_single); + return; + } + if (is_single1 && is_span2) { + const VArrayForSingle<T1> varray1_single{varray1.get_single(), varray1.size()}; + const VArrayForSpan<T2> varray2_span{varray2.get_span()}; + func(varray1_single, varray2_span); + return; + } + if (is_single1 && is_single2) { + const VArrayForSingle<T1> varray1_single{varray1.get_single(), varray1.size()}; + const VArrayForSingle<T2> varray2_single{varray2.get_single(), varray2.size()}; + func(varray1_single, varray2_single); + return; + } + } + /* This fallback is used even when one of the inputs could be optimized. It's probably not worth + * it to optimize just one of the inputs, because then the compiler still has to call into + * unknown code, which inhibits many compiler optimizations. */ + func(varray1, varray2); +} + } // namespace blender diff --git a/source/blender/functions/FN_multi_function_builder.hh b/source/blender/functions/FN_multi_function_builder.hh index 389d0b14bb5..95a9f52e29e 100644 --- a/source/blender/functions/FN_multi_function_builder.hh +++ b/source/blender/functions/FN_multi_function_builder.hh @@ -61,8 +61,11 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn) { return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) { - mask.foreach_index( - [&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i])); }); + /* Devirtualization results in a 2-3x speedup for some simple functions. */ + devirtualize_varray(in1, [&](const auto &in1) { + mask.foreach_index( + [&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i])); }); + }); }; } @@ -111,8 +114,11 @@ class CustomMF_SI_SI_SO : public MultiFunction { const VArray<In1> &in1, const VArray<In2> &in2, MutableSpan<Out1> out1) { - mask.foreach_index( - [&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i])); }); + /* Devirtualization results in a 2-3x speedup for some simple functions. */ + devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) { + mask.foreach_index( + [&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i])); }); + }); }; } |