From ae94e36cfb2f3bc9a99b638782092d9c71d4b3c7 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Tue, 26 Apr 2022 17:12:34 +0200 Subject: Geometry Nodes: refactor array devirtualization Goals: * Better high level control over where devirtualization occurs. There is always a trade-off between performance and compile-time/binary-size. * Simplify using array devirtualization. * Better performance for cases where devirtualization wasn't used before. Many geometry nodes accept fields as inputs. Internally, that means that the execution functions have to accept so called "virtual arrays" as inputs. Those can be e.g. actual arrays, just single values, or lazily computed arrays. Due to these different possible virtual arrays implementations, access to individual elements is slower than it would be if everything was just a normal array (access does through a virtual function call). For more complex execution functions, this overhead does not matter, but for small functions (like a simple addition) it very much does. The virtual function call also prevents the compiler from doing some optimizations (e.g. loop unrolling and inserting simd instructions). The solution is to "devirtualize" the virtual arrays for small functions where the overhead is measurable. Essentially, the function is generated many times with different array types as input. Then there is a run-time dispatch that calls the best implementation. We have been doing devirtualization in e.g. math nodes for a long time already. This patch just generalizes the concept and makes it easier to control. It also makes it easier to investigate the different trade-offs when it comes to devirtualization. Nodes that we've optimized using devirtualization before didn't get a speedup. However, a couple of nodes are using devirtualization now, that didn't before. Those got a 2-4x speedup in common cases. * Map Range * Random Value * Switch * Combine XYZ Differential Revision: https://developer.blender.org/D14628 --- source/blender/blenlib/BLI_virtual_array.hh | 71 +++-------------------------- 1 file changed, 6 insertions(+), 65 deletions(-) (limited to 'source/blender/blenlib/BLI_virtual_array.hh') diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh index 41a73b45853..7aa221f62ce 100644 --- a/source/blender/blenlib/BLI_virtual_array.hh +++ b/source/blender/blenlib/BLI_virtual_array.hh @@ -1089,6 +1089,12 @@ template class VMutableArray : public VArrayCommon { } }; +template static constexpr bool is_VArray_v = false; +template static constexpr bool is_VArray_v> = true; + +template static constexpr bool is_VMutableArray_v = false; +template static constexpr bool is_VMutableArray_v> = true; + /** * In many cases a virtual array is a span internally. In those cases, access to individual could * be much more efficient than calling a virtual method. When the underlying virtual array is not a @@ -1207,69 +1213,4 @@ template class SingleAsSpan { } }; -/** - * Generate multiple versions of the given function optimized for different virtual arrays. - * One has to be careful with nesting multiple devirtualizations, because that results in an - * exponential number of function instantiations (increasing compile time and binary size). - * - * Generally, this function should only be used when the virtual method call overhead to get an - * element from a virtual array is significant. - */ -template -inline void devirtualize_varray(const VArray &varray, const Func &func, bool enable = true) -{ - /* Support disabling the devirtualization to simplify benchmarking. */ - if (enable) { - if (varray.is_single()) { - func(SingleAsSpan(varray)); - return; - } - if (varray.is_span()) { - func(varray.get_internal_span()); - return; - } - } - func(varray); -} - -/** - * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time. - * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer - * cases. - */ -template -inline void devirtualize_varray2(const VArray &varray1, - const VArray &varray2, - const Func &func, - bool enable = true) -{ - /* Support disabling the devirtualization to simplify benchmarking. */ - if (enable) { - const bool is_span1 = varray1.is_span(); - const bool is_span2 = varray2.is_span(); - const bool is_single1 = varray1.is_single(); - const bool is_single2 = varray2.is_single(); - if (is_span1 && is_span2) { - func(varray1.get_internal_span(), varray2.get_internal_span()); - return; - } - if (is_span1 && is_single2) { - func(varray1.get_internal_span(), SingleAsSpan(varray2)); - return; - } - if (is_single1 && is_span2) { - func(SingleAsSpan(varray1), varray2.get_internal_span()); - return; - } - if (is_single1 && is_single2) { - func(SingleAsSpan(varray1), SingleAsSpan(varray2)); - return; - } - } - /* This fallback is used even when one of the inputs could be optimized. It's probably not worth - * it to optimize just one of the inputs, because then the compiler still has to call into - * unknown code, which inhibits many compiler optimizations. */ - func(varray1, varray2); -} - } // namespace blender -- cgit v1.2.3