Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <jacques@blender.org>2022-04-26 18:12:34 +0300
committerJacques Lucke <jacques@blender.org>2022-04-26 18:12:34 +0300
commitae94e36cfb2f3bc9a99b638782092d9c71d4b3c7 (patch)
treedc54dc643a2c498af1d3de97b471115607a8d3b4 /source/blender/nodes/NOD_math_functions.hh
parent9a53599180041cf9501e2ac6150c9f900a3a3fc0 (diff)
Geometry Nodes: refactor array devirtualization
Goals: * Better high level control over where devirtualization occurs. There is always a trade-off between performance and compile-time/binary-size. * Simplify using array devirtualization. * Better performance for cases where devirtualization wasn't used before. Many geometry nodes accept fields as inputs. Internally, that means that the execution functions have to accept so called "virtual arrays" as inputs. Those can be e.g. actual arrays, just single values, or lazily computed arrays. Due to these different possible virtual arrays implementations, access to individual elements is slower than it would be if everything was just a normal array (access does through a virtual function call). For more complex execution functions, this overhead does not matter, but for small functions (like a simple addition) it very much does. The virtual function call also prevents the compiler from doing some optimizations (e.g. loop unrolling and inserting simd instructions). The solution is to "devirtualize" the virtual arrays for small functions where the overhead is measurable. Essentially, the function is generated many times with different array types as input. Then there is a run-time dispatch that calls the best implementation. We have been doing devirtualization in e.g. math nodes for a long time already. This patch just generalizes the concept and makes it easier to control. It also makes it easier to investigate the different trade-offs when it comes to devirtualization. Nodes that we've optimized using devirtualization before didn't get a speedup. However, a couple of nodes are using devirtualization now, that didn't before. Those got a 2-4x speedup in common cases. * Map Range * Random Value * Switch * Combine XYZ Differential Revision: https://developer.blender.org/D14628
Diffstat (limited to 'source/blender/nodes/NOD_math_functions.hh')
-rw-r--r--source/blender/nodes/NOD_math_functions.hh251
1 files changed, 129 insertions, 122 deletions
diff --git a/source/blender/nodes/NOD_math_functions.hh b/source/blender/nodes/NOD_math_functions.hh
index e58c1068368..51057f600f4 100644
--- a/source/blender/nodes/NOD_math_functions.hh
+++ b/source/blender/nodes/NOD_math_functions.hh
@@ -9,6 +9,8 @@
#include "BLI_math_vector.hh"
#include "BLI_string_ref.hh"
+#include "FN_multi_function_builder.hh"
+
namespace blender::nodes {
struct FloatMathOperationInfo {
@@ -49,55 +51,58 @@ inline bool try_dispatch_float_math_fl_to_fl(const int operation, Callback &&cal
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+ static auto exec_preset_slow = fn::CustomMF_presets::Materialized();
+
/* This is just an utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_MATH_EXPONENT:
- return dispatch([](float a) { return expf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return expf(a); });
case NODE_MATH_SQRT:
- return dispatch([](float a) { return safe_sqrtf(a); });
+ return dispatch(exec_preset_fast, [](float a) { return safe_sqrtf(a); });
case NODE_MATH_INV_SQRT:
- return dispatch([](float a) { return safe_inverse_sqrtf(a); });
+ return dispatch(exec_preset_fast, [](float a) { return safe_inverse_sqrtf(a); });
case NODE_MATH_ABSOLUTE:
- return dispatch([](float a) { return fabs(a); });
+ return dispatch(exec_preset_fast, [](float a) { return fabs(a); });
case NODE_MATH_RADIANS:
- return dispatch([](float a) { return (float)DEG2RAD(a); });
+ return dispatch(exec_preset_fast, [](float a) { return (float)DEG2RAD(a); });
case NODE_MATH_DEGREES:
- return dispatch([](float a) { return (float)RAD2DEG(a); });
+ return dispatch(exec_preset_fast, [](float a) { return (float)RAD2DEG(a); });
case NODE_MATH_SIGN:
- return dispatch([](float a) { return compatible_signf(a); });
+ return dispatch(exec_preset_fast, [](float a) { return compatible_signf(a); });
case NODE_MATH_ROUND:
- return dispatch([](float a) { return floorf(a + 0.5f); });
+ return dispatch(exec_preset_fast, [](float a) { return floorf(a + 0.5f); });
case NODE_MATH_FLOOR:
- return dispatch([](float a) { return floorf(a); });
+ return dispatch(exec_preset_fast, [](float a) { return floorf(a); });
case NODE_MATH_CEIL:
- return dispatch([](float a) { return ceilf(a); });
+ return dispatch(exec_preset_fast, [](float a) { return ceilf(a); });
case NODE_MATH_FRACTION:
- return dispatch([](float a) { return a - floorf(a); });
+ return dispatch(exec_preset_fast, [](float a) { return a - floorf(a); });
case NODE_MATH_TRUNC:
- return dispatch([](float a) { return a >= 0.0f ? floorf(a) : ceilf(a); });
+ return dispatch(exec_preset_fast, [](float a) { return a >= 0.0f ? floorf(a) : ceilf(a); });
case NODE_MATH_SINE:
- return dispatch([](float a) { return sinf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return sinf(a); });
case NODE_MATH_COSINE:
- return dispatch([](float a) { return cosf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return cosf(a); });
case NODE_MATH_TANGENT:
- return dispatch([](float a) { return tanf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return tanf(a); });
case NODE_MATH_SINH:
- return dispatch([](float a) { return sinhf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return sinhf(a); });
case NODE_MATH_COSH:
- return dispatch([](float a) { return coshf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return coshf(a); });
case NODE_MATH_TANH:
- return dispatch([](float a) { return tanhf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return tanhf(a); });
case NODE_MATH_ARCSINE:
- return dispatch([](float a) { return safe_asinf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return safe_asinf(a); });
case NODE_MATH_ARCCOSINE:
- return dispatch([](float a) { return safe_acosf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return safe_acosf(a); });
case NODE_MATH_ARCTANGENT:
- return dispatch([](float a) { return atanf(a); });
+ return dispatch(exec_preset_slow, [](float a) { return atanf(a); });
}
return false;
}
@@ -113,41 +118,45 @@ inline bool try_dispatch_float_math_fl_fl_to_fl(const int operation, Callback &&
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+ static auto exec_preset_slow = fn::CustomMF_presets::Materialized();
+
/* This is just an utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_MATH_ADD:
- return dispatch([](float a, float b) { return a + b; });
+ return dispatch(exec_preset_fast, [](float a, float b) { return a + b; });
case NODE_MATH_SUBTRACT:
- return dispatch([](float a, float b) { return a - b; });
+ return dispatch(exec_preset_fast, [](float a, float b) { return a - b; });
case NODE_MATH_MULTIPLY:
- return dispatch([](float a, float b) { return a * b; });
+ return dispatch(exec_preset_fast, [](float a, float b) { return a * b; });
case NODE_MATH_DIVIDE:
- return dispatch([](float a, float b) { return safe_divide(a, b); });
+ return dispatch(exec_preset_fast, [](float a, float b) { return safe_divide(a, b); });
case NODE_MATH_POWER:
- return dispatch([](float a, float b) { return safe_powf(a, b); });
+ return dispatch(exec_preset_slow, [](float a, float b) { return safe_powf(a, b); });
case NODE_MATH_LOGARITHM:
- return dispatch([](float a, float b) { return safe_logf(a, b); });
+ return dispatch(exec_preset_slow, [](float a, float b) { return safe_logf(a, b); });
case NODE_MATH_MINIMUM:
- return dispatch([](float a, float b) { return std::min(a, b); });
+ return dispatch(exec_preset_fast, [](float a, float b) { return std::min(a, b); });
case NODE_MATH_MAXIMUM:
- return dispatch([](float a, float b) { return std::max(a, b); });
+ return dispatch(exec_preset_fast, [](float a, float b) { return std::max(a, b); });
case NODE_MATH_LESS_THAN:
- return dispatch([](float a, float b) { return (float)(a < b); });
+ return dispatch(exec_preset_fast, [](float a, float b) { return (float)(a < b); });
case NODE_MATH_GREATER_THAN:
- return dispatch([](float a, float b) { return (float)(a > b); });
+ return dispatch(exec_preset_fast, [](float a, float b) { return (float)(a > b); });
case NODE_MATH_MODULO:
- return dispatch([](float a, float b) { return safe_modf(a, b); });
+ return dispatch(exec_preset_fast, [](float a, float b) { return safe_modf(a, b); });
case NODE_MATH_SNAP:
- return dispatch([](float a, float b) { return floorf(safe_divide(a, b)) * b; });
+ return dispatch(exec_preset_fast,
+ [](float a, float b) { return floorf(safe_divide(a, b)) * b; });
case NODE_MATH_ARCTAN2:
- return dispatch([](float a, float b) { return atan2f(a, b); });
+ return dispatch(exec_preset_slow, [](float a, float b) { return atan2f(a, b); });
case NODE_MATH_PINGPONG:
- return dispatch([](float a, float b) { return pingpongf(a, b); });
+ return dispatch(exec_preset_fast, [](float a, float b) { return pingpongf(a, b); });
}
return false;
}
@@ -164,57 +173,29 @@ inline bool try_dispatch_float_math_fl_fl_fl_to_fl(const int operation, Callback
}
/* This is just an utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_MATH_MULTIPLY_ADD:
- return dispatch([](float a, float b, float c) { return a * b + c; });
+ return dispatch(fn::CustomMF_presets::AllSpanOrSingle(),
+ [](float a, float b, float c) { return a * b + c; });
case NODE_MATH_COMPARE:
- return dispatch([](float a, float b, float c) -> float {
- return ((a == b) || (fabsf(a - b) <= fmaxf(c, FLT_EPSILON))) ? 1.0f : 0.0f;
- });
+ return dispatch(fn::CustomMF_presets::SomeSpanOrSingle<0, 1>(),
+ [](float a, float b, float c) -> float {
+ return ((a == b) || (fabsf(a - b) <= fmaxf(c, FLT_EPSILON))) ? 1.0f : 0.0f;
+ });
case NODE_MATH_SMOOTH_MIN:
- return dispatch([](float a, float b, float c) { return smoothminf(a, b, c); });
+ return dispatch(fn::CustomMF_presets::SomeSpanOrSingle<0, 1>(),
+ [](float a, float b, float c) { return smoothminf(a, b, c); });
case NODE_MATH_SMOOTH_MAX:
- return dispatch([](float a, float b, float c) { return -smoothminf(-a, -b, c); });
+ return dispatch(fn::CustomMF_presets::SomeSpanOrSingle<0, 1>(),
+ [](float a, float b, float c) { return -smoothminf(-a, -b, c); });
case NODE_MATH_WRAP:
- return dispatch([](float a, float b, float c) { return wrapf(a, b, c); });
- }
- return false;
-}
-
-/**
- * This is similar to try_dispatch_float_math_fl_to_fl, just with a different callback signature.
- */
-template<typename Callback>
-inline bool try_dispatch_float_math_fl_fl_to_bool(const NodeCompareOperation operation,
- Callback &&callback)
-{
- const FloatMathOperationInfo *info = get_float_compare_operation_info(operation);
- if (info == nullptr) {
- return false;
- }
-
- /* This is just an utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
- return true;
- };
-
- switch (operation) {
- case NODE_COMPARE_LESS_THAN:
- return dispatch([](float a, float b) { return a < b; });
- case NODE_COMPARE_LESS_EQUAL:
- return dispatch([](float a, float b) { return a <= b; });
- case NODE_COMPARE_GREATER_THAN:
- return dispatch([](float a, float b) { return a > b; });
- case NODE_COMPARE_GREATER_EQUAL:
- return dispatch([](float a, float b) { return a >= b; });
- default:
- return false;
+ return dispatch(fn::CustomMF_presets::SomeSpanOrSingle<0>(),
+ [](float a, float b, float c) { return wrapf(a, b, c); });
}
return false;
}
@@ -233,35 +214,41 @@ inline bool try_dispatch_float_math_fl3_fl3_to_fl3(const NodeVectorMathOperation
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+ static auto exec_preset_slow = fn::CustomMF_presets::Materialized();
+
/* This is just a utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_VECTOR_MATH_ADD:
- return dispatch([](float3 a, float3 b) { return a + b; });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return a + b; });
case NODE_VECTOR_MATH_SUBTRACT:
- return dispatch([](float3 a, float3 b) { return a - b; });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return a - b; });
case NODE_VECTOR_MATH_MULTIPLY:
- return dispatch([](float3 a, float3 b) { return a * b; });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return a * b; });
case NODE_VECTOR_MATH_DIVIDE:
- return dispatch([](float3 a, float3 b) { return safe_divide(a, b); });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return safe_divide(a, b); });
case NODE_VECTOR_MATH_CROSS_PRODUCT:
- return dispatch([](float3 a, float3 b) { return cross_high_precision(a, b); });
+ return dispatch(exec_preset_fast,
+ [](float3 a, float3 b) { return cross_high_precision(a, b); });
case NODE_VECTOR_MATH_PROJECT:
- return dispatch([](float3 a, float3 b) { return project(a, b); });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return project(a, b); });
case NODE_VECTOR_MATH_REFLECT:
- return dispatch([](float3 a, float3 b) { return reflect(a, normalize(b)); });
+ return dispatch(exec_preset_fast,
+ [](float3 a, float3 b) { return reflect(a, normalize(b)); });
case NODE_VECTOR_MATH_SNAP:
- return dispatch([](float3 a, float3 b) { return floor(safe_divide(a, b)) * b; });
+ return dispatch(exec_preset_fast,
+ [](float3 a, float3 b) { return floor(safe_divide(a, b)) * b; });
case NODE_VECTOR_MATH_MODULO:
- return dispatch([](float3 a, float3 b) { return mod(a, b); });
+ return dispatch(exec_preset_slow, [](float3 a, float3 b) { return mod(a, b); });
case NODE_VECTOR_MATH_MINIMUM:
- return dispatch([](float3 a, float3 b) { return min(a, b); });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return min(a, b); });
case NODE_VECTOR_MATH_MAXIMUM:
- return dispatch([](float3 a, float3 b) { return max(a, b); });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return max(a, b); });
default:
return false;
}
@@ -282,17 +269,19 @@ inline bool try_dispatch_float_math_fl3_fl3_to_fl(const NodeVectorMathOperation
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+
/* This is just a utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_VECTOR_MATH_DOT_PRODUCT:
- return dispatch([](float3 a, float3 b) { return dot(a, b); });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return dot(a, b); });
case NODE_VECTOR_MATH_DISTANCE:
- return dispatch([](float3 a, float3 b) { return distance(a, b); });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b) { return distance(a, b); });
default:
return false;
}
@@ -313,21 +302,25 @@ inline bool try_dispatch_float_math_fl3_fl3_fl3_to_fl3(const NodeVectorMathOpera
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+ static auto exec_preset_slow = fn::CustomMF_presets::Materialized();
+
/* This is just a utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_VECTOR_MATH_MULTIPLY_ADD:
- return dispatch([](float3 a, float3 b, float3 c) { return a * b + c; });
+ return dispatch(exec_preset_fast, [](float3 a, float3 b, float3 c) { return a * b + c; });
case NODE_VECTOR_MATH_WRAP:
- return dispatch([](float3 a, float3 b, float3 c) {
+ return dispatch(exec_preset_slow, [](float3 a, float3 b, float3 c) {
return float3(wrapf(a.x, b.x, c.x), wrapf(a.y, b.y, c.y), wrapf(a.z, b.z, c.z));
});
case NODE_VECTOR_MATH_FACEFORWARD:
- return dispatch([](float3 a, float3 b, float3 c) { return faceforward(a, b, c); });
+ return dispatch(exec_preset_fast,
+ [](float3 a, float3 b, float3 c) { return faceforward(a, b, c); });
default:
return false;
}
@@ -348,15 +341,18 @@ inline bool try_dispatch_float_math_fl3_fl3_fl_to_fl3(const NodeVectorMathOperat
return false;
}
+ static auto exec_preset_slow = fn::CustomMF_presets::Materialized();
+
/* This is just a utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_VECTOR_MATH_REFRACT:
- return dispatch([](float3 a, float3 b, float c) { return refract(a, normalize(b), c); });
+ return dispatch(exec_preset_slow,
+ [](float3 a, float3 b, float c) { return refract(a, normalize(b), c); });
default:
return false;
}
@@ -377,15 +373,17 @@ inline bool try_dispatch_float_math_fl3_to_fl(const NodeVectorMathOperation oper
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+
/* This is just a utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_VECTOR_MATH_LENGTH:
- return dispatch([](float3 in) { return length(in); });
+ return dispatch(exec_preset_fast, [](float3 in) { return length(in); });
default:
return false;
}
@@ -404,15 +402,17 @@ inline bool try_dispatch_float_math_fl3_fl_to_fl3(const NodeVectorMathOperation
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+
/* This is just a utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_VECTOR_MATH_SCALE:
- return dispatch([](float3 a, float b) { return a * b; });
+ return dispatch(exec_preset_fast, [](float3 a, float b) { return a * b; });
default:
return false;
}
@@ -433,29 +433,36 @@ inline bool try_dispatch_float_math_fl3_to_fl3(const NodeVectorMathOperation ope
return false;
}
+ static auto exec_preset_fast = fn::CustomMF_presets::AllSpanOrSingle();
+ static auto exec_preset_slow = fn::CustomMF_presets::Materialized();
+
/* This is just a utility function to keep the individual cases smaller. */
- auto dispatch = [&](auto math_function) -> bool {
- callback(math_function, *info);
+ auto dispatch = [&](auto exec_preset, auto math_function) -> bool {
+ callback(exec_preset, math_function, *info);
return true;
};
switch (operation) {
case NODE_VECTOR_MATH_NORMALIZE:
- return dispatch([](float3 in) { return normalize(in); }); /* Should be safe. */
+ return dispatch(exec_preset_fast,
+ [](float3 in) { return normalize(in); }); /* Should be safe. */
case NODE_VECTOR_MATH_FLOOR:
- return dispatch([](float3 in) { return floor(in); });
+ return dispatch(exec_preset_fast, [](float3 in) { return floor(in); });
case NODE_VECTOR_MATH_CEIL:
- return dispatch([](float3 in) { return ceil(in); });
+ return dispatch(exec_preset_fast, [](float3 in) { return ceil(in); });
case NODE_VECTOR_MATH_FRACTION:
- return dispatch([](float3 in) { return fract(in); });
+ return dispatch(exec_preset_fast, [](float3 in) { return fract(in); });
case NODE_VECTOR_MATH_ABSOLUTE:
- return dispatch([](float3 in) { return abs(in); });
+ return dispatch(exec_preset_fast, [](float3 in) { return abs(in); });
case NODE_VECTOR_MATH_SINE:
- return dispatch([](float3 in) { return float3(sinf(in.x), sinf(in.y), sinf(in.z)); });
+ return dispatch(exec_preset_slow,
+ [](float3 in) { return float3(sinf(in.x), sinf(in.y), sinf(in.z)); });
case NODE_VECTOR_MATH_COSINE:
- return dispatch([](float3 in) { return float3(cosf(in.x), cosf(in.y), cosf(in.z)); });
+ return dispatch(exec_preset_slow,
+ [](float3 in) { return float3(cosf(in.x), cosf(in.y), cosf(in.z)); });
case NODE_VECTOR_MATH_TANGENT:
- return dispatch([](float3 in) { return float3(tanf(in.x), tanf(in.y), tanf(in.z)); });
+ return dispatch(exec_preset_slow,
+ [](float3 in) { return float3(tanf(in.x), tanf(in.y), tanf(in.z)); });
default:
return false;
}