Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <jacques@blender.org>2022-04-04 12:57:39 +0300
committerJacques Lucke <jacques@blender.org>2022-04-04 12:57:58 +0300
commitee3f71d747e3ffd5091335437d52b3ec518d7b67 (patch)
treec32ef6ba1260ec7864a484301083d924c6b9a696 /source/blender/functions
parent992d51bbcfef73bc577e3161739b1b16540dad00 (diff)
Functions: allow for better compiler optimization
This extracts the inner loops into a separate function. There are two main reasons for this: * Allows using `__restrict` to indicate that no other parameter aliases with the output array. This allows for better optimization. * Makes it easier to search for the generated assembly code, especially with the `BLI_NOINLINE`.
Diffstat (limited to 'source/blender/functions')
-rw-r--r--source/blender/functions/FN_multi_function_builder.hh37
1 files changed, 27 insertions, 10 deletions
diff --git a/source/blender/functions/FN_multi_function_builder.hh b/source/blender/functions/FN_multi_function_builder.hh
index 2eaada5dea0..dfdd152e62a 100644
--- a/source/blender/functions/FN_multi_function_builder.hh
+++ b/source/blender/functions/FN_multi_function_builder.hh
@@ -49,15 +49,23 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
/* Devirtualization results in a 2-3x speedup for some simple functions. */
devirtualize_varray(in1, [&](const auto &in1) {
- mask.to_best_mask_type([&](const auto &mask) {
- for (const int64_t i : mask) {
- new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i]));
- }
- });
+ mask.to_best_mask_type(
+ [&](const auto &mask) { execute_SI_SO(element_fn, mask, in1, out1.data()); });
});
};
}
+ template<typename ElementFuncT, typename MaskT, typename In1Array>
+ BLI_NOINLINE static void execute_SI_SO(const ElementFuncT &element_fn,
+ MaskT mask,
+ const In1Array &in1,
+ Out1 *__restrict r_out)
+ {
+ for (const int64_t i : mask) {
+ new (r_out + i) Out1(element_fn(in1[i]));
+ }
+ }
+
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
{
const VArray<In1> &in1 = params.readonly_single_input<In1>(0);
@@ -105,15 +113,24 @@ class CustomMF_SI_SI_SO : public MultiFunction {
MutableSpan<Out1> out1) {
/* Devirtualization results in a 2-3x speedup for some simple functions. */
devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) {
- mask.to_best_mask_type([&](const auto &mask) {
- for (const int64_t i : mask) {
- new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i]));
- }
- });
+ mask.to_best_mask_type(
+ [&](const auto &mask) { execute_SI_SI_SO(element_fn, mask, in1, in2, out1.data()); });
});
};
}
+ template<typename ElementFuncT, typename MaskT, typename In1Array, typename In2Array>
+ BLI_NOINLINE static void execute_SI_SI_SO(const ElementFuncT &element_fn,
+ MaskT mask,
+ const In1Array &in1,
+ const In2Array &in2,
+ Out1 *__restrict r_out)
+ {
+ for (const int64_t i : mask) {
+ new (r_out + i) Out1(element_fn(in1[i], in2[i]));
+ }
+ }
+
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
{
const VArray<In1> &in1 = params.readonly_single_input<In1>(0);