Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <jacques@blender.org>2022-06-18 14:32:46 +0300
committerJacques Lucke <jacques@blender.org>2022-06-18 14:41:08 +0300
commitb8bd304bd45397b8c5a5ce850c4ceb2fdefe4961 (patch)
tree72fba0495a0582f52a51576362a6b09d96dc8d06
parent7d030213b259420d176228f6a57af3ccc99d08f8 (diff)
Geometry Nodes: speedup Separate XYZ node
This speeds up the node ~20% in common cases, e.g. when only the X axis is used. The main optimization comes from not writing to memory that's not used afterwards anymore anyway. The "optimal code" for just extracting the x axis in a separate loop was not faster for me. That indicates that the node is bottlenecked by memory bandwidth, which seems reasonable.
-rw-r--r--source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc38
1 files changed, 29 insertions, 9 deletions
diff --git a/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc b/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
index 94a6febe92e..d4413036555 100644
--- a/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
+++ b/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
@@ -48,16 +48,36 @@ class MF_SeparateXYZ : public fn::MultiFunction {
void call(IndexMask mask, fn::MFParams params, fn::MFContext UNUSED(context)) const override
{
const VArray<float3> &vectors = params.readonly_single_input<float3>(0, "XYZ");
- MutableSpan<float> xs = params.uninitialized_single_output<float>(1, "X");
- MutableSpan<float> ys = params.uninitialized_single_output<float>(2, "Y");
- MutableSpan<float> zs = params.uninitialized_single_output<float>(3, "Z");
-
- for (int64_t i : mask) {
- float3 xyz = vectors[i];
- xs[i] = xyz.x;
- ys[i] = xyz.y;
- zs[i] = xyz.z;
+ MutableSpan<float> xs = params.uninitialized_single_output_if_required<float>(1, "X");
+ MutableSpan<float> ys = params.uninitialized_single_output_if_required<float>(2, "Y");
+ MutableSpan<float> zs = params.uninitialized_single_output_if_required<float>(3, "Z");
+
+ std::array<MutableSpan<float>, 3> outputs = {xs, ys, zs};
+ Vector<int> used_outputs;
+ if (!xs.is_empty()) {
+ used_outputs.append(0);
}
+ if (!ys.is_empty()) {
+ used_outputs.append(1);
+ }
+ if (!zs.is_empty()) {
+ used_outputs.append(2);
+ }
+
+ devirtualize_varray(vectors, [&](auto vectors) {
+ mask.to_best_mask_type([&](auto mask) {
+ const int used_outputs_num = used_outputs.size();
+ const int *used_outputs_data = used_outputs.data();
+
+ for (const int64_t i : mask) {
+ const float3 &vector = vectors[i];
+ for (const int out_i : IndexRange(used_outputs_num)) {
+ const int coordinate = used_outputs_data[out_i];
+ outputs[coordinate][i] = vector[coordinate];
+ }
+ }
+ });
+ });
}
};