diff options
author | Jacques Lucke <jacques@blender.org> | 2022-06-18 14:32:46 +0300 |
---|---|---|
committer | Jacques Lucke <jacques@blender.org> | 2022-06-18 14:41:08 +0300 |
commit | b8bd304bd45397b8c5a5ce850c4ceb2fdefe4961 (patch) | |
tree | 72fba0495a0582f52a51576362a6b09d96dc8d06 /source/blender/nodes | |
parent | 7d030213b259420d176228f6a57af3ccc99d08f8 (diff) |
Geometry Nodes: speedup Separate XYZ node
This speeds up the node ~20% in common cases, e.g. when only the
X axis is used. The main optimization comes from not writing to memory
that's not used afterwards anymore anyway.
The "optimal code" for just extracting the x axis in a separate loop was
not faster for me. That indicates that the node is bottlenecked by
memory bandwidth, which seems reasonable.
Diffstat (limited to 'source/blender/nodes')
-rw-r--r-- | source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc b/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc index 94a6febe92e..d4413036555 100644 --- a/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc +++ b/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc @@ -48,16 +48,36 @@ class MF_SeparateXYZ : public fn::MultiFunction { void call(IndexMask mask, fn::MFParams params, fn::MFContext UNUSED(context)) const override { const VArray<float3> &vectors = params.readonly_single_input<float3>(0, "XYZ"); - MutableSpan<float> xs = params.uninitialized_single_output<float>(1, "X"); - MutableSpan<float> ys = params.uninitialized_single_output<float>(2, "Y"); - MutableSpan<float> zs = params.uninitialized_single_output<float>(3, "Z"); - - for (int64_t i : mask) { - float3 xyz = vectors[i]; - xs[i] = xyz.x; - ys[i] = xyz.y; - zs[i] = xyz.z; + MutableSpan<float> xs = params.uninitialized_single_output_if_required<float>(1, "X"); + MutableSpan<float> ys = params.uninitialized_single_output_if_required<float>(2, "Y"); + MutableSpan<float> zs = params.uninitialized_single_output_if_required<float>(3, "Z"); + + std::array<MutableSpan<float>, 3> outputs = {xs, ys, zs}; + Vector<int> used_outputs; + if (!xs.is_empty()) { + used_outputs.append(0); } + if (!ys.is_empty()) { + used_outputs.append(1); + } + if (!zs.is_empty()) { + used_outputs.append(2); + } + + devirtualize_varray(vectors, [&](auto vectors) { + mask.to_best_mask_type([&](auto mask) { + const int used_outputs_num = used_outputs.size(); + const int *used_outputs_data = used_outputs.data(); + + for (const int64_t i : mask) { + const float3 &vector = vectors[i]; + for (const int out_i : IndexRange(used_outputs_num)) { + const int coordinate = used_outputs_data[out_i]; + outputs[coordinate][i] = vector[coordinate]; + } + } + }); + }); } }; |