From 55a332da6470b504a489cce59048324a1ea6d161 Mon Sep 17 00:00:00 2001 From: Iliay Katueshenock Date: Wed, 3 Aug 2022 10:17:36 -0500 Subject: Attribute Math: Improve performance of mixer in some cases The `DefaultMixer` for mixing generic data types has some issues: 1. The full buffer is always zeroed, even if only some is used. 2. Finalizing also works on all values, even if only some are used. 3. "mixing" doesn't allow setting the first value, requiring that everything is cleared beforehand. This commit adds the following functionality: 1. Constructor with the specified `IndexMask` for preliminary zeroing. 2. `set` method to overwrite the value. 3. `finalize` with the specified mask to process a subset of values. This is useful in situations where you want to use the DefaultMixer without having to overwrite all the values many times. A performance improvement was observed for NURBS curve evaluation and attribute interpolation from the point to curve domain of about 15% and 35% respectively (100,000 curves). Differential Revision: https://developer.blender.org/D15434 --- source/blender/blenkernel/BKE_attribute_math.hh | 97 ++++++++++++++++++++-- source/blender/blenkernel/intern/attribute_math.cc | 69 ++++++++++++--- source/blender/blenkernel/intern/curve_nurbs.cc | 40 ++++----- .../blender/blenkernel/intern/curves_geometry.cc | 14 ++-- .../blender/geometry/intern/add_curves_on_mesh.cc | 3 +- 5 files changed, 181 insertions(+), 42 deletions(-) (limited to 'source/blender') diff --git a/source/blender/blenkernel/BKE_attribute_math.hh b/source/blender/blenkernel/BKE_attribute_math.hh index 01c2ef988f2..0a8e013abf2 100644 --- a/source/blender/blenkernel/BKE_attribute_math.hh +++ b/source/blender/blenkernel/BKE_attribute_math.hh @@ -188,10 +188,28 @@ template class SimpleMixer { * \param default_value: Output value for an element that has not been affected by a #mix_in. */ SimpleMixer(MutableSpan buffer, T default_value = {}) + : SimpleMixer(buffer, buffer.index_range(), default_value) + { + } + + /** + * \param mask: Only initialize these indices. Other indices in the buffer will be invalid. + */ + SimpleMixer(MutableSpan buffer, const IndexMask mask, T default_value = {}) : buffer_(buffer), default_value_(default_value), total_weights_(buffer.size(), 0.0f) { BLI_STATIC_ASSERT(std::is_trivial_v, ""); - memset(buffer_.data(), 0, sizeof(T) * buffer_.size()); + mask.foreach_index([&](const int64_t i) { buffer_[i] = default_value_; }); + } + + /** + * Set a #value into the element with the given #index. + */ + void set(const int64_t index, const T &value, const float weight = 1.0f) + { + BLI_assert(weight >= 0.0f); + buffer_[index] = value * weight; + total_weights_[index] = weight; } /** @@ -209,7 +227,12 @@ template class SimpleMixer { */ void finalize() { - for (const int64_t i : buffer_.index_range()) { + this->finalize(IndexMask(buffer_.size())); + } + + void finalize(const IndexMask mask) + { + mask.foreach_index([&](const int64_t i) { const float weight = total_weights_[i]; if (weight > 0.0f) { buffer_[i] *= 1.0f / weight; @@ -217,7 +240,7 @@ template class SimpleMixer { else { buffer_[i] = default_value_; } - } + }); } }; @@ -237,9 +260,25 @@ class BooleanPropagationMixer { /** * \param buffer: Span where the interpolated values should be stored. */ - BooleanPropagationMixer(MutableSpan buffer) : buffer_(buffer) + BooleanPropagationMixer(MutableSpan buffer) + : BooleanPropagationMixer(buffer, buffer.index_range()) + { + } + + /** + * \param mask: Only initialize these indices. Other indices in the buffer will be invalid. + */ + BooleanPropagationMixer(MutableSpan buffer, const IndexMask mask) : buffer_(buffer) + { + mask.foreach_index([&](const int64_t i) { buffer_[i] = false; }); + } + + /** + * Set a #value into the element with the given #index. + */ + void set(const int64_t index, const bool value, [[maybe_unused]] const float weight = 1.0f) { - buffer_.fill(false); + buffer_[index] = value; } /** @@ -256,6 +295,10 @@ class BooleanPropagationMixer { void finalize() { } + + void finalize(const IndexMask /*mask*/) + { + } }; /** @@ -277,8 +320,27 @@ class SimpleMixerWithAccumulationType { public: SimpleMixerWithAccumulationType(MutableSpan buffer, T default_value = {}) + : SimpleMixerWithAccumulationType(buffer, buffer.index_range(), default_value) + { + } + + /** + * \param mask: Only initialize these indices. Other indices in the buffer will be invalid. + */ + SimpleMixerWithAccumulationType(MutableSpan buffer, + const IndexMask mask, + T default_value = {}) : buffer_(buffer), default_value_(default_value), accumulation_buffer_(buffer.size()) { + mask.foreach_index([&](const int64_t index) { buffer_[index] = default_value_; }); + } + + void set(const int64_t index, const T &value, const float weight = 1.0f) + { + const AccumulationT converted_value = static_cast(value); + Item &item = accumulation_buffer_[index]; + item.value = converted_value * weight; + item.weight = weight; } void mix_in(const int64_t index, const T &value, const float weight = 1.0f) @@ -291,7 +353,12 @@ class SimpleMixerWithAccumulationType { void finalize() { - for (const int64_t i : buffer_.index_range()) { + this->finalize(buffer_.index_range()); + } + + void finalize(const IndexMask mask) + { + mask.foreach_index([&](const int64_t i) { const Item &item = accumulation_buffer_[i]; if (item.weight > 0.0f) { const float weight_inv = 1.0f / item.weight; @@ -301,7 +368,7 @@ class SimpleMixerWithAccumulationType { else { buffer_[i] = default_value_; } - } + }); } }; @@ -314,8 +381,16 @@ class ColorGeometry4fMixer { public: ColorGeometry4fMixer(MutableSpan buffer, ColorGeometry4f default_color = ColorGeometry4f(0.0f, 0.0f, 0.0f, 1.0f)); + /** + * \param mask: Only initialize these indices. Other indices in the buffer will be invalid. + */ + ColorGeometry4fMixer(MutableSpan buffer, + IndexMask mask, + ColorGeometry4f default_color = ColorGeometry4f(0.0f, 0.0f, 0.0f, 1.0f)); + void set(int64_t index, const ColorGeometry4f &color, float weight = 1.0f); void mix_in(int64_t index, const ColorGeometry4f &color, float weight = 1.0f); void finalize(); + void finalize(IndexMask mask); }; class ColorGeometry4bMixer { @@ -328,8 +403,16 @@ class ColorGeometry4bMixer { public: ColorGeometry4bMixer(MutableSpan buffer, ColorGeometry4b default_color = ColorGeometry4b(0, 0, 0, 255)); + /** + * \param mask: Only initialize these indices. Other indices in the buffer will be invalid. + */ + ColorGeometry4bMixer(MutableSpan buffer, + IndexMask mask, + ColorGeometry4b default_color = ColorGeometry4b(0, 0, 0, 255)); + void set(int64_t index, const ColorGeometry4b &color, float weight = 1.0f); void mix_in(int64_t index, const ColorGeometry4b &color, float weight = 1.0f); void finalize(); + void finalize(IndexMask mask); }; template struct DefaultMixerStruct { diff --git a/source/blender/blenkernel/intern/attribute_math.cc b/source/blender/blenkernel/intern/attribute_math.cc index c38df2a2969..d8102b4eeb8 100644 --- a/source/blender/blenkernel/intern/attribute_math.cc +++ b/source/blender/blenkernel/intern/attribute_math.cc @@ -4,13 +4,31 @@ namespace blender::attribute_math { -ColorGeometry4fMixer::ColorGeometry4fMixer(MutableSpan output_buffer, +ColorGeometry4fMixer::ColorGeometry4fMixer(MutableSpan buffer, ColorGeometry4f default_color) - : buffer_(output_buffer), - default_color_(default_color), - total_weights_(output_buffer.size(), 0.0f) + : ColorGeometry4fMixer(buffer, buffer.index_range(), default_color) +{ +} + +ColorGeometry4fMixer::ColorGeometry4fMixer(MutableSpan buffer, + const IndexMask mask, + const ColorGeometry4f default_color) + : buffer_(buffer), default_color_(default_color), total_weights_(buffer.size(), 0.0f) { - buffer_.fill(ColorGeometry4f(0.0f, 0.0f, 0.0f, 0.0f)); + const ColorGeometry4f zero{0.0f, 0.0f, 0.0f, 0.0f}; + mask.foreach_index([&](const int64_t i) { buffer_[i] = zero; }); +} + +void ColorGeometry4fMixer::set(const int64_t index, + const ColorGeometry4f &color, + const float weight) +{ + BLI_assert(weight >= 0.0f); + buffer_[index].r = color.r * weight; + buffer_[index].g = color.g * weight; + buffer_[index].b = color.b * weight; + buffer_[index].a = color.a * weight; + total_weights_[index] = weight; } void ColorGeometry4fMixer::mix_in(const int64_t index, @@ -28,7 +46,12 @@ void ColorGeometry4fMixer::mix_in(const int64_t index, void ColorGeometry4fMixer::finalize() { - for (const int64_t i : buffer_.index_range()) { + this->finalize(buffer_.index_range()); +} + +void ColorGeometry4fMixer::finalize(const IndexMask mask) +{ + mask.foreach_index([&](const int64_t i) { const float weight = total_weights_[i]; ColorGeometry4f &output_color = buffer_[i]; if (weight > 0.0f) { @@ -41,16 +64,37 @@ void ColorGeometry4fMixer::finalize() else { output_color = default_color_; } - } + }); } ColorGeometry4bMixer::ColorGeometry4bMixer(MutableSpan buffer, - ColorGeometry4b default_color) + const ColorGeometry4b default_color) + : ColorGeometry4bMixer(buffer, buffer.index_range(), default_color) +{ +} + +ColorGeometry4bMixer::ColorGeometry4bMixer(MutableSpan buffer, + const IndexMask mask, + const ColorGeometry4b default_color) : buffer_(buffer), default_color_(default_color), total_weights_(buffer.size(), 0.0f), accumulation_buffer_(buffer.size(), float4(0, 0, 0, 0)) { + const ColorGeometry4b zero{0, 0, 0, 0}; + mask.foreach_index([&](const int64_t i) { buffer_[i] = zero; }); +} + +void ColorGeometry4bMixer::ColorGeometry4bMixer::set(int64_t index, + const ColorGeometry4b &color, + const float weight) +{ + BLI_assert(weight >= 0.0f); + accumulation_buffer_[index][0] = color.r * weight; + accumulation_buffer_[index][1] = color.g * weight; + accumulation_buffer_[index][2] = color.b * weight; + accumulation_buffer_[index][3] = color.a * weight; + total_weights_[index] = weight; } void ColorGeometry4bMixer::mix_in(int64_t index, const ColorGeometry4b &color, float weight) @@ -66,7 +110,12 @@ void ColorGeometry4bMixer::mix_in(int64_t index, const ColorGeometry4b &color, f void ColorGeometry4bMixer::finalize() { - for (const int64_t i : buffer_.index_range()) { + this->finalize(buffer_.index_range()); +} + +void ColorGeometry4bMixer::finalize(const IndexMask mask) +{ + mask.foreach_index([&](const int64_t i) { const float weight = total_weights_[i]; const float4 &accum_value = accumulation_buffer_[i]; ColorGeometry4b &output_color = buffer_[i]; @@ -80,7 +129,7 @@ void ColorGeometry4bMixer::finalize() else { output_color = default_color_; } - } + }); } } // namespace blender::attribute_math diff --git a/source/blender/blenkernel/intern/curve_nurbs.cc b/source/blender/blenkernel/intern/curve_nurbs.cc index 3ab6fb01ea5..62d5682da0f 100644 --- a/source/blender/blenkernel/intern/curve_nurbs.cc +++ b/source/blender/blenkernel/intern/curve_nurbs.cc @@ -4,8 +4,9 @@ * \ingroup bke */ -#include "BKE_attribute_math.hh" +#include "BLI_task.hh" +#include "BKE_attribute_math.hh" #include "BKE_curves.hh" namespace blender::bke::curves::nurbs { @@ -192,16 +193,16 @@ static void interpolate_to_evaluated(const BasisCache &basis_cache, { attribute_math::DefaultMixer mixer{dst}; - for (const int i : dst.index_range()) { - Span point_weights = basis_cache.weights.as_span().slice(i * order, order); - - for (const int j : point_weights.index_range()) { - const int point_index = (basis_cache.start_indices[i] + j) % src.size(); - mixer.mix_in(i, src[point_index], point_weights[j]); + threading::parallel_for(dst.index_range(), 128, [&](const IndexRange range) { + for (const int i : range) { + Span point_weights = basis_cache.weights.as_span().slice(i * order, order); + for (const int j : point_weights.index_range()) { + const int point_index = (basis_cache.start_indices[i] + j) % src.size(); + mixer.mix_in(i, src[point_index], point_weights[j]); + } } - } - - mixer.finalize(); + mixer.finalize(range); + }); } template @@ -213,17 +214,18 @@ static void interpolate_to_evaluated_rational(const BasisCache &basis_cache, { attribute_math::DefaultMixer mixer{dst}; - for (const int i : dst.index_range()) { - Span point_weights = basis_cache.weights.as_span().slice(i * order, order); + threading::parallel_for(dst.index_range(), 128, [&](const IndexRange range) { + for (const int i : range) { + Span point_weights = basis_cache.weights.as_span().slice(i * order, order); - for (const int j : point_weights.index_range()) { - const int point_index = (basis_cache.start_indices[i] + j) % src.size(); - const float weight = point_weights[j] * control_weights[point_index]; - mixer.mix_in(i, src[point_index], weight); + for (const int j : point_weights.index_range()) { + const int point_index = (basis_cache.start_indices[i] + j) % src.size(); + const float weight = point_weights[j] * control_weights[point_index]; + mixer.mix_in(i, src[point_index], weight); + } } - } - - mixer.finalize(); + mixer.finalize(range); + }); } void interpolate_to_evaluated(const BasisCache &basis_cache, diff --git a/source/blender/blenkernel/intern/curves_geometry.cc b/source/blender/blenkernel/intern/curves_geometry.cc index 7fb4e37f956..c37634b5d3d 100644 --- a/source/blender/blenkernel/intern/curves_geometry.cc +++ b/source/blender/blenkernel/intern/curves_geometry.cc @@ -13,6 +13,7 @@ #include "BLI_index_mask_ops.hh" #include "BLI_length_parameterize.hh" #include "BLI_math_rotation.hh" +#include "BLI_task.hh" #include "DNA_curves_types.h" @@ -1467,12 +1468,15 @@ static void adapt_curve_domain_point_to_curve_impl(const CurvesGeometry &curves, MutableSpan r_values) { attribute_math::DefaultMixer mixer(r_values); - for (const int i_curve : IndexRange(curves.curves_num())) { - for (const int i_point : curves.points_for_curve(i_curve)) { - mixer.mix_in(i_curve, old_values[i_point]); + + threading::parallel_for(curves.curves_range(), 128, [&](const IndexRange range) { + for (const int i_curve : range) { + for (const int i_point : curves.points_for_curve(i_curve)) { + mixer.mix_in(i_curve, old_values[i_point]); + } } - } - mixer.finalize(); + mixer.finalize(range); + }); } /** diff --git a/source/blender/geometry/intern/add_curves_on_mesh.cc b/source/blender/geometry/intern/add_curves_on_mesh.cc index 7184d774a22..299040d4d32 100644 --- a/source/blender/geometry/intern/add_curves_on_mesh.cc +++ b/source/blender/geometry/intern/add_curves_on_mesh.cc @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include "BLI_length_parameterize.hh" +#include "BLI_task.hh" #include "BKE_attribute_math.hh" #include "BKE_mesh_sample.hh" @@ -102,8 +103,8 @@ void interpolate_from_neighbors(const Span neighbors_per_curve, } } } + mixer.finalize(range); }); - mixer.finalize(); } static void interpolate_position_without_interpolation( -- cgit v1.2.3