From 6a71b2af66cf10556b21361cc609d54e45be5e3b Mon Sep 17 00:00:00 2001
From: Hans Goudey <h.goudey@me.com>
Date: Wed, 22 Dec 2021 11:04:03 -0600
Subject: Mesh: Parallelize bounding box calculation (WIP)

This replaces the single-threaded calculation of mesh min and max
positions with a `parallel_reduce` loop. Since the bounding box
of a mesh is retrieved quite often (at the end of each evaluation,
currently 2(?!) times when leaving edit mode, etc.), this makes for a
quite noticeable speedup actually.

On my Ryzen 3700x and a 4.2 million vertex mesh, I observed
a 4.4x performance increase, from 14 ms to 4.4 ms.

I added some methods to `float3` so they would be inlined, but
they're also a nice addition, since they're used often anyway.

Differential Revision: https://developer.blender.org/D13572
---
 source/blender/blenkernel/intern/mesh.cc | 35 +++++++++++++++++++++++++++-----
 source/blender/blenlib/BLI_float3.hh     | 16 +++++++++++++++
 2 files changed, 46 insertions(+), 5 deletions(-)

(limited to 'source/blender')

diff --git a/source/blender/blenkernel/intern/mesh.cc b/source/blender/blenkernel/intern/mesh.cc
index 05aa9111fa3..334ec0c5423 100644
--- a/source/blender/blenkernel/intern/mesh.cc
+++ b/source/blender/blenkernel/intern/mesh.cc
@@ -36,13 +36,16 @@
 #include "BLI_bitmap.h"
 #include "BLI_edgehash.h"
 #include "BLI_endian_switch.h"
+#include "BLI_float3.hh"
 #include "BLI_ghash.h"
 #include "BLI_hash.h"
+#include "BLI_index_range.hh"
 #include "BLI_linklist.h"
 #include "BLI_listbase.h"
 #include "BLI_math.h"
 #include "BLI_memarena.h"
 #include "BLI_string.h"
+#include "BLI_task.hh"
 #include "BLI_utildefines.h"
 
 #include "BLT_translation.h"
@@ -1577,13 +1580,35 @@ void BKE_mesh_looptri_get_real_edges(const Mesh *mesh, const MLoopTri *looptri,
 
 bool BKE_mesh_minmax(const Mesh *me, float r_min[3], float r_max[3])
 {
-  int i = me->totvert;
-  MVert *mvert;
-  for (mvert = me->mvert; i--; mvert++) {
-    minmax_v3v3_v3(r_min, r_max, mvert->co);
+  using namespace blender;
+  if (me->totvert == 0) {
+    return false;
   }
 
-  return (me->totvert != 0);
+  struct Result {
+    float3 min;
+    float3 max;
+  };
+
+  const Result minmax = threading::parallel_reduce(
+      IndexRange(me->totvert),
+      1024,
+      Result{float3(FLT_MAX), float3(-FLT_MAX)},
+      [&](IndexRange range, const Result &init) {
+        Result result = init;
+        for (const int i : range) {
+          float3::min_max(me->mvert[i].co, result.min, result.max);
+        }
+        return result;
+      },
+      [](const Result &a, const Result &b) {
+        return Result{float3::min(a.min, b.min), float3::max(a.max, b.max)};
+      });
+
+  copy_v3_v3(r_min, float3::min(minmax.min, r_min));
+  copy_v3_v3(r_max, float3::max(minmax.max, r_max));
+
+  return true;
 }
 
 void BKE_mesh_transform(Mesh *me, const float mat[4][4], bool do_keys)
diff --git a/source/blender/blenlib/BLI_float3.hh b/source/blender/blenlib/BLI_float3.hh
index 6ee0c4b973b..765f524fb31 100644
--- a/source/blender/blenlib/BLI_float3.hh
+++ b/source/blender/blenlib/BLI_float3.hh
@@ -228,6 +228,22 @@ struct float3 {
     return result;
   }
 
+  static float3 min(const float3 &a, const float3 &b)
+  {
+    return {a.x < b.x ? a.x : b.x, a.y < b.y ? a.y : b.y, a.z < b.z ? a.z : b.z};
+  }
+
+  static float3 max(const float3 &a, const float3 &b)
+  {
+    return {a.x > b.x ? a.x : b.x, a.y > b.y ? a.y : b.y, a.z > b.z ? a.z : b.z};
+  }
+
+  static void min_max(const float3 &vector, float3 &min, float3 &max)
+  {
+    min = float3::min(vector, min);
+    max = float3::max(vector, max);
+  }
+
   static float3 safe_divide(const float3 &a, const float b)
   {
     return (b != 0.0f) ? a / b : float3(0.0f);
-- 
cgit v1.2.3