From 967664d1ee2f40a85301b1a8ccdb0ba3fe811d5d Mon Sep 17 00:00:00 2001
From: Jacques Lucke <jacques@blender.org>
Date: Wed, 7 Sep 2022 13:15:34 +0200
Subject: BLI: new C++ BitVector data structure

This adds a new `blender::BitVector` data structure that was requested
a couple of times. It also replaces usages of `BLI_bitmap` in C++ code.

See the comment in `BLI_bit_vector.hh` for more details about the
advantages and disadvantages of using a bit-vector and how the new
data structure compares to `std::vector<bool>` and `BLI_bitmap`.

Differential Revision: https://developer.blender.org/D14006
---
 source/blender/blenkernel/intern/mesh.cc           |  27 +-
 source/blender/blenkernel/intern/mesh_normals.cc   |  55 ++-
 source/blender/blenlib/BLI_bit_vector.hh           | 529 +++++++++++++++++++++
 source/blender/blenlib/BLI_index_range.hh          |  15 +
 source/blender/blenlib/CMakeLists.txt              |   2 +
 source/blender/blenlib/intern/BLI_index_range.cc   |  30 ++
 .../blender/blenlib/tests/BLI_bit_vector_test.cc   | 186 ++++++++
 .../blender/blenlib/tests/BLI_index_range_test.cc  |  46 ++
 8 files changed, 847 insertions(+), 43 deletions(-)
 create mode 100644 source/blender/blenlib/BLI_bit_vector.hh
 create mode 100644 source/blender/blenlib/tests/BLI_bit_vector_test.cc

(limited to 'source/blender')
diff --git a/source/blender/blenkernel/intern/mesh.cc b/source/blender/blenkernel/intern/mesh.cc
index 361eb9bcbbe..1bf068fcd45 100644
--- a/source/blender/blenkernel/intern/mesh.cc
+++ b/source/blender/blenkernel/intern/mesh.cc
@@ -17,7 +17,7 @@
 #include "DNA_meshdata_types.h"
 #include "DNA_object_types.h"
 
-#include "BLI_bitmap.h"
+#include "BLI_bit_vector.hh"
 #include "BLI_edgehash.h"
 #include "BLI_endian_switch.h"
 #include "BLI_ghash.h"
@@ -64,6 +64,7 @@
 
 #include "BLO_read_write.h"
 
+using blender::BitVector;
 using blender::float3;
 using blender::MutableSpan;
 using blender::Span;
@@ -1892,8 +1893,8 @@ static int split_faces_prepare_new_verts(Mesh *mesh,
   BKE_mesh_vertex_normals_ensure(mesh);
   float(*vert_normals)[3] = BKE_mesh_vertex_normals_for_write(mesh);
 
-  BLI_bitmap *verts_used = BLI_BITMAP_NEW(verts_len, __func__);
-  BLI_bitmap *done_loops = BLI_BITMAP_NEW(loops_len, __func__);
+  BitVector<> verts_used(verts_len, false);
+  BitVector<> done_loops(loops_len, false);
 
   MLoop *ml = loops.data();
   MLoopNorSpace **lnor_space = lnors_spacearr->lspacearr;
@@ -1901,9 +1902,9 @@ static int split_faces_prepare_new_verts(Mesh *mesh,
   BLI_assert(lnors_spacearr->data_type == MLNOR_SPACEARR_LOOP_INDEX);
 
   for (int loop_idx = 0; loop_idx < loops_len; loop_idx++, ml++, lnor_space++) {
-    if (!BLI_BITMAP_TEST(done_loops, loop_idx)) {
+    if (!done_loops[loop_idx]) {
       const int vert_idx = ml->v;
-      const bool vert_used = BLI_BITMAP_TEST_BOOL(verts_used, vert_idx);
+      const bool vert_used = verts_used[vert_idx];
       /* If vert is already used by another smooth fan, we need a new vert for this one. */
       const int new_vert_idx = vert_used ? verts_len++ : vert_idx;
 
@@ -1912,7 +1913,7 @@ static int split_faces_prepare_new_verts(Mesh *mesh,
       if ((*lnor_space)->flags & MLNOR_SPACE_IS_SINGLE) {
         /* Single loop in this fan... */
         BLI_assert(POINTER_AS_INT((*lnor_space)->loops) == loop_idx);
-        BLI_BITMAP_ENABLE(done_loops, loop_idx);
+        done_loops[loop_idx].set();
         if (vert_used) {
           ml->v = new_vert_idx;
         }
@@ -1920,7 +1921,7 @@ static int split_faces_prepare_new_verts(Mesh *mesh,
       else {
         for (LinkNode *lnode = (*lnor_space)->loops; lnode; lnode = lnode->next) {
           const int ml_fan_idx = POINTER_AS_INT(lnode->link);
-          BLI_BITMAP_ENABLE(done_loops, ml_fan_idx);
+          done_loops[ml_fan_idx].set();
           if (vert_used) {
             loops[ml_fan_idx].v = new_vert_idx;
           }
@@ -1928,7 +1929,7 @@ static int split_faces_prepare_new_verts(Mesh *mesh,
       }
 
       if (!vert_used) {
-        BLI_BITMAP_ENABLE(verts_used, vert_idx);
+        verts_used[vert_idx].set();
         /* We need to update that vertex's normal here, we won't go over it again. */
         /* This is important! *DO NOT* set vnor to final computed lnor,
          * vnor should always be defined to 'automatic normal' value computed from its polys,
@@ -1949,9 +1950,6 @@ static int split_faces_prepare_new_verts(Mesh *mesh,
     }
   }
 
-  MEM_freeN(done_loops);
-  MEM_freeN(verts_used);
-
   return verts_len - mesh->totvert;
 }
 
@@ -1967,7 +1965,7 @@ static int split_faces_prepare_new_edges(Mesh *mesh,
   MutableSpan<MLoop> loops = mesh->loops_for_write();
   const Span<MPoly> polys = mesh->polys();
 
-  BLI_bitmap *edges_used = BLI_BITMAP_NEW(num_edges, __func__);
+  BitVector<> edges_used(num_edges, false);
   EdgeHash *edges_hash = BLI_edgehash_new_ex(__func__, num_edges);
 
   const MPoly *mp = polys.data();
@@ -1980,7 +1978,7 @@ static int split_faces_prepare_new_edges(Mesh *mesh,
         const int edge_idx = ml_prev->e;
 
         /* That edge has not been encountered yet, define it. */
-        if (BLI_BITMAP_TEST(edges_used, edge_idx)) {
+        if (edges_used[edge_idx]) {
           /* Original edge has already been used, we need to define a new one. */
           const int new_edge_idx = num_edges++;
           *eval = POINTER_FROM_INT(new_edge_idx);
@@ -2000,7 +1998,7 @@ static int split_faces_prepare_new_edges(Mesh *mesh,
           edges[edge_idx].v1 = ml_prev->v;
           edges[edge_idx].v2 = ml->v;
           *eval = POINTER_FROM_INT(edge_idx);
-          BLI_BITMAP_ENABLE(edges_used, edge_idx);
+          edges_used[edge_idx].set();
         }
       }
       else {
@@ -2012,7 +2010,6 @@ static int split_faces_prepare_new_edges(Mesh *mesh,
     }
   }
 
-  MEM_freeN(edges_used);
   BLI_edgehash_free(edges_hash, nullptr);
 
   return num_edges - mesh->totedge;
diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc
index 706026f072b..a88ff4e9d90 100644
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@@ -17,8 +17,7 @@
 #include "DNA_meshdata_types.h"
 
 #include "BLI_alloca.h"
-#include "BLI_bitmap.h"
-
+#include "BLI_bit_vector.hh"
 #include "BLI_linklist.h"
 #include "BLI_linklist_stack.h"
 #include "BLI_math.h"
@@ -37,6 +36,7 @@
 
 #include "atomic_ops.h"
 
+using blender::BitVector;
 using blender::MutableSpan;
 using blender::Span;
 
@@ -856,7 +856,10 @@ static void mesh_edges_sharp_tag(LoopSplitTaskDataCommon *data,
   int(*edge_to_loops)[2] = data->edge_to_loops;
   int *loop_to_poly = data->loop_to_poly;
 
-  BLI_bitmap *sharp_edges = do_sharp_edges_tag ? BLI_BITMAP_NEW(numEdges, __func__) : nullptr;
+  BitVector sharp_edges;
+  if (do_sharp_edges_tag) {
+    sharp_edges.resize(numEdges, false);
+  }
 
   const MPoly *mp;
   int mp_index;
@@ -908,7 +911,7 @@ static void mesh_edges_sharp_tag(LoopSplitTaskDataCommon *data,
           /* We want to avoid tagging edges as sharp when it is already defined as such by
            * other causes than angle threshold. */
           if (do_sharp_edges_tag && is_angle_sharp) {
-            BLI_BITMAP_SET(sharp_edges, ml_curr->e, true);
+            sharp_edges[ml_curr->e].set();
           }
         }
         else {
@@ -922,7 +925,7 @@ static void mesh_edges_sharp_tag(LoopSplitTaskDataCommon *data,
         /* We want to avoid tagging edges as sharp when it is already defined as such by
          * other causes than angle threshold. */
         if (do_sharp_edges_tag) {
-          BLI_BITMAP_SET(sharp_edges, ml_curr->e, false);
+          sharp_edges[ml_curr->e].reset();
         }
       }
       /* Else, edge is already 'disqualified' (i.e. sharp)! */
@@ -934,12 +937,10 @@ static void mesh_edges_sharp_tag(LoopSplitTaskDataCommon *data,
     MEdge *me;
     int me_index;
     for (me = (MEdge *)medges, me_index = 0; me_index < numEdges; me++, me_index++) {
-      if (BLI_BITMAP_TEST(sharp_edges, me_index)) {
+      if (sharp_edges[me_index]) {
         me->flag |= ME_SHARP;
       }
     }
-
-    MEM_freeN(sharp_edges);
   }
 }
 
@@ -1361,7 +1362,7 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const MLoop *mloops,
                                                          const int (*edge_to_loops)[2],
                                                          const int *loop_to_poly,
                                                          const int *e2l_prev,
-                                                         BLI_bitmap *skip_loops,
+                                                         BitVector<> &skip_loops,
                                                          const MLoop *ml_curr,
                                                          const MLoop *ml_prev,
                                                          const int ml_curr_index,
@@ -1390,8 +1391,8 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const MLoop *mloops,
   BLI_assert(mlfan_vert_index >= 0);
   BLI_assert(mpfan_curr_index >= 0);
 
-  BLI_assert(!BLI_BITMAP_TEST(skip_loops, mlfan_vert_index));
-  BLI_BITMAP_ENABLE(skip_loops, mlfan_vert_index);
+  BLI_assert(!skip_loops[mlfan_vert_index]);
+  skip_loops[mlfan_vert_index].set();
 
   while (true) {
     /* Find next loop of the smooth fan. */
@@ -1412,7 +1413,7 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const MLoop *mloops,
       return false;
     }
     /* Smooth loop/edge. */
-    if (BLI_BITMAP_TEST(skip_loops, mlfan_vert_index)) {
+    if (skip_loops[mlfan_vert_index]) {
       if (mlfan_vert_index == ml_curr_index) {
         /* We walked around a whole cyclic smooth fan without finding any already-processed loop,
          * means we can use initial `ml_curr` / `ml_prev` edge as start for this smooth fan. */
@@ -1423,7 +1424,7 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const MLoop *mloops,
     }
 
     /* We can skip it in future, and keep checking the smooth fan. */
-    BLI_BITMAP_ENABLE(skip_loops, mlfan_vert_index);
+    skip_loops[mlfan_vert_index].set();
   }
 }
 
@@ -1447,7 +1448,7 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
   int ml_curr_index;
   int ml_prev_index;
 
-  BLI_bitmap *skip_loops = BLI_BITMAP_NEW(numLoops, __func__);
+  BitVector<> skip_loops(numLoops, false);
 
   LoopSplitTaskData *data_buff = nullptr;
   int data_idx = 0;
@@ -1489,7 +1490,7 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
              ml_curr->e,
              ml_curr->v,
              IS_EDGE_SHARP(e2l_curr),
-             BLI_BITMAP_TEST_BOOL(skip_loops, ml_curr_index));
+             skip_loops[ml_curr_index]);
 #endif
 
       /* A smooth edge, we have to check for cyclic smooth fan case.
@@ -1502,7 +1503,7 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
        * However, this would complicate the code, add more memory usage, and despite its logical
        * complexity, #loop_manifold_fan_around_vert_next() is quite cheap in term of CPU cycles,
        * so really think it's not worth it. */
-      if (!IS_EDGE_SHARP(e2l_curr) && (BLI_BITMAP_TEST(skip_loops, ml_curr_index) ||
+      if (!IS_EDGE_SHARP(e2l_curr) && (skip_loops[ml_curr_index] ||
                                        !loop_split_generator_check_cyclic_smooth_fan(mloops,
                                                                                      mpolys,
                                                                                      edge_to_loops,
@@ -1597,7 +1598,6 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
   if (edge_vectors) {
     BLI_stack_free(edge_vectors);
   }
-  MEM_freeN(skip_loops);
 
 #ifdef DEBUG_TIME
   TIMEIT_END_AVERAGED(loop_split_generator);
@@ -1778,7 +1778,7 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
    * (and perhaps from some editing tools later?).
    * So better to keep some simplicity here, and just call #BKE_mesh_normals_loop_split() twice! */
   MLoopNorSpaceArray lnors_spacearr = {nullptr};
-  BLI_bitmap *done_loops = BLI_BITMAP_NEW((size_t)numLoops, __func__);
+  BitVector<> done_loops(numLoops, false);
   float(*lnors)[3] = (float(*)[3])MEM_calloc_arrayN((size_t)numLoops, sizeof(*lnors), __func__);
   int *loop_to_poly = (int *)MEM_malloc_arrayN((size_t)numLoops, sizeof(int), __func__);
   /* In this case we always consider split nors as ON,
@@ -1836,14 +1836,14 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
         /* This should not happen in theory, but in some rare case (probably ugly geometry)
          * we can get some nullptr loopspacearr at this point. :/
          * Maybe we should set those loops' edges as sharp? */
-        BLI_BITMAP_ENABLE(done_loops, i);
+        done_loops[i].set();
         if (G.debug & G_DEBUG) {
           printf("WARNING! Getting invalid nullptr loop space for loop %d!\n", i);
         }
         continue;
       }
 
-      if (!BLI_BITMAP_TEST(done_loops, i)) {
+      if (!done_loops[i]) {
         /* Notes:
          * - In case of mono-loop smooth fan, we have nothing to do.
          * - Loops in this linklist are ordered (in reversed order compared to how they were
@@ -1854,7 +1854,7 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
          *   to avoid small differences adding up into a real big one in the end!
          */
         if (lnors_spacearr.lspacearr[i]->flags & MLNOR_SPACE_IS_SINGLE) {
-          BLI_BITMAP_ENABLE(done_loops, i);
+          done_loops[i].set();
           continue;
         }
 
@@ -1886,7 +1886,7 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
 
           prev_ml = ml;
           loops = loops->next;
-          BLI_BITMAP_ENABLE(done_loops, lidx);
+          done_loops[lidx].set();
         }
 
         /* We also have to check between last and first loops,
@@ -1930,14 +1930,14 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
                                 loop_to_poly);
   }
   else {
-    BLI_bitmap_set_all(done_loops, true, (size_t)numLoops);
+    done_loops.fill(true);
   }
 
   /* And we just have to convert plain object-space custom normals to our
    * lnor space-encoded ones. */
   for (int i = 0; i < numLoops; i++) {
     if (!lnors_spacearr.lspacearr[i]) {
-      BLI_BITMAP_DISABLE(done_loops, i);
+      done_loops[i].reset();
       if (G.debug & G_DEBUG) {
         printf("WARNING! Still getting invalid nullptr loop space in second loop for loop %d!\n",
                i);
@@ -1945,7 +1945,7 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
       continue;
     }
 
-    if (BLI_BITMAP_TEST_BOOL(done_loops, i)) {
+    if (done_loops[i]) {
       /* Note we accumulate and average all custom normals in current smooth fan,
        * to avoid getting different clnors data (tiny differences in plain custom normals can
        * give rather huge differences in computed 2D factors). */
@@ -1956,7 +1956,7 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
         float *nor = r_custom_loopnors[nidx];
 
         BKE_lnor_space_custom_normal_to_data(lnors_spacearr.lspacearr[i], nor, r_clnors_data[i]);
-        BLI_BITMAP_DISABLE(done_loops, i);
+        done_loops[i].reset();
       }
       else {
         int avg_nor_count = 0;
@@ -1974,7 +1974,7 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
           BLI_SMALLSTACK_PUSH(clnors_data, (short *)r_clnors_data[lidx]);
 
           loops = loops->next;
-          BLI_BITMAP_DISABLE(done_loops, lidx);
+          done_loops[lidx].reset();
         }
 
         mul_v3_fl(avg_nor, 1.0f / (float)avg_nor_count);
@@ -1990,7 +1990,6 @@ static void mesh_normals_loop_custom_set(const MVert *mverts,
 
   MEM_freeN(lnors);
   MEM_freeN(loop_to_poly);
-  MEM_freeN(done_loops);
   BKE_lnor_spacearr_free(&lnors_spacearr);
 }
 
diff --git a/source/blender/blenlib/BLI_bit_vector.hh b/source/blender/blenlib/BLI_bit_vector.hh
new file mode 100644
index 00000000000..3cbd2483a31
--- /dev/null
+++ b/source/blender/blenlib/BLI_bit_vector.hh
@@ -0,0 +1,529 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * A `blender::BitVector` is a dynamically growing contiguous arrays of bits. Its main purpose is
+ * to provide a compact way to map indices to bools. It requires 8 times less memory compared to a
+ * `blender::Vector<bool>`.
+ *
+ * Advantages of using a bit- instead of byte-vector are:
+ * - Uses less memory.
+ * - Allows checking the state of many elements at the same time (8 times more bits than bytes fit
+ *   into a CPU register). This can improve performance.
+ *
+ * The compact nature of storing bools in individual bits has some downsides that have to be kept
+ * in mind:
+ * - Writing to separate bits in the same byte is not thread-safe. Therefore, an existing vector of
+ *   bool can't easily be replaced with a bit vector, if it is written to from multiple threads.
+ *   Read-only access from multiple threads is fine though.
+ * - Writing individual elements is more expensive when the array is in cache already. That is
+ *   because changing a bit is always a read-modify-write operation on the byte the bit resides in.
+ * - Reading individual elements is more expensive when the array is in cache already. That is
+ *   because additional bit-wise operations have to be applied after the corresponding byte is
+ *   read.
+ *
+ * Comparison to `std::vector<bool>`:
+ * - `blender::BitVector` has an interface that is more optimized for dealing with bits.
+ * - `blender::BitVector` has an inline buffer that is used to avoid allocations when the vector is
+ *   small.
+ *
+ * Comparison to `BLI_bitmap`:
+ * - `blender::BitVector` offers a more C++ friendly interface.
+ * - `BLI_bitmap` should only be used in C code that can not use `blender::BitVector`.
+ */
+
+#include <cstring>
+
+#include "BLI_allocator.hh"
+#include "BLI_index_range.hh"
+#include "BLI_memory_utils.hh"
+#include "BLI_span.hh"
+
+namespace blender {
+
+/**
+ * This is a read-only pointer to a specific bit. The value of the bit can be retrieved, but not
+ * changed.
+ */
+class BitRef {
+ private:
+  /** Points to the exact byte that the bit is in. */
+  const uint8_t *byte_ptr_;
+  /** All zeros except for a single one at the bit that is referenced. */
+  uint8_t mask_;
+
+  friend class MutableBitRef;
+
+ public:
+  BitRef() = default;
+
+  /**
+   * Reference a specific bit in a byte array. Note that #byte_ptr does *not* have to point to the
+   * exact byte the bit is in.
+   */
+  BitRef(const uint8_t *byte_ptr, const int64_t bit_index)
+  {
+    byte_ptr_ = byte_ptr + (bit_index >> 3);
+    mask_ = 1 << (bit_index & 7);
+  }
+
+  /**
+   * Return true when the bit is currently 1 and false otherwise.
+   */
+  bool test() const
+  {
+    const uint8_t byte = *byte_ptr_;
+    const uint8_t masked_byte = byte & mask_;
+    return masked_byte != 0;
+  }
+
+  operator bool() const
+  {
+    return this->test();
+  }
+};
+
+/**
+ * Similar to #BitRef, but also allows changing the referenced bit.
+ */
+class MutableBitRef {
+ private:
+  /** Points to the exact byte that the bit is in. */
+  uint8_t *byte_ptr_;
+  /** All zeros except for a single one at the bit that is referenced. */
+  uint8_t mask_;
+
+ public:
+  MutableBitRef() = default;
+
+  /**
+   * Reference a specific bit in a byte array. Note that #byte_ptr does *not* have to point to the
+   * exact byte the bit is in.
+   */
+  MutableBitRef(uint8_t *byte_ptr, const int64_t bit_index)
+  {
+    byte_ptr_ = byte_ptr + (bit_index >> 3);
+    mask_ = 1 << static_cast<uint8_t>(bit_index & 7);
+  }
+
+  /**
+   * Support implicitly casting to a read-only #BitRef.
+   */
+  operator BitRef() const
+  {
+    BitRef bit_ref;
+    bit_ref.byte_ptr_ = byte_ptr_;
+    bit_ref.mask_ = mask_;
+    return bit_ref;
+  }
+
+  /**
+   * Return true when the bit is currently 1 and false otherwise.
+   */
+  bool test() const
+  {
+    const uint8_t byte = *byte_ptr_;
+    const uint8_t masked_byte = byte & mask_;
+    return masked_byte != 0;
+  }
+
+  operator bool() const
+  {
+    return this->test();
+  }
+
+  /**
+   * Change the bit to a 1.
+   */
+  void set()
+  {
+    *byte_ptr_ |= mask_;
+  }
+
+  /**
+   * Change the bit to a 0.
+   */
+  void reset()
+  {
+    *byte_ptr_ &= ~mask_;
+  }
+
+  /**
+   * Change the bit to a 1 if #value is true and 0 otherwise.
+   */
+  void set(const bool value)
+  {
+    if (value) {
+      this->set();
+    }
+    else {
+      this->reset();
+    }
+  }
+};
+
+template<
+    /**
+     * Number of bits that can be stored in the vector without doing an allocation.
+     */
+    int64_t InlineBufferCapacity = 32,
+    /**
+     * The allocator used by this vector. Should rarely be changed, except when you don't want that
+     * MEM_* is used internally.
+     */
+    typename Allocator = GuardedAllocator>
+class BitVector {
+ private:
+  static constexpr int64_t required_bytes_for_bits(const int64_t number_of_bits)
+  {
+    return (number_of_bits + BitsPerByte - 1) / BitsPerByte;
+  }
+
+  static constexpr int64_t BitsPerByte = 8;
+  static constexpr int64_t BytesInInlineBuffer = required_bytes_for_bits(InlineBufferCapacity);
+  static constexpr int64_t BitsInInlineBuffer = BytesInInlineBuffer * BitsPerByte;
+  static constexpr int64_t AllocationAlignment = 8;
+
+  /**
+   * Points to the first byte used by the vector. It might point to the memory in the inline
+   * buffer.
+   */
+  uint8_t *data_;
+
+  /** Current size of the vector in bits. */
+  int64_t size_in_bits_;
+
+  /** Number of bits that fit into the vector until a reallocation has to occure. */
+  int64_t capacity_in_bits_;
+
+  /** Used for allocations when the inline buffer is too small. */
+  Allocator allocator_;
+
+  /** Contains the bits as long as the vector is small enough. */
+  TypedBuffer<uint8_t, BytesInInlineBuffer> inline_buffer_;
+
+ public:
+  BitVector(Allocator allocator = {}) noexcept : allocator_(allocator)
+  {
+    data_ = inline_buffer_;
+    size_in_bits_ = 0;
+    capacity_in_bits_ = BitsInInlineBuffer;
+    uninitialized_fill_n(data_, BytesInInlineBuffer, static_cast<uint8_t>(0));
+  }
+
+  BitVector(NoExceptConstructor, Allocator allocator = {}) noexcept : BitVector(allocator)
+  {
+  }
+
+  BitVector(const BitVector &other) : BitVector(NoExceptConstructor(), other.allocator_)
+  {
+    const int64_t bytes_to_copy = other.used_bytes_amount();
+    if (other.size_in_bits_ <= BitsInInlineBuffer) {
+      /* The data is copied into the owned inline buffer. */
+      data_ = inline_buffer_;
+      capacity_in_bits_ = BitsInInlineBuffer;
+    }
+    else {
+      /* Allocate a new byte array because the inline buffer is too small. */
+      data_ = static_cast<uint8_t *>(
+          allocator_.allocate(bytes_to_copy, AllocationAlignment, __func__));
+      capacity_in_bits_ = bytes_to_copy * BitsPerByte;
+    }
+    size_in_bits_ = other.size_in_bits_;
+    uninitialized_copy_n(other.data_, bytes_to_copy, data_);
+  }
+
+  BitVector(BitVector &&other) noexcept : BitVector(NoExceptConstructor(), other.allocator_)
+  {
+    if (other.is_inline()) {
+      /* Copy the data into the inline buffer. */
+      const int64_t bytes_to_copy = other.used_bytes_amount();
+      data_ = inline_buffer_;
+      uninitialized_copy_n(other.data_, bytes_to_copy, data_);
+    }
+    else {
+      /* Steal the pointer. */
+      data_ = other.data_;
+    }
+    size_in_bits_ = other.size_in_bits_;
+    capacity_in_bits_ = other.capacity_in_bits_;
+
+    /* Clear the other vector because it has been moved from. */
+    other.data_ = other.inline_buffer_;
+    other.size_in_bits_ = 0;
+    other.capacity_in_bits_ = BitsInInlineBuffer;
+  }
+
+  /**
+   * Create a new vector with the given size and fill it with #value.
+   */
+  BitVector(const int64_t size_in_bits, const bool value = false, Allocator allocator = {})
+      : BitVector(NoExceptConstructor(), allocator)
+  {
+    this->resize(size_in_bits, value);
+  }
+
+  /**
+   * Create a bit vector based on an array of bools. Each byte of the input array maps to one bit.
+   */
+  explicit BitVector(const Span<bool> values, Allocator allocator = {})
+      : BitVector(NoExceptConstructor(), allocator)
+  {
+    this->resize(values.size());
+    for (const int64_t i : this->index_range()) {
+      (*this)[i].set(values[i]);
+    }
+  }
+
+  ~BitVector()
+  {
+    if (!this->is_inline()) {
+      allocator_.deallocate(data_);
+    }
+  }
+
+  BitVector &operator=(const BitVector &other)
+  {
+    return copy_assign_container(*this, other);
+  }
+
+  BitVector &operator=(BitVector &&other)
+  {
+    return move_assign_container(*this, std::move(other));
+  }
+
+  /**
+   * Number of bits in the bit vector.
+   */
+  int64_t size() const
+  {
+    return size_in_bits_;
+  }
+
+  /**
+   * Get a read-only reference to a specific bit.
+   */
+  BitRef operator[](const int64_t index) const
+  {
+    BLI_assert(index >= 0);
+    BLI_assert(index < size_in_bits_);
+    return {data_, index};
+  }
+
+  /**
+   * Get a mutable reference to a specific bit.
+   */
+  MutableBitRef operator[](const int64_t index)
+  {
+    BLI_assert(index >= 0);
+    BLI_assert(index < size_in_bits_);
+    return {data_, index};
+  }
+
+  IndexRange index_range() const
+  {
+    return {0, size_in_bits_};
+  }
+
+  /**
+   * Add a new bit to the end of the vector.
+   */
+  void append(const bool value)
+  {
+    this->ensure_space_for_one();
+    MutableBitRef bit{data_, size_in_bits_};
+    bit.set(value);
+    size_in_bits_++;
+  }
+
+  class Iterator {
+   private:
+    const BitVector *vector_;
+    int64_t index_;
+
+   public:
+    Iterator(const BitVector &vector, const int64_t index) : vector_(&vector), index_(index)
+    {
+    }
+
+    Iterator &operator++()
+    {
+      index_++;
+      return *this;
+    }
+
+    friend bool operator!=(const Iterator &a, const Iterator &b)
+    {
+      BLI_assert(a.vector_ == b.vector_);
+      return a.index_ != b.index_;
+    }
+
+    BitRef operator*() const
+    {
+      return (*vector_)[index_];
+    }
+  };
+
+  class MutableIterator {
+   private:
+    BitVector *vector_;
+    int64_t index_;
+
+   public:
+    MutableIterator(BitVector &vector, const int64_t index) : vector_(&vector), index_(index)
+    {
+    }
+
+    MutableIterator &operator++()
+    {
+      index_++;
+      return *this;
+    }
+
+    friend bool operator!=(const MutableIterator &a, const MutableIterator &b)
+    {
+      BLI_assert(a.vector_ == b.vector_);
+      return a.index_ != b.index_;
+    }
+
+    MutableBitRef operator*() const
+    {
+      return (*vector_)[index_];
+    }
+  };
+
+  Iterator begin() const
+  {
+    return {*this, 0};
+  }
+
+  Iterator end() const
+  {
+    return {*this, size_in_bits_};
+  }
+
+  MutableIterator begin()
+  {
+    return {*this, 0};
+  }
+
+  MutableIterator end()
+  {
+    return {*this, size_in_bits_};
+  }
+
+  /**
+   * Change the size of the vector. If the new vector is larger than the old one, the new elements
+   * are filled with #value.
+   */
+  void resize(const int64_t new_size_in_bits, const bool value = false)
+  {
+    BLI_assert(new_size_in_bits >= 0);
+    const int64_t old_size_in_bits = size_in_bits_;
+    if (new_size_in_bits > old_size_in_bits) {
+      this->reserve(new_size_in_bits);
+    }
+    size_in_bits_ = new_size_in_bits;
+    if (old_size_in_bits < new_size_in_bits) {
+      this->fill_range(IndexRange(old_size_in_bits, new_size_in_bits - old_size_in_bits), value);
+    }
+  }
+
+  /**
+   * Set #value for every element in #range.
+   */
+  void fill_range(const IndexRange range, const bool value)
+  {
+    const AlignedIndexRanges aligned_ranges = split_index_range_by_alignment(range, BitsPerByte);
+
+    /* Fill first few bits. */
+    for (const int64_t i : aligned_ranges.prefix) {
+      (*this)[i].set(value);
+    }
+
+    /* Fill entire bytes at once. */
+    const int64_t start_fill_byte_index = aligned_ranges.aligned.start() / BitsPerByte;
+    const int64_t bytes_to_fill = aligned_ranges.aligned.size() / BitsPerByte;
+    const uint8_t fill_value = value ? (uint8_t)0xff : (uint8_t)0x00;
+    initialized_fill_n(data_ + start_fill_byte_index, bytes_to_fill, fill_value);
+
+    /* Fill bits in the end that don't cover a full byte. */
+    for (const int64_t i : aligned_ranges.suffix) {
+      (*this)[i].set(value);
+    }
+  }
+
+  /**
+   * Set #value on every element.
+   */
+  void fill(const bool value)
+  {
+    this->fill_range(IndexRange(0, size_in_bits_), value);
+  }
+
+  /**
+   * Make sure that the capacity of the vector is large enough to hold the given amount of bits.
+   * The actual size is not changed.
+   */
+  void reserve(const int new_capacity_in_bits)
+  {
+    this->realloc_to_at_least(new_capacity_in_bits);
+  }
+
+ private:
+  void ensure_space_for_one()
+  {
+    if (UNLIKELY(size_in_bits_ >= capacity_in_bits_)) {
+      this->realloc_to_at_least(size_in_bits_ + 1);
+    }
+  }
+
+  BLI_NOINLINE void realloc_to_at_least(const int64_t min_capacity_in_bits,
+                                        const uint8_t initial_value_for_new_bytes = 0x00)
+  {
+    if (capacity_in_bits_ >= min_capacity_in_bits) {
+      return;
+    }
+
+    const int64_t min_capacity_in_bytes = this->required_bytes_for_bits(min_capacity_in_bits);
+
+    /* At least double the size of the previous allocation. */
+    const int64_t min_new_capacity_in_bytes = capacity_in_bits_ * 2;
+
+    const int64_t new_capacity_in_bytes = std::max(min_capacity_in_bytes,
+                                                   min_new_capacity_in_bytes);
+    const int64_t bytes_to_copy = this->used_bytes_amount();
+
+    uint8_t *new_data = static_cast<uint8_t *>(
+        allocator_.allocate(new_capacity_in_bytes, AllocationAlignment, __func__));
+    uninitialized_copy_n(data_, bytes_to_copy, new_data);
+    /* Always initialize new capacity even if it isn't used yet. That's necessary to avoid warnings
+     * caused by using uninitialized memory. This happens when e.g. setting a clearing a bit in an
+     * uninitialized byte. */
+    uninitialized_fill_n(new_data + bytes_to_copy,
+                         new_capacity_in_bytes - bytes_to_copy,
+                         (uint8_t)initial_value_for_new_bytes);
+
+    if (!this->is_inline()) {
+      allocator_.deallocate(data_);
+    }
+
+    data_ = new_data;
+    capacity_in_bits_ = new_capacity_in_bytes * BitsPerByte;
+  }
+
+  bool is_inline() const
+  {
+    return data_ == inline_buffer_;
+  }
+
+  int64_t used_bytes_amount() const
+  {
+    return this->required_bytes_for_bits(size_in_bits_);
+  }
+};
+
+}  // namespace blender
diff --git a/source/blender/blenlib/BLI_index_range.hh b/source/blender/blenlib/BLI_index_range.hh
index 2b290e1ba7d..a0f129c7324 100644
--- a/source/blender/blenlib/BLI_index_range.hh
+++ b/source/blender/blenlib/BLI_index_range.hh
@@ -318,4 +318,19 @@ class IndexRange {
   Span<int64_t> as_span_internal() const;
 };
 
+struct AlignedIndexRanges {
+  IndexRange prefix;
+  IndexRange aligned;
+  IndexRange suffix;
+};
+
+/**
+ * Split a range into three parts so that the boundaries of the middle part are aligned to some
+ * power of two.
+ *
+ * This can be used when an algorithm can be optimized on aligned indices/memory. The algorithm
+ * then needs a slow path for the beginning and end, and a fast path for the aligned elements.
+ */
+AlignedIndexRanges split_index_range_by_alignment(const IndexRange range, const int64_t alignment);
+
 }  // namespace blender
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index ded4127a0d8..d87c60e6099 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -170,6 +170,7 @@ set(SRC
   BLI_astar.h
   BLI_bitmap.h
   BLI_bitmap_draw_2d.h
+  BLI_bit_vector.hh
   BLI_blenlib.h
   BLI_bounds.hh
   BLI_boxpack_2d.h
@@ -429,6 +430,7 @@ if(WITH_GTESTS)
     tests/BLI_array_store_test.cc
     tests/BLI_array_test.cc
     tests/BLI_array_utils_test.cc
+    tests/BLI_bit_vector_test.cc
     tests/BLI_bitmap_test.cc
     tests/BLI_bounds_test.cc
     tests/BLI_color_test.cc
diff --git a/source/blender/blenlib/intern/BLI_index_range.cc b/source/blender/blenlib/intern/BLI_index_range.cc
index 398228ab461..624dcc39fc5 100644
--- a/source/blender/blenlib/intern/BLI_index_range.cc
+++ b/source/blender/blenlib/intern/BLI_index_range.cc
@@ -44,4 +44,34 @@ Span<int64_t> IndexRange::as_span_internal() const
   return Span<int64_t>(s_current_array + start_, size_);
 }
 
+AlignedIndexRanges split_index_range_by_alignment(const IndexRange range, const int64_t alignment)
+{
+  BLI_assert(is_power_of_2_i(alignment));
+  const int64_t mask = alignment - 1;
+
+  AlignedIndexRanges aligned_ranges;
+
+  const int64_t start_chunk = range.start() & ~mask;
+  const int64_t end_chunk = range.one_after_last() & ~mask;
+  if (start_chunk == end_chunk) {
+    aligned_ranges.prefix = range;
+  }
+  else {
+    int64_t prefix_size = 0;
+    int64_t suffix_size = 0;
+    if (range.start() != start_chunk) {
+      prefix_size = alignment - (range.start() & mask);
+    }
+    if (range.one_after_last() != end_chunk) {
+      suffix_size = range.one_after_last() - end_chunk;
+    }
+    aligned_ranges.prefix = IndexRange(range.start(), prefix_size);
+    aligned_ranges.suffix = IndexRange(end_chunk, suffix_size);
+    aligned_ranges.aligned = IndexRange(aligned_ranges.prefix.one_after_last(),
+                                        range.size() - prefix_size - suffix_size);
+  }
+
+  return aligned_ranges;
+}
+
 }  // namespace blender
diff --git a/source/blender/blenlib/tests/BLI_bit_vector_test.cc b/source/blender/blenlib/tests/BLI_bit_vector_test.cc
new file mode 100644
index 00000000000..c477b464f0c
--- /dev/null
+++ b/source/blender/blenlib/tests/BLI_bit_vector_test.cc
@@ -0,0 +1,186 @@
+/* Apache License, Version 2.0 */
+
+#include "BLI_bit_vector.hh"
+#include "BLI_exception_safety_test_utils.hh"
+#include "BLI_strict_flags.h"
+
+#include "testing/testing.h"
+
+namespace blender::tests {
+
+TEST(bit_vector, DefaultConstructor)
+{
+  BitVector vec;
+  EXPECT_EQ(vec.size(), 0);
+}
+
+TEST(bit_vector, CopyConstructorInline)
+{
+  BitVector<> vec({false, false, true, true, false});
+  BitVector<> vec2 = vec;
+
+  EXPECT_EQ(vec.size(), 5);
+  EXPECT_EQ(vec2.size(), 5);
+
+  vec2[1].set();
+  EXPECT_FALSE(vec[1]);
+
+  EXPECT_FALSE(vec2[0]);
+  EXPECT_TRUE(vec2[1]);
+  EXPECT_TRUE(vec2[2]);
+  EXPECT_TRUE(vec2[3]);
+  EXPECT_FALSE(vec2[4]);
+}
+
+TEST(bit_vector, CopyConstructorLarge)
+{
+  BitVector<> vec(500, false);
+  vec[1].set();
+
+  BitVector<> vec2 = vec;
+
+  EXPECT_EQ(vec.size(), 500);
+  EXPECT_EQ(vec2.size(), 500);
+
+  vec2[2].set();
+  EXPECT_FALSE(vec[2]);
+
+  EXPECT_FALSE(vec2[0]);
+  EXPECT_TRUE(vec2[1]);
+  EXPECT_TRUE(vec2[2]);
+}
+
+TEST(bit_vector, MoveConstructorInline)
+{
+  BitVector<> vec({false, false, true, true, false});
+  BitVector<> vec2 = std::move(vec);
+
+  EXPECT_EQ(vec.size(), 0);
+  EXPECT_EQ(vec2.size(), 5);
+
+  EXPECT_FALSE(vec2[0]);
+  EXPECT_FALSE(vec2[1]);
+  EXPECT_TRUE(vec2[2]);
+  EXPECT_TRUE(vec2[3]);
+  EXPECT_FALSE(vec2[4]);
+}
+
+TEST(bit_vector, MoveConstructorLarge)
+{
+  BitVector<> vec(500, false);
+  vec[3].set();
+
+  BitVector<> vec2 = std::move(vec);
+
+  EXPECT_EQ(vec.size(), 0);
+  EXPECT_EQ(vec2.size(), 500);
+
+  EXPECT_FALSE(vec2[0]);
+  EXPECT_FALSE(vec2[1]);
+  EXPECT_FALSE(vec2[2]);
+  EXPECT_TRUE(vec2[3]);
+  EXPECT_FALSE(vec2[4]);
+}
+
+TEST(bit_vector, SizeConstructor)
+{
+  {
+    BitVector<> vec(0);
+    EXPECT_EQ(vec.size(), 0);
+  }
+  {
+    BitVector<> vec(5);
+    EXPECT_EQ(vec.size(), 5);
+    for (BitRef bit : vec) {
+      EXPECT_FALSE(bit);
+    }
+  }
+  {
+    BitVector<> vec(123);
+    EXPECT_EQ(vec.size(), 123);
+    for (BitRef bit : vec) {
+      EXPECT_FALSE(bit);
+    }
+  }
+}
+
+TEST(bit_vector, SizeFillConstructor)
+{
+  {
+    BitVector<> vec(5, false);
+    for (const int64_t i : IndexRange(5)) {
+      EXPECT_FALSE(vec[i]);
+    }
+  }
+  {
+    BitVector<> vec(123, true);
+    for (const int64_t i : IndexRange(123)) {
+      EXPECT_TRUE(vec[i]);
+    }
+  }
+}
+
+TEST(bit_vector, IndexAccess)
+{
+  BitVector<> vec(100, false);
+  vec[55].set();
+  EXPECT_FALSE(vec[50]);
+  EXPECT_FALSE(vec[51]);
+  EXPECT_FALSE(vec[52]);
+  EXPECT_FALSE(vec[53]);
+  EXPECT_FALSE(vec[54]);
+  EXPECT_TRUE(vec[55]);
+  EXPECT_FALSE(vec[56]);
+  EXPECT_FALSE(vec[57]);
+  EXPECT_FALSE(vec[58]);
+}
+
+TEST(bit_vector, Iterator)
+{
+  BitVector<> vec(100, false);
+  {
+    int64_t index = 0;
+    for (MutableBitRef bit : vec) {
+      bit.set(ELEM(index, 0, 4, 7, 10, 11));
+      index++;
+    }
+  }
+  {
+    int64_t index = 0;
+    for (BitRef bit : const_cast<const BitVector<> &>(vec)) {
+      EXPECT_EQ(bit, ELEM(index, 0, 4, 7, 10, 11));
+      index++;
+    }
+  }
+}
+
+TEST(bit_vector, Append)
+{
+  BitVector<> vec;
+  vec.append(false);
+  vec.append(true);
+  vec.append(true);
+  vec.append(false);
+
+  EXPECT_EQ(vec.size(), 4);
+  EXPECT_FALSE(vec[0]);
+  EXPECT_TRUE(vec[1]);
+  EXPECT_TRUE(vec[2]);
+  EXPECT_FALSE(vec[3]);
+}
+
+TEST(bit_vector, AppendMany)
+{
+  BitVector<> vec;
+  for (const int64_t i : IndexRange(1000)) {
+    vec.append(i % 2);
+  }
+  EXPECT_FALSE(vec[0]);
+  EXPECT_TRUE(vec[1]);
+  EXPECT_FALSE(vec[2]);
+  EXPECT_TRUE(vec[3]);
+  EXPECT_FALSE(vec[4]);
+  EXPECT_TRUE(vec[5]);
+}
+
+}  // namespace blender::tests
diff --git a/source/blender/blenlib/tests/BLI_index_range_test.cc b/source/blender/blenlib/tests/BLI_index_range_test.cc
index f5b994d409a..955691b3430 100644
--- a/source/blender/blenlib/tests/BLI_index_range_test.cc
+++ b/source/blender/blenlib/tests/BLI_index_range_test.cc
@@ -235,4 +235,50 @@ TEST(index_range, GenericAlgorithms)
   EXPECT_EQ(std::count_if(range.begin(), range.end(), [](int v) { return v < 7; }), 3);
 }
 
+TEST(index_range, SplitByAlignment)
+{
+  {
+    AlignedIndexRanges ranges = split_index_range_by_alignment(IndexRange(0, 0), 16);
+    EXPECT_EQ(ranges.prefix, IndexRange());
+    EXPECT_EQ(ranges.aligned, IndexRange());
+    EXPECT_EQ(ranges.suffix, IndexRange());
+  }
+  {
+    AlignedIndexRanges ranges = split_index_range_by_alignment(IndexRange(0, 24), 8);
+    EXPECT_EQ(ranges.prefix, IndexRange());
+    EXPECT_EQ(ranges.aligned, IndexRange(0, 24));
+    EXPECT_EQ(ranges.suffix, IndexRange());
+  }
+  {
+    AlignedIndexRanges ranges = split_index_range_by_alignment(IndexRange(1, 2), 4);
+    EXPECT_EQ(ranges.prefix, IndexRange(1, 2));
+    EXPECT_EQ(ranges.aligned, IndexRange());
+    EXPECT_EQ(ranges.suffix, IndexRange());
+  }
+  {
+    AlignedIndexRanges ranges = split_index_range_by_alignment(IndexRange(3, 50), 8);
+    EXPECT_EQ(ranges.prefix, IndexRange(3, 5));
+    EXPECT_EQ(ranges.aligned, IndexRange(8, 40));
+    EXPECT_EQ(ranges.suffix, IndexRange(48, 5));
+  }
+  {
+    AlignedIndexRanges ranges = split_index_range_by_alignment(IndexRange(3, 50), 1);
+    EXPECT_EQ(ranges.prefix, IndexRange());
+    EXPECT_EQ(ranges.aligned, IndexRange(3, 50));
+    EXPECT_EQ(ranges.suffix, IndexRange());
+  }
+  {
+    AlignedIndexRanges ranges = split_index_range_by_alignment(IndexRange(64, 16), 16);
+    EXPECT_EQ(ranges.prefix, IndexRange());
+    EXPECT_EQ(ranges.aligned, IndexRange(64, 16));
+    EXPECT_EQ(ranges.suffix, IndexRange());
+  }
+  {
+    AlignedIndexRanges ranges = split_index_range_by_alignment(IndexRange(3, 5), 8);
+    EXPECT_EQ(ranges.prefix, IndexRange(3, 5));
+    EXPECT_EQ(ranges.aligned, IndexRange());
+    EXPECT_EQ(ranges.suffix, IndexRange());
+  }
+}
+
 }  // namespace blender::tests
-- 
cgit v1.2.3