BLI: reduce wasted memory in linear allocator

The main change is that large allocations are done separately now. Also, buffers that small allocations are packed into, have a maximum size now. Using larger buffers does not really provider performance benefits, but increases wasted memory.
author: Jacques Lucke <jacques@blender.org> 2021-03-07 16:15:20 +0300
committer: Jacques Lucke <jacques@blender.org> 2021-03-07 16:15:20 +0300
commit: 456d3cc85e9f408341b12eb0d4f2c3a925855e8c (patch)
tree: 7eef9a785facb9cf251d292d1bb72a062e45e935 /source
parent: 84da76a96c5c2b59aeb67fa905398f656af25649 (diff)
2 files changed, 49 insertions, 13 deletions
diff --git a/source/blender/blenlib/BLI_linear_allocator.hh b/source/blender/blenlib/BLI_linear_allocator.hh
index a616ec5cf28..11022a03d69 100644
--- a/source/blender/blenlib/BLI_linear_allocator.hh
+++ b/source/blender/blenlib/BLI_linear_allocator.hh
@@ -38,18 +38,20 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
 
   uintptr_t current_begin_;
   uintptr_t current_end_;
-  int64_t next_min_alloc_size_;
 
 #ifdef DEBUG
   int64_t debug_allocated_amount_ = 0;
 #endif
 
+  /* Buffers larger than that are not packed together with smaller allocations to avoid wasting
+   * memory. */
+  constexpr static inline int64_t large_buffer_threshold = 4096;
+
  public:
   LinearAllocator()
   {
     current_begin_ = 0;
     current_end_ = 0;
-    next_min_alloc_size_ = 64;
   }
 
   ~LinearAllocator()
@@ -71,23 +73,23 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
     BLI_assert(alignment >= 1);
     BLI_assert(is_power_of_2_i(alignment));
 
-#ifdef DEBUG
-    debug_allocated_amount_ += size;
-#endif
-
     const uintptr_t alignment_mask = alignment - 1;
     const uintptr_t potential_allocation_begin = (current_begin_ + alignment_mask) &
                                                  ~alignment_mask;
     const uintptr_t potential_allocation_end = potential_allocation_begin + size;
 
     if (potential_allocation_end <= current_end_) {
+#ifdef DEBUG
+      debug_allocated_amount_ += size;
+#endif
       current_begin_ = potential_allocation_end;
       return reinterpret_cast<void *>(potential_allocation_begin);
     }
-    else {
-      this->allocate_new_buffer(size + alignment);
+    if (size <= large_buffer_threshold) {
+      this->allocate_new_buffer(size + alignment, alignment);
       return this->allocate(size, alignment);
     }
+    return this->allocator_large_buffer(size, alignment);
   };
 
   /**
@@ -195,7 +197,7 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
   }
 
  private:
-  void allocate_new_buffer(int64_t min_allocation_size)
+  void allocate_new_buffer(int64_t min_allocation_size, int64_t min_alignment)
   {
     for (int64_t i : unused_borrowed_buffers_.index_range()) {
       Span<char> buffer = unused_borrowed_buffers_[i];
@@ -207,15 +209,28 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
       }
     }
 
-    const int64_t size_in_bytes = power_of_2_min_u(
-        std::max(min_allocation_size, next_min_alloc_size_));
-    next_min_alloc_size_ = size_in_bytes * 2;
+    /* Possibly allocate more bytes than necessary for the current allocation. This way more small
+     * allocations can be packed together. Large buffers are allocated exactly to avoid wasting too
+     * much memory. */
+    int64_t size_in_bytes = min_allocation_size;
+    if (size_in_bytes <= large_buffer_threshold) {
+      /* Gradually grow buffer size with each allocation, up to a maximum. */
+      const int64_t grow_size = 1 << std::min<int64_t>(owned_buffers_.size() + 6, 20);
+      size_in_bytes = std::min(large_buffer_threshold, std::max(size_in_bytes, grow_size));
+    }
 
-    void *buffer = allocator_.allocate(size_in_bytes, 8, AT);
+    void *buffer = allocator_.allocate(size_in_bytes, min_alignment, __func__);
     owned_buffers_.append(buffer);
     current_begin_ = (uintptr_t)buffer;
     current_end_ = current_begin_ + size_in_bytes;
   }
+
+  void *allocator_large_buffer(const int64_t size, const int64_t alignment)
+  {
+    void *buffer = allocator_.allocate(size, alignment, __func__);
+    owned_buffers_.append(buffer);
+    return buffer;
+  }
 };
 
 }  // namespace blender
diff --git a/source/blender/blenlib/tests/BLI_linear_allocator_test.cc b/source/blender/blenlib/tests/BLI_linear_allocator_test.cc
index a35fbf70711..95156ae5c0c 100644
--- a/source/blender/blenlib/tests/BLI_linear_allocator_test.cc
+++ b/source/blender/blenlib/tests/BLI_linear_allocator_test.cc
@@ -1,6 +1,7 @@
 /* Apache License, Version 2.0 */
 
 #include "BLI_linear_allocator.hh"
+#include "BLI_rand.hh"
 #include "BLI_strict_flags.h"
 #include "testing/testing.h"
 
@@ -115,4 +116,24 @@ TEST(linear_allocator, ConstructArrayCopy)
   EXPECT_EQ(span2[2], 3);
 }
 
+TEST(linear_allocator, AllocateLarge)
+{
+  LinearAllocator<> allocator;
+  void *buffer1 = allocator.allocate(1024 * 1024, 8);
+  void *buffer2 = allocator.allocate(1024 * 1024, 8);
+  EXPECT_NE(buffer1, buffer2);
+}
+
+TEST(linear_allocator, ManyAllocations)
+{
+  LinearAllocator<> allocator;
+  RandomNumberGenerator rng;
+  for (int i = 0; i < 1000; i++) {
+    int size = rng.get_int32(10000);
+    int alignment = 1 << (rng.get_int32(7));
+    void *buffer = allocator.allocate(size, alignment);
+    EXPECT_NE(buffer, nullptr);
+  }
+}
+
 }  // namespace blender::tests
author	Jacques Lucke <jacques@blender.org>	2021-03-07 16:15:20 +0300
committer	Jacques Lucke <jacques@blender.org>	2021-03-07 16:15:20 +0300
commit	456d3cc85e9f408341b12eb0d4f2c3a925855e8c (patch)
tree	7eef9a785facb9cf251d292d1bb72a062e45e935 /source
parent	84da76a96c5c2b59aeb67fa905398f656af25649 (diff)