Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCampbell Barton <ideasman42@gmail.com>2016-05-30 08:25:36 +0300
committerCampbell Barton <ideasman42@gmail.com>2016-05-30 09:18:24 +0300
commit53b60eed45098cdb5e0a6f4a05e242f5d6524635 (patch)
treea6fc111557c3beee2a3b17a2f2550232f004c562 /source/blender/blenlib/intern/array_store.c
parent11b0874db003fed55578e3a49d6c5377ac49902e (diff)
Add BLI_array_store copy-on-write API
This supported in-memory de-duplication, useful to avoid in-efficient memory use when storing multiple, similar arrays.
Diffstat (limited to 'source/blender/blenlib/intern/array_store.c')
-rw-r--r--source/blender/blenlib/intern/array_store.c1731
1 files changed, 1731 insertions, 0 deletions
diff --git a/source/blender/blenlib/intern/array_store.c b/source/blender/blenlib/intern/array_store.c
new file mode 100644
index 00000000000..8cbabddb1af
--- /dev/null
+++ b/source/blender/blenlib/intern/array_store.c
@@ -0,0 +1,1731 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/blenlib/intern/array_store.c
+ * \ingroup bli
+ * \brief Array storage to minimize duplication.
+ *
+ * This is done by splitting arrays into chunks and using copy-on-write (COW),
+ * to de-duplicate chunks,
+ * from the users perspective this is an implementation detail.
+ *
+ *
+ * Overview
+ * ========
+ *
+ *
+ * Data Structure
+ * --------------
+ *
+ * This diagram is an overview of the structure of a single array-store.
+ *
+ * \note The only 2 structues here which are referenced externally are the.
+ *
+ * - BArrayStore: The whole array store.
+ * - BArrayState: Represents a single state (array) of data.
+ * These can be add using a reference state, while this could be considered the previous or parent state.
+ * no relationship is kept, so the caller is free to add any state from the same BArrayStore as a reference.
+ *
+ * <pre>
+ * <+> BArrayStore: root data-structure,
+ * | can store many 'states', which share memory.
+ * |
+ * | This can store many arrays, however they must share the same 'stride'.
+ * | Arrays of different types will need to use a new BArrayStore.
+ * |
+ * +- <+> states (Collection of BArrayState's):
+ * | | Each represents an array added by the user of this API.
+ * | | and references a chunk_list (each state is a chunk_list user).
+ * | | Note that the list order has no significance.
+ * | |
+ * | +- <+> chunk_list (BChunkList):
+ * | | The chunks that make up this state.
+ * | | Each state is a chunk_list user,
+ * | | avoids duplicating lists when there is no change between states.
+ * | |
+ * | +- chunk_refs (List of BChunkRef): Each chunk_ref links to a a BChunk.
+ * | Each reference is a chunk user,
+ * | avoids duplicating smaller chunks of memory found in multiple states.
+ * |
+ * +- info (BArrayInfo):
+ * | Sizes and offsets for this array-store.
+ * | Also caches some variables for reuse.
+ * |
+ * +- <+> memory (BArrayMemory):
+ * | Memory pools for storing BArrayStore data.
+ * |
+ * +- chunk_list (Pool of BChunkList):
+ * | All chunk_lists, (reference counted, used by BArrayState).
+ * |
+ * +- chunk_ref (Pool of BChunkRef):
+ * | All chunk_refs (link between BChunkList & BChunk).
+ * |
+ * +- chunks (Pool of BChunk):
+ * All chunks, (reference counted, used by BChunkList).
+ * These have their headers hashed for reuse so we can quickly check for duplicates.
+ * </pre>
+ *
+ *
+ * De-Duplication
+ * --------------
+ *
+ * When creating a new state, a previous state can be given as a reference,
+ * matching chunks from this state are re-used in the new state.
+ *
+ * First matches at either end of the array are detected.
+ * For identical arrays this is all thats needed.
+ *
+ * De-duplication is performed on any remaining chunks, by hasing the first few bytes of the chunk
+ * (see: BCHUNK_HASH_TABLE_ACCUMULATE_STEPS).
+ *
+ * \note This is cached for reuse since the referenced data never changes.
+ *
+ * An array is created to store hash values at every 'stride',
+ * then stepped over to search for matching chunks.
+ *
+ * Once a match is found, there is a high chance next chunks match too,
+ * so this is checked to avoid performing so many hash-lookups.
+ * Otherwise new chunks are created.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_listbase.h"
+#include "BLI_mempool.h"
+
+#include "BLI_strict_flags.h"
+
+#include "BLI_array_store.h" /* own include */
+
+/* only for BLI_array_store_is_valid */
+#include "BLI_ghash.h"
+
+/** \name Defines
+ *
+ * Some of the logic for merging is quite involved,
+ * support disabling some parts of this.
+ * \{ */
+
+/* Scan first chunks (happy path when beginning of the array matches).
+ * When the array is a perfect match, we can re-use the entire list.
+ *
+ * Note that disabling makes some tests fail that check for output-size.
+ */
+#define USE_FASTPATH_CHUNKS_FIRST
+
+/* Scan last chunks (happy path when end of the array matches).
+ * When the end of the array matches, we can quickly add these chunks.
+ * note that we will add contiguous matching chunks
+ * so this isn't as useful as USE_FASTPATH_CHUNKS_FIRST,
+ * however it avoids adding matching chunks into the lookup table,
+ * so creating the lookup table won't be as expensive.
+ */
+#ifdef USE_FASTPATH_CHUNKS_FIRST
+# define USE_FASTPATH_CHUNKS_LAST
+#endif
+
+/* For arrays of matching length, test that *enough* of the chunks are aligned,
+ * and simply step over both arrays, using matching chunks.
+ * This avoids overhead of using a lookup table for cases when we can assume they're mostly aligned.
+ */
+#define USE_ALIGN_CHUNKS_TEST
+
+/* Accumulate hashes from right to left so we can create a hash for the chunk-start.
+ * This serves to increase uniqueness and will help when there is many values which are the same.
+ */
+#define USE_HASH_TABLE_ACCUMULATE
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+/* Number of times to propagate hashes back.
+ * Effectively a 'triangle-number'.
+ * so 4 -> 7, 5 -> 10, 6 -> 15... etc.
+ */
+# define BCHUNK_HASH_TABLE_ACCUMULATE_STEPS 4
+#else
+/* How many items to hash (multiplied by stride)
+ */
+# define BCHUNK_HASH_LEN 4
+#endif
+
+/* Calculate the key once and reuse it
+ */
+#define USE_HASH_TABLE_KEY_CACHE
+#ifdef USE_HASH_TABLE_KEY_CACHE
+# define HASH_TABLE_KEY_UNSET ((uint64_t)-1)
+# define HASH_TABLE_KEY_FALLBACK ((uint64_t)-2)
+#endif
+
+/* How much smaller the table is then the total number of steps.
+ */
+#define BCHUNK_HASH_TABLE_DIV 16
+
+/* Merge too small/large chunks:
+ *
+ * Using this means chunks below a threshold will be merged together.
+ * Even though short term this uses more memory,
+ * long term the overhead of maintaining many small chunks is reduced.
+ * This is defined by setting the minimum chunk size (as a fraction of the regular chunk size).
+ *
+ * Chunks may also become too large (when incrementally growing an array),
+ * this also enables chunk splitting.
+ */
+#define USE_MERGE_CHUNKS
+
+#ifdef USE_MERGE_CHUNKS
+/* Merge chunks smaller then: (chunk_size / BCHUNK_MIN_SIZE_DIV)
+ */
+# define BCHUNK_SIZE_MIN_DIV 8
+
+/* Disallow chunks bigger then the regular chunk size scaled by this value
+ * note: must be at least 2!
+ * however, this code runs wont run in tests unless its ~1.1 ugh.
+ * so lower only to check splitting works.
+ */
+# define BCHUNK_SIZE_MAX_MUL 2
+#endif /* USE_MERGE_CHUNKS */
+
+/* slow (keep disabled), but handy for debugging */
+// #define USE_VALIDATE_LIST_SIZE
+
+// #define USE_VALIDATE_LIST_DATA_PARTIAL
+
+// #define USE_PARANOID_CHECKS
+
+/** \} */
+
+
+/** \name Internal Structs
+ * \{ */
+
+typedef unsigned int uint;
+typedef unsigned char ubyte;
+
+typedef uint64_t hash_key;
+
+
+typedef struct BArrayInfo {
+ size_t chunk_stride;
+ uint chunk_count;
+
+ /* pre-calculated */
+ size_t chunk_byte_size;
+ size_t chunk_byte_size_min;
+
+ size_t accum_read_ahead_bytes;
+#ifdef USE_HASH_TABLE_ACCUMULATE
+ size_t accum_steps;
+ size_t accum_read_ahead_len;
+#endif
+} BArrayInfo;
+
+typedef struct BArrayMemory {
+ BLI_mempool *chunk_list; /* BChunkList */
+ BLI_mempool *chunk_ref; /* BChunkRef */
+ BLI_mempool *chunk; /* BChunk */
+} BArrayMemory;
+
+/**
+ * Main storage for all states
+ */
+typedef struct BArrayStore {
+ /* static */
+ BArrayInfo info;
+
+ /* memory storage */
+ BArrayMemory memory;
+
+ /**
+ * #BArrayState may be in any order (logic should never depend on state order).
+ */
+ ListBase states;
+} BArrayStore;
+
+/**
+ * A single instance of an array.
+ *
+ * This is how external API's hold a reference to an in-memory state,
+ * although the struct is private.
+ *
+ * \note Currently each 'state' is allocated separately.
+ * While this could be moved to a memory pool,
+ * it makes it easier to trace invalid usage, so leave as-is for now.
+ */
+typedef struct BArrayState {
+ /** linked list in #BArrayStore.states */
+ struct BArrayState *next, *prev;
+
+ struct BChunkList *chunk_list; /* BChunkList's */
+
+} BArrayState;
+
+typedef struct BChunkList {
+ ListBase chunk_refs; /* BChunkRef's */
+ uint chunk_refs_len; /* BLI_listbase_count(chunks), store for reuse. */
+ size_t total_size; /* size of all chunks */
+
+ /** number of #BArrayState using this. */
+ int users;
+} BChunkList;
+
+/* a chunk of an array */
+typedef struct BChunk {
+ const ubyte *data;
+ size_t data_len;
+ /** number of #BChunkList using this. */
+ int users;
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+ hash_key key;
+#endif
+} BChunk;
+
+/**
+ * Links to store #BChunk data in #BChunkList.chunks.
+ */
+typedef struct BChunkRef {
+ struct BChunkRef *next, *prev;
+ BChunk *link;
+} BChunkRef;
+
+/**
+ * Single linked list used when putting chunks into a temporary table,
+ * used for lookups.
+ *
+ * Point to the #BChunkRef, not the #BChunk,
+ * to allow talking down the chunks in-order until a mis-match is found,
+ * this avoids having to do so many table lookups.
+ */
+typedef struct BTableRef {
+ struct BTableRef *next;
+ const BChunkRef *cref;
+} BTableRef;
+
+/** \} */
+
+
+static size_t bchunk_list_size(const BChunkList *chunk_list);
+
+
+/** \name Internal BChunk API
+ * \{ */
+
+static BChunk *bchunk_new(
+ BArrayMemory *bs_mem, const ubyte *data, const size_t data_len)
+{
+ BChunk *chunk = BLI_mempool_alloc(bs_mem->chunk);
+ chunk->data = data;
+ chunk->data_len = data_len;
+ chunk->users = 0;
+#ifdef USE_HASH_TABLE_KEY_CACHE
+ chunk->key = HASH_TABLE_KEY_UNSET;
+#endif
+ return chunk;
+}
+
+static BChunk *bchunk_new_copydata(
+ BArrayMemory *bs_mem, const ubyte *data, const size_t data_len)
+{
+ ubyte *data_copy = MEM_mallocN(data_len, __func__);
+ memcpy(data_copy, data, data_len);
+ return bchunk_new(bs_mem, data_copy, data_len);
+}
+
+static void bchunk_decref(
+ BArrayMemory *bs_mem, BChunk *chunk)
+{
+ BLI_assert(chunk->users > 0);
+ if (chunk->users == 1) {
+ MEM_freeN((void *)chunk->data);
+ BLI_mempool_free(bs_mem->chunk, chunk);
+ }
+ else {
+ chunk->users -= 1;
+ }
+}
+
+static bool bchunk_data_compare(
+ const BChunk *chunk,
+ const ubyte *data_base, const size_t data_base_len,
+ const size_t offset)
+{
+ if (offset + (size_t)chunk->data_len <= data_base_len) {
+ return (memcmp(&data_base[offset], chunk->data, chunk->data_len) == 0);
+ }
+ else {
+ return false;
+ }
+}
+
+/** \} */
+
+
+/** \name Internal BChunkList API
+ * \{ */
+
+static BChunkList *bchunk_list_new(
+ BArrayMemory *bs_mem, size_t total_size)
+{
+ BChunkList *chunk_list = BLI_mempool_alloc(bs_mem->chunk_list);
+
+ BLI_listbase_clear(&chunk_list->chunk_refs);
+ chunk_list->chunk_refs_len = 0;
+ chunk_list->total_size = total_size;
+ chunk_list->users = 0;
+ return chunk_list;
+}
+
+static void bchunk_list_decref(
+ BArrayMemory *bs_mem, BChunkList *chunk_list)
+{
+ BLI_assert(chunk_list->users > 0);
+ if (chunk_list->users == 1) {
+ for (BChunkRef *cref = chunk_list->chunk_refs.first, *cref_next; cref; cref = cref_next) {
+ cref_next = cref->next;
+ bchunk_decref(bs_mem, cref->link);
+ BLI_mempool_free(bs_mem->chunk_ref, cref);
+ }
+
+ BLI_mempool_free(bs_mem->chunk_list, chunk_list);
+ }
+ else {
+ chunk_list->users -= 1;
+ }
+}
+
+#ifdef USE_VALIDATE_LIST_SIZE
+# ifndef NDEBUG
+# define ASSERT_CHUNKLIST_SIZE(chunk_list, n) BLI_assert(bchunk_list_size(chunk_list) == n)
+# endif
+#endif
+#ifndef ASSERT_CHUNKLIST_SIZE
+# define ASSERT_CHUNKLIST_SIZE(chunk_list, n) (EXPR_NOP(chunk_list), EXPR_NOP(n))
+#endif
+
+
+#ifdef USE_VALIDATE_LIST_DATA_PARTIAL
+static size_t bchunk_list_data_check(
+ const BChunkList *chunk_list, const ubyte *data)
+{
+ size_t total_size = 0;
+ for (BChunkRef *cref = chunk_list->chunk_refs.first; cref; cref = cref->next) {
+ if (memcmp(&data[total_size], cref->link->data, cref->link->data_len) != 0) {
+ return false;
+ }
+ total_size += cref->link->data_len;
+ }
+ return true;
+}
+# define ASSERT_CHUNKLIST_DATA(chunk_list, data) BLI_assert(bchunk_list_data_check(chunk_list, data))
+#else
+# define ASSERT_CHUNKLIST_DATA(chunk_list, data) (EXPR_NOP(chunk_list), EXPR_NOP(data))
+#endif
+
+
+#ifdef USE_MERGE_CHUNKS
+static void bchunk_list_ensure_min_size_last(
+ const BArrayInfo *info, BArrayMemory *bs_mem,
+ BChunkList *chunk_list)
+{
+ BChunkRef *cref = chunk_list->chunk_refs.last;
+ if (cref && cref->prev) {
+ /* both are decref'd after use (end of this block) */
+ BChunk *chunk_curr = cref->link;
+ BChunk *chunk_prev = cref->prev->link;
+
+ if (MIN2(chunk_prev->data_len, chunk_curr->data_len) < info->chunk_byte_size_min) {
+ const size_t data_merge_len = chunk_prev->data_len + chunk_curr->data_len;
+ /* we could pass, but no need */
+ if (data_merge_len <= (info->chunk_byte_size * BCHUNK_SIZE_MAX_MUL)) {
+ /* we have enough space to merge */
+
+ /* remove last from linklist */
+ BLI_assert(chunk_list->chunk_refs.last != chunk_list->chunk_refs.first);
+ cref->prev->next = NULL;
+ chunk_list->chunk_refs.last = cref->prev;
+ chunk_list->chunk_refs_len -= 1;
+
+ ubyte *data_merge = MEM_mallocN(data_merge_len, __func__);
+ memcpy(data_merge, chunk_prev->data, chunk_prev->data_len);
+ memcpy(&data_merge[chunk_prev->data_len], chunk_curr->data, chunk_curr->data_len);
+
+ cref->prev->link = bchunk_new(bs_mem, data_merge, data_merge_len);
+ cref->prev->link->users += 1;
+
+ BLI_mempool_free(bs_mem->chunk_ref, cref);
+ }
+ else {
+ /* If we always merge small slices, we should _almost_ never end up having very large chunks.
+ * Gradual expanding on contracting will cause this.
+ *
+ * if we do, the code below works (test by setting 'BCHUNK_SIZE_MAX_MUL = 1.2') */
+
+ /* keep chunk on the left hand side a regular size */
+ const size_t split = info->chunk_byte_size;
+
+ /* merge and split */
+ const size_t data_prev_len = split;
+ const size_t data_curr_len = data_merge_len - split;
+ ubyte *data_prev = MEM_mallocN(data_prev_len, __func__);
+ ubyte *data_curr = MEM_mallocN(data_curr_len, __func__);
+
+ if (data_prev_len <= chunk_prev->data_len) {
+ const size_t data_curr_shrink_len = chunk_prev->data_len - data_prev_len;
+
+ /* setup 'data_prev' */
+ memcpy(data_prev, chunk_prev->data, data_prev_len);
+
+ /* setup 'data_curr' */
+ memcpy(data_curr, &chunk_prev->data[data_prev_len], data_curr_shrink_len);
+ memcpy(&data_curr[data_curr_shrink_len], chunk_curr->data, chunk_curr->data_len);
+ }
+ else {
+ BLI_assert(data_curr_len <= chunk_curr->data_len);
+ BLI_assert(data_prev_len >= chunk_prev->data_len);
+
+ const size_t data_prev_grow_len = data_prev_len - chunk_prev->data_len;
+
+ /* setup 'data_prev' */
+ memcpy(data_prev, chunk_prev->data, chunk_prev->data_len);
+ memcpy(&data_prev[chunk_prev->data_len], chunk_curr->data, data_prev_grow_len);
+
+ /* setup 'data_curr' */
+ memcpy(data_curr, &chunk_curr->data[data_prev_grow_len], data_curr_len);
+ }
+
+ cref->prev->link = bchunk_new(bs_mem, data_prev, data_prev_len);
+ cref->prev->link->users += 1;
+
+ cref->link = bchunk_new(bs_mem, data_curr, data_curr_len);
+ cref->link->users += 1;
+ }
+
+ /* free zero users */
+ bchunk_decref(bs_mem, chunk_curr);
+ bchunk_decref(bs_mem, chunk_prev);
+ }
+ }
+}
+#endif /* USE_MERGE_CHUNKS */
+
+/**
+ * Append and don't manage merging small chunks.
+ */
+static bool bchunk_list_append_only(
+ BArrayMemory *bs_mem,
+ BChunkList *chunk_list, BChunk *chunk)
+{
+ BChunkRef *cref = BLI_mempool_alloc(bs_mem->chunk_ref);
+ BLI_addtail(&chunk_list->chunk_refs, cref);
+ cref->link = chunk;
+ chunk_list->chunk_refs_len += 1;
+ chunk->users += 1;
+ return chunk;
+}
+
+static void bchunk_list_append_data(
+ const BArrayInfo *info, BArrayMemory *bs_mem,
+ BChunkList *chunk_list,
+ const ubyte *data, const size_t data_len)
+{
+ BLI_assert(data_len != 0);
+ // printf("data_len: %d\n", data_len);
+#ifdef USE_MERGE_CHUNKS
+ if (!BLI_listbase_is_empty(&chunk_list->chunk_refs)) {
+ BChunkRef *cref = chunk_list->chunk_refs.last;
+ BChunk *chunk_prev = cref->link;
+
+ if (MIN2(chunk_prev->data_len, data_len) < info->chunk_byte_size_min) {
+ const size_t data_merge_len = chunk_prev->data_len + data_len;
+ /* realloc for single user */
+ if (cref->link->users == 1) {
+ ubyte *data_merge = MEM_reallocN((void *)cref->link->data, data_merge_len);
+ memcpy(&data_merge[chunk_prev->data_len], data, data_len);
+ cref->link->data = data_merge;
+ cref->link->data_len = data_merge_len;
+ }
+ else {
+ ubyte *data_merge = MEM_mallocN(data_merge_len, __func__);
+ memcpy(data_merge, chunk_prev->data, chunk_prev->data_len);
+ memcpy(&data_merge[chunk_prev->data_len], data, data_len);
+ cref->link = bchunk_new(bs_mem, data_merge, data_merge_len);
+ cref->link->users += 1;
+ bchunk_decref(bs_mem, chunk_prev);
+ }
+ return;
+ }
+ }
+#else
+ UNUSED_VARS(info);
+#endif /* USE_MERGE_CHUNKS */
+
+ BChunk *chunk = bchunk_new_copydata(bs_mem, data, data_len);
+ bchunk_list_append_only(bs_mem, chunk_list, chunk);
+
+ /* don't run this, instead preemptively avoid creating a chunk only to merge it (above). */
+#if 0
+#ifdef USE_MERGE_CHUNKS
+ bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list, chunk_size_min);
+#endif
+#endif
+}
+
+static void bchunk_list_append(
+ const BArrayInfo *info, BArrayMemory *bs_mem,
+ BChunkList *chunk_list,
+ BChunk *chunk)
+{
+ bchunk_list_append_only(bs_mem, chunk_list, chunk);
+
+#ifdef USE_MERGE_CHUNKS
+ bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list);
+#else
+ UNUSED_VARS(info);
+#endif
+}
+
+static void bchunk_list_fill_from_array(
+ const BArrayInfo *info, BArrayMemory *bs_mem,
+ BChunkList *chunk_list,
+ const ubyte *data,
+ const size_t data_len)
+{
+ BLI_assert(BLI_listbase_is_empty(&chunk_list->chunk_refs));
+
+ size_t data_last_chunk_len = 0;
+ size_t data_trim_len = data_len;
+
+#ifdef USE_MERGE_CHUNKS
+ /* avoid creating too-small chunks
+ * more efficient then merging after */
+ if (data_len > info->chunk_byte_size) {
+ data_last_chunk_len = (data_trim_len % info->chunk_byte_size);
+ data_trim_len = data_trim_len - data_last_chunk_len;
+ if (data_last_chunk_len) {
+ if (data_last_chunk_len < info->chunk_byte_size_min) {
+ /* may be zero and thats OK */
+ data_trim_len -= info->chunk_byte_size;
+ data_last_chunk_len += info->chunk_byte_size;
+ }
+ }
+ }
+ else {
+ data_trim_len = 0;
+ data_last_chunk_len = data_len;
+ }
+#else
+ data_last_chunk_len = (data_trim_len % info->chunk_byte_size);
+ data_trim_len = data_trim_len - data_last_chunk_len;
+#endif
+
+
+ BLI_assert(data_trim_len + data_last_chunk_len == data_len);
+
+ size_t i_prev = 0;
+ while (i_prev < data_trim_len) {
+ const size_t i = i_prev + info->chunk_byte_size;
+ BChunk *chunk = bchunk_new_copydata(bs_mem, &data[i_prev], i - i_prev);
+ bchunk_list_append_only(bs_mem, chunk_list, chunk);
+ i_prev = i;
+ }
+
+ if (data_last_chunk_len) {
+ BChunk *chunk = bchunk_new_copydata(bs_mem, &data[i_prev], data_last_chunk_len);
+ bchunk_list_append_only(bs_mem, chunk_list, chunk);
+ // i_prev = data_len;
+ }
+
+#ifdef USE_MERGE_CHUNKS
+ if (data_len > info->chunk_byte_size) {
+ BLI_assert(((BChunkRef *)chunk_list->chunk_refs.last)->link->data_len >= info->chunk_byte_size_min);
+ }
+#endif
+
+ /* works but better avoid redundant re-alloc */
+#if 0
+#ifdef USE_MERGE_CHUNKS
+ bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list);
+#endif
+#endif
+
+ ASSERT_CHUNKLIST_SIZE(chunk_list, data_len);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+}
+
+
+/* ---------------------------------------------------------------------------
+ * Internal Table Lookup Functions
+ */
+
+/** \name Internal Hashing/De-Duplication API
+ *
+ * Only used by #bchunk_list_from_data_merge
+ * \{ */
+
+#define HASH_INIT (5381)
+
+BLI_INLINE uint hash_data_single(const ubyte p)
+{
+ return (HASH_INIT << 5) + HASH_INIT + (unsigned int)p;
+}
+
+/* hash bytes, from BLI_ghashutil_strhash_n */
+static uint hash_data(const ubyte *key, size_t n)
+{
+ const signed char *p;
+ unsigned int h = HASH_INIT;
+
+ for (p = (const signed char *)key; n--; p++) {
+ h = (h << 5) + h + (unsigned int)*p;
+ }
+
+ return h;
+}
+
+#undef HASH_INIT
+
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+static void hash_array_from_data(
+ const BArrayInfo *info, const ubyte *data_slice, const size_t data_slice_len,
+ hash_key *hash_array)
+{
+ if (info->chunk_stride != 1) {
+ for (size_t i = 0, i_step = 0; i_step < data_slice_len; i++, i_step += info->chunk_stride) {
+ hash_array[i] = hash_data(&data_slice[i_step], info->chunk_stride);
+ }
+ }
+ else {
+ /* fast-path for bytes */
+ for (size_t i = 0; i < data_slice_len; i++) {
+ hash_array[i] = hash_data_single(data_slice[i]);
+ }
+ }
+}
+
+/*
+ * Similar to hash_array_from_data,
+ * but able to step into the next chunk if we run-out of data.
+ */
+static void hash_array_from_cref(
+ const BArrayInfo *info, const BChunkRef *cref, const size_t data_len,
+ hash_key *hash_array)
+{
+ const size_t hash_array_len = data_len / info->chunk_stride;
+ size_t i = 0;
+ do {
+ size_t i_next = hash_array_len - i;
+ size_t data_trim_len = i_next * info->chunk_stride;
+ if (data_trim_len > cref->link->data_len) {
+ data_trim_len = cref->link->data_len;
+ i_next = data_trim_len / info->chunk_stride;
+ }
+ BLI_assert(data_trim_len <= cref->link->data_len);
+ hash_array_from_data(info, cref->link->data, data_trim_len, &hash_array[i]);
+ i += i_next;
+ cref = cref->next;
+ } while ((i < hash_array_len) && (cref != NULL));
+
+ /* If this isn't equal, the caller didn't properly check
+ * that there was enough data left in all chunks */
+ BLI_assert(i == hash_array_len);
+}
+
+static void hash_accum(hash_key *hash_array, const size_t hash_array_len, size_t iter_steps)
+{
+ /* _very_ unlikely, can happen if you select a chunk-size of 1 for example. */
+ if (UNLIKELY((iter_steps > hash_array_len))) {
+ iter_steps = hash_array_len;
+ }
+
+ const size_t hash_array_search_len = hash_array_len - iter_steps;
+ while (iter_steps != 0) {
+ const size_t hash_offset = iter_steps;
+ for (uint i = 0; i < hash_array_search_len; i++) {
+ hash_array[i] += (hash_array[i + hash_offset]) * ((hash_array[i] & 0xff) + 1);
+ }
+ iter_steps -= 1;
+ }
+}
+
+/**
+ * When we only need a single value, can use a small optimization.
+ * we can avoid accumulating the tail of the array a little, each iteration.
+ */
+static void hash_accum_single(hash_key *hash_array, const size_t hash_array_len, size_t iter_steps)
+{
+ BLI_assert(iter_steps <= hash_array_len);
+ if (UNLIKELY(!(iter_steps <= hash_array_len))) {
+ /* while this shouldn't happen, avoid crashing */
+ iter_steps = hash_array_len;
+ }
+ /* We can increase this value each step to avoid accumulating quite as much
+ * while getting the same results as hash_accum */
+ size_t iter_steps_sub = iter_steps;
+
+ while (iter_steps != 0) {
+ const size_t hash_array_search_len = hash_array_len - iter_steps_sub;
+ const size_t hash_offset = iter_steps;
+ for (uint i = 0; i < hash_array_search_len; i++) {
+ hash_array[i] += (hash_array[i + hash_offset]) * ((hash_array[i] & 0xff) + 1);
+ }
+ iter_steps -= 1;
+ iter_steps_sub += iter_steps;
+ }
+}
+
+static hash_key key_from_chunk_ref(
+ const BArrayInfo *info, const BChunkRef *cref,
+ /* avoid reallicating each time */
+ hash_key *hash_store, const size_t hash_store_len)
+{
+ /* in C, will fill in a reusable array */
+ BChunk *chunk = cref->link;
+ BLI_assert(info->accum_read_ahead_bytes * info->chunk_stride);
+
+ if (info->accum_read_ahead_bytes <= chunk->data_len) {
+ hash_key key;
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+ key = chunk->key;
+ if (key != HASH_TABLE_KEY_UNSET) {
+ /* Using key cache!
+ * avoids calculating every time */
+ }
+ else {
+ hash_array_from_cref(info, cref, info->accum_read_ahead_bytes, hash_store);
+ hash_accum_single(hash_store, hash_store_len, info->accum_steps);
+ key = hash_store[0];
+
+ /* cache the key */
+ if (key == HASH_TABLE_KEY_UNSET) {
+ key = HASH_TABLE_KEY_FALLBACK;
+ }
+ chunk->key = key;
+ }
+#else
+ hash_array_from_cref(info, cref, info->accum_read_ahead_bytes, hash_store);
+ hash_accum_single(hash_store, hash_store_len, info->accum_steps);
+ key = hash_store[0];
+#endif
+ return key;
+ }
+ else {
+ /* corner case - we're too small, calculate the key each time. */
+
+ hash_array_from_cref(info, cref, info->accum_read_ahead_bytes, hash_store);
+ hash_accum_single(hash_store, hash_store_len, info->accum_steps);
+ hash_key key = hash_store[0];
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+ if (UNLIKELY(key == HASH_TABLE_KEY_UNSET)) {
+ key = HASH_TABLE_KEY_FALLBACK;
+ }
+#endif
+ return key;
+ }
+}
+
+static const BChunkRef *table_lookup(
+ const BArrayInfo *info, BTableRef **table, const size_t table_len, const size_t i_table_start,
+ const ubyte *data, const size_t data_len, const size_t offset, const hash_key *table_hash_array)
+{
+ size_t size_left = data_len - offset;
+ hash_key key = table_hash_array[((offset - i_table_start) / info->chunk_stride)];
+ size_t key_index = (size_t)(key % (hash_key)table_len);
+ for (BTableRef *tref = table[key_index]; tref; tref = tref->next) {
+ const BChunkRef *cref = tref->cref;
+#ifdef USE_HASH_TABLE_KEY_CACHE
+ if (cref->link->key == key)
+#endif
+ {
+ BChunk *chunk_test = cref->link;
+ if (chunk_test->data_len <= size_left) {
+ if (bchunk_data_compare(chunk_test, data, data_len, offset)) {
+ /* we could remove the chunk from the table, to avoid multiple hits */
+ return cref;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+#else /* USE_HASH_TABLE_ACCUMULATE */
+
+/* NON USE_HASH_TABLE_ACCUMULATE code (simply hash each chunk) */
+
+static hash_key key_from_chunk_ref(const BArrayInfo *info, const BChunkRef *cref)
+{
+ const size_t data_hash_len = BCHUNK_HASH_LEN * info->chunk_stride;
+ hash_key key;
+ BChunk *chunk = cref->link;
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+ key = chunk->key;
+ if (key != HASH_TABLE_KEY_UNSET) {
+ /* Using key cache!
+ * avoids calculating every time */
+ }
+ else {
+ /* cache the key */
+ key = hash_data(chunk->data, data_hash_len);
+ if (key == HASH_TABLE_KEY_UNSET) {
+ key = HASH_TABLE_KEY_FALLBACK;
+ }
+ chunk->key = key;
+ }
+#else
+ key = hash_data(chunk->data, data_hash_len);
+#endif
+
+ return key;
+}
+
+static const BChunkRef *table_lookup(
+ const BArrayInfo *info, BTableRef **table, const size_t table_len, const uint UNUSED(i_table_start),
+ const ubyte *data, const size_t data_len, const size_t offset, const hash_key *UNUSED(table_hash_array))
+{
+ const size_t data_hash_len = BCHUNK_HASH_LEN * info->chunk_stride; /* TODO, cache */
+
+ size_t size_left = data_len - offset;
+ hash_key key = hash_data(&data[offset], MIN2(data_hash_len, size_left));
+ size_t key_index = (size_t)(key % (hash_key)table_len);
+ for (BTableRef *tref = table[key_index]; tref; tref = tref->next) {
+ const BChunkRef *cref = tref->cref;
+#ifdef USE_HASH_TABLE_KEY_CACHE
+ if (cref->link->key == key)
+#endif
+ {
+ BChunk *chunk_test = cref->link;
+ if (chunk_test->data_len <= size_left) {
+ if (bchunk_data_compare(chunk_test, data, data_len, offset)) {
+ /* we could remove the chunk from the table, to avoid multiple hits */
+ return cref;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+#endif /* USE_HASH_TABLE_ACCUMULATE */
+
+/* End Table Lookup
+ * ---------------- */
+
+/** \} */
+
+/**
+ * \param data: Data to store in the returned value.
+ * \param data_len_original: Length of data in bytes.
+ * \param chunk_list_reference: Reuse this list or chunks within it, don't modify its content.
+ * \note Caller is responsible for adding the user.
+ */
+static BChunkList *bchunk_list_from_data_merge(
+ const BArrayInfo *info, BArrayMemory *bs_mem,
+ const ubyte *data, const size_t data_len_original,
+ const BChunkList *chunk_list_reference)
+{
+ ASSERT_CHUNKLIST_SIZE(chunk_list_reference, chunk_list_reference->total_size);
+
+ /* -----------------------------------------------------------------------
+ * Fast-Path for exact match
+ * Check for exact match, if so, return the current list.
+ */
+
+ const BChunkRef *cref_match_first = NULL;
+
+ uint chunk_list_reference_skip_len = 0;
+ size_t chunk_list_reference_skip_bytes = 0;
+ size_t i_prev = 0;
+
+#ifdef USE_FASTPATH_CHUNKS_FIRST
+ bool full_match = false;
+
+ {
+ full_match = true;
+
+ const BChunkRef *cref = chunk_list_reference->chunk_refs.first;
+ while (i_prev < data_len_original) {
+ if (cref != NULL && bchunk_data_compare(cref->link, data, data_len_original, i_prev)) {
+ cref_match_first = cref;
+ chunk_list_reference_skip_len += 1;
+ chunk_list_reference_skip_bytes += cref->link->data_len;
+ i_prev += cref->link->data_len;
+ cref = cref->next;
+ }
+ else {
+ full_match = false;
+ break;
+ }
+ }
+
+ if (full_match) {
+ if (chunk_list_reference->total_size == data_len_original) {
+ return (BChunkList *)chunk_list_reference;
+ }
+ }
+ }
+
+ /* End Fast-Path (first)
+ * --------------------- */
+
+#endif /* USE_FASTPATH_CHUNKS_FIRST */
+
+ /* Copy until we have a mismatch */
+ BChunkList *chunk_list = bchunk_list_new(bs_mem, data_len_original);
+ if (cref_match_first != NULL) {
+ size_t chunk_size_step = 0;
+ const BChunkRef *cref = chunk_list_reference->chunk_refs.first;
+ while (true) {
+ BChunk *chunk = cref->link;
+ chunk_size_step += chunk->data_len;
+ bchunk_list_append_only(bs_mem, chunk_list, chunk);
+ ASSERT_CHUNKLIST_SIZE(chunk_list, chunk_size_step);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+ if (cref == cref_match_first) {
+ break;
+ }
+ else {
+ cref = cref->next;
+ }
+ }
+ /* happens when bytes are removed from the end of the array */
+ if (chunk_size_step == data_len_original) {
+ return chunk_list;
+ }
+
+ i_prev = chunk_size_step;
+ }
+ else {
+ i_prev = 0;
+ }
+
+ /* ------------------------------------------------------------------------
+ * Fast-Path for end chunks
+ *
+ * Check for trailing chunks
+ */
+
+ /* In this case use 'chunk_list_reference_last' to define the last index
+ * index_match_last = -1 */
+
+ /* warning, from now on don't use len(data)
+ * since we want to ignore chunks already matched */
+ size_t data_len = data_len_original;
+#define data_len_original invalid_usage
+#ifdef data_len_original /* quiet warning */
+#endif
+
+ const BChunkRef *chunk_list_reference_last = NULL;
+
+#ifdef USE_FASTPATH_CHUNKS_LAST
+ if (!BLI_listbase_is_empty(&chunk_list_reference->chunk_refs)) {
+ const BChunkRef *cref = chunk_list_reference->chunk_refs.last;
+ while ((cref->prev != NULL) &&
+ (cref != cref_match_first) &&
+ (cref->link->data_len <= data_len - i_prev))
+ {
+ BChunk *chunk_test = cref->link;
+ size_t offset = data_len - chunk_test->data_len;
+ if (bchunk_data_compare(chunk_test, data, data_len, offset)) {
+ data_len = offset;
+ chunk_list_reference_last = cref;
+ chunk_list_reference_skip_len += 1;
+ chunk_list_reference_skip_bytes += cref->link->data_len;
+ cref = cref->prev;
+ }
+ else {
+ break;
+ }
+ }
+ }
+
+ /* End Fast-Path (last)
+ * -------------------- */
+#endif /* USE_FASTPATH_CHUNKS_LAST */
+
+ /* -----------------------------------------------------------------------
+ * Check for aligned chunks
+ *
+ * This saves a lot of searching, so use simple heuristics to detect aligned arrays.
+ * (may need to tweak exact method).
+ */
+
+ bool use_aligned = false;
+
+#ifdef USE_ALIGN_CHUNKS_TEST
+ if (chunk_list->total_size == chunk_list_reference->total_size) {
+ /* if we're already a quarter aligned */
+ if (data_len - i_prev <= chunk_list->total_size / 4) {
+ use_aligned = true;
+ }
+ else {
+ /* TODO, walk over chunks and check if some arbitrary amount align */
+ }
+ }
+#endif /* USE_ALIGN_CHUNKS_TEST */
+
+ /* End Aligned Chunk Case
+ * ----------------------- */
+
+ if (use_aligned) {
+ /* Copy matching chunks, creates using the same 'layout' as the reference */
+ const BChunkRef *cref = cref_match_first ? cref_match_first->next : chunk_list_reference->chunk_refs.first;
+ while (i_prev != data_len) {
+ const size_t i = i_prev + cref->link->data_len;
+ BLI_assert(i != i_prev);
+
+ if ((cref != chunk_list_reference_last) &&
+ bchunk_data_compare(cref->link, data, data_len, i_prev))
+ {
+ bchunk_list_append(info, bs_mem, chunk_list, cref->link);
+ ASSERT_CHUNKLIST_SIZE(chunk_list, i);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+ }
+ else {
+ bchunk_list_append_data(info, bs_mem, chunk_list, &data[i_prev], i - i_prev);
+ ASSERT_CHUNKLIST_SIZE(chunk_list, i);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+ }
+
+ cref = cref->next;
+
+ i_prev = i;
+ }
+ }
+ else if ((data_len - i_prev >= info->chunk_byte_size) &&
+ (chunk_list_reference->chunk_refs_len >= chunk_list_reference_skip_len) &&
+ (chunk_list_reference->chunk_refs.first != NULL))
+ {
+
+ /* --------------------------------------------------------------------
+ * Non-Aligned Chunk De-Duplication */
+
+ /* only create a table if we have at least one chunk to search
+ * otherwise just make a new one.
+ *
+ * Support re-arranged chunks */
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+ size_t i_table_start = i_prev;
+ const size_t table_hash_array_len = (data_len - i_prev) / info->chunk_stride;
+ hash_key *table_hash_array = MEM_mallocN(sizeof(*table_hash_array) * table_hash_array_len, __func__);
+ hash_array_from_data(info, &data[i_prev], data_len - i_prev, table_hash_array);
+
+ hash_accum(table_hash_array, table_hash_array_len, info->accum_steps);
+#else
+ /* dummy vars */
+ uint i_table_start = 0;
+ hash_key *table_hash_array = NULL;
+#endif
+
+ const size_t table_len = MAX2(1u, (((data_len - i_prev) / info->chunk_stride)) / BCHUNK_HASH_TABLE_DIV);
+ BTableRef **table = MEM_callocN(table_len * sizeof(*table), __func__);
+
+ const uint chunk_list_reference_remaining_len =
+ (chunk_list_reference->chunk_refs_len - chunk_list_reference_skip_len) + 1;
+ BTableRef *table_ref_stack = MEM_mallocN(chunk_list_reference_remaining_len * sizeof(BTableRef), __func__);
+ uint table_ref_stack_n = 0;
+
+ /* table_make - inline
+ * include one matching chunk, to allow for repeating values */
+ {
+#ifdef USE_HASH_TABLE_ACCUMULATE
+ const size_t hash_store_len = info->accum_read_ahead_len;
+ hash_key *hash_store = MEM_mallocN(sizeof(hash_key) * hash_store_len, __func__);
+#endif
+
+ const BChunkRef *cref;
+ size_t chunk_list_reference_bytes_remaining =
+ chunk_list_reference->total_size - chunk_list_reference_skip_bytes;
+
+ if (cref_match_first) {
+ cref = cref_match_first;
+ chunk_list_reference_bytes_remaining += cref->link->data_len;
+ }
+ else {
+ cref = chunk_list_reference->chunk_refs.first;
+ }
+
+#ifdef USE_PARANOID_CHECKS
+ {
+ size_t test_bytes_len = 0;
+ const BChunkRef *cr = cref;
+ while (cr != chunk_list_reference_last) {
+ test_bytes_len += cr->link->data_len;
+ cr = cr->next;
+ }
+ BLI_assert(test_bytes_len == chunk_list_reference_bytes_remaining);
+ }
+#endif
+
+ while ((cref != chunk_list_reference_last) &&
+ (chunk_list_reference_bytes_remaining >= info->accum_read_ahead_bytes))
+ {
+ hash_key key = key_from_chunk_ref(info, cref
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+ , hash_store, hash_store_len
+#endif
+ );
+ size_t key_index = (size_t)(key % (hash_key)table_len);
+ BTableRef *tref_prev = table[key_index];
+ BLI_assert(table_ref_stack_n < chunk_list_reference_remaining_len);
+ BTableRef *tref = &table_ref_stack[table_ref_stack_n++];
+ tref->cref = cref;
+ tref->next = tref_prev;
+ table[key_index] = tref;
+
+ chunk_list_reference_bytes_remaining -= cref->link->data_len;
+ cref = cref->next;
+ }
+
+ BLI_assert(table_ref_stack_n <= chunk_list_reference_remaining_len);
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+ MEM_freeN(hash_store);
+#endif
+ }
+ /* done making the table */
+
+ BLI_assert(i_prev <= data_len);
+ for (size_t i = i_prev; i < data_len; ) {
+ /* Assumes exiting chunk isnt a match! */
+
+ const BChunkRef *cref_found = table_lookup(
+ info,
+ table, table_len, i_table_start,
+ data, data_len, i, table_hash_array);
+ if (cref_found != NULL) {
+ BLI_assert(i < data_len);
+ if (i != i_prev) {
+ size_t i_step = MIN2(i_prev + info->chunk_byte_size, data_len);
+ BLI_assert(i_step <= data_len);
+
+ while (i_prev != i) {
+ i_step = MIN2(i_step, i);
+ const ubyte *data_slice = &data[i_prev];
+ const size_t data_slice_len = i_step - i_prev;
+ /* First add all previous chunks! */
+ i_prev += data_slice_len;
+ bchunk_list_append_data(info, bs_mem, chunk_list, data_slice, data_slice_len);
+ BLI_assert(i_prev <= data_len);
+ ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+ i_step += info->chunk_byte_size;
+ }
+ }
+
+ /* now add the reference chunk */
+ {
+ BChunk *chunk_found = cref_found->link;
+ i += chunk_found->data_len;
+ bchunk_list_append(info, bs_mem, chunk_list, chunk_found);
+ }
+ i_prev = i;
+ BLI_assert(i_prev <= data_len);
+ ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+
+ /* its likely that the next chunk in the list will be a match, so check it! */
+ while ((cref_found->next != NULL) &&
+ (cref_found->next != chunk_list_reference_last))
+ {
+ cref_found = cref_found->next;
+ BChunk *chunk_found = cref_found->link;
+
+ if (bchunk_data_compare(chunk_found, data, data_len, i_prev)) {
+ /* may be useful to remove table data, assuming we dont have repeating memory
+ * where it would be useful to re-use chunks. */
+ i += chunk_found->data_len;
+ bchunk_list_append(info, bs_mem, chunk_list, chunk_found);
+ /* chunk_found may be freed! */
+ i_prev = i;
+ BLI_assert(i_prev <= data_len);
+ ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+ }
+ else {
+ break;
+ }
+ }
+ }
+ else {
+ i = i + info->chunk_stride;
+ }
+ }
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+ MEM_freeN(table_hash_array);
+#endif
+ MEM_freeN(table);
+ MEM_freeN(table_ref_stack);
+
+ /* End Table Lookup
+ * ---------------- */
+ }
+
+ ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+
+ /* -----------------------------------------------------------------------
+ * No Duplicates to copy, write new chunks
+ *
+ * Trailing chunks, no matches found in table lookup above.
+ * Write all new data. */
+ BLI_assert(i_prev <= data_len);
+ while (i_prev != data_len) {
+ size_t i = i_prev + info->chunk_byte_size;
+ i = MIN2(i, data_len);
+ BLI_assert(i != i_prev);
+ bchunk_list_append_data(info, bs_mem, chunk_list, &data[i_prev], i - i_prev);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+ i_prev = i;
+ }
+
+ BLI_assert(i_prev == data_len);
+
+#ifdef USE_FASTPATH_CHUNKS_LAST
+ if (chunk_list_reference_last != NULL) {
+ /* write chunk_list_reference_last since it hasn't been written yet */
+ const BChunkRef *cref = chunk_list_reference_last;
+ while (cref != NULL) {
+ BChunk *chunk = cref->link;
+ // BLI_assert(bchunk_data_compare(chunk, data, data_len, i_prev));
+ i_prev += chunk->data_len;
+ /* use simple since we assume the references chunks have already been sized correctly. */
+ bchunk_list_append_only(bs_mem, chunk_list, chunk);
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+ cref = cref->next;
+ }
+ }
+#endif
+
+#undef data_len_original
+
+ BLI_assert(i_prev == data_len_original);
+
+ /* check we're the correct size and that we didn't accidentally modify the reference */
+ ASSERT_CHUNKLIST_SIZE(chunk_list, data_len_original);
+ ASSERT_CHUNKLIST_SIZE(chunk_list_reference, chunk_list_reference->total_size);
+
+ ASSERT_CHUNKLIST_DATA(chunk_list, data);
+
+ return chunk_list;
+}
+/* end private API */
+
+/** \} */
+
+
+/** \name Main Array Storage API
+ * \{ */
+
+
+/**
+ * Create a new array store, which can store any number of arrays
+ * as long as their stride matches.
+ *
+ * \param stride: ``sizeof()`` each element,
+ *
+ * \note while a stride of ``1`` will always work,
+ * its less efficient since duplicate chunks of memory will be searched
+ * at positions unaligned with the array data.
+ *
+ * \param chunk_count: Number of elements to split each chunk into.
+ * - A small value increases the ability to de-duplicate chunks,
+ * but adds overhead by increasing the number of chunks to look-up when searching for duplicates,
+ * as well as some overhead constructing the original array again, with more calls to ``memcpy``.
+ * - Larger values reduce the *book keeping* overhead,
+ * but increase the chance a small, isolated change will cause a larger amount of data to be duplicated.
+ *
+ * \return A new array store, to be freed with #BLI_array_store_destroy.
+ */
+BArrayStore *BLI_array_store_create(
+ uint stride,
+ uint chunk_count)
+{
+ BArrayStore *bs = MEM_callocN(sizeof(BArrayStore), __func__);
+
+ bs->info.chunk_stride = stride;
+ bs->info.chunk_count = chunk_count;
+
+ bs->info.chunk_byte_size = chunk_count * stride;
+#ifdef USE_MERGE_CHUNKS
+ bs->info.chunk_byte_size_min = MAX2(1u, chunk_count / BCHUNK_SIZE_MIN_DIV) * stride;
+#endif
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+ bs->info.accum_steps = BCHUNK_HASH_TABLE_ACCUMULATE_STEPS - 1;
+ /* Triangle number, identifying now much read-ahead we need:
+ * https://en.wikipedia.org/wiki/Triangular_number (+ 1) */
+ bs->info.accum_read_ahead_len = (uint)((((bs->info.accum_steps * (bs->info.accum_steps + 1))) / 2) + 1);
+ bs->info.accum_read_ahead_bytes = bs->info.accum_read_ahead_len * stride;
+#else
+ bs->info.accum_read_ahead_bytes = BCHUNK_HASH_LEN * stride;
+#endif
+
+ bs->memory.chunk_list = BLI_mempool_create(sizeof(BChunkList), 0, 512, BLI_MEMPOOL_NOP);
+ bs->memory.chunk_ref = BLI_mempool_create(sizeof(BChunkRef), 0, 512, BLI_MEMPOOL_NOP);
+ /* allow iteration to simplify freeing, otherwise its not needed
+ * (we could loop over all states as an alternative). */
+ bs->memory.chunk = BLI_mempool_create(sizeof(BChunk), 0, 512, BLI_MEMPOOL_ALLOW_ITER);
+
+ return bs;
+}
+
+static void array_store_free_data(BArrayStore *bs)
+{
+ /* free chunk data */
+ {
+ BLI_mempool_iter iter;
+ BChunk *chunk;
+ BLI_mempool_iternew(bs->memory.chunk, &iter);
+ while ((chunk = BLI_mempool_iterstep(&iter))) {
+ BLI_assert(chunk->users > 0);
+ MEM_freeN((void *)chunk->data);
+ }
+ }
+
+ /* free states */
+ for (BArrayState *state = bs->states.first, *state_next; state; state = state_next) {
+ state_next = state->next;
+ MEM_freeN(state);
+ }
+}
+
+/**
+ * Free the #BArrayStore, including all states and chunks.
+ */
+void BLI_array_store_destroy(
+ BArrayStore *bs)
+{
+ array_store_free_data(bs);
+
+ BLI_mempool_destroy(bs->memory.chunk_list);
+ BLI_mempool_destroy(bs->memory.chunk_ref);
+ BLI_mempool_destroy(bs->memory.chunk);
+
+ MEM_freeN(bs);
+}
+
+/**
+ * Clear all contents, allowing reuse of \a bs.
+ */
+void BLI_array_store_clear(
+ BArrayStore *bs)
+{
+ array_store_free_data(bs);
+
+ BLI_listbase_clear(&bs->states);
+
+ BLI_mempool_clear(bs->memory.chunk_list);
+ BLI_mempool_clear(bs->memory.chunk_ref);
+ BLI_mempool_clear(bs->memory.chunk);
+}
+
+/** \name BArrayStore Statistics
+ * \{ */
+
+/**
+ * \return the total amount of memory that would be used by getting the arrays for all states.
+ */
+size_t BLI_array_store_calc_size_expanded_get(
+ const BArrayStore *bs)
+{
+ size_t size_accum = 0;
+ for (const BArrayState *state = bs->states.first; state; state = state->next) {
+ size_accum += state->chunk_list->total_size;
+ }
+ return size_accum;
+}
+
+/**
+ * \return the amount of memory used by all #BChunk.data
+ * (duplicate chunks are only counted once).
+ */
+size_t BLI_array_store_calc_size_compacted_get(
+ const BArrayStore *bs)
+{
+ size_t size_total = 0;
+ BLI_mempool_iter iter;
+ BChunk *chunk;
+ BLI_mempool_iternew(bs->memory.chunk, &iter);
+ while ((chunk = BLI_mempool_iterstep(&iter))) {
+ BLI_assert(chunk->users > 0);
+ size_total += (size_t)chunk->data_len;
+ }
+ return size_total;
+}
+
+/** \} */
+
+
+/** \name BArrayState Access
+ * \{ */
+
+/**
+ *
+ * \param data: Data used to create
+ * \param state_reference: The state to use as a reference when adding the new state,
+ * typically this is the previous state,
+ * however it can be any previously created state from this \a bs.
+ *
+ * \return The new state, which is used by the caller as a handle to get back the contents of \a data.
+ * This may be removed using #BLI_array_store_state_remove,
+ * otherwise it will be removed with #BLI_array_store_destroy.
+ */
+BArrayState *BLI_array_store_state_add(
+ BArrayStore *bs,
+ const void *data, const size_t data_len,
+ const BArrayState *state_reference)
+{
+ /* ensure we're aligned to the stride */
+ BLI_assert((data_len % bs->info.chunk_stride) == 0);
+
+#ifdef USE_PARANOID_CHECKS
+ if (state_reference) {
+ BLI_assert(BLI_findindex(&bs->states, state_reference) != -1);
+ }
+#endif
+
+ static BChunkList *chunk_list;
+ if (state_reference) {
+ chunk_list = bchunk_list_from_data_merge(
+ &bs->info, &bs->memory,
+ (const ubyte *)data, data_len,
+ /* re-use reference chunks */
+ state_reference->chunk_list);
+ }
+ else {
+ chunk_list = bchunk_list_new(&bs->memory, data_len);
+ bchunk_list_fill_from_array(
+ &bs->info, &bs->memory,
+ chunk_list,
+ (const ubyte *)data, data_len);
+ }
+
+ chunk_list->users += 1;
+
+ BArrayState *state = MEM_callocN(sizeof(BArrayState), __func__);
+ state->chunk_list = chunk_list;
+
+ BLI_addtail(&bs->states, state);
+
+#ifdef USE_PARANOID_CHECKS
+ {
+ size_t data_test_len;
+ void *data_test = BLI_array_store_state_data_get_alloc(state, &data_test_len);
+ BLI_assert(data_test_len == data_len);
+ BLI_assert(memcmp(data_test, data, data_len) == 0);
+ MEM_freeN(data_test);
+ }
+#endif
+
+ return state;
+}
+
+/**
+ * Remove a state and free any unused #BChunk data.
+ *
+ * The states can be freed in any order.
+ */
+void BLI_array_store_state_remove(
+ BArrayStore *bs,
+ BArrayState *state)
+{
+#ifdef USE_PARANOID_CHECKS
+ BLI_assert(BLI_findindex(&bs->states, state) != -1);
+#endif
+
+ bchunk_list_decref(&bs->memory, state->chunk_list);
+ BLI_remlink(&bs->states, state);
+
+ MEM_freeN(state);
+}
+
+/**
+ * \return the expanded size of the array,
+ * use this to know how much memory to allocate #BLI_array_store_state_data_get's argument.
+ */
+size_t BLI_array_store_state_size_get(
+ BArrayState *state)
+{
+ return state->chunk_list->total_size;
+}
+
+/**
+ * Fill in existing allocated memory with the contents of \a state.
+ */
+void BLI_array_store_state_data_get(
+ BArrayState *state,
+ void *data)
+{
+#ifdef USE_PARANOID_CHECKS
+ size_t data_test_len = 0;
+ for (BChunkRef *cref = state->chunk_list->chunk_refs.first; cref; cref = cref->next) {
+ data_test_len += cref->link->data_len;
+ }
+ BLI_assert(data_test_len == state->chunk_list->total_size);
+#endif
+
+ ubyte *data_step = (ubyte *)data;
+ for (BChunkRef *cref = state->chunk_list->chunk_refs.first; cref; cref = cref->next) {
+ BLI_assert(cref->link->users > 0);
+ memcpy(data_step, cref->link->data, cref->link->data_len);
+ data_step += cref->link->data_len;
+ }
+}
+
+/**
+ * Allocate an array for \a state and return it.
+ */
+void *BLI_array_store_state_data_get_alloc(
+ BArrayState *state,
+ size_t *r_data_len)
+{
+ void *data = MEM_mallocN(state->chunk_list->total_size, __func__);
+ BLI_array_store_state_data_get(state, data);
+ *r_data_len = state->chunk_list->total_size;
+ return data;
+}
+
+/** \} */
+
+
+/** \name Debigging API (for testing).
+ * \{ */
+
+/* only for test validation */
+static size_t bchunk_list_size(const BChunkList *chunk_list)
+{
+ size_t total_size = 0;
+ for (BChunkRef *cref = chunk_list->chunk_refs.first; cref; cref = cref->next) {
+ total_size += cref->link->data_len;
+ }
+ return total_size;
+}
+
+bool BLI_array_store_is_valid(
+ BArrayStore *bs)
+{
+ bool ok = true;
+
+ /* Check Length
+ * ------------ */
+
+ for (BArrayState *state = bs->states.first; state; state = state->next) {
+ BChunkList *chunk_list = state->chunk_list;
+ if (!(bchunk_list_size(chunk_list) == chunk_list->total_size)) {
+ return false;
+ }
+ }
+
+ {
+ BLI_mempool_iter iter;
+ BChunk *chunk;
+ BLI_mempool_iternew(bs->memory.chunk, &iter);
+ while ((chunk = BLI_mempool_iterstep(&iter))) {
+ if (!(MEM_allocN_len(chunk->data) >= chunk->data_len)) {
+ return false;
+ }
+ }
+ }
+
+ /* Check User Count & Lost References
+ * ---------------------------------- */
+ {
+ GHashIterator gh_iter;
+
+#define GHASH_PTR_ADD_USER(gh, pt) \
+ { \
+ void **val; \
+ if (BLI_ghash_ensure_p((gh), (pt), &val)) { \
+ *((int *)val) += 1; \
+ } \
+ else { \
+ *((int *)val) = 1; \
+ } \
+ } ((void)0)
+
+
+ /* count chunk_list's */
+ int totrefs = 0;
+ GHash *chunk_list_map = BLI_ghash_ptr_new(__func__);
+ for (BArrayState *state = bs->states.first; state; state = state->next) {
+ GHASH_PTR_ADD_USER(chunk_list_map, state->chunk_list);
+ }
+ GHASH_ITER (gh_iter, chunk_list_map) {
+ const struct BChunkList *chunk_list = BLI_ghashIterator_getKey(&gh_iter);
+ const int users = GET_INT_FROM_POINTER(BLI_ghashIterator_getValue(&gh_iter));
+ if (!(chunk_list->users == users)) {
+ ok = false;
+ goto user_finally;
+ }
+ }
+ if (!(BLI_mempool_count(bs->memory.chunk_list) == (int)BLI_ghash_size(chunk_list_map))) {
+ ok = false;
+ goto user_finally;
+ }
+
+ /* count chunk's */
+ GHash *chunk_map = BLI_ghash_ptr_new(__func__);
+ GHASH_ITER (gh_iter, chunk_list_map) {
+ const struct BChunkList *chunk_list = BLI_ghashIterator_getKey(&gh_iter);
+ for (const BChunkRef *cref = chunk_list->chunk_refs.first; cref; cref = cref->next) {
+ GHASH_PTR_ADD_USER(chunk_map, cref->link);
+ totrefs += 1;
+ }
+ }
+ if (!(BLI_mempool_count(bs->memory.chunk) == (int)BLI_ghash_size(chunk_map))) {
+ ok = false;
+ goto user_finally;
+ }
+ if (!(BLI_mempool_count(bs->memory.chunk_ref) == totrefs)) {
+ ok = false;
+ goto user_finally;
+ }
+
+ GHASH_ITER (gh_iter, chunk_map) {
+ const struct BChunk *chunk = BLI_ghashIterator_getKey(&gh_iter);
+ const int users = GET_INT_FROM_POINTER(BLI_ghashIterator_getValue(&gh_iter));
+ if (!(chunk->users == users)) {
+ ok = false;
+ goto user_finally;
+ }
+ }
+
+#undef GHASH_PTR_ADD_USER
+
+user_finally:
+ BLI_ghash_free(chunk_list_map, NULL, NULL);
+ BLI_ghash_free(chunk_map, NULL, NULL);
+ }
+
+
+ return ok;
+ /* TODO, dangling pointer checks */
+}
+
+/** \} */