IDManagement: Speedup ID unique name assignment by tracking used names/basenames/suffixes

An implementation of T73412, roughly as outlined there: Track the names that are in use, as well as base names (before numeric suffix) plus a bit map for each base name, indicating which numeric suffixes are already used. This is done per-Main/Library, per-object-type. Timings (Windows, VS2022 Release build, AMD Ryzen 5950X): - Scene with 10k cubes, Shift+D to duplicate them all: 8.7s -> 1.9s. Name map memory usage for resulting 20k objects: 4.3MB. - Importing a 2.5GB .obj file of exported Blender 3.0 splash scene (24k objects), using the new C++ importer: 34.2s-> 22.0s. Name map memory usage for resulting scene: 8.6MB. - Importing Disney Moana USD scene (almost half a million objects): 56min -> 10min. Name map usage: ~100MB. Blender crashes later on when trying to render it, in the same place in both cases, but that's for another day. Reviewed By: Bastien Montagne Differential Revision: https://developer.blender.org/D14162
author: Aras Pranckevicius <aras@nesnausk.org> 2022-07-20 14:27:14 +0300
committer: Aras Pranckevicius <aras@nesnausk.org> 2022-07-20 14:27:14 +0300
commit: 7f8d05131a7738327ae125d065df44be492ff1f2 (patch)
tree: 2e3e13596dee6bc747ed285374ea83d977b7c745 /source/blender/blenkernel/intern
parent: 8d69c6c4e7c5417f88603d5ccb2c4bb0e156aa1e (diff)
6 files changed, 433 insertions, 275 deletions
diff --git a/source/blender/blenkernel/intern/lib_id.c b/source/blender/blenkernel/intern/lib_id.c
index 90a4853fd3e..affa1e72ad0 100644
--- a/source/blender/blenkernel/intern/lib_id.c
+++ b/source/blender/blenkernel/intern/lib_id.c
@@ -53,6 +53,7 @@
 #include "BKE_lib_query.h"
 #include "BKE_lib_remap.h"
 #include "BKE_main.h"
+#include "BKE_main_namemap.h"
 #include "BKE_node.h"
 #include "BKE_rigidbody.h"
 
@@ -186,7 +187,7 @@ void BKE_lib_id_clear_library_data(Main *bmain, ID *id, const int flags)
   id->tag &= ~(LIB_TAG_INDIRECT | LIB_TAG_EXTERN);
   id->flag &= ~LIB_INDIRECT_WEAK_LINK;
   if (id_in_mainlist) {
-    if (BKE_id_new_name_validate(which_libbase(bmain, GS(id->name)), id, NULL, false)) {
+    if (BKE_id_new_name_validate(bmain, which_libbase(bmain, GS(id->name)), id, NULL, false)) {
       bmain->is_memfile_undo_written = false;
     }
   }
@@ -842,7 +843,7 @@ void BKE_libblock_management_main_add(Main *bmain, void *idv)
   BLI_addtail(lb, id);
   /* We need to allow adding extra datablocks into libraries too, e.g. to support generating new
    * overrides for recursive resync. */
-  BKE_id_new_name_validate(lb, id, NULL, true);
+  BKE_id_new_name_validate(bmain, lb, id, NULL, true);
   /* alphabetic insertion: is in new_id */
   id->tag &= ~(LIB_TAG_NO_MAIN | LIB_TAG_NO_USER_REFCOUNT);
   bmain->is_memfile_undo_written = false;
@@ -865,6 +866,7 @@ void BKE_libblock_management_main_remove(Main *bmain, void *idv)
   ListBase *lb = which_libbase(bmain, GS(id->name));
   BKE_main_lock(bmain);
   BLI_remlink(lb, id);
+  BKE_main_namemap_remove_name(bmain, id, id->name + 2);
   id->tag |= LIB_TAG_NO_MAIN;
   bmain->is_memfile_undo_written = false;
   BKE_main_unlock(bmain);
@@ -958,7 +960,7 @@ void BKE_main_id_flag_all(Main *bmain, const int flag, const bool value)
   }
 }
 
-void BKE_main_id_repair_duplicate_names_listbase(ListBase *lb)
+void BKE_main_id_repair_duplicate_names_listbase(Main *bmain, ListBase *lb)
 {
   int lb_len = 0;
   LISTBASE_FOREACH (ID *, id, lb) {
@@ -982,7 +984,7 @@ void BKE_main_id_repair_duplicate_names_listbase(ListBase *lb)
   }
   for (i = 0; i < lb_len; i++) {
     if (!BLI_gset_add(gset, id_array[i]->name + 2)) {
-      BKE_id_new_name_validate(lb, id_array[i], NULL, false);
+      BKE_id_new_name_validate(bmain, lb, id_array[i], NULL, false);
     }
   }
   BLI_gset_free(gset, NULL);
@@ -1073,7 +1075,7 @@ void *BKE_libblock_alloc(Main *bmain, short type, const char *name, const int fl
 
       BKE_main_lock(bmain);
       BLI_addtail(lb, id);
-      BKE_id_new_name_validate(lb, id, name, false);
+      BKE_id_new_name_validate(bmain, lb, id, name, false);
       bmain->is_memfile_undo_written = false;
       /* alphabetic insertion: is in new_id */
       BKE_main_unlock(bmain);
@@ -1415,255 +1417,8 @@ void id_sort_by_name(ListBase *lb, ID *id, ID *id_sorting_hint)
 #undef ID_SORT_STEP_SIZE
 }
 
-/* NOTE: this code assumes and ensures that the suffix number can never go beyond 1 billion. */
-#define MAX_NUMBER 1000000000
-/* We do not want to get "name.000", so minimal number is 1. */
-#define MIN_NUMBER 1
-/* The maximum value up to which we search for the actual smallest unused number. Beyond that
- * value, we will only use the first biggest unused number, without trying to 'fill the gaps'
- * in-between already used numbers... */
-#define MAX_NUMBERS_IN_USE 1024
-
-/**
- * Helper building final ID name from given base_name and number.
- *
- * If everything goes well and we do generate a valid final ID name in given name, we return
- * true. In case the final name would overflow the allowed ID name length, or given number is
- * bigger than maximum allowed value, we truncate further the base_name (and given name, which is
- * assumed to have the same 'base_name' part), and return false.
- */
-static bool id_name_final_build(char *name, char *base_name, size_t base_name_len, int number)
-{
-  char number_str[11]; /* Dot + nine digits + NULL terminator. */
-  size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number);
-
-  /* If the number would lead to an overflow of the maximum ID name length, we need to truncate
-   * the base name part and do all the number checks again. */
-  if (base_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) {
-    if (base_name_len + number_str_len >= MAX_ID_NAME - 2) {
-      base_name_len = MAX_ID_NAME - 2 - number_str_len - 1;
-    }
-    else {
-      base_name_len--;
-    }
-    base_name[base_name_len] = '\0';
-
-    /* Code above may have generated invalid utf-8 string, due to raw truncation.
-     * Ensure we get a valid one now. */
-    base_name_len -= (size_t)BLI_str_utf8_invalid_strip(base_name, base_name_len);
-
-    /* Also truncate orig name, and start the whole check again. */
-    name[base_name_len] = '\0';
-    return false;
-  }
-
-  /* We have our final number, we can put it in name and exit the function. */
-  BLI_strncpy(name + base_name_len, number_str, number_str_len + 1);
-  return true;
-}
-
-/**
- * Check to see if an ID name is already used, and find a new one if so.
- * Return true if a new name was created (returned in name).
- *
- * Normally the ID that's being checked is already in the ListBase, so ID *id points at the new
- * entry. The Python Library module needs to know what the name of a data-block will be before it
- * is appended, in this case ID *id is NULL.
- */
-static bool check_for_dupid(ListBase *lb, ID *id, char *name, ID **r_id_sorting_hint)
-{
-  BLI_assert(strlen(name) < MAX_ID_NAME - 2);
-
-  *r_id_sorting_hint = NULL;
-
-  ID *id_test = lb->first;
-  bool is_name_changed = false;
-
-  if (id_test == NULL) {
-    return is_name_changed;
-  }
-
-  const short id_type = (short)GS(id_test->name);
-
-  /* Static storage of previous handled ID/name info, used to perform a quicker test and optimize
-   * creation of huge number of IDs using the same given base name. */
-  static char prev_orig_base_name[MAX_ID_NAME - 2] = {0};
-  static char prev_final_base_name[MAX_ID_NAME - 2] = {0};
-  static short prev_id_type = ID_LINK_PLACEHOLDER; /* Should never exist in actual ID list. */
-  static int prev_number = MIN_NUMBER - 1;
-
-  /* Initial test to check whether we can 'shortcut' the more complex loop of the main code
-   * below. Note that we do not do that for low numbers, as that would prevent using actual
-   * smallest available number in some cases, and benefits of this special case handling mostly
-   * show up with high numbers anyway. */
-  if (id_type == prev_id_type && prev_number >= MAX_NUMBERS_IN_USE &&
-      prev_number < MAX_NUMBER - 1 && name[0] == prev_final_base_name[0]) {
-
-    /* Get the name and number parts ("name.number"). */
-    char base_name[MAX_ID_NAME - 2];
-    int number = MIN_NUMBER;
-    size_t base_name_len = BLI_split_name_num(base_name, &number, name, '.');
-    size_t prev_final_base_name_len = strlen(prev_final_base_name);
-    size_t prev_orig_base_name_len = strlen(prev_orig_base_name);
-
-    if (base_name_len == prev_orig_base_name_len &&
-        STREQLEN(base_name, prev_orig_base_name, prev_orig_base_name_len)) {
-      /* Once we have ensured given base_name and original previous one are the same, we can
-       * check that previously used number is actually used, and that next one is free. */
-      /* Note that from now on, we only used previous final base name, as it might have been
-       * truncated from original one due to number suffix length. */
-      char final_name[MAX_ID_NAME - 2];
-      char prev_final_name[MAX_ID_NAME - 2];
-      BLI_strncpy(final_name, prev_final_base_name, prev_final_base_name_len + 1);
-      BLI_strncpy(prev_final_name, prev_final_base_name, prev_final_base_name_len + 1);
-
-      if (id_name_final_build(final_name, base_name, prev_final_base_name_len, prev_number + 1) &&
-          id_name_final_build(prev_final_name, base_name, prev_final_base_name_len, prev_number)) {
-        /* We successfully built valid final names of previous and current iterations,
-         * now we have to ensure that previous final name is indeed used in current ID list,
-         * and that current one is not. */
-        bool is_valid = false;
-        for (id_test = lb->first; id_test; id_test = id_test->next) {
-          if (id != id_test && id_test->lib == id->lib) {
-            if (id_test->name[2] == final_name[0] && STREQ(final_name, id_test->name + 2)) {
-              /* We expect final_name to not be already used, so this is a failure. */
-              is_valid = false;
-              break;
-            }
-            /* Previous final name should only be found once in the list, so if it was found
-             * already, no need to do a string comparison again. */
-            if (!is_valid && id_test->name[2] == prev_final_name[0] &&
-                STREQ(prev_final_name, id_test->name + 2)) {
-              is_valid = true;
-              *r_id_sorting_hint = id_test;
-            }
-          }
-        }
-
-        if (is_valid) {
-          /* Only the number changed, prev_orig_base_name, prev_final_base_name and prev_id_type
-           * remain the same. */
-          prev_number++;
-
-          strcpy(name, final_name);
-          return true;
-        }
-      }
-    }
-  }
-
-  /* To speed up finding smallest unused number within [0 .. MAX_NUMBERS_IN_USE - 1].
-   * We do not bother beyond that point. */
-  ID *ids_in_use[MAX_NUMBERS_IN_USE] = {NULL};
-
-  bool is_first_run = true;
-  while (true) {
-    /* Get the name and number parts ("name.number"). */
-    char base_name[MAX_ID_NAME - 2];
-    int number = MIN_NUMBER;
-    size_t base_name_len = BLI_split_name_num(base_name, &number, name, '.');
-
-    /* Store previous original given base name now, as we might alter it later in code below. */
-    if (is_first_run) {
-      strcpy(prev_orig_base_name, base_name);
-      is_first_run = false;
-    }
-
-    /* In case we get an insane initial number suffix in given name. */
-    /* NOTE: BLI_split_name_num() cannot return negative numbers, so we do not have to check for
-     * that here. */
-    if (number >= MAX_NUMBER || number < MIN_NUMBER) {
-      number = MIN_NUMBER;
-    }
-
-    bool is_orig_name_used = false;
-    for (id_test = lb->first; id_test; id_test = id_test->next) {
-      char base_name_test[MAX_ID_NAME - 2];
-      int number_test;
-      if ((id != id_test) && (id_test->lib == id->lib) && (name[0] == id_test->name[2]) &&
-          (ELEM(id_test->name[base_name_len + 2], '.', '\0')) &&
-          STREQLEN(name, id_test->name + 2, base_name_len) &&
-          (BLI_split_name_num(base_name_test, &number_test, id_test->name + 2, '.') ==
-           base_name_len)) {
-        /* If we did not yet encounter exact same name as the given one, check the remaining
-         * parts of the strings. */
-        if (!is_orig_name_used) {
-          is_orig_name_used = STREQ(name + base_name_len, id_test->name + 2 + base_name_len);
-        }
-        /* Mark number of current id_test name as used, if possible. */
-        if (number_test < MAX_NUMBERS_IN_USE) {
-          ids_in_use[number_test] = id_test;
-        }
-        /* Keep track of first largest unused number. */
-        if (number <= number_test) {
-          *r_id_sorting_hint = id_test;
-          number = number_test + 1;
-        }
-      }
-    }
-
-    /* If there is no double, we are done.
-     * Note however that name might have been changed (truncated) in a previous iteration
-     * already.
-     */
-    if (!is_orig_name_used) {
-      /* Don't bother updating `prev_*` static variables here, this case is not supposed to happen
-       * that often, and is not straight-forward here, so just ignore and reset them to default. */
-      prev_id_type = ID_LINK_PLACEHOLDER;
-      prev_final_base_name[0] = '\0';
-      prev_number = MIN_NUMBER - 1;
-
-      /* Value set previously is meaningless in that case. */
-      *r_id_sorting_hint = NULL;
-
-      return is_name_changed;
-    }
-
-    /* Decide which value of number to use, either the smallest unused one if possible, or
-     * default to the first largest unused one we got from previous loop. */
-    for (int i = MIN_NUMBER; i < MAX_NUMBERS_IN_USE; i++) {
-      if (ids_in_use[i] == NULL) {
-        number = i;
-        if (i > 0) {
-          *r_id_sorting_hint = ids_in_use[i - 1];
-        }
-        break;
-      }
-    }
-    /* At this point, number is either the lowest unused number within
-     * [MIN_NUMBER .. MAX_NUMBERS_IN_USE - 1], or 1 greater than the largest used number if all
-     * those low ones are taken.
-     * We can't be bothered to look for the lowest unused number beyond
-     * (MAX_NUMBERS_IN_USE - 1).
-     */
-    /* We know for wure that name will be changed. */
-    is_name_changed = true;
-
-    /* If id_name_final_build helper returns false, it had to truncate further given name, hence
-     * we have to go over the whole check again. */
-    if (!id_name_final_build(name, base_name, base_name_len, number)) {
-      /* We have to clear our list of small used numbers before we do the whole check again. */
-      memset(ids_in_use, 0, sizeof(ids_in_use));
-
-      continue;
-    }
-
-    /* Update `prev_*` static variables, in case next call is for the same type of IDs and with the
-     * same initial base name, we can skip a lot of above process. */
-    prev_id_type = id_type;
-    strcpy(prev_final_base_name, base_name);
-    prev_number = number;
-
-    return is_name_changed;
-  }
-
-#undef MAX_NUMBERS_IN_USE
-}
-
-#undef MIN_NUMBER
-#undef MAX_NUMBER
-
-bool BKE_id_new_name_validate(ListBase *lb, ID *id, const char *tname, const bool do_linked_data)
+bool BKE_id_new_name_validate(
+    struct Main *bmain, ListBase *lb, ID *id, const char *tname, const bool do_linked_data)
 {
   bool result = false;
   char name[MAX_ID_NAME - 2];
@@ -1693,22 +1448,10 @@ bool BKE_id_new_name_validate(ListBase *lb, ID *id, const char *tname, const boo
     BLI_str_utf8_invalid_strip(name, strlen(name));
   }
 
-  ID *id_sorting_hint = NULL;
-  result = check_for_dupid(lb, id, name, &id_sorting_hint);
-  strcpy(id->name + 2, name);
-
-  /* This was in 2.43 and previous releases
-   * however all data in blender should be sorted, not just duplicate names
-   * sorting should not hurt, but noting just in case it alters the way other
-   * functions work, so sort every time. */
-#if 0
-  if (result) {
-    id_sort_by_name(lb, id, id_sorting_hint);
-  }
-#endif
-
-  id_sort_by_name(lb, id, id_sorting_hint);
+  result = BKE_main_namemap_get_name(bmain, id, name);
 
+  strcpy(id->name + 2, name);
+  id_sort_by_name(lb, id, NULL);
   return result;
 }
 
@@ -2074,7 +1817,7 @@ void BLI_libblock_ensure_unique_name(Main *bmain, const char *name)
   idtest = BLI_findstring(lb, name + 2, offsetof(ID, name) + 2);
   if (idtest != NULL && !ID_IS_LINKED(idtest)) {
     /* BKE_id_new_name_validate also takes care of sorting. */
-    BKE_id_new_name_validate(lb, idtest, NULL, false);
+    BKE_id_new_name_validate(bmain, lb, idtest, NULL, false);
     bmain->is_memfile_undo_written = false;
   }
 }
@@ -2082,8 +1825,9 @@ void BLI_libblock_ensure_unique_name(Main *bmain, const char *name)
 void BKE_libblock_rename(Main *bmain, ID *id, const char *name)
 {
   BLI_assert(!ID_IS_LINKED(id));
+  BKE_main_namemap_remove_name(bmain, id, id->name + 2);
   ListBase *lb = which_libbase(bmain, GS(id->name));
-  if (BKE_id_new_name_validate(lb, id, name, false)) {
+  if (BKE_id_new_name_validate(bmain, lb, id, name, false)) {
     bmain->is_memfile_undo_written = false;
   }
 }
diff --git a/source/blender/blenkernel/intern/lib_id_delete.c b/source/blender/blenkernel/intern/lib_id_delete.c
index f14c11a949e..405b0be70f9 100644
--- a/source/blender/blenkernel/intern/lib_id_delete.c
+++ b/source/blender/blenkernel/intern/lib_id_delete.c
@@ -28,6 +28,7 @@
 #include "BKE_lib_remap.h"
 #include "BKE_library.h"
 #include "BKE_main.h"
+#include "BKE_main_namemap.h"
 
 #include "lib_intern.h"
 
@@ -151,6 +152,7 @@ void BKE_id_free_ex(Main *bmain, void *idv, int flag, const bool use_flag_from_i
   if ((flag & LIB_ID_FREE_NO_MAIN) == 0) {
     ListBase *lb = which_libbase(bmain, type);
     BLI_remlink(lb, id);
+    BKE_main_namemap_remove_name(bmain, id, id->name + 2);
   }
 
   BKE_libblock_free_data(id, (flag & LIB_ID_FREE_NO_USER_REFCOUNT) == 0);
@@ -237,6 +239,7 @@ static size_t id_delete(Main *bmain, const bool do_tagged_deletion)
           /* NOTE: in case we delete a library, we also delete all its datablocks! */
           if ((id->tag & tag) || (ID_IS_LINKED(id) && (id->lib->id.tag & tag))) {
             BLI_remlink(lb, id);
+            BKE_main_namemap_remove_name(bmain, id, id->name + 2);
             BLI_addtail(&tagged_deleted_ids, id);
             /* Do not tag as no_main now, we want to unlink it first (lower-level ID management
              * code has some specific handling of 'no main' IDs that would be a problem in that
diff --git a/source/blender/blenkernel/intern/lib_id_test.cc b/source/blender/blenkernel/intern/lib_id_test.cc
index 1aba78eed8f..ea3f5395f1f 100644
--- a/source/blender/blenkernel/intern/lib_id_test.cc
+++ b/source/blender/blenkernel/intern/lib_id_test.cc
@@ -10,6 +10,7 @@
 #include "BKE_idtype.h"
 #include "BKE_lib_id.h"
 #include "BKE_main.h"
+#include "BKE_main_namemap.h"
 
 #include "DNA_ID.h"
 #include "DNA_mesh_types.h"
@@ -57,15 +58,20 @@ TEST(lib_id_main_sort, local_ids_1)
   test_lib_id_main_sort_check_order({id_a, id_b, id_c});
 }
 
-static void change_lib(Main * /*bmain*/, ID *id, Library *lib)
+static void change_lib(Main *bmain, ID *id, Library *lib)
 {
+  if (id->lib == lib) {
+    return;
+  }
+  BKE_main_namemap_remove_name(bmain, id, id->name + 2);
   id->lib = lib;
 }
 
 static void change_name(Main *bmain, ID *id, const char *name)
 {
+  BKE_main_namemap_remove_name(bmain, id, id->name + 2);
   BLI_strncpy(id->name + 2, name, MAX_NAME);
-  BKE_id_new_name_validate(&bmain->objects, id, nullptr, true);
+  BKE_id_new_name_validate(bmain, &bmain->objects, id, nullptr, true);
 }
 
 TEST(lib_id_main_sort, linked_ids_1)
@@ -389,4 +395,17 @@ TEST(lib_id_main_unique_name, names_are_unique_per_id_type)
   EXPECT_STREQ(id_c->name + 2, "Foo.001");
 }
 
+TEST(lib_id_main_unique_name, name_huge_number_suffix)
+{
+  LibIDMainSortTestContext ctx;
+
+  /* Use numeric suffix that is really large: should come through
+   * fine, since no duplicates with other names. */
+  ID *id_a = static_cast<ID *>(BKE_id_new(ctx.bmain, ID_OB, "SuperLong.1234567890"));
+  EXPECT_STREQ(id_a->name + 2, "SuperLong.1234567890");
+  /* Now create with the same name again: should get 001 suffix. */
+  ID *id_b = static_cast<ID *>(BKE_id_new(ctx.bmain, ID_OB, "SuperLong.1234567890"));
+  EXPECT_STREQ(id_b->name + 2, "SuperLong.001");
+}
+
 }  // namespace blender::bke::tests
diff --git a/source/blender/blenkernel/intern/library.c b/source/blender/blenkernel/intern/library.c
index 4962b1c448e..fee4cae2701 100644
--- a/source/blender/blenkernel/intern/library.c
+++ b/source/blender/blenkernel/intern/library.c
@@ -26,14 +26,26 @@
 #include "BKE_lib_query.h"
 #include "BKE_library.h"
 #include "BKE_main.h"
+#include "BKE_main_namemap.h"
 #include "BKE_packedFile.h"
 
 /* Unused currently. */
 // static CLG_LogRef LOG = {.identifier = "bke.library"};
 
+struct BlendWriter;
+struct BlendDataReader;
+
+static void library_runtime_reset(Library *lib)
+{
+  if (lib->runtime.name_map) {
+    BKE_main_namemap_destroy(&lib->runtime.name_map);
+  }
+}
+
 static void library_free_data(ID *id)
 {
   Library *library = (Library *)id;
+  library_runtime_reset(library);
   if (library->packedfile) {
     BKE_packedfile_free(library->packedfile);
   }
@@ -61,6 +73,20 @@ static void library_foreach_path(ID *id, BPathForeachPathData *bpath_data)
   }
 }
 
+static void library_blend_write(struct BlendWriter *UNUSED(writer),
+                                ID *id,
+                                const void *UNUSED(id_address))
+{
+  Library *lib = (Library *)id;
+  library_runtime_reset(lib);
+}
+
+static void library_blend_read_data(struct BlendDataReader *UNUSED(reader), ID *id)
+{
+  Library *lib = (Library *)id;
+  library_runtime_reset(lib);
+}
+
 IDTypeInfo IDType_ID_LI = {
     .id_code = ID_LI,
     .id_filter = FILTER_ID_LI,
@@ -81,8 +107,8 @@ IDTypeInfo IDType_ID_LI = {
     .foreach_path = library_foreach_path,
     .owner_get = NULL,
 
-    .blend_write = NULL,
-    .blend_read_data = NULL,
+    .blend_write = library_blend_write,
+    .blend_read_data = library_blend_read_data,
     .blend_read_lib = NULL,
     .blend_read_expand = NULL,
 
diff --git a/source/blender/blenkernel/intern/main.c b/source/blender/blenkernel/intern/main.c
index b9ed783fa8c..239aacf28d6 100644
--- a/source/blender/blenkernel/intern/main.c
+++ b/source/blender/blenkernel/intern/main.c
@@ -24,6 +24,7 @@
 #include "BKE_lib_query.h"
 #include "BKE_main.h"
 #include "BKE_main_idmap.h"
+#include "BKE_main_namemap.h"
 
 #include "IMB_imbuf.h"
 #include "IMB_imbuf_types.h"
@@ -184,6 +185,10 @@ void BKE_main_free(Main *mainvar)
     BKE_main_idmap_destroy(mainvar->id_map);
   }
 
+  if (mainvar->name_map) {
+    BKE_main_namemap_destroy(&mainvar->name_map);
+  }
+
   BLI_spin_end((SpinLock *)mainvar->lock);
   MEM_freeN(mainvar->lock);
   MEM_freeN(mainvar);
diff --git a/source/blender/blenkernel/intern/main_namemap.cc b/source/blender/blenkernel/intern/main_namemap.cc
new file mode 100644
index 00000000000..32f4c6be639
--- /dev/null
+++ b/source/blender/blenkernel/intern/main_namemap.cc
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup bke
+ */
+
+#include "BKE_idtype.h"
+#include "BKE_main.h"
+#include "BKE_main_namemap.h"
+
+#include "BLI_assert.h"
+#include "BLI_bitmap.h"
+#include "BLI_ghash.h"
+#include "BLI_listbase.h"
+#include "BLI_map.hh"
+#include "BLI_math_base.hh"
+#include "BLI_set.hh"
+#include "BLI_string_utf8.h"
+#include "BLI_string_utils.h"
+
+#include "DNA_ID.h"
+
+#include "MEM_guardedalloc.h"
+
+//#define DEBUG_PRINT_MEMORY_USAGE
+
+using namespace blender;
+
+/* Assumes and ensure that the suffix number can never go beyond 1 billion. */
+#define MAX_NUMBER 1000000000
+/* We do not want to get "name.000", so minimal number is 1. */
+#define MIN_NUMBER 1
+
+/**
+ * Helper building final ID name from given base_name and number.
+ *
+ * If everything goes well and we do generate a valid final ID name in given name, we return
+ * true. In case the final name would overflow the allowed ID name length, or given number is
+ * bigger than maximum allowed value, we truncate further the base_name (and given name, which is
+ * assumed to have the same 'base_name' part), and return false.
+ */
+static bool id_name_final_build(char *name, char *base_name, size_t base_name_len, int number)
+{
+  char number_str[11]; /* Dot + nine digits + NULL terminator. */
+  size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number);
+
+  /* If the number would lead to an overflow of the maximum ID name length, we need to truncate
+   * the base name part and do all the number checks again. */
+  if (base_name_len + number_str_len >= MAX_NAME || number >= MAX_NUMBER) {
+    if (base_name_len + number_str_len >= MAX_NAME) {
+      base_name_len = MAX_NAME - number_str_len - 1;
+    }
+    else {
+      base_name_len--;
+    }
+    base_name[base_name_len] = '\0';
+
+    /* Code above may have generated invalid utf-8 string, due to raw truncation.
+     * Ensure we get a valid one now. */
+    base_name_len -= (size_t)BLI_str_utf8_invalid_strip(base_name, base_name_len);
+
+    /* Also truncate orig name, and start the whole check again. */
+    name[base_name_len] = '\0';
+    return false;
+  }
+
+  /* We have our final number, we can put it in name and exit the function. */
+  BLI_strncpy(name + base_name_len, number_str, number_str_len + 1);
+  return true;
+}
+
+/* Key used in set/map lookups: just a string name. */
+struct UniqueName_Key {
+  char name[MAX_NAME];
+  uint64_t hash() const
+  {
+    return BLI_ghashutil_strhash_n(name, MAX_NAME);
+  }
+  bool operator==(const UniqueName_Key &o) const
+  {
+    return !BLI_ghashutil_strcmp(name, o.name);
+  }
+};
+
+/* Tracking of used numeric suffixes. For each base name:
+ *
+ * - Exactly track which of the lowest 1024 suffixes are in use,
+ *   whenever there is a name collision we pick the lowest "unused"
+ *   one. This is done with a bit map.
+ * - Above 1024, do not track them exactly, just track the maximum
+ *   suffix value seen so far. Upon collision, assign number that is
+ *   one larger.
+ */
+struct UniqueName_Value {
+  static constexpr unsigned max_exact_tracking = 1024;
+  BLI_BITMAP_DECLARE(mask, max_exact_tracking);
+  int max_value = 0;
+
+  void mark_used(int number)
+  {
+    if (number >= 0 && number < max_exact_tracking) {
+      BLI_BITMAP_ENABLE(mask, number);
+    }
+    if (number < MAX_NUMBER) {
+      math::max_inplace(max_value, number);
+    }
+  }
+
+  void mark_unused(int number)
+  {
+    if (number >= 0 && number < max_exact_tracking) {
+      BLI_BITMAP_DISABLE(mask, number);
+    }
+    if (number > 0 && number == max_value) {
+      --max_value;
+    }
+  }
+
+  bool use_if_unused(int number)
+  {
+    if (number >= 0 && number < max_exact_tracking) {
+      if (!BLI_BITMAP_TEST_BOOL(mask, number)) {
+        BLI_BITMAP_ENABLE(mask, number);
+        math::max_inplace(max_value, number);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  int use_smallest_unused()
+  {
+    /* Find the smallest available one <1k.
+     * However we never want to pick zero ("none") suffix, even if it is
+     * available, e.g. if Foo.001 was used and we want to create another
+     * Foo.001, we should return Foo.002 and not Foo.
+     * So while searching, mark #0 as "used" to make sure we don't find it,
+     * and restore the value afterwards. */
+
+    BLI_bitmap prev_first = mask[0];
+    mask[0] |= 1;
+    int result = BLI_bitmap_find_first_unset(mask, max_exact_tracking);
+    if (result >= 0) {
+      BLI_BITMAP_ENABLE(mask, result);
+      math::max_inplace(max_value, result);
+    }
+    mask[0] |= prev_first & 1; /* Restore previous value of #0 bit. */
+    return result;
+  }
+};
+
+/* Tracking of names for a single ID type. */
+struct UniqueName_TypeMap {
+  /* Set of full names that are in use. */
+  Set<UniqueName_Key> full_names;
+  /* For each base name (i.e. without numeric suffix), track the
+   * numeric suffixes that are in use. */
+  Map<UniqueName_Key, UniqueName_Value> base_name_to_num_suffix;
+};
+
+struct UniqueName_Map {
+  UniqueName_TypeMap type_maps[INDEX_ID_MAX];
+
+  UniqueName_TypeMap *find_by_type(short id_type)
+  {
+    int index = BKE_idtype_idcode_to_index(id_type);
+    return index >= 0 ? &type_maps[index] : nullptr;
+  }
+};
+
+struct UniqueName_Map *BKE_main_namemap_create()
+{
+  struct UniqueName_Map *map = MEM_new<UniqueName_Map>(__func__);
+  return map;
+}
+
+void BKE_main_namemap_destroy(struct UniqueName_Map **r_name_map)
+{
+#ifdef DEBUG_PRINT_MEMORY_USAGE
+  int64_t size_sets = 0;
+  int64_t size_maps = 0;
+  for (const UniqueName_TypeMap &type_map : (*r_name_map)->type_maps) {
+    size_sets += type_map.full_names.size_in_bytes();
+    size_maps += type_map.base_name_to_num_suffix.size_in_bytes();
+  }
+  printf(
+      "NameMap memory usage: sets %.1fKB, maps %.1fKB\n", size_sets / 1024.0, size_maps / 1024.0);
+#endif
+  MEM_delete<UniqueName_Map>(*r_name_map);
+  *r_name_map = nullptr;
+}
+
+static void main_namemap_populate(UniqueName_Map *name_map, struct Main *bmain, ID *ignore_id)
+{
+  BLI_assert_msg(name_map != nullptr, "name_map should not be null");
+  for (UniqueName_TypeMap &type_map : name_map->type_maps) {
+    type_map.base_name_to_num_suffix.clear();
+  }
+  Library *library = ignore_id->lib;
+  ID *id;
+  FOREACH_MAIN_ID_BEGIN (bmain, id) {
+    if ((id == ignore_id) || (id->lib != library)) {
+      continue;
+    }
+    UniqueName_TypeMap *type_map = name_map->find_by_type(GS(id->name));
+    BLI_assert(type_map != nullptr);
+
+    /* Insert the full name into the set. */
+    UniqueName_Key key;
+    strncpy(key.name, id->name + 2, MAX_NAME);
+    type_map->full_names.add(key);
+
+    /* Get the name and number parts ("name.number"). */
+    int number = MIN_NUMBER;
+    BLI_split_name_num(key.name, &number, id->name + 2, '.');
+
+    /* Get and update the entry for this base name. */
+    UniqueName_Value &val = type_map->base_name_to_num_suffix.lookup_or_add_default(key);
+    val.mark_used(number);
+  }
+  FOREACH_MAIN_ID_END;
+}
+
+/* Get the name map object used for the given Main/ID.
+ * Lazily creates and populates the contents of the name map, if ensure_created is true.
+ * Note: if the contents are populated, the name of the given ID itself is not added. */
+static UniqueName_Map *get_namemap_for(Main *bmain, ID *id, bool ensure_created)
+{
+  if (id->lib != nullptr) {
+    if (ensure_created && id->lib->runtime.name_map == nullptr) {
+      id->lib->runtime.name_map = BKE_main_namemap_create();
+      main_namemap_populate(id->lib->runtime.name_map, bmain, id);
+    }
+    return id->lib->runtime.name_map;
+  }
+  if (ensure_created && bmain->name_map == nullptr) {
+    bmain->name_map = BKE_main_namemap_create();
+    main_namemap_populate(bmain->name_map, bmain, id);
+  }
+  return bmain->name_map;
+}
+
+bool BKE_main_namemap_get_name(struct Main *bmain, struct ID *id, char *name)
+{
+  BLI_assert(bmain != nullptr);
+  BLI_assert(id != nullptr);
+  UniqueName_Map *name_map = get_namemap_for(bmain, id, true);
+  BLI_assert(name_map != nullptr);
+  BLI_assert(strlen(name) < MAX_NAME);
+  UniqueName_TypeMap *type_map = name_map->find_by_type(GS(id->name));
+  BLI_assert(type_map != nullptr);
+
+  bool is_name_changed = false;
+
+  UniqueName_Key key;
+  while (true) {
+    /* Check if the full original name has a duplicate. */
+    strncpy(key.name, name, MAX_NAME);
+    const bool has_dup = type_map->full_names.contains(key);
+
+    /* Get the name and number parts ("name.number"). */
+    int number = MIN_NUMBER;
+    size_t base_name_len = BLI_split_name_num(key.name, &number, name, '.');
+
+    bool added_new = false;
+    UniqueName_Value &val = type_map->base_name_to_num_suffix.lookup_or_add_cb(key, [&]() {
+      added_new = true;
+      return UniqueName_Value();
+    });
+    if (added_new || !has_dup) {
+      /* This base name is not used at all yet, or the full original
+       * name has no duplicates. The latter could happen if splitting
+       * by number would produce the same values, for different name
+       * strings (e.g. Foo.001 and Foo.1). */
+      val.mark_used(number);
+
+      if (!has_dup) {
+        strncpy(key.name, name, MAX_NAME);
+        type_map->full_names.add(key);
+      }
+      return is_name_changed;
+    }
+
+    /* The base name is already used. But our number suffix might not be used yet. */
+    int number_to_use = -1;
+    if (val.use_if_unused(number)) {
+      /* Our particular number suffix is not used yet: use it. */
+      number_to_use = number;
+    }
+    else {
+      /* Find lowest free under 1k and use it. */
+      number_to_use = val.use_smallest_unused();
+
+      /* Did not find one under 1k. */
+      if (number_to_use == -1) {
+        if (number >= MIN_NUMBER && number > val.max_value) {
+          val.max_value = number;
+          number_to_use = number;
+        }
+        else {
+          val.max_value++;
+          number_to_use = val.max_value;
+        }
+      }
+    }
+
+    /* Try to build final name from the current base name and the number.
+     * Note that this can fail due to too long base name, or a too large number,
+     * in which case it will shorten the base name, and we'll start again. */
+    BLI_assert(number_to_use >= MIN_NUMBER);
+    if (id_name_final_build(name, key.name, base_name_len, number_to_use)) {
+      /* All good, add final name to the set. */
+      strncpy(key.name, name, MAX_NAME);
+      type_map->full_names.add(key);
+      break;
+    }
+
+    /* Name had to be truncated, or number too large: mark
+     * the output name as definitely changed, and proceed with the
+     * truncated name again. */
+    is_name_changed = true;
+  }
+  return is_name_changed;
+}
+
+void BKE_main_namemap_remove_name(struct Main *bmain, struct ID *id, const char *name)
+{
+  BLI_assert(bmain != nullptr);
+  BLI_assert(id != nullptr);
+  BLI_assert(name != nullptr);
+  /* Name is empty or not initialized yet, nothing to remove. */
+  if (name[0] == '\0') {
+    return;
+  }
+
+  struct UniqueName_Map *name_map = get_namemap_for(bmain, id, false);
+  if (name_map == nullptr) {
+    return;
+  }
+  BLI_assert(strlen(name) < MAX_NAME);
+  UniqueName_TypeMap *type_map = name_map->find_by_type(GS(id->name));
+  BLI_assert(type_map != nullptr);
+
+  UniqueName_Key key;
+  /* Remove full name from the set. */
+  strncpy(key.name, name, MAX_NAME);
+  type_map->full_names.remove(key);
+
+  int number = MIN_NUMBER;
+  BLI_split_name_num(key.name, &number, name, '.');
+  UniqueName_Value *val = type_map->base_name_to_num_suffix.lookup_ptr(key);
+  if (val == nullptr) {
+    return;
+  }
+  if (number == 0 && val->max_value == 0) {
+    /* This was the only base name usage, remove whole key. */
+    type_map->base_name_to_num_suffix.remove(key);
+    return;
+  }
+  val->mark_unused(number);
+}
author	Aras Pranckevicius <aras@nesnausk.org>	2022-07-20 14:27:14 +0300
committer	Aras Pranckevicius <aras@nesnausk.org>	2022-07-20 14:27:14 +0300
commit	7f8d05131a7738327ae125d065df44be492ff1f2 (patch)
tree	2e3e13596dee6bc747ed285374ea83d977b7c745 /source/blender/blenkernel/intern
parent	8d69c6c4e7c5417f88603d5ccb2c4bb0e156aa1e (diff)