From 4cc8201a651007b7308a4468a550dbbd97c6c346 Mon Sep 17 00:00:00 2001
From: Bastien Montagne <montagne29@wanadoo.fr>
Date: Thu, 19 Dec 2019 16:27:13 +0100
Subject: ID Management: Improve speed of code used when creating/renaming and
 ID.

This commit affects `check_for_dupid()` helper:
* Add a special, quicker code path dedicated to sequential addition of a
  large number of IDs using the same base name.

This gives a significant speedup to adding 'randomly'-named IDs:

| Number and type of names of IDs  | old code | new code | speed improvement |
| -------------------------------- | -------- | -------- | ----------------- |
| 40K, mixed (14k rand, 26k const) |      49s |      39s |               26% |
| 40K, fully random                |      51s |      51s |                0% |
| 40K, fully constant              |      71s |      40s |               78% |

Note that 'random' names give no improvement as expected, since this new code
path will never be used in such cases.
---
 source/blender/blenkernel/intern/library.c | 182 +++++++++++++++++++++++------
 1 file changed, 147 insertions(+), 35 deletions(-)

(limited to 'source/blender/blenkernel/intern/library.c')

diff --git a/source/blender/blenkernel/intern/library.c b/source/blender/blenkernel/intern/library.c
index 986c28c057c..2006c5af6f0 100644
--- a/source/blender/blenkernel/intern/library.c
+++ b/source/blender/blenkernel/intern/library.c
@@ -1612,6 +1612,53 @@ void id_sort_by_name(ListBase *lb, ID *id)
 }
 #undef ID_SORT_STEP_SIZE
 
+/* Note: this code assumes and ensures that the suffix number can never go beyond 1 billion. */
+#define MAX_NUMBER 1000000000
+/* We do not want to get "name.000", so minimal number is 1. */
+#define MIN_NUMBER 1
+/* The maximum value up to which we search for the actual smallest unused number. Beyond that
+ * value, we will only use the first biggest unused number, without trying to 'fill the gaps'
+ * in-between already used numbers... */
+#define MAX_NUMBERS_IN_USE 1024
+
+/**
+ * Helper building final ID name from given root_name and number.
+ *
+ * If everything goes well and we do generate a valid final ID anme in given name, we return true.
+ * In case the final name would overflow the allowed ID name length, or given number is bigger than
+ * maximum allowed value, we truncate further the root_name (and given name, which is assumed to
+ * have the same 'root_name' part), and return false.
+ */
+static bool id_name_final_build(char *name, char *root_name, size_t root_name_len, int number)
+{
+  char number_str[11]; /* Dot + nine digits + NULL terminator. */
+  size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number);
+
+  /* If the number would lead to an overflow of the maximum ID name length, we need to truncate
+   * the root name part and do all the number checks again. */
+  if (root_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) {
+    if (root_name_len + number_str_len >= MAX_ID_NAME - 2) {
+      root_name_len = MAX_ID_NAME - 2 - number_str_len - 1;
+    }
+    else {
+      root_name_len--;
+    }
+    root_name[root_name_len] = '\0';
+
+    /* Code above may have generated invalid utf-8 string, due to raw truncation.
+     * Ensure we get a valid one now. */
+    root_name_len -= (size_t)BLI_utf8_invalid_strip(root_name, root_name_len);
+
+    /* Also truncate orig name, and start the whole check again. */
+    name[root_name_len] = '\0';
+    return false;
+  }
+
+  /* We have our final number, we can put it in name and exit the function. */
+  BLI_strncpy(name + root_name_len, number_str, number_str_len + 1);
+  return true;
+}
+
 /**
  * Check to see if an ID name is already used, and find a new one if so.
  * Return true if a new name was created (returned in name).
@@ -1622,28 +1669,100 @@ void id_sort_by_name(ListBase *lb, ID *id)
  */
 static bool check_for_dupid(ListBase *lb, ID *id, char *name)
 {
-  /* Note: this code assumes and ensures that the suffix number can never go beyond 1 billion. */
-#define MAX_NUMBER 1000000000
-  /* We do not want to get "name.000", so minimal number is 1. */
-#define MIN_NUMBER 1
-
   BLI_assert(strlen(name) < MAX_ID_NAME - 2);
 
+  ID *id_test = lb->first;
   bool is_name_changed = false;
 
+  if (id_test == NULL) {
+    return is_name_changed;
+  }
+
+  const short id_type = (short)GS(id_test->name);
+
+  /* Static storage of previous handled ID/name info, used to perform a quicker test and optimize
+   * creation of huge number of IDs using the same given root name. */
+  static char prev_orig_root_name[MAX_ID_NAME - 2] = {0};
+  static char prev_final_root_name[MAX_ID_NAME - 2] = {0};
+  static short prev_id_type = ID_LINK_PLACEHOLDER; /* Should never exist in actual ID list. */
+  static int prev_number = MIN_NUMBER - 1;
+
+  /* Initial test to check whether we can 'shortcut' the more complex loop of the main code below.
+   * Note that we do not do that for low numbers, as that would prevent using actual smallest
+   * available number in some cases, and benefits of this special case handling mostly show up with
+   * high numbers anyway. */
+  if (id_type == prev_id_type && prev_number >= MAX_NUMBERS_IN_USE &&
+      prev_number < MAX_NUMBER - 1 && name[0] == prev_final_root_name[0]) {
+
+    /* Get the name and number parts ("name.number"). */
+    char root_name[MAX_ID_NAME - 2];
+    int number = MIN_NUMBER;
+    size_t root_name_len = BLI_split_name_num(root_name, &number, name, '.');
+    size_t prev_final_root_name_len = strlen(prev_final_root_name);
+    size_t prev_orig_root_name_len = strlen(prev_orig_root_name);
+
+    if (root_name_len == prev_orig_root_name_len &&
+        STREQLEN(root_name, prev_orig_root_name, prev_orig_root_name_len)) {
+      /* Once we have ensured given root_name and original previous one are the same, we can check
+       * that previously used number is actually used, and that next one is free. */
+      /* Note that from now on, we only used previous final root name, as it might have been
+       * truncated from original one due to number suffix length. */
+      char final_name[MAX_ID_NAME - 2];
+      char prev_final_name[MAX_ID_NAME - 2];
+      BLI_strncpy(final_name, prev_final_root_name, prev_final_root_name_len + 1);
+      BLI_strncpy(prev_final_name, prev_final_root_name, prev_final_root_name_len + 1);
+
+      if (id_name_final_build(final_name, root_name, prev_final_root_name_len, prev_number + 1) &&
+          id_name_final_build(prev_final_name, root_name, prev_final_root_name_len, prev_number)) {
+        /* We succeffuly built valid final names of previous and current iterations, now we have to
+         * ensure that previous final name is indeed used in curent ID list, and that current one
+         * is not. */
+        bool is_valid = false;
+        for (id_test = lb->first; id_test; id_test = id_test->next) {
+          if (id != id_test && !ID_IS_LINKED(id_test)) {
+            if (id_test->name[2] == final_name[0] && STREQ(final_name, id_test->name + 2)) {
+              /* We expect final_name to not be already used, so this is a failure. */
+              is_valid = false;
+              break;
+            }
+            /* Previous final name should only be found once in the list, so if it was found
+             * already, no need to do a string comparison again. */
+            if (!is_valid && id_test->name[2] == prev_final_name[0] &&
+                STREQ(prev_final_name, id_test->name + 2)) {
+              is_valid = true;
+            }
+          }
+        }
+
+        if (is_valid) {
+          /* Only the number changed, prev_orig_root_name, prev_root_name and prev_id_type remain
+           * the same. */
+          prev_number++;
+
+          strcpy(name, final_name);
+          return true;
+        }
+      }
+    }
+  }
+
   /* To speed up finding smallest unused number within [0 .. MAX_NUMBERS_IN_USE - 1].
    * We do not bother beyond that point. */
-#define MAX_NUMBERS_IN_USE 1024
   BLI_bitmap *numbers_in_use = BLI_BITMAP_NEW_ALLOCA(MAX_NUMBERS_IN_USE);
 
-  ID *id_test;
-
+  bool is_first_run = true;
   while (true) {
     /* Get the name and number parts ("name.number"). */
     char root_name[MAX_ID_NAME - 2];
     int number = MIN_NUMBER;
     size_t root_name_len = BLI_split_name_num(root_name, &number, name, '.');
 
+    /* Store previous original given root name now, as we might alter it later in code below. */
+    if (is_first_run) {
+      strcpy(prev_orig_root_name, root_name);
+      is_first_run = false;
+    }
+
     /* In case we get an insane initial number suffix in given name. */
     /* Note: BLI_split_name_num() cannot return negative numbers, so we do not have to check for
      * that here. */
@@ -1680,11 +1799,17 @@ static bool check_for_dupid(ListBase *lb, ID *id, char *name)
      * Note however that name might have been changed (truncated) in a previous iteration already.
      */
     if (!is_orig_name_used) {
+      /* Don't bother updating prev_ static variables here, this case is not supposed to happen
+       * that often, and is not straight-forward here, so just ignore and reset them to default. */
+      prev_id_type = ID_LINK_PLACEHOLDER;
+      prev_final_root_name[0] = '\0';
+      prev_number = MIN_NUMBER - 1;
+
       return is_name_changed;
     }
 
-    /* Decide which value of nr to use, either the smallest unused one if possible, or default to
-     * the first largest unused one we got from previous loop. */
+    /* Decide which value of number to use, either the smallest unused one if possible, or default
+     * to the first largest unused one we got from previous loop. */
     for (int i = MIN_NUMBER; i < MAX_NUMBERS_IN_USE; i++) {
       if (!BLI_BITMAP_TEST_BOOL(numbers_in_use, i)) {
         number = i;
@@ -1697,45 +1822,32 @@ static bool check_for_dupid(ListBase *lb, ID *id, char *name)
      * We can't be bothered to look for the lowest unused number beyond
      * (MAX_NUMBERS_IN_USE - 1).
      */
+    /* We know for wure that name will be changed. */
+    is_name_changed = true;
 
-    char number_str[11]; /* Dot + nine digits + NULL char. */
-    size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number);
-
-    /* If the number would lead to an overflow of the maximum ID name length, we need to truncate
-     * the root name part and do all the number checks again. */
-    if (root_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) {
-      if (root_name_len + number_str_len >= MAX_ID_NAME - 2) {
-        root_name_len = MAX_ID_NAME - 2 - number_str_len - 1;
-      }
-      else {
-        root_name_len--;
-      }
-      root_name[root_name_len] = '\0';
-
-      /* Code above may have generated invalid utf-8 string, due to raw truncation.
-       * Ensure we get a valid one now. */
-      root_name_len -= (size_t)BLI_utf8_invalid_strip(root_name, root_name_len);
-
+    /* If id_name_final_build helper returns false, it had to truncate further given name, hence we
+     * have to go over the whole check again. */
+    if (!id_name_final_build(name, root_name, root_name_len, number)) {
       /* We have to clear our list of small used numbers before we do the whole check again. */
       BLI_bitmap_set_all(numbers_in_use, false, MAX_NUMBERS_IN_USE);
 
-      /* Copy back truncated root name into orig name, and start the whole check again. */
-      BLI_strncpy(name, root_name, root_name_len + 1);
-      is_name_changed = true;
       continue;
     }
 
-    /* We have our final number, we can put it in name and exit the function. */
-    BLI_strncpy(name + root_name_len, number_str, number_str_len + 1);
-    is_name_changed = true;
+    /* Update prev_ static variables, in case next call is for the same type of IDs and with the
+     * same initial root name, we can skip a lot of above process. */
+    prev_id_type = id_type;
+    strcpy(prev_final_root_name, root_name);
+    prev_number = number;
 
     return is_name_changed;
   }
 
 #undef MAX_NUMBERS_IN_USE
+}
+
 #undef MIN_NUMBER
 #undef MAX_NUMBER
-}
 
 /**
  * Ensures given ID has a unique name in given listbase.
-- 
cgit v1.2.3