From 4cc8201a651007b7308a4468a550dbbd97c6c346 Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Thu, 19 Dec 2019 16:27:13 +0100 Subject: ID Management: Improve speed of code used when creating/renaming and ID. This commit affects `check_for_dupid()` helper: * Add a special, quicker code path dedicated to sequential addition of a large number of IDs using the same base name. This gives a significant speedup to adding 'randomly'-named IDs: | Number and type of names of IDs | old code | new code | speed improvement | | -------------------------------- | -------- | -------- | ----------------- | | 40K, mixed (14k rand, 26k const) | 49s | 39s | 26% | | 40K, fully random | 51s | 51s | 0% | | 40K, fully constant | 71s | 40s | 78% | Note that 'random' names give no improvement as expected, since this new code path will never be used in such cases. --- source/blender/blenkernel/intern/library.c | 182 +++++++++++++++++++++++------ 1 file changed, 147 insertions(+), 35 deletions(-) (limited to 'source/blender/blenkernel/intern/library.c') diff --git a/source/blender/blenkernel/intern/library.c b/source/blender/blenkernel/intern/library.c index 986c28c057c..2006c5af6f0 100644 --- a/source/blender/blenkernel/intern/library.c +++ b/source/blender/blenkernel/intern/library.c @@ -1612,6 +1612,53 @@ void id_sort_by_name(ListBase *lb, ID *id) } #undef ID_SORT_STEP_SIZE +/* Note: this code assumes and ensures that the suffix number can never go beyond 1 billion. */ +#define MAX_NUMBER 1000000000 +/* We do not want to get "name.000", so minimal number is 1. */ +#define MIN_NUMBER 1 +/* The maximum value up to which we search for the actual smallest unused number. Beyond that + * value, we will only use the first biggest unused number, without trying to 'fill the gaps' + * in-between already used numbers... */ +#define MAX_NUMBERS_IN_USE 1024 + +/** + * Helper building final ID name from given root_name and number. + * + * If everything goes well and we do generate a valid final ID anme in given name, we return true. + * In case the final name would overflow the allowed ID name length, or given number is bigger than + * maximum allowed value, we truncate further the root_name (and given name, which is assumed to + * have the same 'root_name' part), and return false. + */ +static bool id_name_final_build(char *name, char *root_name, size_t root_name_len, int number) +{ + char number_str[11]; /* Dot + nine digits + NULL terminator. */ + size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number); + + /* If the number would lead to an overflow of the maximum ID name length, we need to truncate + * the root name part and do all the number checks again. */ + if (root_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) { + if (root_name_len + number_str_len >= MAX_ID_NAME - 2) { + root_name_len = MAX_ID_NAME - 2 - number_str_len - 1; + } + else { + root_name_len--; + } + root_name[root_name_len] = '\0'; + + /* Code above may have generated invalid utf-8 string, due to raw truncation. + * Ensure we get a valid one now. */ + root_name_len -= (size_t)BLI_utf8_invalid_strip(root_name, root_name_len); + + /* Also truncate orig name, and start the whole check again. */ + name[root_name_len] = '\0'; + return false; + } + + /* We have our final number, we can put it in name and exit the function. */ + BLI_strncpy(name + root_name_len, number_str, number_str_len + 1); + return true; +} + /** * Check to see if an ID name is already used, and find a new one if so. * Return true if a new name was created (returned in name). @@ -1622,28 +1669,100 @@ void id_sort_by_name(ListBase *lb, ID *id) */ static bool check_for_dupid(ListBase *lb, ID *id, char *name) { - /* Note: this code assumes and ensures that the suffix number can never go beyond 1 billion. */ -#define MAX_NUMBER 1000000000 - /* We do not want to get "name.000", so minimal number is 1. */ -#define MIN_NUMBER 1 - BLI_assert(strlen(name) < MAX_ID_NAME - 2); + ID *id_test = lb->first; bool is_name_changed = false; + if (id_test == NULL) { + return is_name_changed; + } + + const short id_type = (short)GS(id_test->name); + + /* Static storage of previous handled ID/name info, used to perform a quicker test and optimize + * creation of huge number of IDs using the same given root name. */ + static char prev_orig_root_name[MAX_ID_NAME - 2] = {0}; + static char prev_final_root_name[MAX_ID_NAME - 2] = {0}; + static short prev_id_type = ID_LINK_PLACEHOLDER; /* Should never exist in actual ID list. */ + static int prev_number = MIN_NUMBER - 1; + + /* Initial test to check whether we can 'shortcut' the more complex loop of the main code below. + * Note that we do not do that for low numbers, as that would prevent using actual smallest + * available number in some cases, and benefits of this special case handling mostly show up with + * high numbers anyway. */ + if (id_type == prev_id_type && prev_number >= MAX_NUMBERS_IN_USE && + prev_number < MAX_NUMBER - 1 && name[0] == prev_final_root_name[0]) { + + /* Get the name and number parts ("name.number"). */ + char root_name[MAX_ID_NAME - 2]; + int number = MIN_NUMBER; + size_t root_name_len = BLI_split_name_num(root_name, &number, name, '.'); + size_t prev_final_root_name_len = strlen(prev_final_root_name); + size_t prev_orig_root_name_len = strlen(prev_orig_root_name); + + if (root_name_len == prev_orig_root_name_len && + STREQLEN(root_name, prev_orig_root_name, prev_orig_root_name_len)) { + /* Once we have ensured given root_name and original previous one are the same, we can check + * that previously used number is actually used, and that next one is free. */ + /* Note that from now on, we only used previous final root name, as it might have been + * truncated from original one due to number suffix length. */ + char final_name[MAX_ID_NAME - 2]; + char prev_final_name[MAX_ID_NAME - 2]; + BLI_strncpy(final_name, prev_final_root_name, prev_final_root_name_len + 1); + BLI_strncpy(prev_final_name, prev_final_root_name, prev_final_root_name_len + 1); + + if (id_name_final_build(final_name, root_name, prev_final_root_name_len, prev_number + 1) && + id_name_final_build(prev_final_name, root_name, prev_final_root_name_len, prev_number)) { + /* We succeffuly built valid final names of previous and current iterations, now we have to + * ensure that previous final name is indeed used in curent ID list, and that current one + * is not. */ + bool is_valid = false; + for (id_test = lb->first; id_test; id_test = id_test->next) { + if (id != id_test && !ID_IS_LINKED(id_test)) { + if (id_test->name[2] == final_name[0] && STREQ(final_name, id_test->name + 2)) { + /* We expect final_name to not be already used, so this is a failure. */ + is_valid = false; + break; + } + /* Previous final name should only be found once in the list, so if it was found + * already, no need to do a string comparison again. */ + if (!is_valid && id_test->name[2] == prev_final_name[0] && + STREQ(prev_final_name, id_test->name + 2)) { + is_valid = true; + } + } + } + + if (is_valid) { + /* Only the number changed, prev_orig_root_name, prev_root_name and prev_id_type remain + * the same. */ + prev_number++; + + strcpy(name, final_name); + return true; + } + } + } + } + /* To speed up finding smallest unused number within [0 .. MAX_NUMBERS_IN_USE - 1]. * We do not bother beyond that point. */ -#define MAX_NUMBERS_IN_USE 1024 BLI_bitmap *numbers_in_use = BLI_BITMAP_NEW_ALLOCA(MAX_NUMBERS_IN_USE); - ID *id_test; - + bool is_first_run = true; while (true) { /* Get the name and number parts ("name.number"). */ char root_name[MAX_ID_NAME - 2]; int number = MIN_NUMBER; size_t root_name_len = BLI_split_name_num(root_name, &number, name, '.'); + /* Store previous original given root name now, as we might alter it later in code below. */ + if (is_first_run) { + strcpy(prev_orig_root_name, root_name); + is_first_run = false; + } + /* In case we get an insane initial number suffix in given name. */ /* Note: BLI_split_name_num() cannot return negative numbers, so we do not have to check for * that here. */ @@ -1680,11 +1799,17 @@ static bool check_for_dupid(ListBase *lb, ID *id, char *name) * Note however that name might have been changed (truncated) in a previous iteration already. */ if (!is_orig_name_used) { + /* Don't bother updating prev_ static variables here, this case is not supposed to happen + * that often, and is not straight-forward here, so just ignore and reset them to default. */ + prev_id_type = ID_LINK_PLACEHOLDER; + prev_final_root_name[0] = '\0'; + prev_number = MIN_NUMBER - 1; + return is_name_changed; } - /* Decide which value of nr to use, either the smallest unused one if possible, or default to - * the first largest unused one we got from previous loop. */ + /* Decide which value of number to use, either the smallest unused one if possible, or default + * to the first largest unused one we got from previous loop. */ for (int i = MIN_NUMBER; i < MAX_NUMBERS_IN_USE; i++) { if (!BLI_BITMAP_TEST_BOOL(numbers_in_use, i)) { number = i; @@ -1697,45 +1822,32 @@ static bool check_for_dupid(ListBase *lb, ID *id, char *name) * We can't be bothered to look for the lowest unused number beyond * (MAX_NUMBERS_IN_USE - 1). */ + /* We know for wure that name will be changed. */ + is_name_changed = true; - char number_str[11]; /* Dot + nine digits + NULL char. */ - size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number); - - /* If the number would lead to an overflow of the maximum ID name length, we need to truncate - * the root name part and do all the number checks again. */ - if (root_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) { - if (root_name_len + number_str_len >= MAX_ID_NAME - 2) { - root_name_len = MAX_ID_NAME - 2 - number_str_len - 1; - } - else { - root_name_len--; - } - root_name[root_name_len] = '\0'; - - /* Code above may have generated invalid utf-8 string, due to raw truncation. - * Ensure we get a valid one now. */ - root_name_len -= (size_t)BLI_utf8_invalid_strip(root_name, root_name_len); - + /* If id_name_final_build helper returns false, it had to truncate further given name, hence we + * have to go over the whole check again. */ + if (!id_name_final_build(name, root_name, root_name_len, number)) { /* We have to clear our list of small used numbers before we do the whole check again. */ BLI_bitmap_set_all(numbers_in_use, false, MAX_NUMBERS_IN_USE); - /* Copy back truncated root name into orig name, and start the whole check again. */ - BLI_strncpy(name, root_name, root_name_len + 1); - is_name_changed = true; continue; } - /* We have our final number, we can put it in name and exit the function. */ - BLI_strncpy(name + root_name_len, number_str, number_str_len + 1); - is_name_changed = true; + /* Update prev_ static variables, in case next call is for the same type of IDs and with the + * same initial root name, we can skip a lot of above process. */ + prev_id_type = id_type; + strcpy(prev_final_root_name, root_name); + prev_number = number; return is_name_changed; } #undef MAX_NUMBERS_IN_USE +} + #undef MIN_NUMBER #undef MAX_NUMBER -} /** * Ensures given ID has a unique name in given listbase. -- cgit v1.2.3