diff options
author | Jacques Lucke <jacques@blender.org> | 2022-03-02 19:25:39 +0300 |
---|---|---|
committer | Jacques Lucke <jacques@blender.org> | 2022-03-02 19:25:39 +0300 |
commit | 9789a6c6d96817489474bde2fa1da05f7868a02f (patch) | |
tree | 260d771a44936f020764bd43d2e5416ab3cd7871 /source/blender/blenlib/intern | |
parent | ac45540a348ec8662e4e27002c64176c402fe549 (diff) |
Search: take word order into account in string search
Differential Revision: https://developer.blender.org/D14223
Diffstat (limited to 'source/blender/blenlib/intern')
-rw-r--r-- | source/blender/blenlib/intern/string_search.cc | 53 |
1 files changed, 37 insertions, 16 deletions
diff --git a/source/blender/blenlib/intern/string_search.cc b/source/blender/blenlib/intern/string_search.cc index d32737f2a4b..14d85b99739 100644 --- a/source/blender/blenlib/intern/string_search.cc +++ b/source/blender/blenlib/intern/string_search.cc @@ -151,6 +151,8 @@ int get_fuzzy_match_errors(StringRef query, StringRef full) } } +static constexpr int unused_word = -1; + /** * Takes a query and tries to match it with the first characters of some words. For example, "msfv" * matches "Mark Sharp from Vertices". Multiple letters of the beginning of a word can be matched @@ -163,7 +165,7 @@ int get_fuzzy_match_errors(StringRef query, StringRef full) */ static bool match_word_initials(StringRef query, Span<StringRef> words, - Span<bool> word_is_usable, + Span<int> word_match_map, MutableSpan<bool> r_word_is_matched, int start = 0) { @@ -189,13 +191,13 @@ static bool match_word_initials(StringRef query, /* Try starting to match at another word. In some cases one can still find matches this * way. */ return match_word_initials( - query, words, word_is_usable, r_word_is_matched, first_found_word_index + 1); + query, words, word_match_map, r_word_is_matched, first_found_word_index + 1); } return false; } /* Skip words that the caller does not want us to use. */ - if (!word_is_usable[word_index]) { + if (word_match_map[word_index] != unused_word) { word_index++; BLI_assert(char_index == 0); continue; @@ -225,12 +227,12 @@ static bool match_word_initials(StringRef query, static int get_shortest_word_index_that_startswith(StringRef query, Span<StringRef> words, - Span<bool> word_is_usable) + Span<int> word_match_map) { int best_word_size = INT32_MAX; int best_word_index = -1; for (const int i : words.index_range()) { - if (!word_is_usable[i]) { + if (word_match_map[i] != unused_word) { continue; } StringRef word = words[i]; @@ -246,11 +248,11 @@ static int get_shortest_word_index_that_startswith(StringRef query, static int get_word_index_that_fuzzy_matches(StringRef query, Span<StringRef> words, - Span<bool> word_is_usable, + Span<int> word_match_map, int *r_error_count) { for (const int i : words.index_range()) { - if (!word_is_usable[i]) { + if (word_match_map[i] != unused_word) { continue; } StringRef word = words[i]; @@ -269,20 +271,22 @@ static int get_word_index_that_fuzzy_matches(StringRef query, */ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef> result_words) { - /* Remember which words have been matched, so that they are not matched again. */ - Array<bool, 64> word_is_usable(result_words.size(), true); + /* A mapping from #result_words to #query_words. It's mainly used to determine if a word has been + * matched already to avoid matching it again. */ + Array<int, 64> word_match_map(result_words.size(), unused_word); /* Start with some high score, because otherwise the final score might become negative. */ int total_match_score = 1000; - for (StringRef query_word : query_words) { + for (const int query_word_index : query_words.index_range()) { + const StringRef query_word = query_words[query_word_index]; { /* Check if any result word begins with the query word. */ const int word_index = get_shortest_word_index_that_startswith( - query_word, result_words, word_is_usable); + query_word, result_words, word_match_map); if (word_index >= 0) { total_match_score += 10; - word_is_usable[word_index] = false; + word_match_map[word_index] = query_word_index; continue; } } @@ -290,12 +294,12 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef /* Try to match against word initials. */ Array<bool, 64> matched_words(result_words.size()); const bool success = match_word_initials( - query_word, result_words, word_is_usable, matched_words); + query_word, result_words, word_match_map, matched_words); if (success) { total_match_score += 3; for (const int i : result_words.index_range()) { if (matched_words[i]) { - word_is_usable[i] = false; + word_match_map[i] = query_word_index; } } continue; @@ -305,10 +309,10 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef /* Fuzzy match against words. */ int error_count = 0; const int word_index = get_word_index_that_fuzzy_matches( - query_word, result_words, word_is_usable, &error_count); + query_word, result_words, word_match_map, &error_count); if (word_index >= 0) { total_match_score += 3 - error_count; - word_is_usable[word_index] = false; + word_match_map[word_index] = query_word_index; continue; } } @@ -317,6 +321,23 @@ static int score_query_against_words(Span<StringRef> query_words, Span<StringRef return -1; } + { + /* Add penalty when query words are not in the correct order. */ + Vector<int> match_indices; + for (const int index : word_match_map) { + if (index != unused_word) { + match_indices.append(index); + } + } + if (!match_indices.is_empty()) { + for (const int i : IndexRange(match_indices.size() - 1)) { + if (match_indices[i] > match_indices[i + 1]) { + total_match_score -= 1; + } + } + } + } + return total_match_score; } |