Welcome to mirror list, hosted at ThFree Co, Russian Federation.

fts_fuzzy_match.h « GUI « slic3r « src - github.com/prusa3d/PrusaSlicer.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 379fd9c74ea8f193fb6da6a3f90aae19669fd908 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
// LICENSE
//
//   This software is dual-licensed to the public domain and under the following
//   license: you are granted a perpetual, irrevocable license to copy, modify,
//   publish, and distribute this file as you see fit.
//
// VERSION 
//   0.2.0  (2017-02-18)  Scored matches perform exhaustive search for best score
//   0.1.0  (2016-03-28)  Initial release
//
// AUTHOR
//   Forrest Smith
//
// NOTES
//   Compiling
//     You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
//
//   fuzzy_match_simple(...)
//     Returns true if each character in pattern is found sequentially within str
//
//   fuzzy_match(...)
//     Returns true if pattern is found AND calculates a score.
//     Performs exhaustive search via recursion to find all possible matches and match with highest score.
//     Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
//     Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
//     Uses uint8_t for match indices. Therefore patterns are limited to max_matches characters.
//     Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.


#ifndef FTS_FUZZY_MATCH_H
#define FTS_FUZZY_MATCH_H


#include <cstdint> // uint8_t
#include <ctype.h> // ::tolower, ::toupper
#include <cstring> // memcpy

#include <cstdio>

#include "../Utils/ASCIIFolding.hpp"

// Public interface
namespace fts {
	using 						char_type 	= wchar_t;
	using 						pos_type  	= uint16_t;
	static constexpr pos_type 	stopper 	= pos_type(-1);
	static constexpr int 		max_matches = 255;

    static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore);
    static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore, pos_type * matches);
}

#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
namespace fts {

    // Forward declarations for "private" implementation
    namespace fuzzy_internal {
        static bool fuzzy_match_recursive(const char_type * pattern, const char_type * str, int & outScore, const char_type * const strBegin,          
            pos_type const * srcMatches,  pos_type * newMatches, int nextMatch, 
            int recursionCount, const int recursionLimit);
        static void copy_matches(pos_type * dst, pos_type const* src);
    }

    // Public interface
    [[maybe_unused]] static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore) {
        pos_type matches[max_matches + 1]; // with the room for the stopper
        matches[0] = stopper;
        return fuzzy_match(pattern, str, outScore, matches);
    }

    [[maybe_unused]] static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore, pos_type * matches) {
        int recursionCount = 0;
        static constexpr int recursionLimit = 10;
        return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, 0, recursionCount, recursionLimit);
    }

    // Private implementation
    static bool fuzzy_internal::fuzzy_match_recursive(
    	// Pattern to match over str.
    	const char_type * 		pattern, 
    	// Text to match the pattern over.
    	const char_type * 		str, 
    	// Score of the pattern matching str. Output variable.
    	int & 					outScore, 
    	// The very start of str, for calculating indices of matches and for calculating matches from the start of the input string.
        const char_type * const	strBegin, 
        // Matches when entering this function.
        pos_type const * 		srcMatches,
        // Output matches.
        pos_type * 				matches,
        // Number of matched characters stored in srcMatches when entering this function, also tracking the successive matches.
        int 					nextMatch,
        // Recursion count is input / output to track the maximum depth reached.
        // Was given by reference &recursionCount, see discussion in https://github.com/forrestthewoods/lib_fts/issues/21
//        int & 					recursionCount, 
        int				        recursionCount, 
        const int				recursionLimit)
    {
        // Count recursions
        if (++ recursionCount >= recursionLimit)
            return false;

        // Detect end of strings
        if (*pattern == '\0' || *str == '\0')
            return false;

        // Recursion params
        bool recursiveMatch = false;
        pos_type bestRecursiveMatches[max_matches + 1]; // with the room for the stopper
        int bestRecursiveScore = 0;

        // Loop through pattern and str looking for a match
        bool first_match = true;
        while (*pattern != '\0' && *str != '\0') {

        	int  num_matched  = std::towlower(*pattern) == std::towlower(*str) ? 1 : 0;
            // bool folded_match = false;

            if (! num_matched) {
                wchar_t tmp[4];
                wchar_t *end = Slic3r::fold_to_ascii(*str, tmp);
                wchar_t *c = tmp;
                for (const wchar_t* d = pattern; c != end && *d != 0 && std::towlower(*c) == std::towlower(*d); ++c, ++d);
                if (c == end) {
                    // folded_match = true;
        			num_matched = end - tmp;
        		}
	        }
            
            // Found match
            if (num_matched) {

                // Supplied matches buffer was too short
                if (nextMatch + num_matched > max_matches)
                    return false;

                // "Copy-on-Write" srcMatches into matches
                if (first_match && srcMatches) {
                    memcpy(matches, srcMatches, sizeof(pos_type) * (nextMatch + 1)); // including the stopper
                    first_match = false;
                }

                // Recursive call that "skips" this match
                pos_type recursiveMatches[max_matches + 1]; // with the room for the stopper
                int recursiveScore;
                if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, nextMatch, recursionCount, recursionLimit)) {
                    
                    // Pick best recursive score
                    if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
                    	copy_matches(bestRecursiveMatches, recursiveMatches);
                		bestRecursiveScore = recursiveScore;
                    }
                    recursiveMatch = true;
                }

                // Advance
                matches[nextMatch++] = (pos_type)(str - strBegin);
                // Write a stopper sign.
                matches[nextMatch] = stopper;
                // Advance pattern by the number of matched characters (could be more if ASCII folding triggers in).
                pattern += num_matched;
            } 
            ++str;
        }

        // Determine if full pattern was matched
        bool matched = *pattern == '\0';

        // Calculate score
        if (matched) {
            static constexpr int sequential_bonus = 15;            // bonus for adjacent matches
            static constexpr int separator_bonus = 10/*30*/;             // bonus if match occurs after a separator
            // static constexpr int camel_bonus = 30;                 // bonus if match is uppercase and prev is lower
            static constexpr int first_letter_bonus = 15;          // bonus if the first letter is matched

            static constexpr int leading_letter_penalty = -1/*-5*/;      // penalty applied for every letter in str before the first match
            static constexpr int max_leading_letter_penalty = -15; // maximum penalty for leading letters
            static constexpr int unmatched_letter_penalty = -1;    // penalty for every letter that doesn't matter

            // Iterate str to end
            while (*str != '\0')
                ++str;

            // Initialize score
            outScore = 100;

            // Start of the first group that contains matches[0].
            const char_type *group_start = strBegin + matches[0];
            for (const char_type *c = group_start; c >= strBegin && *c != ':'; -- c)
            	if (*c != ' ' && *c != '\t')
            		group_start = c;

            // Apply leading letter penalty or bonus.
            outScore += matches[0] == int(group_start - strBegin) ?
            	first_letter_bonus :
            	std::max((matches[0] - int(group_start - strBegin)) * leading_letter_penalty, max_leading_letter_penalty);

            // Apply unmatched letters after the end penalty
//            outScore += (int(str - group_start) - matches[nextMatch-1] + 1) * unmatched_letter_penalty;
            // Apply unmatched penalty
            outScore += (int(str - group_start) - nextMatch) * unmatched_letter_penalty;

            // Apply ordering bonuses
            int sequential_state = sequential_bonus;
            for (int i = 0; i < nextMatch; ++i) {
                pos_type currIdx = matches[i];

                // Check for bonuses based on neighbor character value
                if (currIdx > 0) {
                    if (i > 0 && currIdx == matches[i - 1] + 1) {
	                    // Sequential
                        outScore += sequential_state;
                        // Exponential grow of the sequential bonus.
                    	sequential_state = std::min(5 * sequential_bonus, sequential_state + sequential_state / 3);
                    } else {
                    	// Reset the sequential bonus exponential grow.
                    	sequential_state = sequential_bonus;
                    }
					char_type prev = strBegin[currIdx - 1];
/*
                    // Camel case
                    if (std::islower(prev) && std::isupper(strBegin[currIdx]))
                        outScore += camel_bonus;
*/
                    // Separator
                    if (prev == '_' || prev == ' ')
                        outScore += separator_bonus;
                }
            }
        }

        // Return best result
        if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
            // Recursive score is better than "this"
            copy_matches(matches, bestRecursiveMatches);
            outScore = bestRecursiveScore;
            return true;
        }
        else if (matched) {
            // "this" score is better than recursive
            return true;
        }
        else {
            // no match
            return false;
        }
    }

    // Copy matches up to a stopper.
    static void fuzzy_internal::copy_matches(pos_type * dst, pos_type const* src)
    {
        while (*src != stopper)
            *dst++ = *src++;
        *dst = stopper;
    }

} // namespace fts

#endif // FTS_FUZZY_MATCH_IMPLEMENTATION

#endif // FTS_FUZZY_MATCH_H