From 987387534aba3f23346e5b076058fcb567651389 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Sat, 5 May 2018 19:25:07 +0200 Subject: src: minor refactor to string_search.h - Use `std::max` instead of a custom variant - Use member method pointers to avoid an extra layer of indirection - Stop transferring `Vector` into the `node` namespace PR-URL: https://github.com/nodejs/node/pull/20546 Reviewed-By: James M Snell Reviewed-By: Daniel Bevenius Reviewed-By: Tiancheng "Timothy" Gu --- src/string_search.h | 145 +++++++++++++++++++--------------------------------- 1 file changed, 54 insertions(+), 91 deletions(-) (limited to 'src/string_search.h') diff --git a/src/string_search.h b/src/string_search.h index 51f72062a99..6a45eb36662 100644 --- a/src/string_search.h +++ b/src/string_search.h @@ -9,18 +9,11 @@ #include "node_internals.h" #include +#include namespace node { namespace stringsearch { - -// Returns the maximum of the two parameters. -template -T Max(T a, T b) { - return a < b ? b : a; -} - - static const uint32_t kMaxOneByteCharCodeU = 0xff; template @@ -98,7 +91,9 @@ class StringSearchBase { template class StringSearch : private StringSearchBase { public: - explicit StringSearch(Vector pattern) + typedef stringsearch::Vector Vector; + + explicit StringSearch(Vector pattern) : pattern_(pattern), start_(0) { if (pattern.length() >= kBMMaxShift) { start_ = pattern.length() - kBMMaxShift; @@ -108,17 +103,17 @@ class StringSearch : private StringSearchBase { CHECK_GT(pattern_length, 0); if (pattern_length < kBMMinPatternLength) { if (pattern_length == 1) { - strategy_ = &SingleCharSearch; + strategy_ = &StringSearch::SingleCharSearch; return; } - strategy_ = &LinearSearch; + strategy_ = &StringSearch::LinearSearch; return; } - strategy_ = &InitialSearch; + strategy_ = &StringSearch::InitialSearch; } - size_t Search(Vector subject, size_t index) { - return strategy_(this, subject, index); + size_t Search(Vector subject, size_t index) { + return (this->*strategy_)(subject, index); } static inline int AlphabetSize() { @@ -136,31 +131,12 @@ class StringSearch : private StringSearchBase { } private: - typedef size_t (*SearchFunction)( - StringSearch*, - Vector, - size_t); - - static size_t SingleCharSearch(StringSearch* search, - Vector subject, - size_t start_index); - - static size_t LinearSearch(StringSearch* search, - Vector subject, - size_t start_index); - - static size_t InitialSearch(StringSearch* search, - Vector subject, - size_t start_index); - - static size_t BoyerMooreHorspoolSearch( - StringSearch* search, - Vector subject, - size_t start_index); - - static size_t BoyerMooreSearch(StringSearch* search, - Vector subject, - size_t start_index); + typedef size_t (StringSearch::*SearchFunction)(Vector, size_t); + size_t SingleCharSearch(Vector subject, size_t start_index); + size_t LinearSearch(Vector subject, size_t start_index); + size_t InitialSearch(Vector subject, size_t start_index); + size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index); + size_t BoyerMooreSearch(Vector subject, size_t start_index); void PopulateBoyerMooreHorspoolTable(); @@ -197,7 +173,7 @@ class StringSearch : private StringSearchBase { } // The pattern to search for. - Vector pattern_; + Vector pattern_; // Pointer to implementation of the search. SearchFunction strategy_; // Cache value of Max(0, pattern_length() - kBMMaxShift) @@ -213,8 +189,8 @@ inline T AlignDown(T value, U alignment) { inline uint8_t GetHighestValueByte(uint16_t character) { - return Max(static_cast(character & 0xFF), - static_cast(character >> 8)); + return std::max(static_cast(character & 0xFF), + static_cast(character >> 8)); } @@ -319,11 +295,10 @@ inline size_t FindFirstCharacter(Vector pattern, template size_t StringSearch::SingleCharSearch( - StringSearch* search, - Vector subject, + Vector subject, size_t index) { - CHECK_EQ(1, search->pattern_.length()); - return FindFirstCharacter(search->pattern_, subject, index); + CHECK_EQ(1, pattern_.length()); + return FindFirstCharacter(pattern_, subject, index); } //--------------------------------------------------------------------- @@ -333,22 +308,19 @@ size_t StringSearch::SingleCharSearch( // Simple linear search for short patterns. Never bails out. template size_t StringSearch::LinearSearch( - StringSearch* search, - Vector subject, + Vector subject, size_t index) { - Vector pattern = search->pattern_; - CHECK_GT(pattern.length(), 1); - const size_t pattern_length = pattern.length(); - const size_t n = subject.length() - pattern_length; + CHECK_GT(pattern_.length(), 1); + const size_t n = subject.length() - pattern_.length(); for (size_t i = index; i <= n; i++) { - i = FindFirstCharacter(pattern, subject, i); + i = FindFirstCharacter(pattern_, subject, i); if (i == subject.length()) return subject.length(); CHECK_LE(i, n); bool matches = true; - for (size_t j = 1; j < pattern_length; j++) { - if (pattern[j] != subject[i + j]) { + for (size_t j = 1; j < pattern_.length(); j++) { + if (pattern_[j] != subject[i + j]) { matches = false; break; } @@ -366,19 +338,17 @@ size_t StringSearch::LinearSearch( template size_t StringSearch::BoyerMooreSearch( - StringSearch* search, - Vector subject, + Vector subject, size_t start_index) { - Vector pattern = search->pattern_; const size_t subject_length = subject.length(); - const size_t pattern_length = pattern.length(); + const size_t pattern_length = pattern_.length(); // Only preprocess at most kBMMaxShift last characters of pattern. - size_t start = search->start_; + size_t start = start_; - int* bad_char_occurrence = search->bad_char_table(); - int* good_suffix_shift = search->good_suffix_shift_table(); + int* bad_char_occurrence = bad_char_table(); + int* good_suffix_shift = good_suffix_shift_table(); - Char last_char = pattern[pattern_length - 1]; + Char last_char = pattern_[pattern_length - 1]; size_t index = start_index; // Continue search from i. while (index <= subject_length - pattern_length) { @@ -391,7 +361,7 @@ size_t StringSearch::BoyerMooreSearch( return subject.length(); } } - while (pattern[j] == (c = subject[index + j])) { + while (pattern_[j] == (c = subject[index + j])) { if (j == 0) { return index; } @@ -420,7 +390,6 @@ size_t StringSearch::BoyerMooreSearch( template void StringSearch::PopulateBoyerMooreTable() { const size_t pattern_length = pattern_.length(); - Vector pattern = pattern_; // Only look at the last kBMMaxShift characters of pattern (from start_ // to pattern_length). const size_t start = start_; @@ -448,8 +417,8 @@ void StringSearch::PopulateBoyerMooreTable() { { size_t i = pattern_length; while (i > start) { - Char c = pattern[i - 1]; - while (suffix <= pattern_length && c != pattern[suffix - 1]) { + Char c = pattern_[i - 1]; + while (suffix <= pattern_length && c != pattern_[suffix - 1]) { if (static_cast(shift_table[suffix]) == length) { shift_table[suffix] = suffix - i; } @@ -458,7 +427,7 @@ void StringSearch::PopulateBoyerMooreTable() { suffix_table[--i] = --suffix; if (suffix == pattern_length) { // No suffix to extend, so we check against last_char only. - while ((i > start) && (pattern[i - 1] != last_char)) { + while ((i > start) && (pattern_[i - 1] != last_char)) { if (static_cast(shift_table[pattern_length]) == length) { shift_table[pattern_length] = pattern_length - i; } @@ -489,17 +458,15 @@ void StringSearch::PopulateBoyerMooreTable() { template size_t StringSearch::BoyerMooreHorspoolSearch( - StringSearch* search, - Vector subject, + Vector subject, size_t start_index) { - Vector pattern = search->pattern_; const size_t subject_length = subject.length(); - const size_t pattern_length = pattern.length(); - int* char_occurrences = search->bad_char_table(); + const size_t pattern_length = pattern_.length(); + int* char_occurrences = bad_char_table(); int64_t badness = -pattern_length; // How bad we are doing without a good-suffix table. - Char last_char = pattern[pattern_length - 1]; + Char last_char = pattern_[pattern_length - 1]; int last_char_shift = pattern_length - 1 - CharOccurrence(char_occurrences, static_cast(last_char)); @@ -519,7 +486,7 @@ size_t StringSearch::BoyerMooreHorspoolSearch( } } j--; - while (pattern[j] == (subject[index + j])) { + while (pattern_[j] == (subject[index + j])) { if (j == 0) { return index; } @@ -532,9 +499,9 @@ size_t StringSearch::BoyerMooreHorspoolSearch( // compared to reading each character exactly once. badness += (pattern_length - j) - last_char_shift; if (badness > 0) { - search->PopulateBoyerMooreTable(); - search->strategy_ = &BoyerMooreSearch; - return BoyerMooreSearch(search, subject, index); + PopulateBoyerMooreTable(); + strategy_ = &StringSearch::BoyerMooreSearch; + return BoyerMooreSearch(subject, index); } } return subject.length(); @@ -575,11 +542,9 @@ void StringSearch::PopulateBoyerMooreHorspoolTable() { // isn't found very early in the subject. Upgrades to BoyerMooreHorspool. template size_t StringSearch::InitialSearch( - StringSearch* search, - Vector subject, + Vector subject, size_t index) { - Vector pattern = search->pattern_; - const size_t pattern_length = pattern.length(); + const size_t pattern_length = pattern_.length(); // Badness is a count of how much work we have done. When we have // done enough work we decide it's probably worth switching to a better // algorithm. @@ -590,13 +555,13 @@ size_t StringSearch::InitialSearch( for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) { badness++; if (badness <= 0) { - i = FindFirstCharacter(pattern, subject, i); + i = FindFirstCharacter(pattern_, subject, i); if (i == subject.length()) return subject.length(); CHECK_LE(i, n); size_t j = 1; do { - if (pattern[j] != subject[i + j]) { + if (pattern_[j] != subject[i + j]) { break; } j++; @@ -606,9 +571,9 @@ size_t StringSearch::InitialSearch( } badness += j; } else { - search->PopulateBoyerMooreHorspoolTable(); - search->strategy_ = &BoyerMooreHorspoolSearch; - return BoyerMooreHorspoolSearch(search, subject, i); + PopulateBoyerMooreHorspoolTable(); + strategy_ = &StringSearch::BoyerMooreHorspoolSearch; + return BoyerMooreHorspoolSearch(subject, i); } } return subject.length(); @@ -629,7 +594,6 @@ size_t SearchString(Vector subject, } // namespace node namespace node { -using node::stringsearch::Vector; template size_t SearchString(const Char* haystack, @@ -643,9 +607,8 @@ size_t SearchString(const Char* haystack, // code, create two vectors that are reversed views into the input strings. // For example, v_needle[0] would return the *last* character of the needle. // So we're searching for the first instance of rev(needle) in rev(haystack) - Vector v_needle = Vector( - needle, needle_length, is_forward); - Vector v_haystack = Vector( + stringsearch::Vector v_needle(needle, needle_length, is_forward); + stringsearch::Vector v_haystack( haystack, haystack_length, is_forward); size_t diff = haystack_length - needle_length; size_t relative_start_index; -- cgit v1.2.3