diff options
author | Yuri Gorshenin <y@maps.me> | 2017-02-27 12:40:20 +0300 |
---|---|---|
committer | Yuri Gorshenin <y@maps.me> | 2017-02-27 12:40:20 +0300 |
commit | 28bad4ed8b1f421088a9287ca86081e76ce6a59a (patch) | |
tree | d16b85583b626e2e4ac528f2c31709250e1b76c3 /indexer/search_string_utils.cpp | |
parent | 36f7f7c9d6db473c37e85e4af1df332e4ee007b3 (diff) |
[search] Use MemTrie for street synonyms.
Diffstat (limited to 'indexer/search_string_utils.cpp')
-rw-r--r-- | indexer/search_string_utils.cpp | 84 |
1 files changed, 76 insertions, 8 deletions
diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 216b5b59b8..0503d2a591 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -1,7 +1,9 @@ #include "indexer/search_string_utils.hpp" #include "indexer/string_set.hpp" +#include "base/assert.hpp" #include "base/macros.hpp" +#include "base/mem_trie.hpp" #include "std/algorithm.hpp" @@ -94,11 +96,67 @@ char const * kStreetTokensSeparator = "\t -,."; /// @todo Move prefixes, suffixes into separate file (autogenerated). /// It's better to distinguish synonyms comparison according to language/region. - class StreetsSynonymsHolder { public: - using TStrings = search::StringSet<UniChar, 8>; + struct BooleanSum + { + void Add(bool value) { m_value = m_value || value; } + + template <typename ToDo> + void ForEach(ToDo && toDo) const + { + if (m_value) + toDo(m_value); + } + + void Clear() { m_value = false; } + + bool m_value = false; + }; + + template <typename Char, typename SubTree> + class Moves + { + public: + template <typename ToDo> + void ForEach(ToDo && toDo) const + { + for (auto const & subtree : m_subtrees) + toDo(subtree.first, *subtree.second); + } + + SubTree * GetSubTree(Char const & c) const + { + for (auto const & subtree : m_subtrees) + { + if (subtree.first == c) + return subtree.second.get(); + } + return nullptr; + } + + SubTree & GetOrCreateSubTree(Char const & c, bool & created) + { + for (size_t i = 0; i < m_subtrees.size(); ++i) + { + if (m_subtrees[i].first == c) + { + created = false; + return *m_subtrees[i].second; + } + } + + created = true; + m_subtrees.emplace_back(c, make_unique<SubTree>()); + return *m_subtrees.back().second; + } + + void Clear() { m_subtrees.clear(); } + + private: + buffer_vector<pair<Char, std::unique_ptr<SubTree>>, 8> m_subtrees; + }; StreetsSynonymsHolder() { @@ -168,24 +226,34 @@ public: for (auto const * s : affics) { UniString const us = NormalizeAndSimplifyString(s); - m_strings.Add(us.begin(), us.end()); + m_strings.Add(us, true); } } bool MatchPrefix(UniString const & s) const { - auto const status = m_strings.Has(s.begin(), s.end()); - return status != TStrings::Status::Absent; + bool found = false; + m_strings.ForEachInNode(s, [&](UniString const & prefix, bool value) { + ASSERT_EQUAL(s, prefix, ()); + ASSERT(value, ()); + found = true; + }); + return found; } bool FullMatch(UniString const & s) const { - auto const status = m_strings.Has(s.begin(), s.end()); - return status == TStrings::Status::Full; + bool found = false; + m_strings.ForEachInNode(s, [&](UniString const & prefix, bool value) { + ASSERT_EQUAL(s, prefix, ()); + ASSERT(value, ()); + found = value; + }); + return found; } private: - TStrings m_strings; + my::MemTrie<UniString, BooleanSum, Moves> m_strings; }; StreetsSynonymsHolder g_streets; |