Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'search/locality_scorer.cpp')
-rw-r--r--search/locality_scorer.cpp137
1 files changed, 137 insertions, 0 deletions
diff --git a/search/locality_scorer.cpp b/search/locality_scorer.cpp
new file mode 100644
index 0000000000..48eff3e136
--- /dev/null
+++ b/search/locality_scorer.cpp
@@ -0,0 +1,137 @@
+#include "search/locality_scorer.hpp"
+
+#include "search/token_slice.hpp"
+
+#include "std/algorithm.hpp"
+
+namespace search
+{
+// static
+size_t const LocalityScorer::kDefaultReadLimit = 100;
+
+namespace
+{
+bool IsAlmostFullMatch(NameScore score)
+{
+ return score == NAME_SCORE_FULL_MATCH_PREFIX || score == NAME_SCORE_FULL_MATCH;
+}
+} // namespace
+
+// LocalityScorer::ExLocality ----------------------------------------------------------------------
+LocalityScorer::ExLocality::ExLocality() : m_numTokens(0), m_rank(0), m_nameScore(NAME_SCORE_ZERO)
+{
+}
+
+LocalityScorer::ExLocality::ExLocality(Geocoder::Locality const & locality)
+ : m_locality(locality)
+ , m_numTokens(locality.m_endToken - locality.m_startToken)
+ , m_rank(0)
+ , m_nameScore(NAME_SCORE_ZERO)
+{
+}
+
+// LocalityScorer ----------------------------------------------------------------------------------
+LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & delegate)
+ : m_params(params), m_delegate(delegate)
+{
+}
+
+void LocalityScorer::GetTopLocalities(size_t limit, vector<Geocoder::Locality> & localities) const
+{
+ vector<ExLocality> ls;
+ ls.reserve(localities.size());
+ for (auto const & locality : localities)
+ ls.emplace_back(locality);
+
+ RemoveDuplicates(ls);
+ LeaveTopByRank(std::max(limit, kDefaultReadLimit), ls);
+ SortByName(ls);
+ if (ls.size() > limit)
+ ls.resize(limit);
+
+ localities.clear();
+ localities.reserve(ls.size());
+ for (auto const & l : ls)
+ localities.push_back(l.m_locality);
+}
+
+void LocalityScorer::RemoveDuplicates(vector<ExLocality> & ls) const
+{
+ sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs)
+ {
+ if (lhs.GetId() != rhs.GetId())
+ return lhs.GetId() < rhs.GetId();
+ return lhs.m_numTokens > rhs.m_numTokens;
+ });
+ ls.erase(unique(ls.begin(), ls.end(),
+ [](ExLocality const & lhs, ExLocality const & rhs)
+ {
+ return lhs.GetId() == rhs.GetId();
+ }),
+ ls.end());
+}
+
+void LocalityScorer::LeaveTopByRank(size_t limit, vector<ExLocality> & ls) const
+{
+ if (ls.size() <= limit)
+ return;
+
+ for (auto & l : ls)
+ l.m_rank = m_delegate.GetRank(l.GetId());
+
+ sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs)
+ {
+ if (lhs.m_rank != rhs.m_rank)
+ return lhs.m_rank > rhs.m_rank;
+ return lhs.m_numTokens > rhs.m_numTokens;
+ });
+ ls.resize(limit);
+}
+
+void LocalityScorer::SortByName(vector<ExLocality> & ls) const
+{
+ vector<string> names;
+ for (auto & l : ls)
+ {
+ names.clear();
+ m_delegate.GetNames(l.GetId(), names);
+
+ auto score = NAME_SCORE_ZERO;
+ for (auto const & name : names)
+ {
+ score = max(score, GetNameScore(name, TokenSlice(m_params, l.m_locality.m_startToken,
+ l.m_locality.m_endToken)));
+ }
+ l.m_nameScore = score;
+ }
+
+ sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs)
+ {
+ if (IsAlmostFullMatch(lhs.m_nameScore) && IsAlmostFullMatch(rhs.m_nameScore))
+ {
+ // When both localities match well, e.g. full or full prefix
+ // match, the one with larger number of tokens is selected. In
+ // case of tie, the one with better score is selected.
+ if (lhs.m_numTokens != rhs.m_numTokens)
+ return lhs.m_numTokens > rhs.m_numTokens;
+ if (lhs.m_nameScore != rhs.m_nameScore)
+ return lhs.m_nameScore > rhs.m_nameScore;
+ }
+ else
+ {
+ // When name scores differ, the one with better name score is
+ // selected. In case of tie, the one with larger number of
+ // matched tokens is selected.
+ if (lhs.m_nameScore != rhs.m_nameScore)
+ return lhs.m_nameScore > rhs.m_nameScore;
+ if (lhs.m_numTokens != rhs.m_numTokens)
+ return lhs.m_numTokens > rhs.m_numTokens;
+ }
+
+ // Okay, in case of tie we select the one with better rank. This
+ // is a quite arbitrary decision and definitely may be improved.
+ return lhs.m_rank > rhs.m_rank;
+ });
+}
+
+} // namespace search