Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViktor Govako <viktor.govako@gmail.com>2016-03-24 13:34:08 +0300
committerViktor Govako <viktor.govako@gmail.com>2016-03-24 13:34:08 +0300
commit76cb6e84ab2e0ea0ff34790ff1c3ed2ac6331120 (patch)
treefd52eb65f575e06f197bec4dafa98cbefb706d29
parent0b296f2e59501fb8163a525e5a09006d0e312c14 (diff)
parent891794420c62ccee307559649a4ded26f31cba6b (diff)
Merge pull request #2478 from ygorshenin/fix-name-scoring-calculation
[search] Fixed name score calculation.
-rw-r--r--search/search_query.cpp53
-rw-r--r--search/search_query_params.cpp1
-rw-r--r--search/search_query_params.hpp2
-rw-r--r--search/search_tests/ranking_tests.cpp2
-rw-r--r--search/v2/locality_scorer.cpp4
-rw-r--r--search/v2/ranking_info.cpp2
-rw-r--r--search/v2/ranking_utils.cpp58
-rw-r--r--search/v2/ranking_utils.hpp130
8 files changed, 169 insertions, 83 deletions
diff --git a/search/search_query.cpp b/search/search_query.cpp
index 682419c431..48e523e4cd 100644
--- a/search/search_query.cpp
+++ b/search/search_query.cpp
@@ -181,6 +181,32 @@ m2::RectD GetRectAroundPosition(m2::PointD const & position)
double constexpr kMaxPositionRadiusM = 50.0 * 1000;
return MercatorBounds::RectByCenterXYAndSizeInMeters(position, kMaxPositionRadiusM);
}
+
+template <typename TSlice>
+void UpdateNameScore(string const & name, TSlice const & slice, v2::NameScore & bestScore)
+{
+ auto const score = v2::GetNameScore(name, slice);
+ if (score > bestScore)
+ bestScore = score;
+}
+
+template <typename TSlice>
+void UpdateNameScore(vector<strings::UniString> const & tokens, TSlice const & slice,
+ v2::NameScore & bestScore, double & bestCoverage)
+{
+ auto const score = v2::GetNameScore(tokens, slice);
+ auto const coverage =
+ tokens.empty() ? 0 : static_cast<double>(slice.Size()) / static_cast<double>(tokens.size());
+ if (score > bestScore)
+ {
+ bestScore = score;
+ bestCoverage = coverage;
+ }
+ else if (score == bestScore && coverage > bestCoverage)
+ {
+ bestCoverage = coverage;
+ }
+}
} // namespace
// static
@@ -591,6 +617,9 @@ class PreResult2Maker
info.m_searchType = preInfo.m_searchType;
info.m_nameScore = v2::NAME_SCORE_ZERO;
+
+ v2::TokensSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken);
+
for (auto const & lang : m_params.m_langs)
{
string name;
@@ -599,28 +628,11 @@ class PreResult2Maker
vector<strings::UniString> tokens;
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
- auto score = GetNameScore(tokens, m_params, preInfo.m_startToken, preInfo.m_endToken);
- auto coverage =
- tokens.empty() ? 0 : static_cast<double>(preInfo.m_endToken - preInfo.m_startToken) /
- static_cast<double>(tokens.size());
- if (score > info.m_nameScore)
- {
- info.m_nameScore = score;
- info.m_nameCoverage = coverage;
- }
- else if (score == info.m_nameScore && coverage > info.m_nameCoverage)
- {
- info.m_nameCoverage = coverage;
- }
+ UpdateNameScore(tokens, slice, info.m_nameScore, info.m_nameCoverage);
}
if (info.m_searchType == v2::SearchModel::SEARCH_TYPE_BUILDING)
- {
- string const houseNumber = ft.GetHouseNumber();
- auto score = GetNameScore(houseNumber, m_params, preInfo.m_startToken, preInfo.m_endToken);
- if (score > info.m_nameScore)
- info.m_nameScore = score;
- }
+ UpdateNameScore(ft.GetHouseNumber(), slice, info.m_nameScore);
}
uint8_t NormalizeRank(uint8_t rank, v2::SearchModel::SearchType type, m2::PointD const & center,
@@ -1164,6 +1176,8 @@ void Query::InitParams(bool localitySearch, SearchQueryParams & params)
for (size_t i = 0; i < tokensCount; ++i)
params.m_tokens[i].push_back(m_tokens[i]);
+ params.m_isCategorySynonym.assign(tokensCount + (m_prefix.empty() ? 0 : 1), false);
+
// Add names of categories (and synonyms).
if (!localitySearch)
{
@@ -1175,6 +1189,7 @@ void Query::InitParams(bool localitySearch, SearchQueryParams & params)
uint32_t const index = cl.GetIndexForType(t);
v.push_back(FeatureTypeToString(index));
+ params.m_isCategorySynonym[i] = true;
// v2-version MWM has raw classificator types in search index prefix, so
// do the hack: add synonyms for old convention if needed.
diff --git a/search/search_query_params.cpp b/search/search_query_params.cpp
index 3de965be4a..c358bce01f 100644
--- a/search/search_query_params.cpp
+++ b/search/search_query_params.cpp
@@ -63,6 +63,7 @@ void SearchQueryParams::Clear()
{
m_tokens.clear();
m_prefixTokens.clear();
+ m_isCategorySynonym.clear();
m_langs.clear();
m_scale = scales::GetUpperScale();
}
diff --git a/search/search_query_params.hpp b/search/search_query_params.hpp
index 4b3f3ebb66..38a26b7011 100644
--- a/search/search_query_params.hpp
+++ b/search/search_query_params.hpp
@@ -16,6 +16,8 @@ struct SearchQueryParams
vector<TSynonymsVector> m_tokens;
TSynonymsVector m_prefixTokens;
+ vector<bool> m_isCategorySynonym;
+
TLangsSet m_langs;
int m_scale;
diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp
index 605900dfbb..b74e35a284 100644
--- a/search/search_tests/ranking_tests.cpp
+++ b/search/search_tests/ranking_tests.cpp
@@ -32,7 +32,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken,
params.m_prefixTokens.swap(params.m_tokens.back());
params.m_tokens.pop_back();
}
- return GetNameScore(name, params, startToken, endToken);
+ return GetNameScore(name, TokensSlice(params, startToken, endToken));
}
UNIT_TEST(NameTest_Smoke)
diff --git a/search/v2/locality_scorer.cpp b/search/v2/locality_scorer.cpp
index 66f5d92d90..49e7e8a817 100644
--- a/search/v2/locality_scorer.cpp
+++ b/search/v2/locality_scorer.cpp
@@ -99,8 +99,8 @@ void LocalityScorer::SortByName(vector<ExLocality> & ls) const
auto score = NAME_SCORE_ZERO;
for (auto const & name : names)
{
- score = max(score,
- GetNameScore(name, m_params, l.m_locality.m_startToken, l.m_locality.m_endToken));
+ score = max(score, GetNameScore(name, v2::TokensSlice(m_params, l.m_locality.m_startToken,
+ l.m_locality.m_endToken)));
}
l.m_nameScore = score;
}
diff --git a/search/v2/ranking_info.cpp b/search/v2/ranking_info.cpp
index 59c571ec2e..f612e6be87 100644
--- a/search/v2/ranking_info.cpp
+++ b/search/v2/ranking_info.cpp
@@ -14,7 +14,7 @@ namespace
double const kDistanceToPivot = 24.443;
double const kRank = 11.010;
double const kNameScore = 1.0;
-double const kNameCoverage = 0.0;
+double const kNameCoverage = 1.0;
double const kSearchType = 22.378;
double TransformDistance(double distance)
diff --git a/search/v2/ranking_utils.cpp b/search/v2/ranking_utils.cpp
index f2dc94bd06..f865f3336e 100644
--- a/search/v2/ranking_utils.cpp
+++ b/search/v2/ranking_utils.cpp
@@ -1,12 +1,5 @@
#include "search/v2/ranking_utils.hpp"
-#include "search/search_query_params.hpp"
-
-#include "indexer/search_delimiters.hpp"
-#include "indexer/search_string_utils.hpp"
-
-#include "base/stl_add.hpp"
-
#include "std/algorithm.hpp"
using namespace strings;
@@ -15,7 +8,7 @@ namespace search
{
namespace v2
{
-namespace
+namespace impl
{
bool Match(vector<UniString> const & tokens, UniString const & token)
{
@@ -31,54 +24,7 @@ bool PrefixMatch(vector<UniString> const & prefixes, UniString const & token)
}
return false;
}
-} // namespace
-
-NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken,
- size_t endToken)
-{
- if (startToken >= endToken)
- return NAME_SCORE_ZERO;
-
- vector<UniString> tokens;
- SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
- return GetNameScore(tokens, params, startToken, endToken);
-}
-
-NameScore GetNameScore(vector<UniString> const & tokens, SearchQueryParams const & params,
- size_t startToken, size_t endToken)
-{
- if (startToken >= endToken)
- return NAME_SCORE_ZERO;
-
- size_t const n = tokens.size();
- size_t const m = endToken - startToken;
-
- bool const lastTokenIsPrefix = (endToken == params.m_tokens.size() + 1);
-
- NameScore score = NAME_SCORE_ZERO;
- for (int offset = 0; offset + m <= n; ++offset)
- {
- bool match = true;
- for (int i = 0; i + 1 < m && match; ++i)
- match = match && Match(params.GetTokens(startToken + i), tokens[offset + i]);
- if (!match)
- continue;
-
- if (Match(params.GetTokens(endToken - 1), tokens[offset + m - 1]))
- {
- if (m == n)
- return NAME_SCORE_FULL_MATCH;
- score = max(score, NAME_SCORE_SUBSTRING);
- }
- if (lastTokenIsPrefix && PrefixMatch(params.GetTokens(endToken - 1), tokens[offset + m - 1]))
- {
- if (m == n)
- return NAME_SCORE_FULL_MATCH_PREFIX;
- score = max(score, NAME_SCORE_SUBSTRING_PREFIX);
- }
- }
- return score;
-}
+} // namespace impl
string DebugPrint(NameScore score)
{
diff --git a/search/v2/ranking_utils.hpp b/search/v2/ranking_utils.hpp
index 4dd5099c6c..a559192930 100644
--- a/search/v2/ranking_utils.hpp
+++ b/search/v2/ranking_utils.hpp
@@ -1,8 +1,14 @@
#pragma once
+#include "search/search_query_params.hpp"
#include "search/v2/geocoder.hpp"
#include "search/v2/search_model.hpp"
+#include "indexer/search_delimiters.hpp"
+#include "indexer/search_string_utils.hpp"
+
+#include "base/assert.hpp"
+#include "base/stl_add.hpp"
#include "base/string_utils.hpp"
#include "std/cstdint.hpp"
@@ -16,6 +22,13 @@ struct SearchQueryParams;
namespace v2
{
+namespace impl
+{
+bool Match(vector<strings::UniString> const & tokens, strings::UniString const & token);
+
+bool PrefixMatch(vector<strings::UniString> const & prefixes, strings::UniString const & token);
+} // namespace impl
+
// The order and numeric values are important here. Please, check all
// use-cases before changing this enum.
enum NameScore
@@ -29,11 +42,120 @@ enum NameScore
NAME_SCORE_COUNT
};
-NameScore GetNameScore(string const & name, SearchQueryParams const & params, size_t startToken,
- size_t endToken);
+class TokensSlice
+{
+public:
+ TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
+ : m_params(params), m_offset(startToken), m_size(endToken - startToken)
+ {
+ ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
+ }
+
+ inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_params.GetTokens(m_offset + i);
+ }
+
+ inline size_t Size() const { return m_size; }
+
+ inline bool Empty() const { return Size() == 0; }
+
+ inline bool IsPrefix(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_offset + i == m_params.m_tokens.size();
+ }
+
+private:
+ SearchQueryParams const & m_params;
+ size_t const m_offset;
+ size_t const m_size;
+};
+
+class TokensSliceNoCategories
+{
+public:
+ TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken)
+ : m_params(params)
+ {
+ ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
+
+ m_indexes.reserve(endToken - startToken);
+ for (size_t i = startToken; i < endToken; ++i)
+ {
+ if (!m_params.m_isCategorySynonym[i])
+ m_indexes.push_back(i);
+ }
+ }
+
+ inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_params.GetTokens(m_indexes[i]);
+ }
+
+ inline size_t Size() const { return m_indexes.size(); }
+
+ inline bool Empty() const { return Size() == 0; }
+
+ inline bool IsPrefix(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_indexes[i] == m_params.m_tokens.size();
+ }
+
+private:
+ SearchQueryParams const & m_params;
+ vector<size_t> m_indexes;
+};
+
+template <typename TSlice>
+NameScore GetNameScore(string const & name, TSlice const & slice)
+{
+ if (slice.Empty())
+ return NAME_SCORE_ZERO;
+
+ vector<strings::UniString> tokens;
+ SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
+ return GetNameScore(tokens, slice);
+}
+
+template <typename TSlice>
+NameScore GetNameScore(vector<strings::UniString> const & tokens, TSlice const & slice)
+{
+ if (slice.Empty())
+ return NAME_SCORE_ZERO;
+
+ size_t const n = tokens.size();
+ size_t const m = slice.Size();
+
+ bool const lastTokenIsPrefix = slice.IsPrefix(m - 1);
+
+ NameScore score = NAME_SCORE_ZERO;
+ for (int offset = 0; offset + m <= n; ++offset)
+ {
+ bool match = true;
+ for (int i = 0; i < m - 1 && match; ++i)
+ match = match && impl::Match(slice.Get(i), tokens[offset + i]);
+ if (!match)
+ continue;
-NameScore GetNameScore(vector<strings::UniString> const & tokens, SearchQueryParams const & params,
- size_t startToken, size_t endToken);
+ if (impl::Match(slice.Get(m - 1), tokens[offset + m - 1]))
+ {
+ if (m == n)
+ return NAME_SCORE_FULL_MATCH;
+ score = max(score, NAME_SCORE_SUBSTRING);
+ }
+ if (lastTokenIsPrefix && impl::PrefixMatch(slice.Get(m - 1), tokens[offset + m - 1]))
+ {
+ if (m == n)
+ return NAME_SCORE_FULL_MATCH_PREFIX;
+ score = max(score, NAME_SCORE_SUBSTRING_PREFIX);
+ }
+ }
+ return score;
+}
string DebugPrint(NameScore score);
} // namespace v2