Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortatiana-yan <tatiana.kondakova@gmail.com>2019-04-23 16:23:46 +0300
committermpimenov <mpimenov@users.noreply.github.com>2019-04-23 19:05:04 +0300
commitad0bca72b5f0adad982fffee1b5bf7e75d87f439 (patch)
tree869c9d372085f1c829e5746e52c6bcf522b94efd
parente77592b68e345c8cec11324be0c03c1b5a8df6bb (diff)
[search] Heuristics for rating.
-rw-r--r--search/intermediate_result.hpp1
-rw-r--r--search/pre_ranker.cpp7
-rw-r--r--search/pre_ranking_info.cpp9
-rw-r--r--search/pre_ranking_info.hpp11
-rw-r--r--search/ranker.cpp1
-rw-r--r--search/ranking_info.cpp25
-rw-r--r--search/ranking_info.hpp4
-rwxr-xr-xsearch/search_quality/scoring_model.py2
8 files changed, 55 insertions, 5 deletions
diff --git a/search/intermediate_result.hpp b/search/intermediate_result.hpp
index b8ff62c324..6a865354f1 100644
--- a/search/intermediate_result.hpp
+++ b/search/intermediate_result.hpp
@@ -51,6 +51,7 @@ public:
double GetDistance() const { return m_info.m_distanceToPivot; }
uint8_t GetRank() const { return m_info.m_rank; }
uint8_t GetPopularity() const { return m_info.m_popularity; }
+ std::pair<uint8_t, float> GetRating() const { return m_info.m_rating; }
PreRankingInfo & GetInfo() { return m_info; }
PreRankingInfo const & GetInfo() const { return m_info; }
std::vector<ResultTracer::Branch> const & GetProvenance() const { return m_provenance; }
diff --git a/search/pre_ranker.cpp b/search/pre_ranker.cpp
index 711526d17c..5d6c16b309 100644
--- a/search/pre_ranker.cpp
+++ b/search/pre_ranker.cpp
@@ -5,6 +5,8 @@
#include "search/pre_ranking_info.hpp"
#include "search/tracer.hpp"
+#include "ugc/types.hpp"
+
#include "indexer/data_source.hpp"
#include "indexer/mwm_set.hpp"
#include "indexer/rank_table.hpp"
@@ -77,6 +79,7 @@ void PreRanker::FillMissingFieldsInPreResults()
MwmSet::MwmHandle mwmHandle;
unique_ptr<RankTable> ranks = make_unique<DummyRankTable>();
unique_ptr<RankTable> popularityRanks = make_unique<DummyRankTable>();
+ unique_ptr<RankTable> ratings = make_unique<DummyRankTable>();
unique_ptr<LazyCentersTable> centers;
bool pivotFeaturesInitialized = false;
@@ -94,16 +97,20 @@ void PreRanker::FillMissingFieldsInPreResults()
ranks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, SEARCH_RANKS_FILE_TAG);
popularityRanks = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont,
POPULARITY_RANKS_FILE_TAG);
+ ratings = RankTable::Load(mwmHandle.GetValue<MwmValue>()->m_cont, RATINGS_FILE_TAG);
centers = make_unique<LazyCentersTable>(*mwmHandle.GetValue<MwmValue>());
}
if (!ranks)
ranks = make_unique<DummyRankTable>();
if (!popularityRanks)
popularityRanks = make_unique<DummyRankTable>();
+ if (!ratings)
+ ratings = make_unique<DummyRankTable>();
}
info.m_rank = ranks->Get(id.m_index);
info.m_popularity = popularityRanks->Get(id.m_index);
+ info.m_rating = ugc::UGC::UnpackRating(ratings->Get(id.m_index));
m2::PointD center;
if (centers && centers->Get(id.m_index, center))
diff --git a/search/pre_ranking_info.cpp b/search/pre_ranking_info.cpp
index 2b05f68d5c..c5a4afa668 100644
--- a/search/pre_ranking_info.cpp
+++ b/search/pre_ranking_info.cpp
@@ -8,17 +8,18 @@ std::string DebugPrint(PreRankingInfo const & info)
{
std::ostringstream os;
os << "PreRankingInfo [";
- os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
+ os << "m_distanceToPivot:" << info.m_distanceToPivot << ", ";
for (size_t i = 0; i < static_cast<size_t>(Model::TYPE_COUNT); ++i)
{
if (info.m_tokenRange[i].Empty())
continue;
auto const type = static_cast<Model::Type>(i);
- os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ",";
+ os << "m_tokenRange[" << DebugPrint(type) << "]:" << DebugPrint(info.m_tokenRange[i]) << ", ";
}
- os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
- os << "m_popularity:" << static_cast<int>(info.m_popularity) << ",";
+ os << "m_rank:" << static_cast<int>(info.m_rank) << ", ";
+ os << "m_popularity:" << static_cast<int>(info.m_popularity) << ", ";
+ os << "m_rating: [" << static_cast<int>(info.m_rating.first) << ", "<< info.m_rating.second << "], ";
os << "m_type:" << info.m_type;
os << "]";
return os.str();
diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp
index b19a76dd1d..8150bf0e36 100644
--- a/search/pre_ranking_info.hpp
+++ b/search/pre_ranking_info.hpp
@@ -12,6 +12,7 @@
#include <cstdint>
#include <string>
+#include <utility>
namespace search
{
@@ -59,6 +60,16 @@ struct PreRankingInfo
// Popularity rank of the feature.
uint8_t m_popularity = 0;
+ // Confidence and UGC rating.
+ // Confidence: 0 - no information
+ // 1 - based on few reviews
+ // 2 - based on average reviews number
+ // 3 - based on large number of reviews.
+ // Rating [4.0 ... 10.0]:
+ // 4.0 and lower represented as 4.0
+ // higher ratings saved as is from UGC.
+ std::pair<uint8_t, float> m_rating = {0, 0.0f};
+
// Search type for the feature.
Model::Type m_type = Model::TYPE_COUNT;
};
diff --git a/search/ranker.cpp b/search/ranker.cpp
index 8a1c3fada5..2e5bb0fd73 100644
--- a/search/ranker.cpp
+++ b/search/ranker.cpp
@@ -298,6 +298,7 @@ class RankerResultMaker
info.m_distanceToPivot = MercatorBounds::DistanceOnEarth(center, pivot);
info.m_rank = preInfo.m_rank;
info.m_popularity = preInfo.m_popularity;
+ info.m_rating = preInfo.m_rating;
info.m_type = preInfo.m_type;
info.m_allTokensUsed = preInfo.m_allTokensUsed;
info.m_categorialRequest = m_params.IsCategorialRequest();
diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp
index fdb4f675aa..a80cd70cb2 100644
--- a/search/ranking_info.cpp
+++ b/search/ranking_info.cpp
@@ -1,5 +1,7 @@
#include "search/ranking_info.hpp"
+#include "ugc/types.hpp"
+
#include <iomanip>
#include <limits>
#include <sstream>
@@ -16,10 +18,13 @@ double constexpr kDistanceToPivot = -1.0000000;
double constexpr kRank = 1.0000000;
// todo: (@t.yan) Adjust.
double constexpr kPopularity = 0.0500000;
+// todo: (@t.yan) Adjust.
+double constexpr kRating = 0.0500000;
double constexpr kFalseCats = -0.3691859;
double constexpr kErrorsMade = -0.0579812;
double constexpr kAllTokensUsed = 0.0000000;
double constexpr kHasName = 0.5;
+
double constexpr kNameScore[NameScore::NAME_SCORE_COUNT] = {
-0.7245815 /* Zero */,
0.1853727 /* Substring */,
@@ -48,6 +53,20 @@ double TransformDistance(double distance)
{
return min(distance, RankingInfo::kMaxDistMeters) / RankingInfo::kMaxDistMeters;
}
+
+double TransformRating(pair<uint8_t, float> const & rating)
+{
+ double r = 0.0;
+ // From statistics.
+ double constexpr kAverageRating = 7.6;
+ if (rating.first != 0)
+ {
+ r = (static_cast<double>(rating.second) - kAverageRating) /
+ (ugc::UGC::kMaxRating - ugc::UGC::kRatingDetalizationThreshold);
+ r *= static_cast<double>(rating.first) / 3.0 /* maximal confidence */;
+ }
+ return r;
+}
} // namespace
// static
@@ -59,6 +78,7 @@ void RankingInfo::PrintCSVHeader(ostream & os)
os << "DistanceToPivot"
<< ",Rank"
<< ",Popularity"
+ << ",Rating"
<< ",NameScore"
<< ",ErrorsMade"
<< ",SearchType"
@@ -75,6 +95,8 @@ string DebugPrint(RankingInfo const & info)
os << "m_distanceToPivot:" << info.m_distanceToPivot;
os << ", m_rank:" << static_cast<int>(info.m_rank);
os << ", m_popularity:" << static_cast<int>(info.m_popularity);
+ os << ", m_rating:[" << static_cast<int>(info.m_rating.first) << ", " << info.m_rating.second
+ << "]";
os << ", m_nameScore:" << DebugPrint(info.m_nameScore);
os << ", m_errorsMade:" << DebugPrint(info.m_errorsMade);
os << ", m_type:" << DebugPrint(info.m_type);
@@ -93,6 +115,7 @@ void RankingInfo::ToCSV(ostream & os) const
os << m_distanceToPivot << ",";
os << static_cast<int>(m_rank) << ",";
os << static_cast<int>(m_popularity) << ",";
+ os << TransformRating(m_rating) << ",";
os << DebugPrint(m_nameScore) << ",";
os << GetErrorsMade() << ",";
os << DebugPrint(m_type) << ",";
@@ -112,6 +135,7 @@ double RankingInfo::GetLinearModelRank() const
double const distanceToPivot = TransformDistance(m_distanceToPivot);
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
double const popularity = static_cast<double>(m_popularity) / numeric_limits<uint8_t>::max();
+ double const rating = TransformRating(m_rating);
auto nameScore = m_nameScore;
if (m_pureCats || m_falseCats)
@@ -129,6 +153,7 @@ double RankingInfo::GetLinearModelRank() const
result += kDistanceToPivot * distanceToPivot;
result += kRank * rank;
result += kPopularity * popularity;
+ result += kRating * rating;
result += m_falseCats * kFalseCats;
if (!m_categorialRequest)
{
diff --git a/search/ranking_info.hpp b/search/ranking_info.hpp
index 2b47b23632..2668ea0f45 100644
--- a/search/ranking_info.hpp
+++ b/search/ranking_info.hpp
@@ -8,6 +8,7 @@
#include <cstdint>
#include <ostream>
#include <string>
+#include <utility>
class FeatureType;
@@ -36,6 +37,9 @@ struct RankingInfo
// Popularity rank of the feature.
uint8_t m_popularity = 0;
+ // Confidence and UGC rating.
+ std::pair<uint8_t, float> m_rating = {0, 0.0f};
+
// Score for the feature's name.
NameScore m_nameScore = NAME_SCORE_ZERO;
diff --git a/search/search_quality/scoring_model.py b/search/search_quality/scoring_model.py
index ab010dfd64..20b77314d6 100755
--- a/search/search_quality/scoring_model.py
+++ b/search/search_quality/scoring_model.py
@@ -20,7 +20,7 @@ MAX_POPULARITY = 255
RELEVANCES = {'Harmful': -3, 'Irrelevant': 0, 'Relevant': 1, 'Vital': 3}
NAME_SCORES = ['Zero', 'Substring', 'Prefix', 'Full Match']
SEARCH_TYPES = ['POI', 'Building', 'Street', 'Unclassified', 'Village', 'City', 'State', 'Country']
-FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
+FEATURES = ['DistanceToPivot', 'Rank', 'Popularity', 'Rating', 'FalseCats', 'ErrorsMade', 'AllTokensUsed',
'CategorialRequest', 'HasName'] + NAME_SCORES + SEARCH_TYPES
BOOTSTRAP_ITERATIONS = 10000