Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Magidovich <mgsergio@mapswithme.com>2016-07-11 22:42:43 +0300
committerSergey Magidovich <mgsergio@mapswithme.com>2016-07-12 10:31:59 +0300
commit88d5775c230e5ac2caa06ad2266b893b300a2211 (patch)
tree27515e79567c3f1883df47cdf068d246a5724584 /generator
parent6f423fbac4dc92d0bbca7f4d2a7c34c600a59633 (diff)
Refactor. Add Matching by name.
Diffstat (limited to 'generator')
-rw-r--r--generator/booking_dataset.cpp56
-rw-r--r--generator/booking_dataset.hpp5
-rw-r--r--generator/booking_quality_check/booking_quality_check.cpp9
-rw-r--r--generator/booking_scoring.cpp97
-rw-r--r--generator/booking_scoring.hpp21
-rw-r--r--generator/osm_element.cpp14
-rw-r--r--generator/osm_element.hpp3
7 files changed, 142 insertions, 63 deletions
diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp
index 8239f68344..fec4af1689 100644
--- a/generator/booking_dataset.cpp
+++ b/generator/booking_dataset.cpp
@@ -1,11 +1,11 @@
#include "generator/booking_dataset.hpp"
+#include "generator/booking_scoring.hpp"
+
#include "platform/local_country_file_utils.hpp"
#include "platform/platform.hpp"
#include "indexer/ftypes_matcher.hpp"
-#include "indexer/search_delimiters.hpp"
-#include "indexer/search_string_utils.hpp"
#include "geometry/distance_on_sphere.hpp"
@@ -171,45 +171,6 @@ vector<size_t> BookingDataset::GetNearestHotels(double lat, double lon, size_t l
return indexes;
}
-bool BookingDataset::MatchByName(string const & osmName,
- vector<size_t> const & bookingIndexes) const
-{
- return false;
-
- // Match name.
- // vector<strings::UniString> osmTokens;
- // NormalizeAndTokenizeString(name, osmTokens, search::Delimiters());
- //
- // cout << "\n------------- " << name << endl;
- //
- // bool matched = false;
- // for (auto const & index : indexes)
- // {
- // vector<strings::UniString> bookingTokens;
- // NormalizeAndTokenizeString(m_hotels[index].name, bookingTokens, search::Delimiters());
- //
- // map<size_t, vector<pair<size_t, size_t>>> weightPair;
- //
- // for (size_t j = 0; j < osmTokens.size(); ++j)
- // {
- // for (size_t i = 0; i < bookingTokens.size(); ++i)
- // {
- // size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(),
- // bookingTokens[i].begin(),
- // bookingTokens[i].end());
- // if (distance < 3)
- // weightPair[distance].emplace_back(i, j);
- // }
- // }
- //
- // if (!weightPair.empty())
- // {
- // cout << m_hotels[e.second] << endl;
- // matched = true;
- // }
- // }
-}
-
void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) const
{
for (auto const & hotel : m_hotels)
@@ -302,13 +263,6 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
}
}
-// static
-double BookingDataset::ScoreByLinearNormDistance(double distance)
-{
- distance = my::clamp(distance, 0, kDistanceLimitInMeters);
- return 1.0 - distance / kDistanceLimitInMeters;
-}
-
void BookingDataset::LoadHotels(istream & src, string const & addressReferencePath)
{
m_hotels.clear();
@@ -374,11 +328,7 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const
for (size_t const j : bookingIndexes)
{
- auto const & hotel = GetHotel(j);
- double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
- double score = ScoreByLinearNormDistance(distanceMeters);
- matched = score > kOptimalThreshold;
- if (matched)
+ if (booking_scoring::Match(GetHotel(j), e).IsMatched())
break;
}
diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp
index f56bba4d17..c37859cca6 100644
--- a/generator/booking_dataset.hpp
+++ b/generator/booking_dataset.hpp
@@ -22,9 +22,6 @@ public:
double static constexpr kDistanceLimitInMeters = 150;
size_t static constexpr kMaxSelectedElements = 3;
- // Calculated with tools/python/booking_hotels_quality.py
- double static constexpr kOptimalThreshold = 0.709283;
-
struct Hotel
{
enum class Fields
@@ -92,8 +89,6 @@ public:
void BuildFeatures(function<void(OsmElement *)> const & fn) const;
- static double ScoreByLinearNormDistance(double distance);
-
protected:
vector<Hotel> m_hotels;
diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp
index 6602687ec4..0331a7f9d3 100644
--- a/generator/booking_quality_check/booking_quality_check.cpp
+++ b/generator/booking_quality_check/booking_quality_check.cpp
@@ -1,4 +1,5 @@
#include "generator/booking_dataset.hpp"
+#include "generator/booking_scoring.hpp"
#include "generator/osm_source.hpp"
#include "geometry/distance_on_sphere.hpp"
@@ -73,15 +74,15 @@ int main(int argc, char * argv[])
for (size_t const j : bookingIndexes)
{
auto const & hotel = bookingDataset.GetHotel(j);
- double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
- double const score = BookingDataset::ScoreByLinearNormDistance(distanceMeters);
+ auto const score = booking_scoring::Match(hotel, e);
- bool matched = score > BookingDataset::kOptimalThreshold;
+ double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
+ bool matched = score.IsMatched();
outStream << "# ------------------------------------------" << fixed << setprecision(6)
<< endl;
outStream << (matched ? 'y' : 'n') << " \t" << i << "\t " << j
- << " distance: " << distanceMeters << " score: " << score << endl;
+ << " distance: " << distanceMeters << " score: " << score.GetMatchingScore() << endl;
outStream << "# " << e << endl;
outStream << "# " << hotel << endl;
outStream << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat
diff --git a/generator/booking_scoring.cpp b/generator/booking_scoring.cpp
new file mode 100644
index 0000000000..ff2aaec177
--- /dev/null
+++ b/generator/booking_scoring.cpp
@@ -0,0 +1,97 @@
+#include "generator/booking_scoring.hpp"
+
+#include "generator/booking_dataset.hpp"
+
+#include "indexer/search_string_utils.hpp"
+#include "indexer/search_delimiters.hpp"
+
+#include "geometry/distance_on_sphere.hpp"
+
+#include "base/collection_cast.hpp"
+
+namespace generator
+{
+namespace booking_scoring
+{
+namespace
+{
+// Calculated with tools/python/booking_hotels_quality.py.
+double constexpr kOptimalThreshold = 0.151001;
+
+template <typename T, typename U>
+struct decay_equiv :
+ std::is_same<typename std::decay<T>::type, U>::type
+{};
+
+set<strings::UniString> StringToSetOfWords(string const & str)
+{
+ vector<strings::UniString> result;
+ search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
+ return my::collection_cast<set>(result);
+}
+
+// TODO(mgsergio): Update existing one in base or wherever...
+// Or just use one from boost.
+struct CounterIterator
+{
+ template<typename T, typename = typename enable_if<!decay_equiv<T, CounterIterator>::value>::type>
+ CounterIterator & operator=(T const &) { ++m_count; return *this; }
+ CounterIterator & operator++() { return *this; }
+ CounterIterator & operator++(int) { return *this; }
+ CounterIterator & operator*() { return *this; }
+ uint32_t Count() const { return m_count; }
+
+ uint32_t m_count = 0;
+};
+
+double StringSimilarityScore(string const & a, string const & b)
+{
+ auto const aWords = StringToSetOfWords(a);
+ auto const bWords = StringToSetOfWords(b);
+
+ auto const intersectionCard = set_intersection(begin(aWords), end(aWords),
+ begin(bWords), end(bWords),
+ CounterIterator()).Count();
+ auto const aLikeBScore = static_cast<double>(intersectionCard) / aWords.size();
+ auto const bLikeAScore = static_cast<double>(intersectionCard) / bWords.size();
+
+ return aLikeBScore * bLikeAScore;
+}
+
+double GetLinearNormDistanceScrore(double distance)
+{
+ distance = my::clamp(distance, 0, BookingDataset::kDistanceLimitInMeters);
+ return 1.0 - distance / BookingDataset::kDistanceLimitInMeters;
+}
+
+double GetNameSimilarityScore(string const & booking_name, string const & osm_name)
+{
+ return StringSimilarityScore(booking_name, osm_name);
+}
+} // namespace
+
+double BookingMatchScore::GetMatchingScore() const
+{
+ return m_linearNormDistanceScore * m_nameSimilarityScore;
+}
+
+bool BookingMatchScore::IsMatched() const
+{
+ return GetMatchingScore() > kOptimalThreshold;
+}
+
+BookingMatchScore Match(BookingDataset::Hotel const & h, OsmElement const & e)
+{
+ BookingMatchScore score;
+
+ auto const distance = ms::DistanceOnEarth(e.lat, e.lon, h.lat, h.lon);
+ score.m_linearNormDistanceScore = GetLinearNormDistanceScrore(distance);
+
+ string osmHotelName;
+ score.m_nameSimilarityScore = e.GetTag("name", osmHotelName)
+ ? GetNameSimilarityScore(h.name, osmHotelName) : 0;
+
+ return score;
+}
+} // namespace booking_scoring
+} // namespace generator
diff --git a/generator/booking_scoring.hpp b/generator/booking_scoring.hpp
new file mode 100644
index 0000000000..d92482cf35
--- /dev/null
+++ b/generator/booking_scoring.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "generator/booking_dataset.hpp"
+#include "generator/osm_element.hpp"
+
+namespace generator
+{
+namespace booking_scoring
+{
+struct BookingMatchScore
+{
+ double GetMatchingScore() const;
+ bool IsMatched() const;
+
+ double m_linearNormDistanceScore{};
+ double m_nameSimilarityScore{};
+};
+
+BookingMatchScore Match(BookingDataset::Hotel const & h, OsmElement const & e);
+} // namespace booking_scoring
+} // namespace generator
diff --git a/generator/osm_element.cpp b/generator/osm_element.cpp
index 1c65dd599d..8ae3340132 100644
--- a/generator/osm_element.cpp
+++ b/generator/osm_element.cpp
@@ -121,6 +121,20 @@ string OsmElement::ToString(string const & shift) const
return ss.str();
}
+bool OsmElement::GetTag(string const & key, string & value) const
+{
+ auto const it = find_if(begin(m_tags), end(m_tags), [&key](Tag const & tag)
+ {
+ return tag.key == key;
+ });
+
+ if (it == end(m_tags))
+ return false;
+
+ value = it->value;
+ return true;
+}
+
string DebugPrint(OsmElement const & e)
{
return e.ToString();
diff --git a/generator/osm_element.hpp b/generator/osm_element.hpp
index fc1187c6a7..c473f7d175 100644
--- a/generator/osm_element.hpp
+++ b/generator/osm_element.hpp
@@ -152,7 +152,8 @@ struct OsmElement
if (!v.empty())
AddTag(k, v);
}
+
+ bool GetTag(string const & key, string & value) const;
};
string DebugPrint(OsmElement const & e);
-