Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Yershov <yershov@corp.mail.ru>2016-06-21 12:12:34 +0300
committerSergey Yershov <yershov@corp.mail.ru>2016-06-30 14:47:46 +0300
commitcfe43e4cf63397c34cca2b60b6b7cbc6d78645b6 (patch)
tree3e14469a66e0b9a292ec5e1481b3e0783883c2b8 /generator
parenta9d57e8e8aaaba02aebcf9e749e6a653f4ab4442 (diff)
Add booking addresses via geocoder
Diffstat (limited to 'generator')
-rw-r--r--generator/booking_dataset.cpp102
-rw-r--r--generator/booking_dataset.hpp8
-rw-r--r--generator/booking_quality_check/booking_addr_match.cpp135
-rw-r--r--generator/generate_info.hpp1
-rw-r--r--generator/generator_tests/booking_test.cpp35
-rw-r--r--generator/generator_tests/generator_tests.pro9
-rw-r--r--generator/generator_tool/generator_tool.cpp2
-rw-r--r--generator/generator_tool/generator_tool.pro18
-rw-r--r--generator/osm_source.cpp2
9 files changed, 288 insertions, 24 deletions
diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp
index b6328d10e2..899c0fc71b 100644
--- a/generator/booking_dataset.cpp
+++ b/generator/booking_dataset.cpp
@@ -1,5 +1,9 @@
#include "generator/booking_dataset.hpp"
+#include "map/framework.hpp"
+
+#include "platform/platform.hpp"
+
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
@@ -26,6 +30,34 @@ bool CheckForValues(string const & value)
}
return false;
}
+
+// Unlike strings::Tokenize, this function allows for empty tokens.
+void Split(string const & s, char delim, vector<string> & parts)
+{
+ stringstream ss;
+
+ // Workaround for empty last field.
+ ss << s;
+ if (!s.empty() && s.back() == delim)
+ ss << delim;
+
+ string part;
+ while (getline(ss, part, delim))
+ parts.emplace_back(part);
+}
+
+string TabbedString(string const & str)
+{
+ stringstream ss;
+ for (char c : str)
+ {
+ if (c == '\t')
+ ss << "\\t";
+ else
+ ss << c;
+ }
+ return ss.str();
+}
} // namespace
BookingDataset::Hotel::Hotel(string const & src)
@@ -55,21 +87,29 @@ BookingDataset::Hotel::Hotel(string const & src)
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
{
+ s << fixed << setprecision(7);
return s << "Name: " << h.name << "\t Address: " << h.address << "\t lat: " << h.lat
<< " lon: " << h.lon;
}
-BookingDataset::BookingDataset(string const & dataPath)
+BookingDataset::BookingDataset(string const & dataPath, string const & addressReferencePath)
{
- LoadHotels(dataPath);
+ if (dataPath.empty())
+ return;
- size_t counter = 0;
- for (auto const & hotel : m_hotels)
+ ifstream dataSource(dataPath);
+ if (!dataSource.is_open())
{
- TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
- m_rtree.insert(std::make_pair(b, counter));
- ++counter;
+ LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
+ return;
}
+
+ LoadHotels(dataSource, addressReferencePath);
+}
+
+BookingDataset::BookingDataset(istream & dataSource, string const & addressReferencePath)
+{
+ LoadHotels(dataSource, addressReferencePath);
}
bool BookingDataset::BookingFilter(OsmElement const & e) const
@@ -184,6 +224,12 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
}
}
+ if (!hotel.street.empty())
+ e.AddTag("addr:street", hotel.street);
+
+ if(!hotel.houseNumber.empty())
+ e.AddTag("addr:housenumber", hotel.houseNumber);
+
switch (hotel.type)
{
case 19:
@@ -244,22 +290,46 @@ double BookingDataset::ScoreByLinearNormDistance(double distance)
return 1.0 - distance / kDistanceLimitInMeters;
}
-void BookingDataset::LoadHotels(string const & path)
+void BookingDataset::LoadHotels(istream & src, string const & addressReferencePath)
{
m_hotels.clear();
- if (path.empty())
- return;
+ for (string line; getline(src, line);)
+ m_hotels.emplace_back(line);
- ifstream src(path);
- if (!src.is_open())
+ if(!addressReferencePath.empty())
{
- LOG(LERROR, ("Error while opening", path, ":", strerror(errno)));
- return;
+ LOG(LINFO, ("Match addresses for booking objects",addressReferencePath));
+ Platform & platform = GetPlatform();
+ string backupPath = platform.WritableDir();
+ platform.SetWritableDirForTests(addressReferencePath);
+
+ Framework f;
+
+ size_t matchedNum = 0;
+ size_t emptyAddr = 0;
+ for (Hotel & hotel : m_hotels)
+ {
+ search::AddressInfo info = f.GetAddressInfoAtPoint(MercatorBounds::FromLatLon(hotel.lat, hotel.lon));
+ hotel.street = info.m_street;
+ hotel.houseNumber = info.m_house;
+
+ if (hotel.address.empty())
+ ++emptyAddr;
+ if(!info.FormatAddress().empty())
+ ++matchedNum;
+ }
+ LOG(LINFO, ("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "Empty addresses:", emptyAddr));
+ platform.SetWritableDirForTests(backupPath);
}
- for (string line; getline(src, line);)
- m_hotels.emplace_back(line);
+ size_t counter = 0;
+ for (auto const & hotel : m_hotels)
+ {
+ TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon));
+ m_rtree.insert(std::make_pair(b, counter));
+ ++counter;
+ }
}
bool BookingDataset::MatchWithBooking(OsmElement const & e) const
diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp
index cac294db30..09956be10d 100644
--- a/generator/booking_dataset.hpp
+++ b/generator/booking_dataset.hpp
@@ -46,6 +46,8 @@ public:
double lon = 0.0;
string name;
string address;
+ string street;
+ string houseNumber;
uint32_t stars = 0;
uint32_t priceCategory = 0;
double ratingBooking = 0.0;
@@ -59,11 +61,13 @@ public:
explicit Hotel(string const & src);
};
- explicit BookingDataset(string const & dataPath);
+ explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string());
+ explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string());
bool BookingFilter(OsmElement const & e) const;
bool TourismFilter(OsmElement const & e) const;
+ inline size_t Size() const { return m_hotels.size(); }
Hotel const & GetHotel(size_t index) const;
vector<size_t> GetNearestHotels(double lat, double lon, size_t limit,
double maxDistance = 0.0) const;
@@ -83,7 +87,7 @@ protected:
boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree;
- void LoadHotels(string const & path);
+ void LoadHotels(istream & path, string const & addressReferencePath);
bool MatchWithBooking(OsmElement const & e) const;
bool Filter(OsmElement const & e, function<bool(OsmElement const &)> const & fn) const;
};
diff --git a/generator/booking_quality_check/booking_addr_match.cpp b/generator/booking_quality_check/booking_addr_match.cpp
new file mode 100644
index 0000000000..9d063be0bb
--- /dev/null
+++ b/generator/booking_quality_check/booking_addr_match.cpp
@@ -0,0 +1,135 @@
+#include "generator/booking_dataset.hpp"
+#include "generator/osm_source.hpp"
+
+#include "map/framework.hpp"
+
+#include "indexer/classificator_loader.hpp"
+#include "indexer/data_header.hpp"
+#include "indexer/index.hpp"
+#include "indexer/mwm_set.hpp"
+
+#include "platform/country_file.hpp"
+#include "platform/local_country_file.hpp"
+#include "platform/local_country_file_utils.hpp"
+#include "platform/platform.hpp"
+
+#include "geometry/distance_on_sphere.hpp"
+
+#include "search/processor_factory.hpp"
+#include "search/ranking_info.hpp"
+#include "search/result.hpp"
+#include "search/search_quality/helpers.hpp"
+#include "search/search_tests_support/test_search_engine.hpp"
+#include "search/search_tests_support/test_search_request.hpp"
+
+#include "storage/country_info_getter.hpp"
+#include "storage/index.hpp"
+#include "storage/storage.hpp"
+
+#include "coding/reader_wrapper.hpp"
+
+#include "std/fstream.hpp"
+#include "std/iostream.hpp"
+#include "std/numeric.hpp"
+#include "std/shared_ptr.hpp"
+
+#include "3party/gflags/src/gflags/gflags.h"
+
+using namespace generator;
+using namespace storage;
+using namespace search;
+using namespace search::tests_support;
+
+void DidDownload(TCountryId const & /* countryId */,
+ shared_ptr<platform::LocalCountryFile> const & /* localFile */)
+{
+}
+
+bool WillDelete(TCountryId const & /* countryId */,
+ shared_ptr<platform::LocalCountryFile> const & /* localFile */)
+{
+ return false;
+}
+
+uint64_t ReadVersionFromHeader(platform::LocalCountryFile const & mwm)
+{
+ vector<string> specialFiles = {
+ WORLD_FILE_NAME,
+ WORLD_COASTS_FILE_NAME,
+ WORLD_COASTS_OBSOLETE_FILE_NAME
+ };
+ for (auto const & name : specialFiles)
+ {
+ if (mwm.GetCountryName() == name)
+ return mwm.GetVersion();
+ }
+
+ ModelReaderPtr reader = FilesContainerR(mwm.GetPath(MapOptions::Map)).GetReader(VERSION_FILE_TAG);
+ ReaderSrc src(reader.GetPtr());
+
+ version::MwmVersion version;
+ version::ReadVersion(src, version);
+ return version.GetVersion();
+}
+
+DEFINE_string(booking_data, "", "Path to booking data in .tsv format");
+DEFINE_string(user_resource_path, "", "Path to data directory (resources dir)");
+DEFINE_string(data_path, "", "Path to mwm files (writable dir)");
+DEFINE_string(locale, "en", "Locale of all the search queries");
+DEFINE_int32(num_threads, 1, "Number of search engine threads");
+
+
+int main(int argc, char * argv[])
+{
+ google::SetUsageMessage("Takes OSM XML data from stdin and creates"
+ " data and index files in several passes.");
+ google::ParseCommandLineFlags(&argc, &argv, true);
+
+ Platform & platform = GetPlatform();
+
+ string countriesFile = COUNTRIES_FILE;
+ if (!FLAGS_user_resource_path.empty())
+ {
+ platform.SetResourceDir(FLAGS_user_resource_path);
+ countriesFile = my::JoinFoldersToPath(FLAGS_user_resource_path, COUNTRIES_FILE);
+ }
+
+ if (!FLAGS_data_path.empty())
+ {
+ platform.SetSettingsDirForTests(FLAGS_data_path);
+ platform.SetWritableDirForTests(FLAGS_data_path);
+ }
+
+ LOG(LINFO, ("writable dir =", platform.WritableDir()));
+ LOG(LINFO, ("resources dir =", platform.ResourcesDir()));
+
+ LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking_data));
+
+ BookingDataset bookingDataset(FLAGS_booking_data);
+
+
+ Framework f;
+
+ size_t matchedNum = 0;
+ size_t emptyAddr = 0;
+ for (size_t i = 0; i < bookingDataset.Size(); ++i)
+ {
+ BookingDataset::Hotel const & hotel = bookingDataset.GetHotel(i);
+
+ search::AddressInfo info = f.GetAddressInfoAtPoint(MercatorBounds::FromLatLon(hotel.lat, hotel.lon));
+
+ if (hotel.address.empty())
+ ++emptyAddr;
+
+ if(!info.FormatAddress().empty())
+ {
+ ++matchedNum;
+ cout << "[" << i << "/" << bookingDataset.Size() << "] Hotel: " << hotel.address
+ << " AddLoc: " << hotel.addressLoc << " --> " << info.FormatAddress() << endl;
+ }
+ }
+
+ cout << "Num of hotels: " << bookingDataset.Size() << " matched: " << matchedNum << " Empty addresses: " << emptyAddr << endl;
+
+ return 0;
+}
diff --git a/generator/generate_info.hpp b/generator/generate_info.hpp
index 6acca8fa80..94198d93f0 100644
--- a/generator/generate_info.hpp
+++ b/generator/generate_info.hpp
@@ -43,6 +43,7 @@ struct GenerateInfo
string m_osmFileName;
string m_bookingDatafileName;
+ string m_bookingReferenceDir;
uint32_t m_versionDate = 0;
diff --git a/generator/generator_tests/booking_test.cpp b/generator/generator_tests/booking_test.cpp
new file mode 100644
index 0000000000..f7ed68feed
--- /dev/null
+++ b/generator/generator_tests/booking_test.cpp
@@ -0,0 +1,35 @@
+#include "testing/testing.hpp"
+
+#include "generator/booking_dataset.hpp"
+
+UNIT_TEST(BookingDataset_SmokeTest)
+{
+ stringstream ss;
+ generator::BookingDataset data(ss);
+ TEST_EQUAL(data.Size(), 0, ());
+}
+
+UNIT_TEST(BookingDataset_ParseTest)
+{
+ stringstream ss("1485988\t36.75818960879561\t3.053177244180233\tAppartement Alger Centre\t50 Avenue Ahmed Ghermoul\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/appartement-alger-centre-alger.html\t201\t\t\t");
+ generator::BookingDataset data(ss);
+ TEST_EQUAL(data.Size(), 1, ());
+}
+
+UNIT_TEST(BookingDataset_ParseTest2)
+{
+ stringstream ss(
+ "1485988\t36.75818960879561\t3.053177244180233\tAppartement Alger Centre\t50 Avenue Ahmed Ghermoul\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/appartement-alger-centre-alger.html\t201\t\t\t\n"
+ "357811\t34.86737239675703\t-1.31686270236969\tRenaissance Tlemcen Hotel\tRenaissance Tlemcen Hotel\t5\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/renaissance-tlemcen.html\t204\t\t\t\n" \
+ "1500820\t36.72847621708523\t3.0645270245369147\tMazghana Apartment\tCite Garidi 1 Tours 3 N 53, Kouba\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/mazghana-apartment.html\t201\t\t\t\n" \
+ "1318991\t35.692865978372666\t-0.6278949570083796\tBest Western Hotel Colombe\t6 Bd Zabour Larbi Hai Khaldia Delmonte\t4\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/best-western-colombe.html\t204\t\t\t\n" \
+ "1495828\t36.33835943\t6.626214981\tConstantine Marriott Hotel\tOued Rhumel Street, Cites des Arcades Romaines,\t5\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/constantine-marriott.html\t204\t\t\t\n" \
+ "1411999\t35.73994643933386\t-0.757756233215332\tResidence Nadra\tBoulevard de la plage, Niche 1236 Paradis plage\t0\t1\tNone\tNone\thttp://www.booking.com/hotel/dz/residence-nadra.html\t201\t\t\t\n" \
+ "1497769\t36.80667121575615\t3.231203541069817\tApartment La Pérouse\tLa Pérouse Ain Taya\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/apartment-la-perouse.html\t220\t\t\t\n" \
+ "1668244\t36.715150622433804\t2.8442734479904175\tAZ Hotel Zeralda\t09 Rue de Mahelma - Zeralda - ALGER\t4\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/el-aziz-zeralda.html\t204\t\t\t\n" \
+ "1486823\t36.73432645678891\t3.0335435271263123\tGuest House Marhaba\tResidence Soumam - Bloc B - Appt 17- Said Hamdine\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/marhaba.html\t208\t\t\t\n" \
+ "1759799\t35.73832476589291\t-0.7553583383560181\tHotel la brise\tAngle boulevard de la plage et route nationale niche 1159 paradis plage\t2\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/la-brise.html\t204\t\t\t\n"
+ );
+ generator::BookingDataset data(ss);
+ TEST_EQUAL(data.Size(), 10, ());
+} \ No newline at end of file
diff --git a/generator/generator_tests/generator_tests.pro b/generator/generator_tests/generator_tests.pro
index c55a55bf38..f3b4141758 100644
--- a/generator/generator_tests/generator_tests.pro
+++ b/generator/generator_tests/generator_tests.pro
@@ -4,8 +4,13 @@ CONFIG -= app_bundle
TEMPLATE = app
ROOT_DIR = ../..
-DEPENDENCIES = generator map routing indexer platform geometry coding base \
- expat tess2 protobuf tomcrypt osrm succinct
+#DEPENDENCIES = generator map routing indexer platform geometry coding base \
+# expat tess2 protobuf tomcrypt osrm succinct
+
+DEPENDENCIES = drape_frontend routing search storage indexer drape map platform editor geometry \
+ coding base freetype expat fribidi tomcrypt jansson protobuf osrm stats_client \
+ minizip succinct pugixml tess2 gflags oauthcpp generator
+
include($$ROOT_DIR/common.pri)
diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp
index 9c884917a4..0aff559f45 100644
--- a/generator/generator_tool/generator_tool.cpp
+++ b/generator/generator_tool/generator_tool.cpp
@@ -68,6 +68,7 @@ DEFINE_string(osm_file_name, "", "Input osm area file");
DEFINE_string(osm_file_type, "xml", "Input osm area file type [xml, o5m]");
DEFINE_string(user_resource_path, "", "User defined resource path for classificator.txt and etc.");
DEFINE_string(booking_data, "", "Path to booking data in .tsv format");
+DEFINE_string(booking_reference_path, "", "Path to mwm dataset for match booking addresses");
DEFINE_uint64(planet_version, my::SecondsSinceEpoch(), "Version as seconds since epoch, by default - now");
int main(int argc, char ** argv)
@@ -102,6 +103,7 @@ int main(int argc, char ** argv)
genInfo.m_failOnCoasts = FLAGS_fail_on_coasts;
genInfo.m_preloadCache = FLAGS_preload_cache;
genInfo.m_bookingDatafileName = FLAGS_booking_data;
+ genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path;
genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version);
diff --git a/generator/generator_tool/generator_tool.pro b/generator/generator_tool/generator_tool.pro
index b0a0c3c6b5..9e332c77e3 100644
--- a/generator/generator_tool/generator_tool.pro
+++ b/generator/generator_tool/generator_tool.pro
@@ -1,9 +1,17 @@
# Generator binary
ROOT_DIR = ../..
-DEPENDENCIES = generator search routing storage indexer platform editor geometry coding base \
- osrm gflags expat tess2 jansson protobuf tomcrypt \
- succinct stats_client pugixml
+#DEPENDENCIES = generator map search routing storage indexer platform editor geometry coding base \
+# osrm gflags expat tess2 jansson protobuf tomcrypt \
+# succinct stats_client pugixml
+
+#DEPENDENCIES = generator drape map drape_frontend routing search storage indexer platform editor geometry \
+# coding base freetype gflags expat tess2 fribidi tomcrypt jansson protobuf osrm stats_client \
+# minizip succinct pugixml oauthcpp
+
+DEPENDENCIES = drape_frontend routing search storage indexer drape map platform editor geometry \
+ coding base freetype expat fribidi tomcrypt jansson protobuf osrm stats_client \
+ minizip succinct pugixml tess2 gflags oauthcpp generator
include($$ROOT_DIR/common.pri)
@@ -16,6 +24,10 @@ TEMPLATE = app
# needed for Platform::WorkingDir() and unicode combining
QT *= core
+macx-* {
+ LIBS *= "-framework IOKit" "-framework SystemConfiguration"
+}
+
SOURCES += \
generator_tool.cpp \
diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp
index 3992b9016f..2ef84e5dc8 100644
--- a/generator/osm_source.cpp
+++ b/generator/osm_source.cpp
@@ -515,7 +515,7 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info)
TagReplacer tagReplacer(GetPlatform().ResourcesDir() + REPLACED_TAGS_FILE);
// If info.m_bookingDatafileName is empty then no data will be loaded.
- generator::BookingDataset bookingDataset(info.m_bookingDatafileName);
+ generator::BookingDataset bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir);
stringstream skippedElements;