diff options
author | Sergey Yershov <yershov@corp.mail.ru> | 2016-06-21 12:12:34 +0300 |
---|---|---|
committer | Sergey Yershov <yershov@corp.mail.ru> | 2016-06-30 14:47:46 +0300 |
commit | cfe43e4cf63397c34cca2b60b6b7cbc6d78645b6 (patch) | |
tree | 3e14469a66e0b9a292ec5e1481b3e0783883c2b8 /generator | |
parent | a9d57e8e8aaaba02aebcf9e749e6a653f4ab4442 (diff) |
Add booking addresses via geocoder
Diffstat (limited to 'generator')
-rw-r--r-- | generator/booking_dataset.cpp | 102 | ||||
-rw-r--r-- | generator/booking_dataset.hpp | 8 | ||||
-rw-r--r-- | generator/booking_quality_check/booking_addr_match.cpp | 135 | ||||
-rw-r--r-- | generator/generate_info.hpp | 1 | ||||
-rw-r--r-- | generator/generator_tests/booking_test.cpp | 35 | ||||
-rw-r--r-- | generator/generator_tests/generator_tests.pro | 9 | ||||
-rw-r--r-- | generator/generator_tool/generator_tool.cpp | 2 | ||||
-rw-r--r-- | generator/generator_tool/generator_tool.pro | 18 | ||||
-rw-r--r-- | generator/osm_source.cpp | 2 |
9 files changed, 288 insertions, 24 deletions
diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index b6328d10e2..899c0fc71b 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -1,5 +1,9 @@ #include "generator/booking_dataset.hpp" +#include "map/framework.hpp" + +#include "platform/platform.hpp" + #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" @@ -26,6 +30,34 @@ bool CheckForValues(string const & value) } return false; } + +// Unlike strings::Tokenize, this function allows for empty tokens. +void Split(string const & s, char delim, vector<string> & parts) +{ + stringstream ss; + + // Workaround for empty last field. + ss << s; + if (!s.empty() && s.back() == delim) + ss << delim; + + string part; + while (getline(ss, part, delim)) + parts.emplace_back(part); +} + +string TabbedString(string const & str) +{ + stringstream ss; + for (char c : str) + { + if (c == '\t') + ss << "\\t"; + else + ss << c; + } + return ss.str(); +} } // namespace BookingDataset::Hotel::Hotel(string const & src) @@ -55,21 +87,29 @@ BookingDataset::Hotel::Hotel(string const & src) ostream & operator<<(ostream & s, BookingDataset::Hotel const & h) { + s << fixed << setprecision(7); return s << "Name: " << h.name << "\t Address: " << h.address << "\t lat: " << h.lat << " lon: " << h.lon; } -BookingDataset::BookingDataset(string const & dataPath) +BookingDataset::BookingDataset(string const & dataPath, string const & addressReferencePath) { - LoadHotels(dataPath); + if (dataPath.empty()) + return; - size_t counter = 0; - for (auto const & hotel : m_hotels) + ifstream dataSource(dataPath); + if (!dataSource.is_open()) { - TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon)); - m_rtree.insert(std::make_pair(b, counter)); - ++counter; + LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno))); + return; } + + LoadHotels(dataSource, addressReferencePath); +} + +BookingDataset::BookingDataset(istream & dataSource, string const & addressReferencePath) +{ + LoadHotels(dataSource, addressReferencePath); } bool BookingDataset::BookingFilter(OsmElement const & e) const @@ -184,6 +224,12 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons } } + if (!hotel.street.empty()) + e.AddTag("addr:street", hotel.street); + + if(!hotel.houseNumber.empty()) + e.AddTag("addr:housenumber", hotel.houseNumber); + switch (hotel.type) { case 19: @@ -244,22 +290,46 @@ double BookingDataset::ScoreByLinearNormDistance(double distance) return 1.0 - distance / kDistanceLimitInMeters; } -void BookingDataset::LoadHotels(string const & path) +void BookingDataset::LoadHotels(istream & src, string const & addressReferencePath) { m_hotels.clear(); - if (path.empty()) - return; + for (string line; getline(src, line);) + m_hotels.emplace_back(line); - ifstream src(path); - if (!src.is_open()) + if(!addressReferencePath.empty()) { - LOG(LERROR, ("Error while opening", path, ":", strerror(errno))); - return; + LOG(LINFO, ("Match addresses for booking objects",addressReferencePath)); + Platform & platform = GetPlatform(); + string backupPath = platform.WritableDir(); + platform.SetWritableDirForTests(addressReferencePath); + + Framework f; + + size_t matchedNum = 0; + size_t emptyAddr = 0; + for (Hotel & hotel : m_hotels) + { + search::AddressInfo info = f.GetAddressInfoAtPoint(MercatorBounds::FromLatLon(hotel.lat, hotel.lon)); + hotel.street = info.m_street; + hotel.houseNumber = info.m_house; + + if (hotel.address.empty()) + ++emptyAddr; + if(!info.FormatAddress().empty()) + ++matchedNum; + } + LOG(LINFO, ("Num of hotels:", m_hotels.size(), "matched:", matchedNum, "Empty addresses:", emptyAddr)); + platform.SetWritableDirForTests(backupPath); } - for (string line; getline(src, line);) - m_hotels.emplace_back(line); + size_t counter = 0; + for (auto const & hotel : m_hotels) + { + TBox b(TPoint(hotel.lat, hotel.lon), TPoint(hotel.lat, hotel.lon)); + m_rtree.insert(std::make_pair(b, counter)); + ++counter; + } } bool BookingDataset::MatchWithBooking(OsmElement const & e) const diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp index cac294db30..09956be10d 100644 --- a/generator/booking_dataset.hpp +++ b/generator/booking_dataset.hpp @@ -46,6 +46,8 @@ public: double lon = 0.0; string name; string address; + string street; + string houseNumber; uint32_t stars = 0; uint32_t priceCategory = 0; double ratingBooking = 0.0; @@ -59,11 +61,13 @@ public: explicit Hotel(string const & src); }; - explicit BookingDataset(string const & dataPath); + explicit BookingDataset(string const & dataPath, string const & addressReferencePath = string()); + explicit BookingDataset(istream & dataSource, string const & addressReferencePath = string()); bool BookingFilter(OsmElement const & e) const; bool TourismFilter(OsmElement const & e) const; + inline size_t Size() const { return m_hotels.size(); } Hotel const & GetHotel(size_t index) const; vector<size_t> GetNearestHotels(double lat, double lon, size_t limit, double maxDistance = 0.0) const; @@ -83,7 +87,7 @@ protected: boost::geometry::index::rtree<TValue, boost::geometry::index::quadratic<16>> m_rtree; - void LoadHotels(string const & path); + void LoadHotels(istream & path, string const & addressReferencePath); bool MatchWithBooking(OsmElement const & e) const; bool Filter(OsmElement const & e, function<bool(OsmElement const &)> const & fn) const; }; diff --git a/generator/booking_quality_check/booking_addr_match.cpp b/generator/booking_quality_check/booking_addr_match.cpp new file mode 100644 index 0000000000..9d063be0bb --- /dev/null +++ b/generator/booking_quality_check/booking_addr_match.cpp @@ -0,0 +1,135 @@ +#include "generator/booking_dataset.hpp" +#include "generator/osm_source.hpp" + +#include "map/framework.hpp" + +#include "indexer/classificator_loader.hpp" +#include "indexer/data_header.hpp" +#include "indexer/index.hpp" +#include "indexer/mwm_set.hpp" + +#include "platform/country_file.hpp" +#include "platform/local_country_file.hpp" +#include "platform/local_country_file_utils.hpp" +#include "platform/platform.hpp" + +#include "geometry/distance_on_sphere.hpp" + +#include "search/processor_factory.hpp" +#include "search/ranking_info.hpp" +#include "search/result.hpp" +#include "search/search_quality/helpers.hpp" +#include "search/search_tests_support/test_search_engine.hpp" +#include "search/search_tests_support/test_search_request.hpp" + +#include "storage/country_info_getter.hpp" +#include "storage/index.hpp" +#include "storage/storage.hpp" + +#include "coding/reader_wrapper.hpp" + +#include "std/fstream.hpp" +#include "std/iostream.hpp" +#include "std/numeric.hpp" +#include "std/shared_ptr.hpp" + +#include "3party/gflags/src/gflags/gflags.h" + +using namespace generator; +using namespace storage; +using namespace search; +using namespace search::tests_support; + +void DidDownload(TCountryId const & /* countryId */, + shared_ptr<platform::LocalCountryFile> const & /* localFile */) +{ +} + +bool WillDelete(TCountryId const & /* countryId */, + shared_ptr<platform::LocalCountryFile> const & /* localFile */) +{ + return false; +} + +uint64_t ReadVersionFromHeader(platform::LocalCountryFile const & mwm) +{ + vector<string> specialFiles = { + WORLD_FILE_NAME, + WORLD_COASTS_FILE_NAME, + WORLD_COASTS_OBSOLETE_FILE_NAME + }; + for (auto const & name : specialFiles) + { + if (mwm.GetCountryName() == name) + return mwm.GetVersion(); + } + + ModelReaderPtr reader = FilesContainerR(mwm.GetPath(MapOptions::Map)).GetReader(VERSION_FILE_TAG); + ReaderSrc src(reader.GetPtr()); + + version::MwmVersion version; + version::ReadVersion(src, version); + return version.GetVersion(); +} + +DEFINE_string(booking_data, "", "Path to booking data in .tsv format"); +DEFINE_string(user_resource_path, "", "Path to data directory (resources dir)"); +DEFINE_string(data_path, "", "Path to mwm files (writable dir)"); +DEFINE_string(locale, "en", "Locale of all the search queries"); +DEFINE_int32(num_threads, 1, "Number of search engine threads"); + + +int main(int argc, char * argv[]) +{ + google::SetUsageMessage("Takes OSM XML data from stdin and creates" + " data and index files in several passes."); + google::ParseCommandLineFlags(&argc, &argv, true); + + Platform & platform = GetPlatform(); + + string countriesFile = COUNTRIES_FILE; + if (!FLAGS_user_resource_path.empty()) + { + platform.SetResourceDir(FLAGS_user_resource_path); + countriesFile = my::JoinFoldersToPath(FLAGS_user_resource_path, COUNTRIES_FILE); + } + + if (!FLAGS_data_path.empty()) + { + platform.SetSettingsDirForTests(FLAGS_data_path); + platform.SetWritableDirForTests(FLAGS_data_path); + } + + LOG(LINFO, ("writable dir =", platform.WritableDir())); + LOG(LINFO, ("resources dir =", platform.ResourcesDir())); + + LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking_data)); + + BookingDataset bookingDataset(FLAGS_booking_data); + + + Framework f; + + size_t matchedNum = 0; + size_t emptyAddr = 0; + for (size_t i = 0; i < bookingDataset.Size(); ++i) + { + BookingDataset::Hotel const & hotel = bookingDataset.GetHotel(i); + + search::AddressInfo info = f.GetAddressInfoAtPoint(MercatorBounds::FromLatLon(hotel.lat, hotel.lon)); + + if (hotel.address.empty()) + ++emptyAddr; + + if(!info.FormatAddress().empty()) + { + ++matchedNum; + cout << "[" << i << "/" << bookingDataset.Size() << "] Hotel: " << hotel.address + << " AddLoc: " << hotel.addressLoc << " --> " << info.FormatAddress() << endl; + } + } + + cout << "Num of hotels: " << bookingDataset.Size() << " matched: " << matchedNum << " Empty addresses: " << emptyAddr << endl; + + return 0; +} diff --git a/generator/generate_info.hpp b/generator/generate_info.hpp index 6acca8fa80..94198d93f0 100644 --- a/generator/generate_info.hpp +++ b/generator/generate_info.hpp @@ -43,6 +43,7 @@ struct GenerateInfo string m_osmFileName; string m_bookingDatafileName; + string m_bookingReferenceDir; uint32_t m_versionDate = 0; diff --git a/generator/generator_tests/booking_test.cpp b/generator/generator_tests/booking_test.cpp new file mode 100644 index 0000000000..f7ed68feed --- /dev/null +++ b/generator/generator_tests/booking_test.cpp @@ -0,0 +1,35 @@ +#include "testing/testing.hpp" + +#include "generator/booking_dataset.hpp" + +UNIT_TEST(BookingDataset_SmokeTest) +{ + stringstream ss; + generator::BookingDataset data(ss); + TEST_EQUAL(data.Size(), 0, ()); +} + +UNIT_TEST(BookingDataset_ParseTest) +{ + stringstream ss("1485988\t36.75818960879561\t3.053177244180233\tAppartement Alger Centre\t50 Avenue Ahmed Ghermoul\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/appartement-alger-centre-alger.html\t201\t\t\t"); + generator::BookingDataset data(ss); + TEST_EQUAL(data.Size(), 1, ()); +} + +UNIT_TEST(BookingDataset_ParseTest2) +{ + stringstream ss( + "1485988\t36.75818960879561\t3.053177244180233\tAppartement Alger Centre\t50 Avenue Ahmed Ghermoul\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/appartement-alger-centre-alger.html\t201\t\t\t\n" + "357811\t34.86737239675703\t-1.31686270236969\tRenaissance Tlemcen Hotel\tRenaissance Tlemcen Hotel\t5\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/renaissance-tlemcen.html\t204\t\t\t\n" \ + "1500820\t36.72847621708523\t3.0645270245369147\tMazghana Apartment\tCite Garidi 1 Tours 3 N 53, Kouba\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/mazghana-apartment.html\t201\t\t\t\n" \ + "1318991\t35.692865978372666\t-0.6278949570083796\tBest Western Hotel Colombe\t6 Bd Zabour Larbi Hai Khaldia Delmonte\t4\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/best-western-colombe.html\t204\t\t\t\n" \ + "1495828\t36.33835943\t6.626214981\tConstantine Marriott Hotel\tOued Rhumel Street, Cites des Arcades Romaines,\t5\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/constantine-marriott.html\t204\t\t\t\n" \ + "1411999\t35.73994643933386\t-0.757756233215332\tResidence Nadra\tBoulevard de la plage, Niche 1236 Paradis plage\t0\t1\tNone\tNone\thttp://www.booking.com/hotel/dz/residence-nadra.html\t201\t\t\t\n" \ + "1497769\t36.80667121575615\t3.231203541069817\tApartment La Pérouse\tLa Pérouse Ain Taya\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/apartment-la-perouse.html\t220\t\t\t\n" \ + "1668244\t36.715150622433804\t2.8442734479904175\tAZ Hotel Zeralda\t09 Rue de Mahelma - Zeralda - ALGER\t4\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/el-aziz-zeralda.html\t204\t\t\t\n" \ + "1486823\t36.73432645678891\t3.0335435271263123\tGuest House Marhaba\tResidence Soumam - Bloc B - Appt 17- Said Hamdine\t0\t0\tNone\tNone\thttp://www.booking.com/hotel/dz/marhaba.html\t208\t\t\t\n" \ + "1759799\t35.73832476589291\t-0.7553583383560181\tHotel la brise\tAngle boulevard de la plage et route nationale niche 1159 paradis plage\t2\t2\tNone\tNone\thttp://www.booking.com/hotel/dz/la-brise.html\t204\t\t\t\n" + ); + generator::BookingDataset data(ss); + TEST_EQUAL(data.Size(), 10, ()); +}
\ No newline at end of file diff --git a/generator/generator_tests/generator_tests.pro b/generator/generator_tests/generator_tests.pro index c55a55bf38..f3b4141758 100644 --- a/generator/generator_tests/generator_tests.pro +++ b/generator/generator_tests/generator_tests.pro @@ -4,8 +4,13 @@ CONFIG -= app_bundle TEMPLATE = app ROOT_DIR = ../.. -DEPENDENCIES = generator map routing indexer platform geometry coding base \ - expat tess2 protobuf tomcrypt osrm succinct +#DEPENDENCIES = generator map routing indexer platform geometry coding base \ +# expat tess2 protobuf tomcrypt osrm succinct + +DEPENDENCIES = drape_frontend routing search storage indexer drape map platform editor geometry \ + coding base freetype expat fribidi tomcrypt jansson protobuf osrm stats_client \ + minizip succinct pugixml tess2 gflags oauthcpp generator + include($$ROOT_DIR/common.pri) diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp index 9c884917a4..0aff559f45 100644 --- a/generator/generator_tool/generator_tool.cpp +++ b/generator/generator_tool/generator_tool.cpp @@ -68,6 +68,7 @@ DEFINE_string(osm_file_name, "", "Input osm area file"); DEFINE_string(osm_file_type, "xml", "Input osm area file type [xml, o5m]"); DEFINE_string(user_resource_path, "", "User defined resource path for classificator.txt and etc."); DEFINE_string(booking_data, "", "Path to booking data in .tsv format"); +DEFINE_string(booking_reference_path, "", "Path to mwm dataset for match booking addresses"); DEFINE_uint64(planet_version, my::SecondsSinceEpoch(), "Version as seconds since epoch, by default - now"); int main(int argc, char ** argv) @@ -102,6 +103,7 @@ int main(int argc, char ** argv) genInfo.m_failOnCoasts = FLAGS_fail_on_coasts; genInfo.m_preloadCache = FLAGS_preload_cache; genInfo.m_bookingDatafileName = FLAGS_booking_data; + genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path; genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version); diff --git a/generator/generator_tool/generator_tool.pro b/generator/generator_tool/generator_tool.pro index b0a0c3c6b5..9e332c77e3 100644 --- a/generator/generator_tool/generator_tool.pro +++ b/generator/generator_tool/generator_tool.pro @@ -1,9 +1,17 @@ # Generator binary ROOT_DIR = ../.. -DEPENDENCIES = generator search routing storage indexer platform editor geometry coding base \ - osrm gflags expat tess2 jansson protobuf tomcrypt \ - succinct stats_client pugixml +#DEPENDENCIES = generator map search routing storage indexer platform editor geometry coding base \ +# osrm gflags expat tess2 jansson protobuf tomcrypt \ +# succinct stats_client pugixml + +#DEPENDENCIES = generator drape map drape_frontend routing search storage indexer platform editor geometry \ +# coding base freetype gflags expat tess2 fribidi tomcrypt jansson protobuf osrm stats_client \ +# minizip succinct pugixml oauthcpp + +DEPENDENCIES = drape_frontend routing search storage indexer drape map platform editor geometry \ + coding base freetype expat fribidi tomcrypt jansson protobuf osrm stats_client \ + minizip succinct pugixml tess2 gflags oauthcpp generator include($$ROOT_DIR/common.pri) @@ -16,6 +24,10 @@ TEMPLATE = app # needed for Platform::WorkingDir() and unicode combining QT *= core +macx-* { + LIBS *= "-framework IOKit" "-framework SystemConfiguration" +} + SOURCES += \ generator_tool.cpp \ diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 3992b9016f..2ef84e5dc8 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -515,7 +515,7 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) TagReplacer tagReplacer(GetPlatform().ResourcesDir() + REPLACED_TAGS_FILE); // If info.m_bookingDatafileName is empty then no data will be loaded. - generator::BookingDataset bookingDataset(info.m_bookingDatafileName); + generator::BookingDataset bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir); stringstream skippedElements; |