diff options
author | Arsentiy Milchakov <milcars@mapswithme.com> | 2018-04-27 16:11:06 +0300 |
---|---|---|
committer | Ilya Zverev <ilya@zverev.info> | 2018-04-27 16:24:42 +0300 |
commit | 531389165278edef3a16e8a749b28981a8170a1c (patch) | |
tree | ab6d23f3190b54a9cd88262998e0918b74af5787 | |
parent | b6daae63396b78b4ce5f9c85379d2678a49d14f4 (diff) |
[generator] added possibility to exclude some booking hotels
-rw-r--r-- | base/newtype.hpp | 12 | ||||
-rw-r--r-- | base/string_utils.hpp | 12 | ||||
-rw-r--r-- | data/booking_excluded.txt | 0 | ||||
-rw-r--r-- | defines.hpp | 2 | ||||
-rw-r--r-- | generator/generate_info.hpp | 2 | ||||
-rw-r--r-- | generator/generator_tests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | generator/generator_tests/sponsored_storage_tests.cpp | 37 | ||||
-rw-r--r-- | generator/generator_tool/generator_tool.cpp | 4 | ||||
-rw-r--r-- | generator/osm_source.cpp | 4 | ||||
-rw-r--r-- | generator/sponsored_dataset.hpp | 5 | ||||
-rw-r--r-- | generator/sponsored_dataset_inl.hpp | 32 | ||||
-rw-r--r-- | generator/sponsored_object_storage.hpp | 72 | ||||
-rw-r--r-- | generator/viator_dataset.cpp | 2 |
13 files changed, 114 insertions, 71 deletions
diff --git a/base/newtype.hpp b/base/newtype.hpp index 644f29314e..50a65c8c3b 100644 --- a/base/newtype.hpp +++ b/base/newtype.hpp @@ -1,5 +1,6 @@ #pragma once +#include <functional> #include <iostream> #include <string> #include <type_traits> @@ -13,7 +14,7 @@ using IsConvertibleGuard = std::enable_if_t<std::is_convertible<From, To>::value } // namespace impl /// Creates a typesafe alias to a given numeric Type. -template <typename Type, typename Tag> +template <typename Type, typename Tag, typename Hasher = std::hash<Type>> class NewType { static_assert(std::is_integral<Type>::value || std::is_floating_point<Type>::value, @@ -138,6 +139,15 @@ public: NewType operator|(NewType const & o) const { return NewType(m_value | o.m_value); } NewType operator&(NewType const & o) const { return NewType(m_value & o.m_value); } + struct Hash + { + size_t operator()(NewType const & v) const + { + Hasher h; + return h(v.Get()); + } + }; + private: Type m_value; }; diff --git a/base/string_utils.hpp b/base/string_utils.hpp index fe9e99a6c1..1b2f308de0 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -376,6 +376,18 @@ std::string to_string(T t) return ss.str(); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, int & i) { return to_int(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, unsigned int & i) { return to_uint(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, uint64_t & i) { return to_uint64(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, int64_t & i) { return to_int64(s, i); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, float & f) { return to_float(s, f); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, double & d) { return to_double(s, d); } +WARN_UNUSED_RESULT inline bool to_any(std::string const & s, std::string & result) +{ + result = s; + return true; +} + namespace impl { template <typename T> diff --git a/data/booking_excluded.txt b/data/booking_excluded.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/data/booking_excluded.txt diff --git a/defines.hpp b/defines.hpp index c9043c3b18..bcc9c5abd6 100644 --- a/defines.hpp +++ b/defines.hpp @@ -102,4 +102,6 @@ #define LOCALIZATION_DESCRIPTION_SUFFIX " Description" +#define BOOKING_EXCLUDED_FILE "booking_excluded.txt" + auto constexpr kInvalidRatingValue = -1.0f; diff --git a/generator/generate_info.hpp b/generator/generate_info.hpp index ed53c53e37..7388bd8a66 100644 --- a/generator/generate_info.hpp +++ b/generator/generate_info.hpp @@ -44,9 +44,7 @@ struct GenerateInfo std::string m_osmFileName; std::string m_bookingDatafileName; - std::string m_bookingReferenceDir; std::string m_opentableDatafileName; - std::string m_opentableReferenceDir; std::string m_viatorDatafileName; std::shared_ptr<generator::OsmIdToBoundariesTable> m_boundariesTable; diff --git a/generator/generator_tests/CMakeLists.txt b/generator/generator_tests/CMakeLists.txt index da83ea4e53..37233ad01f 100644 --- a/generator/generator_tests/CMakeLists.txt +++ b/generator/generator_tests/CMakeLists.txt @@ -19,6 +19,7 @@ set( source_data.cpp source_data.hpp source_to_element_test.cpp + sponsored_storage_tests.cpp srtm_parser_test.cpp tag_admixer_test.cpp tesselator_test.cpp diff --git a/generator/generator_tests/sponsored_storage_tests.cpp b/generator/generator_tests/sponsored_storage_tests.cpp new file mode 100644 index 0000000000..4dbe48c27d --- /dev/null +++ b/generator/generator_tests/sponsored_storage_tests.cpp @@ -0,0 +1,37 @@ +#include "testing/testing.hpp" + +#include "generator/booking_dataset.hpp" +#include "generator/sponsored_object_storage.hpp" + +#include "platform/platform_tests_support/scoped_file.hpp" + +#include "coding/file_name_utils.hpp" + +using platform::tests_support::ScopedFile; + +double const kDummyDistanseForTesting = 1.0; +size_t const kDummyCountOfObjectsForTesting = 1; +std::string const kExcludedContent = "100\n200\n300"; +std::string const kExcludedIdsFileName = "excluded_for_testing.txt"; + +namespace +{ +UNIT_TEST(LoadExcludedIds) +{ + ScopedFile sf(kExcludedIdsFileName, kExcludedContent); + + generator::SponsoredObjectStorage<generator::BookingHotel> storage( + kDummyDistanseForTesting, kDummyCountOfObjectsForTesting); + + auto const & path = my::JoinPath(GetPlatform().WritableDir(), kExcludedIdsFileName); + auto const excludedIds = storage.LoadExcludedIds(path); + generator::BookingHotel::ObjectId id; + TEST_EQUAL(excludedIds.size(), 3, ()); + id.Set(100); + TEST(excludedIds.find(id) != excludedIds.cend(), ()); + id.Set(200); + TEST(excludedIds.find(id) != excludedIds.cend(), ()); + id.Set(300); + TEST(excludedIds.find(id) != excludedIds.cend(), ()); +} +} // namespace diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp index 5bba9e4f5b..497d9ff75d 100644 --- a/generator/generator_tool/generator_tool.cpp +++ b/generator/generator_tool/generator_tool.cpp @@ -121,9 +121,7 @@ DEFINE_string(transit_path, "", "Path to directory with transit graphs in json." // Sponsored-related. DEFINE_string(booking_data, "", "Path to booking data in .tsv format."); -DEFINE_string(booking_reference_path, "", "Path to mwm dataset for booking addresses matching."); DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format."); -DEFINE_string(opentable_reference_path, "", "Path to mwm dataset for opentable addresses matching."); DEFINE_string(viator_data, "", "Path to viator data in .tsv format."); // UGC @@ -188,9 +186,7 @@ int main(int argc, char ** argv) genInfo.m_failOnCoasts = FLAGS_fail_on_coasts; genInfo.m_preloadCache = FLAGS_preload_cache; genInfo.m_bookingDatafileName = FLAGS_booking_data; - genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path; genInfo.m_opentableDatafileName = FLAGS_opentable_data; - genInfo.m_opentableReferenceDir = FLAGS_opentable_reference_path; genInfo.m_viatorDatafileName = FLAGS_viator_data; genInfo.m_boundariesTable = make_shared<generator::OsmIdToBoundariesTable>(); diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 5b7f601e5b..18c5d2f42d 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -332,8 +332,8 @@ public: MainFeaturesEmitter(feature::GenerateInfo const & info) : m_skippedElementsPath(info.GetIntermediateFileName("skipped_elements", ".lst")) , m_failOnCoasts(info.m_failOnCoasts) - , m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir) - , m_opentableDataset(info.m_opentableDatafileName, info.m_opentableReferenceDir) + , m_bookingDataset(info.m_bookingDatafileName) + , m_opentableDataset(info.m_opentableDatafileName) , m_viatorDataset(info.m_viatorDatafileName) , m_boundariesTable(info.m_boundariesTable) { diff --git a/generator/sponsored_dataset.hpp b/generator/sponsored_dataset.hpp index 3b8bad6eb7..45322bdc08 100644 --- a/generator/sponsored_dataset.hpp +++ b/generator/sponsored_dataset.hpp @@ -21,8 +21,7 @@ public: static double constexpr kDistanceLimitInMeters = 150; static size_t constexpr kMaxSelectedElements = 3; - explicit SponsoredDataset(std::string const & dataPath, - std::string const & addressReferencePath = std::string()); + explicit SponsoredDataset(std::string const & dataPath); /// @return true if |fb| satisfies some necessary conditions to match one or serveral /// objects from dataset. @@ -39,8 +38,6 @@ public: SponsoredObjectStorage<Object> const & GetStorage() const { return m_storage; } private: - void InitStorage(); - void BuildObject(Object const & object, std::function<void(FeatureBuilder1 &)> const & fn) const; diff --git a/generator/sponsored_dataset_inl.hpp b/generator/sponsored_dataset_inl.hpp index 2380a12892..73c3dcfc22 100644 --- a/generator/sponsored_dataset_inl.hpp +++ b/generator/sponsored_dataset_inl.hpp @@ -38,38 +38,10 @@ private: // SponsoredDataset -------------------------------------------------------------------------------- template <typename SponsoredObject> -SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath, - std::string const & addressReferencePath) +SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath) : m_storage(kDistanceLimitInMeters, kMaxSelectedElements) { - InitStorage(); - m_storage.LoadData(dataPath, addressReferencePath); -} - -template <typename SponsoredObject> -void SponsoredDataset<SponsoredObject>::InitStorage() -{ - using Container = typename SponsoredObjectStorage<SponsoredObject>::ObjectsContainer; - - m_storage.SetFillObjects([](Container & objects) { - AddressMatcher addressMatcher; - - size_t matchedCount = 0; - size_t emptyCount = 0; - for (auto & item : objects) - { - auto & object = item.second; - addressMatcher(object); - - if (object.m_address.empty()) - ++emptyCount; - if (object.HasAddresParts()) - ++matchedCount; - } - - LOG(LINFO, ("Num of objects:", objects.size(), "matched:", matchedCount, - "empty addresses:", emptyCount)); - }); + m_storage.LoadData(dataPath); } template <typename SponsoredObject> diff --git a/generator/sponsored_object_storage.hpp b/generator/sponsored_object_storage.hpp index 0f9a46d096..b6b82c6fe7 100644 --- a/generator/sponsored_object_storage.hpp +++ b/generator/sponsored_object_storage.hpp @@ -5,12 +5,16 @@ #include "geometry/distance_on_sphere.hpp" #include "geometry/latlon.hpp" +#include "coding/file_name_utils.hpp" + #include "base/logging.hpp" +#include "base/string_utils.hpp" #include <fstream> #include <functional> #include <map> #include <string> +#include <unordered_set> #include <vector> #include "boost/geometry.hpp" @@ -18,6 +22,8 @@ #include "boost/geometry/geometries/point.hpp" #include "boost/geometry/index/rtree.hpp" +#include "defines.hpp" + namespace generator { template <typename Object> @@ -26,7 +32,7 @@ class SponsoredObjectStorage public: using ObjectId = typename Object::ObjectId; using ObjectsContainer = std::map<ObjectId, Object>; - using FillObject = std::function<void(ObjectsContainer & objects)>; + using ExcludedIdsContainer = std::unordered_set<ObjectId, typename ObjectId::Hash>; SponsoredObjectStorage(double distanceLimitMeters, size_t maxSelectedElements) : m_distanceLimitMeters(distanceLimitMeters) @@ -54,12 +60,7 @@ public: return m_objects.size(); } - void SetFillObjects(FillObject const & fn) - { - m_fillObject = fn; - } - - void LoadData(std::string const & dataPath, std::string const & addressReferencePath) + void LoadData(std::string const & dataPath) { if (dataPath.empty()) return; @@ -71,36 +72,54 @@ public: return; } - LoadData(dataSource, addressReferencePath); + auto const excludedIdsPath = my::JoinPath(GetPlatform().ResourcesDir(), BOOKING_EXCLUDED_FILE); + + LoadData(dataSource, LoadExcludedIds(excludedIdsPath)); } - void LoadData(std::istream & src, std::string const & addressReferencePath) + ExcludedIdsContainer LoadExcludedIds(std::string const & excludedIdsPath) { - m_objects.clear(); - m_rtree.clear(); + if (excludedIdsPath.empty()) + return {}; - for (std::string line; std::getline(src, line);) + std::ifstream source(excludedIdsPath); + if (!source) { - Object object(line); - if (object.m_id != Object::InvalidObjectId()) - m_objects.emplace(object.m_id, object); + LOG(LERROR, ("Error while opening", excludedIdsPath, ":", strerror(errno))); + return {}; } - // Try to get object address from existing MWMs. - if (!addressReferencePath.empty()) + ExcludedIdsContainer result; + for (std::string line; std::getline(source, line);) { - LOG(LINFO, ("Reference addresses for sponsored objects", addressReferencePath)); - Platform & platform = GetPlatform(); - std::string const backupPath = platform.WritableDir(); + ObjectId id{Object::InvalidObjectId()}; - // MWMs can be loaded only from a writebledir or from a resourcedir, - // changig resourcedir can lead to problems with classificator, so - // we change writebledir. - platform.SetWritableDirForTests(addressReferencePath); + if (!strings::to_any(line, id.Get())) + { + LOG(LWARNING, ("Incorrect excluded sponsored id:", line)); + continue; + } - m_fillObject(m_objects); + if (id != Object::InvalidObjectId()) + result.emplace(id); + } + + return result; + } - platform.SetWritableDirForTests(backupPath); + void LoadData(std::istream & src, ExcludedIdsContainer const & excludedIds) + { + m_objects.clear(); + m_rtree.clear(); + + for (std::string line; std::getline(src, line);) + { + Object object(line); + if (object.m_id != Object::InvalidObjectId() && + excludedIds.find(object.m_id) == excludedIds.cend()) + { + m_objects.emplace(object.m_id, object); + } } for (auto const & item : m_objects) @@ -159,6 +178,5 @@ private: double const m_distanceLimitMeters; size_t const m_maxSelectedElements; - FillObject m_fillObject; }; } // namespace generator diff --git a/generator/viator_dataset.cpp b/generator/viator_dataset.cpp index 7a83bbb58c..12155d87d4 100644 --- a/generator/viator_dataset.cpp +++ b/generator/viator_dataset.cpp @@ -59,7 +59,7 @@ ViatorDataset::ViatorDataset(std::string const & dataPath) LoadIndex(m_index); m_cityFinder = make_unique<search::CityFinder>(m_index); - m_storage.LoadData(dataPath, ""); + m_storage.LoadData(dataPath); } ViatorCity::ObjectId ViatorDataset::FindMatchingObjectId(FeatureBuilder1 const & fb) const |