Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArsentiy Milchakov <milcars@mapswithme.com>2018-04-27 16:11:06 +0300
committerIlya Zverev <ilya@zverev.info>2018-04-27 16:24:42 +0300
commit531389165278edef3a16e8a749b28981a8170a1c (patch)
treeab6d23f3190b54a9cd88262998e0918b74af5787
parentb6daae63396b78b4ce5f9c85379d2678a49d14f4 (diff)
[generator] added possibility to exclude some booking hotels
-rw-r--r--base/newtype.hpp12
-rw-r--r--base/string_utils.hpp12
-rw-r--r--data/booking_excluded.txt0
-rw-r--r--defines.hpp2
-rw-r--r--generator/generate_info.hpp2
-rw-r--r--generator/generator_tests/CMakeLists.txt1
-rw-r--r--generator/generator_tests/sponsored_storage_tests.cpp37
-rw-r--r--generator/generator_tool/generator_tool.cpp4
-rw-r--r--generator/osm_source.cpp4
-rw-r--r--generator/sponsored_dataset.hpp5
-rw-r--r--generator/sponsored_dataset_inl.hpp32
-rw-r--r--generator/sponsored_object_storage.hpp72
-rw-r--r--generator/viator_dataset.cpp2
13 files changed, 114 insertions, 71 deletions
diff --git a/base/newtype.hpp b/base/newtype.hpp
index 644f29314e..50a65c8c3b 100644
--- a/base/newtype.hpp
+++ b/base/newtype.hpp
@@ -1,5 +1,6 @@
#pragma once
+#include <functional>
#include <iostream>
#include <string>
#include <type_traits>
@@ -13,7 +14,7 @@ using IsConvertibleGuard = std::enable_if_t<std::is_convertible<From, To>::value
} // namespace impl
/// Creates a typesafe alias to a given numeric Type.
-template <typename Type, typename Tag>
+template <typename Type, typename Tag, typename Hasher = std::hash<Type>>
class NewType
{
static_assert(std::is_integral<Type>::value || std::is_floating_point<Type>::value,
@@ -138,6 +139,15 @@ public:
NewType operator|(NewType const & o) const { return NewType(m_value | o.m_value); }
NewType operator&(NewType const & o) const { return NewType(m_value & o.m_value); }
+ struct Hash
+ {
+ size_t operator()(NewType const & v) const
+ {
+ Hasher h;
+ return h(v.Get());
+ }
+ };
+
private:
Type m_value;
};
diff --git a/base/string_utils.hpp b/base/string_utils.hpp
index fe9e99a6c1..1b2f308de0 100644
--- a/base/string_utils.hpp
+++ b/base/string_utils.hpp
@@ -376,6 +376,18 @@ std::string to_string(T t)
return ss.str();
}
+WARN_UNUSED_RESULT inline bool to_any(std::string const & s, int & i) { return to_int(s, i); }
+WARN_UNUSED_RESULT inline bool to_any(std::string const & s, unsigned int & i) { return to_uint(s, i); }
+WARN_UNUSED_RESULT inline bool to_any(std::string const & s, uint64_t & i) { return to_uint64(s, i); }
+WARN_UNUSED_RESULT inline bool to_any(std::string const & s, int64_t & i) { return to_int64(s, i); }
+WARN_UNUSED_RESULT inline bool to_any(std::string const & s, float & f) { return to_float(s, f); }
+WARN_UNUSED_RESULT inline bool to_any(std::string const & s, double & d) { return to_double(s, d); }
+WARN_UNUSED_RESULT inline bool to_any(std::string const & s, std::string & result)
+{
+ result = s;
+ return true;
+}
+
namespace impl
{
template <typename T>
diff --git a/data/booking_excluded.txt b/data/booking_excluded.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/data/booking_excluded.txt
diff --git a/defines.hpp b/defines.hpp
index c9043c3b18..bcc9c5abd6 100644
--- a/defines.hpp
+++ b/defines.hpp
@@ -102,4 +102,6 @@
#define LOCALIZATION_DESCRIPTION_SUFFIX " Description"
+#define BOOKING_EXCLUDED_FILE "booking_excluded.txt"
+
auto constexpr kInvalidRatingValue = -1.0f;
diff --git a/generator/generate_info.hpp b/generator/generate_info.hpp
index ed53c53e37..7388bd8a66 100644
--- a/generator/generate_info.hpp
+++ b/generator/generate_info.hpp
@@ -44,9 +44,7 @@ struct GenerateInfo
std::string m_osmFileName;
std::string m_bookingDatafileName;
- std::string m_bookingReferenceDir;
std::string m_opentableDatafileName;
- std::string m_opentableReferenceDir;
std::string m_viatorDatafileName;
std::shared_ptr<generator::OsmIdToBoundariesTable> m_boundariesTable;
diff --git a/generator/generator_tests/CMakeLists.txt b/generator/generator_tests/CMakeLists.txt
index da83ea4e53..37233ad01f 100644
--- a/generator/generator_tests/CMakeLists.txt
+++ b/generator/generator_tests/CMakeLists.txt
@@ -19,6 +19,7 @@ set(
source_data.cpp
source_data.hpp
source_to_element_test.cpp
+ sponsored_storage_tests.cpp
srtm_parser_test.cpp
tag_admixer_test.cpp
tesselator_test.cpp
diff --git a/generator/generator_tests/sponsored_storage_tests.cpp b/generator/generator_tests/sponsored_storage_tests.cpp
new file mode 100644
index 0000000000..4dbe48c27d
--- /dev/null
+++ b/generator/generator_tests/sponsored_storage_tests.cpp
@@ -0,0 +1,37 @@
+#include "testing/testing.hpp"
+
+#include "generator/booking_dataset.hpp"
+#include "generator/sponsored_object_storage.hpp"
+
+#include "platform/platform_tests_support/scoped_file.hpp"
+
+#include "coding/file_name_utils.hpp"
+
+using platform::tests_support::ScopedFile;
+
+double const kDummyDistanseForTesting = 1.0;
+size_t const kDummyCountOfObjectsForTesting = 1;
+std::string const kExcludedContent = "100\n200\n300";
+std::string const kExcludedIdsFileName = "excluded_for_testing.txt";
+
+namespace
+{
+UNIT_TEST(LoadExcludedIds)
+{
+ ScopedFile sf(kExcludedIdsFileName, kExcludedContent);
+
+ generator::SponsoredObjectStorage<generator::BookingHotel> storage(
+ kDummyDistanseForTesting, kDummyCountOfObjectsForTesting);
+
+ auto const & path = my::JoinPath(GetPlatform().WritableDir(), kExcludedIdsFileName);
+ auto const excludedIds = storage.LoadExcludedIds(path);
+ generator::BookingHotel::ObjectId id;
+ TEST_EQUAL(excludedIds.size(), 3, ());
+ id.Set(100);
+ TEST(excludedIds.find(id) != excludedIds.cend(), ());
+ id.Set(200);
+ TEST(excludedIds.find(id) != excludedIds.cend(), ());
+ id.Set(300);
+ TEST(excludedIds.find(id) != excludedIds.cend(), ());
+}
+} // namespace
diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp
index 5bba9e4f5b..497d9ff75d 100644
--- a/generator/generator_tool/generator_tool.cpp
+++ b/generator/generator_tool/generator_tool.cpp
@@ -121,9 +121,7 @@ DEFINE_string(transit_path, "", "Path to directory with transit graphs in json."
// Sponsored-related.
DEFINE_string(booking_data, "", "Path to booking data in .tsv format.");
-DEFINE_string(booking_reference_path, "", "Path to mwm dataset for booking addresses matching.");
DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format.");
-DEFINE_string(opentable_reference_path, "", "Path to mwm dataset for opentable addresses matching.");
DEFINE_string(viator_data, "", "Path to viator data in .tsv format.");
// UGC
@@ -188,9 +186,7 @@ int main(int argc, char ** argv)
genInfo.m_failOnCoasts = FLAGS_fail_on_coasts;
genInfo.m_preloadCache = FLAGS_preload_cache;
genInfo.m_bookingDatafileName = FLAGS_booking_data;
- genInfo.m_bookingReferenceDir = FLAGS_booking_reference_path;
genInfo.m_opentableDatafileName = FLAGS_opentable_data;
- genInfo.m_opentableReferenceDir = FLAGS_opentable_reference_path;
genInfo.m_viatorDatafileName = FLAGS_viator_data;
genInfo.m_boundariesTable = make_shared<generator::OsmIdToBoundariesTable>();
diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp
index 5b7f601e5b..18c5d2f42d 100644
--- a/generator/osm_source.cpp
+++ b/generator/osm_source.cpp
@@ -332,8 +332,8 @@ public:
MainFeaturesEmitter(feature::GenerateInfo const & info)
: m_skippedElementsPath(info.GetIntermediateFileName("skipped_elements", ".lst"))
, m_failOnCoasts(info.m_failOnCoasts)
- , m_bookingDataset(info.m_bookingDatafileName, info.m_bookingReferenceDir)
- , m_opentableDataset(info.m_opentableDatafileName, info.m_opentableReferenceDir)
+ , m_bookingDataset(info.m_bookingDatafileName)
+ , m_opentableDataset(info.m_opentableDatafileName)
, m_viatorDataset(info.m_viatorDatafileName)
, m_boundariesTable(info.m_boundariesTable)
{
diff --git a/generator/sponsored_dataset.hpp b/generator/sponsored_dataset.hpp
index 3b8bad6eb7..45322bdc08 100644
--- a/generator/sponsored_dataset.hpp
+++ b/generator/sponsored_dataset.hpp
@@ -21,8 +21,7 @@ public:
static double constexpr kDistanceLimitInMeters = 150;
static size_t constexpr kMaxSelectedElements = 3;
- explicit SponsoredDataset(std::string const & dataPath,
- std::string const & addressReferencePath = std::string());
+ explicit SponsoredDataset(std::string const & dataPath);
/// @return true if |fb| satisfies some necessary conditions to match one or serveral
/// objects from dataset.
@@ -39,8 +38,6 @@ public:
SponsoredObjectStorage<Object> const & GetStorage() const { return m_storage; }
private:
- void InitStorage();
-
void BuildObject(Object const & object,
std::function<void(FeatureBuilder1 &)> const & fn) const;
diff --git a/generator/sponsored_dataset_inl.hpp b/generator/sponsored_dataset_inl.hpp
index 2380a12892..73c3dcfc22 100644
--- a/generator/sponsored_dataset_inl.hpp
+++ b/generator/sponsored_dataset_inl.hpp
@@ -38,38 +38,10 @@ private:
// SponsoredDataset --------------------------------------------------------------------------------
template <typename SponsoredObject>
-SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath,
- std::string const & addressReferencePath)
+SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath)
: m_storage(kDistanceLimitInMeters, kMaxSelectedElements)
{
- InitStorage();
- m_storage.LoadData(dataPath, addressReferencePath);
-}
-
-template <typename SponsoredObject>
-void SponsoredDataset<SponsoredObject>::InitStorage()
-{
- using Container = typename SponsoredObjectStorage<SponsoredObject>::ObjectsContainer;
-
- m_storage.SetFillObjects([](Container & objects) {
- AddressMatcher addressMatcher;
-
- size_t matchedCount = 0;
- size_t emptyCount = 0;
- for (auto & item : objects)
- {
- auto & object = item.second;
- addressMatcher(object);
-
- if (object.m_address.empty())
- ++emptyCount;
- if (object.HasAddresParts())
- ++matchedCount;
- }
-
- LOG(LINFO, ("Num of objects:", objects.size(), "matched:", matchedCount,
- "empty addresses:", emptyCount));
- });
+ m_storage.LoadData(dataPath);
}
template <typename SponsoredObject>
diff --git a/generator/sponsored_object_storage.hpp b/generator/sponsored_object_storage.hpp
index 0f9a46d096..b6b82c6fe7 100644
--- a/generator/sponsored_object_storage.hpp
+++ b/generator/sponsored_object_storage.hpp
@@ -5,12 +5,16 @@
#include "geometry/distance_on_sphere.hpp"
#include "geometry/latlon.hpp"
+#include "coding/file_name_utils.hpp"
+
#include "base/logging.hpp"
+#include "base/string_utils.hpp"
#include <fstream>
#include <functional>
#include <map>
#include <string>
+#include <unordered_set>
#include <vector>
#include "boost/geometry.hpp"
@@ -18,6 +22,8 @@
#include "boost/geometry/geometries/point.hpp"
#include "boost/geometry/index/rtree.hpp"
+#include "defines.hpp"
+
namespace generator
{
template <typename Object>
@@ -26,7 +32,7 @@ class SponsoredObjectStorage
public:
using ObjectId = typename Object::ObjectId;
using ObjectsContainer = std::map<ObjectId, Object>;
- using FillObject = std::function<void(ObjectsContainer & objects)>;
+ using ExcludedIdsContainer = std::unordered_set<ObjectId, typename ObjectId::Hash>;
SponsoredObjectStorage(double distanceLimitMeters, size_t maxSelectedElements)
: m_distanceLimitMeters(distanceLimitMeters)
@@ -54,12 +60,7 @@ public:
return m_objects.size();
}
- void SetFillObjects(FillObject const & fn)
- {
- m_fillObject = fn;
- }
-
- void LoadData(std::string const & dataPath, std::string const & addressReferencePath)
+ void LoadData(std::string const & dataPath)
{
if (dataPath.empty())
return;
@@ -71,36 +72,54 @@ public:
return;
}
- LoadData(dataSource, addressReferencePath);
+ auto const excludedIdsPath = my::JoinPath(GetPlatform().ResourcesDir(), BOOKING_EXCLUDED_FILE);
+
+ LoadData(dataSource, LoadExcludedIds(excludedIdsPath));
}
- void LoadData(std::istream & src, std::string const & addressReferencePath)
+ ExcludedIdsContainer LoadExcludedIds(std::string const & excludedIdsPath)
{
- m_objects.clear();
- m_rtree.clear();
+ if (excludedIdsPath.empty())
+ return {};
- for (std::string line; std::getline(src, line);)
+ std::ifstream source(excludedIdsPath);
+ if (!source)
{
- Object object(line);
- if (object.m_id != Object::InvalidObjectId())
- m_objects.emplace(object.m_id, object);
+ LOG(LERROR, ("Error while opening", excludedIdsPath, ":", strerror(errno)));
+ return {};
}
- // Try to get object address from existing MWMs.
- if (!addressReferencePath.empty())
+ ExcludedIdsContainer result;
+ for (std::string line; std::getline(source, line);)
{
- LOG(LINFO, ("Reference addresses for sponsored objects", addressReferencePath));
- Platform & platform = GetPlatform();
- std::string const backupPath = platform.WritableDir();
+ ObjectId id{Object::InvalidObjectId()};
- // MWMs can be loaded only from a writebledir or from a resourcedir,
- // changig resourcedir can lead to problems with classificator, so
- // we change writebledir.
- platform.SetWritableDirForTests(addressReferencePath);
+ if (!strings::to_any(line, id.Get()))
+ {
+ LOG(LWARNING, ("Incorrect excluded sponsored id:", line));
+ continue;
+ }
- m_fillObject(m_objects);
+ if (id != Object::InvalidObjectId())
+ result.emplace(id);
+ }
+
+ return result;
+ }
- platform.SetWritableDirForTests(backupPath);
+ void LoadData(std::istream & src, ExcludedIdsContainer const & excludedIds)
+ {
+ m_objects.clear();
+ m_rtree.clear();
+
+ for (std::string line; std::getline(src, line);)
+ {
+ Object object(line);
+ if (object.m_id != Object::InvalidObjectId() &&
+ excludedIds.find(object.m_id) == excludedIds.cend())
+ {
+ m_objects.emplace(object.m_id, object);
+ }
}
for (auto const & item : m_objects)
@@ -159,6 +178,5 @@ private:
double const m_distanceLimitMeters;
size_t const m_maxSelectedElements;
- FillObject m_fillObject;
};
} // namespace generator
diff --git a/generator/viator_dataset.cpp b/generator/viator_dataset.cpp
index 7a83bbb58c..12155d87d4 100644
--- a/generator/viator_dataset.cpp
+++ b/generator/viator_dataset.cpp
@@ -59,7 +59,7 @@ ViatorDataset::ViatorDataset(std::string const & dataPath)
LoadIndex(m_index);
m_cityFinder = make_unique<search::CityFinder>(m_index);
- m_storage.LoadData(dataPath, "");
+ m_storage.LoadData(dataPath);
}
ViatorCity::ObjectId ViatorDataset::FindMatchingObjectId(FeatureBuilder1 const & fb) const