Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuri Gorshenin <y@maps.me>2017-10-23 16:49:30 +0300
committerIlya Zverev <ilya@zverev.info>2017-10-24 17:01:06 +0300
commit1453a18f1bbea5450c4b270fcb3075a7db1cb979 (patch)
tree6955df031b7f33db58d4354077d04682784b9c27
parentae8625b92592e0a57bb35fb12b5faab45731b50d (diff)
[search][generator] Fixed cities boundaries generation.
-rw-r--r--base/base_tests/clustering_map_tests.cpp98
-rw-r--r--base/clustering_map.hpp50
-rw-r--r--generator/cities_boundaries_builder.cpp90
-rw-r--r--generator/cities_boundaries_builder.hpp3
-rw-r--r--generator/generator_tool/generator_tool.cpp31
-rw-r--r--map/framework.cpp3
-rw-r--r--search/cities_boundaries_table.cpp3
-rw-r--r--search/engine.cpp2
-rw-r--r--search/engine.hpp7
-rw-r--r--search/processor.cpp8
-rw-r--r--search/search_tests_support/test_search_engine.hpp2
-rwxr-xr-xtools/unix/generate_planet.sh18
12 files changed, 294 insertions, 21 deletions
diff --git a/base/base_tests/clustering_map_tests.cpp b/base/base_tests/clustering_map_tests.cpp
index 7203a4fcc4..e1f9332492 100644
--- a/base/base_tests/clustering_map_tests.cpp
+++ b/base/base_tests/clustering_map_tests.cpp
@@ -3,6 +3,7 @@
#include "base/clustering_map.hpp"
#include <algorithm>
+#include <sstream>
#include <string>
#include <utility>
#include <vector>
@@ -23,6 +24,42 @@ template <typename Key, typename Value, typename Hash = hash<Key>>
class ClusteringMapAdapter
{
public:
+ struct Cluster
+ {
+ Cluster(Key const & key, Value const & value): m_keys({key}), m_values({value}) {}
+
+ Cluster(vector<Key> const & keys, vector<Value> const & values) : m_keys(keys), m_values(values)
+ {
+ sort(m_keys.begin(), m_keys.end());
+ sort(m_values.begin(), m_values.end());
+ }
+
+ bool operator<(Cluster const & rhs) const
+ {
+ if (m_keys != rhs.m_keys)
+ return m_keys < rhs.m_keys;
+ return m_values < rhs.m_values;
+ }
+
+ bool operator==(Cluster const & rhs) const
+ {
+ return m_keys == rhs.m_keys && m_values == rhs.m_values;
+ }
+
+ friend string DebugPrint(Cluster const & cluster)
+ {
+ ostringstream os;
+ os << "Cluster [";
+ os << "keys: " << DebugPrint(cluster.m_keys) << ", ";
+ os << "values: " << DebugPrint(cluster.m_values);
+ os << "]";
+ return os.str();
+ }
+
+ vector<Key> m_keys;
+ vector<Value> m_values;
+ };
+
template <typename V>
void Append(Key const & key, V && value)
{
@@ -33,6 +70,17 @@ public:
vector<Value> Get(Key const & key) { return Sort(m_m.Get(key)); }
+ vector<Cluster> Clusters()
+ {
+ vector<Cluster> clusters;
+
+ m_m.ForEachCluster([&](vector<Key> const & keys, vector<Value> const & values) {
+ clusters.emplace_back(keys, values);
+ });
+ sort(clusters.begin(), clusters.end());
+ return clusters;
+ }
+
private:
ClusteringMap<Key, Value, Hash> m_m;
};
@@ -81,4 +129,54 @@ UNIT_TEST(ClusteringMap_Smoke)
TEST_EQUAL(m.Get(5), vector<string>({"alpha", "beta", "gamma"}), ());
}
}
+
+UNIT_TEST(ClusteringMap_ForEach)
+{
+ using Map = ClusteringMapAdapter<int, string>;
+ using Cluster = Map::Cluster;
+
+ {
+ Map m;
+ auto const clusters = m.Clusters();
+ TEST(clusters.empty(), (clusters));
+ }
+
+ {
+ Map m;
+ m.Append(0, "Hello");
+ m.Append(1, "World!");
+ m.Append(2, "alpha");
+ m.Append(3, "beta");
+ m.Append(4, "gamma");
+
+ {
+ vector<Cluster> const expected = {{Cluster{0, "Hello"}, Cluster{1, "World!"},
+ Cluster{2, "alpha"}, Cluster{3, "beta"},
+ Cluster{4, "gamma"}}};
+ TEST_EQUAL(expected, m.Clusters(), ());
+ }
+
+ m.Union(0, 1);
+ {
+ vector<Cluster> const expected = {{Cluster{{0, 1}, {"Hello", "World!"}}, Cluster{2, "alpha"},
+ Cluster{3, "beta"}, Cluster{4, "gamma"}}};
+ TEST_EQUAL(expected, m.Clusters(), ());
+ }
+
+ m.Union(2, 3);
+ m.Union(3, 4);
+ {
+ vector<Cluster> const expected = {
+ {Cluster{{0, 1}, {"Hello", "World!"}}, Cluster{{2, 3, 4}, {"alpha", "beta", "gamma"}}}};
+ TEST_EQUAL(expected, m.Clusters(), ());
+ }
+
+ m.Union(0, 3);
+ {
+ vector<Cluster> const expected = {
+ {Cluster{{0, 1, 2, 3, 4}, {"Hello", "World!", "alpha", "beta", "gamma"}}}};
+ TEST_EQUAL(expected, m.Clusters(), ());
+ }
+ }
+}
} // namespace
diff --git a/base/clustering_map.hpp b/base/clustering_map.hpp
index 5df48da0dc..f6ff9db60c 100644
--- a/base/clustering_map.hpp
+++ b/base/clustering_map.hpp
@@ -24,6 +24,8 @@ public:
// * m is the total number of values in the map
// * α() is the inverse Ackermann function
// * F is the complexity of find() in unordered_map
+ //
+ // Also, it's assumed that complexity to compare two keys is O(1).
// Appends |value| to the list of values in the cluster
// corresponding to |key|.
@@ -61,6 +63,54 @@ public:
return entry.m_values;
}
+ // Complexity: O(n * log(n)).
+ template <typename Fn>
+ void ForEachCluster(Fn && fn)
+ {
+ struct EntryWithKey : public std::less<Entry const *>
+ {
+ EntryWithKey(Entry const * entry, Key const & key) : m_entry(entry), m_key(key) {}
+
+ bool operator<(EntryWithKey const & rhs) const
+ {
+ return std::less<Entry const *>::operator()(m_entry, rhs.m_entry);
+ }
+
+ Entry const * m_entry;
+ Key m_key;
+ };
+
+ std::vector<EntryWithKey> eks;
+ for (auto const & kv : m_table)
+ {
+ auto const & key = kv.first;
+ auto const & entry = GetRoot(key);
+ eks.emplace_back(&entry, key);
+ }
+ std::sort(eks.begin(), eks.end());
+
+ std::equal_to<Entry const *> equal;
+ size_t i = 0;
+ while (i < eks.size())
+ {
+ std::vector<Key> keys;
+ keys.push_back(eks[i].m_key);
+
+ size_t j = i + 1;
+ while (j < eks.size() && equal(eks[i].m_entry, eks[j].m_entry))
+ {
+ keys.push_back(eks[j].m_key);
+ ++j;
+ }
+
+ fn(keys, eks[i].m_entry->m_values);
+
+ i = j;
+ }
+ }
+
+ void Clear() { m_table.clear(); }
+
private:
struct Entry
{
diff --git a/generator/cities_boundaries_builder.cpp b/generator/cities_boundaries_builder.cpp
index d60f73a9fe..6d93cedf9a 100644
--- a/generator/cities_boundaries_builder.cpp
+++ b/generator/cities_boundaries_builder.cpp
@@ -15,6 +15,10 @@
#include "platform/local_country_file.hpp"
+#include "coding/file_reader.hpp"
+#include "coding/file_writer.hpp"
+#include "coding/reader.hpp"
+
#include "base/assert.hpp"
#include "base/checked_cast.hpp"
#include "base/logging.hpp"
@@ -136,4 +140,90 @@ bool BuildCitiesBoundariesForTesting(string const & dataPath, TestIdToBoundaries
return my::make_unique<Mapping>(move(mapping));
});
}
+
+bool SerializeBoundariesTable(std::string const & path, OsmIdToBoundariesTable & table)
+{
+ vector<vector<osm::Id>> allIds;
+ vector<vector<CityBoundary>> allBoundaries;
+ table.ForEachCluster([&](vector<osm::Id> const & ids, vector<CityBoundary> const & boundaries) {
+ allIds.push_back(ids);
+ allBoundaries.push_back(boundaries);
+ });
+
+ CHECK_EQUAL(allIds.size(), allBoundaries.size(), ());
+
+ try
+ {
+ FileWriter sink(path);
+
+ indexer::CitiesBoundariesSerDes::Serialize(sink, allBoundaries);
+
+ for (auto const & ids : allIds)
+ {
+ WriteToSink(sink, static_cast<uint64_t>(ids.size()));
+ for (auto const & id : ids)
+ WriteToSink(sink, id.EncodedId());
+ }
+
+ return true;
+ }
+ catch (Writer::Exception const & e)
+ {
+ LOG(LERROR, ("Can't serialize boundaries table:", e.what()));
+ return false;
+ }
+}
+
+bool DeserializeBoundariesTable(std::string const & path, OsmIdToBoundariesTable & table)
+{
+ vector<vector<osm::Id>> allIds;
+ vector<vector<CityBoundary>> allBoundaries;
+
+ try
+ {
+ FileReader reader(path);
+ NonOwningReaderSource source(reader);
+
+ double precision;
+ indexer::CitiesBoundariesSerDes::Deserialize(source, allBoundaries, precision);
+
+ auto const n = allBoundaries.size();
+ allIds.resize(n);
+
+ for (auto & ids : allIds)
+ {
+ auto const m = ReadPrimitiveFromSource<uint64_t>(source);
+ ids.resize(m);
+ CHECK(m != 0, ());
+
+ for (auto & id : ids)
+ {
+ auto const encodedId = ReadPrimitiveFromSource<uint64_t>(source);
+ id = osm::Id(encodedId);
+ }
+ }
+ }
+ catch (Reader::Exception const & e)
+ {
+ LOG(LERROR, ("Can't deserialize boundaries table:", e.what()));
+ return false;
+ }
+
+ CHECK_EQUAL(allBoundaries.size(), allIds.size(), ());
+ table.Clear();
+ for (size_t i = 0; i < allBoundaries.size(); ++i)
+ {
+ auto const & ids = allIds[i];
+ CHECK(!ids.empty(), ());
+ auto const & id = ids.front();
+
+ for (auto const & b : allBoundaries[i])
+ table.Append(id, b);
+
+ for (size_t j = 1; j < ids.size(); ++j)
+ table.Union(id, ids[j]);
+ }
+
+ return true;
+}
} // namespace generator
diff --git a/generator/cities_boundaries_builder.hpp b/generator/cities_boundaries_builder.hpp
index 6b1b013478..0abd7eb18a 100644
--- a/generator/cities_boundaries_builder.hpp
+++ b/generator/cities_boundaries_builder.hpp
@@ -17,4 +17,7 @@ using TestIdToBoundariesTable = base::ClusteringMap<uint64_t, indexer::CityBound
bool BuildCitiesBoundaries(std::string const & dataPath, std::string const & osmToFeaturePath,
OsmIdToBoundariesTable & table);
bool BuildCitiesBoundariesForTesting(std::string const & dataPath, TestIdToBoundariesTable & table);
+
+bool SerializeBoundariesTable(std::string const & path, OsmIdToBoundariesTable & table);
+bool DeserializeBoundariesTable(std::string const & path, OsmIdToBoundariesTable & table);
} // namespace generator
diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp
index 8f444958ae..0cd215d872 100644
--- a/generator/generator_tool/generator_tool.cpp
+++ b/generator/generator_tool/generator_tool.cpp
@@ -74,7 +74,11 @@ DEFINE_bool(generate_geometry, false,
"3rd pass - split and simplify geometry and triangles for features.");
DEFINE_bool(generate_index, false, "4rd pass - generate index.");
DEFINE_bool(generate_search_index, false, "5th pass - generate search index.");
-DEFINE_bool(generate_cities_boundaries, false, "Generate section with cities boundaries");
+
+DEFINE_bool(dump_cities_boundaries, false, "Dump cities boundaries to a file");
+DEFINE_bool(generate_cities_boundaries, false, "Generate cities boundaries section");
+DEFINE_string(cities_boundaries_path, "", "Path to collect cities boundaries");
+
DEFINE_bool(generate_world, false, "Generate separate world file.");
DEFINE_bool(split_by_polygons, false,
"Use countries borders to split planet by regions and countries.");
@@ -160,9 +164,7 @@ int main(int argc, char ** argv)
genInfo.m_opentableDatafileName = FLAGS_opentable_data;
genInfo.m_opentableReferenceDir = FLAGS_opentable_reference_path;
genInfo.m_viatorDatafileName = FLAGS_viator_data;
-
- if (FLAGS_generate_cities_boundaries)
- genInfo.m_boundariesTable = make_shared<generator::OsmIdToBoundariesTable>();
+ genInfo.m_boundariesTable = make_shared<generator::OsmIdToBoundariesTable>();
genInfo.m_versionDate = static_cast<uint32_t>(FLAGS_planet_version);
@@ -224,6 +226,17 @@ int main(int argc, char ** argv)
genInfo.m_bucketNames.push_back(WORLD_FILE_NAME);
genInfo.m_bucketNames.push_back(WORLD_COASTS_FILE_NAME);
}
+
+ if (FLAGS_dump_cities_boundaries)
+ {
+ CHECK(!FLAGS_cities_boundaries_path.empty(), ());
+ LOG(LINFO, ("Dumping cities boundaries to", FLAGS_cities_boundaries_path));
+ if (!generator::SerializeBoundariesTable(FLAGS_cities_boundaries_path,
+ *genInfo.m_boundariesTable))
+ {
+ LOG(LCRITICAL, ("Error serializing boundaries table to", FLAGS_cities_boundaries_path));
+ }
+ }
}
else
{
@@ -295,13 +308,13 @@ int main(int argc, char ** argv)
if (FLAGS_generate_cities_boundaries)
{
+ CHECK(!FLAGS_cities_boundaries_path.empty(), ());
LOG(LINFO, ("Generating cities boundaries for", datFile));
- CHECK(genInfo.m_boundariesTable, ());
- if (!generator::BuildCitiesBoundaries(datFile, osmToFeatureFilename,
- *genInfo.m_boundariesTable))
- {
+ generator::OsmIdToBoundariesTable table;
+ if (!generator::DeserializeBoundariesTable(FLAGS_cities_boundaries_path, table))
+ LOG(LCRITICAL, ("Error deserializing boundaries table"));
+ if (!generator::BuildCitiesBoundaries(datFile, osmToFeatureFilename, table))
LOG(LCRITICAL, ("Error generating cities boundaries."));
- }
}
if (!FLAGS_srtm_path.empty())
diff --git a/map/framework.cpp b/map/framework.cpp
index 18043ae75b..2207c8ee9f 100644
--- a/map/framework.cpp
+++ b/map/framework.cpp
@@ -455,6 +455,9 @@ Framework::Framework(FrameworkParams const & params)
RegisterAllMaps();
LOG(LDEBUG, ("Maps initialized"));
+ // Need to reload cities boundaries because maps in indexer were updated.
+ m_searchEngine->LoadCitiesBoundaries();
+
// Init storage with needed callback.
m_storage.Init(
bind(&Framework::OnCountryFileDownloaded, this, _1, _2),
diff --git a/search/cities_boundaries_table.cpp b/search/cities_boundaries_table.cpp
index 97ef366bb5..bd1cff62e3 100644
--- a/search/cities_boundaries_table.cpp
+++ b/search/cities_boundaries_table.cpp
@@ -34,7 +34,10 @@ bool CitiesBoundariesTable::Load()
{
auto handle = FindWorld(m_index);
if (!handle.IsAlive())
+ {
+ LOG(LWARNING, ("Can't find World map file."));
return false;
+ }
// Skip if table was already loaded from this file.
if (handle.GetId() == m_mwmId)
diff --git a/search/engine.cpp b/search/engine.cpp
index 99ac9f380c..9e8bce1d26 100644
--- a/search/engine.cpp
+++ b/search/engine.cpp
@@ -110,8 +110,6 @@ Engine::Engine(Index & index, CategoriesHolder const & categories,
m_threads.reserve(params.m_numThreads);
for (size_t i = 0; i < params.m_numThreads; ++i)
m_threads.emplace_back(&Engine::MainLoop, this, ref(m_contexts[i]));
-
- LoadCitiesBoundaries();
}
Engine::~Engine()
diff --git a/search/engine.hpp b/search/engine.hpp
index 779db4f474..5d16a2c4dc 100644
--- a/search/engine.hpp
+++ b/search/engine.hpp
@@ -106,9 +106,8 @@ public:
// Posts request to clear caches to the queue.
void ClearCaches();
- // Posts request to reload cities boundaries tables. Must be used
- // for testing only.
- void LoadCitiesBoundariesForTesting() { return LoadCitiesBoundaries(); }
+ // Posts request to reload cities boundaries tables.
+ void LoadCitiesBoundaries();
private:
struct Message
@@ -144,8 +143,6 @@ private:
unique_ptr<Processor> m_processor;
};
- void LoadCitiesBoundaries();
-
// *ALL* following methods are executed on the m_threads threads.
// This method executes tasks from a common pool (|tasks|) in a FIFO
diff --git a/search/processor.cpp b/search/processor.cpp
index 2f4d1a78ec..da192a371f 100644
--- a/search/processor.cpp
+++ b/search/processor.cpp
@@ -391,7 +391,13 @@ void Processor::SetViewportByIndex(m2::RectD const & viewport, size_t idx, bool
void Processor::ClearCache(size_t ind) { m_viewport[ind].MakeEmpty(); }
-void Processor::LoadCitiesBoundaries() { m_citiesBoundaries.Load(); }
+void Processor::LoadCitiesBoundaries()
+{
+ if (m_citiesBoundaries.Load())
+ LOG(LINFO, ("Loaded cities boundaries"));
+ else
+ LOG(LWARNING, ("Can't load cities boundaries"));
+}
Locales Processor::GetCategoryLocales() const
{
diff --git a/search/search_tests_support/test_search_engine.hpp b/search/search_tests_support/test_search_engine.hpp
index 58955383bd..c67c252001 100644
--- a/search/search_tests_support/test_search_engine.hpp
+++ b/search/search_tests_support/test_search_engine.hpp
@@ -33,7 +33,7 @@ public:
void SetLocale(string const & locale) { m_engine.SetLocale(locale); }
- void LoadCitiesBoundaries() { m_engine.LoadCitiesBoundariesForTesting(); }
+ void LoadCitiesBoundaries() { m_engine.LoadCitiesBoundaries(); }
weak_ptr<search::ProcessorHandle> Search(search::SearchParams const & params,
m2::RectD const & viewport);
diff --git a/tools/unix/generate_planet.sh b/tools/unix/generate_planet.sh
index d91e3cac13..c4651315dd 100755
--- a/tools/unix/generate_planet.sh
+++ b/tools/unix/generate_planet.sh
@@ -183,6 +183,7 @@ OPENTABLE_SCRIPT="$PYTHON_SCRIPTS_PATH/opentable_restaurants.py"
OPENTABLE_FILE="${OPENTABLE_FILE:-$INTDIR/restaurants.csv}"
VIATOR_SCRIPT="$PYTHON_SCRIPTS_PATH/viator_cities.py"
VIATOR_FILE="${VIATOR_FILE:-$INTDIR/viator.csv}"
+CITIES_BOUNDARIES_FILE="${CITIES_BOUNDARIES_FILE:-$INTDIR/cities_boundaries}"
TESTING_SCRIPT="$SCRIPTS_PATH/test_planet.sh"
PYTHON="$(which python2.7)"
MWM_VERSION_FORMAT="%s"
@@ -460,8 +461,15 @@ if [ "$MODE" == "features" ]; then
[ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE"
[ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE"
[ -f "$VIATOR_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --viator_data=$VIATOR_FILE"
- "$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" --node_storage=$NODE_STORAGE --osm_file_type=o5m --osm_file_name="$PLANET" \
- --data_path="$TARGET" --user_resource_path="$DATA_PATH/" $PARAMS_SPLIT 2>> "$PLANET_LOG"
+ "$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" \
+ --node_storage=$NODE_STORAGE \
+ --osm_file_type=o5m \
+ --osm_file_name="$PLANET" \
+ --data_path="$TARGET" \
+ --user_resource_path="$DATA_PATH/" \
+ --dump_cities_boundaries \
+ --cities_boundaries_path="$CITIES_BOUNDARIES_FILE" \
+ $PARAMS_SPLIT 2>> "$PLANET_LOG"
MODE=mwm
fi
@@ -487,7 +495,11 @@ if [ "$MODE" == "mwm" ]; then
if [ -n "$OPT_WORLD" ]; then
(
"$GENERATOR_TOOL" $PARAMS --output=World 2>> "$LOG_PATH/World.log"
- "$GENERATOR_TOOL" --data_path="$TARGET" --user_resource_path="$DATA_PATH/" -generate_search_index --output=World 2>> "$LOG_PATH/World.log"
+ "$GENERATOR_TOOL" --data_path="$TARGET" --user_resource_path="$DATA_PATH/" \
+ --generate_search_index \
+ --generate_cities_boundaries \
+ --cities_boundaries_path="$CITIES_BOUNDARIES_FILE" \
+ --output=World 2>> "$LOG_PATH/World.log"
) &
"$GENERATOR_TOOL" $PARAMS --output=WorldCoasts 2>> "$LOG_PATH/WorldCoasts.log" &
fi