Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcc-engineering <13055392+cc-engineering@users.noreply.github.com>2019-04-09 17:24:01 +0300
committerGitHub <noreply@github.com>2019-04-09 17:24:01 +0300
commitd3c01acd1948d59924b3735add4c593b5cb55b59 (patch)
tree579e559b899b467322f3c10f2ed0710d0abb5ab8 /generator
parent8bc91ab1d991f0540dd7727a2cc670b7a222fec8 (diff)
[generator:geo_objects] Refactor: KeyValueMem -> KeyValueStorage (#10646)
* [generator:geo_objects] Refactor: KeyValueMem -> KeyValueStorage, remove KeyValueMap
Diffstat (limited to 'generator')
-rw-r--r--generator/CMakeLists.txt2
-rw-r--r--generator/geo_objects/geo_objects.cpp180
-rw-r--r--generator/geo_objects/key_value_storage.cpp73
-rw-r--r--generator/geo_objects/key_value_storage.hpp36
4 files changed, 124 insertions, 167 deletions
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt
index 4eda851148..010345b282 100644
--- a/generator/CMakeLists.txt
+++ b/generator/CMakeLists.txt
@@ -69,6 +69,8 @@ set(SRC
geometry_holder.hpp
geo_objects/geo_objects.cpp
geo_objects/geo_objects.hpp
+ geo_objects/key_value_storage.cpp
+ geo_objects/key_value_storage.hpp
holes.cpp
holes.hpp
intermediate_data.cpp
diff --git a/generator/geo_objects/geo_objects.cpp b/generator/geo_objects/geo_objects.cpp
index 0746065948..06e7b2b38d 100644
--- a/generator/geo_objects/geo_objects.cpp
+++ b/generator/geo_objects/geo_objects.cpp
@@ -1,5 +1,7 @@
#include "generator/geo_objects/geo_objects.hpp"
+#include "generator/geo_objects/key_value_storage.hpp"
+
#include "generator/feature_builder.hpp"
#include "generator/locality_sorter.hpp"
#include "generator/regions/region_base.hpp"
@@ -21,168 +23,19 @@
#include <fstream>
#include <functional>
#include <future>
-#include <unordered_map>
-#include <utility>
#include "platform/platform.hpp"
#include <boost/optional.hpp>
#include "3party/jansson/myjansson.hpp"
-namespace
-{
-using KeyValue = std::pair<uint64_t, base::Json>;
-using IndexReader = ReaderPtr<Reader>;
-
-bool DefaultPred(KeyValue const &) { return true; }
-
-bool ParseKey(std::string const & line, int64_t & key)
-{
- auto const pos = line.find(" ");
- if (pos == std::string::npos)
- {
- LOG(LWARNING, ("Cannot find separator."));
- return false;
- }
-
- if (!strings::to_int64(line.substr(0, pos), key))
- {
- LOG(LWARNING, ("Cannot parse id."));
- return false;
- }
-
- return true;
-}
-
-bool ParseKeyValueLine(std::string const & line, KeyValue & res)
-{
- auto const pos = line.find(" ");
- if (pos == std::string::npos)
- {
- LOG(LWARNING, ("Cannot find separator."));
- return false;
- }
-
- int64_t id;
- if (!strings::to_int64(line.substr(0, pos), id))
- {
- LOG(LWARNING, ("Cannot parse id."));
- return false;
- }
-
- base::Json json;
- try
- {
- json = base::Json(line.substr(pos + 1));
- if (!json.get())
- return false;
- }
- catch (base::Json::Exception const &)
- {
- LOG(LWARNING, ("Cannot create base::Json."));
- return false;
- }
-
- res = std::make_pair(static_cast<uint64_t>(id), json);
- return true;
-}
-
-// An interface for reading key-value storage.
-class KeyValueInterface
+namespace generator
{
-public:
- virtual ~KeyValueInterface() = default;
-
- virtual boost::optional<base::Json> Find(uint64_t key) const = 0;
- virtual size_t Size() const = 0;
-};
-
-// An implementation for reading key-value storage with loading and searching in memory.
-class KeyValueMem : public KeyValueInterface
+namespace geo_objects
{
-public:
- KeyValueMem(std::istream & stream, std::function<bool(KeyValue const &)> pred = DefaultPred)
- {
- std::string line;
- KeyValue kv;
- while (std::getline(stream, line))
- {
- if (!ParseKeyValueLine(line, kv) || !pred(kv))
- continue;
-
- m_map.insert(kv);
- }
-
- }
-
- // KeyValueInterface overrides:
- boost::optional<base::Json> Find(uint64_t key) const override
- {
- boost::optional<base::Json> result;
- auto const it = m_map.find(key);
- if (it != std::end(m_map))
- result = it->second;
-
- return result;
- }
-
- size_t Size() const override { return m_map.size(); }
-
-private:
- std::unordered_map<uint64_t, base::Json> m_map;
-};
-
-// An implementation for reading key-value storage with loading and searching in disk.
-class KeyValueMap : public KeyValueInterface
+namespace
{
-public:
- KeyValueMap(std::istream & stream) : m_stream(stream)
- {
- std::string line;
- std::istream::pos_type pos = 0;
- KeyValue kv;
- while (std::getline(m_stream, line))
- {
- int64_t key;
- if (!ParseKey(line, key))
- continue;
-
- m_map.emplace(key, pos);
- pos = m_stream.tellg();
- }
-
- m_stream.clear();
- }
-
- // KeyValueInterface overrides:
- boost::optional<base::Json> Find(uint64_t key) const override
- {
- boost::optional<base::Json> result;
- auto const it = m_map.find(key);
- if (it == std::end(m_map))
- return result;
-
- m_stream.seekg(it->second);
- std::string line;
- if (!std::getline(m_stream, line))
- {
- LOG(LERROR, ("Cannot read line."));
- return result;
- }
-
- KeyValue kv;
- if (ParseKeyValueLine(line, kv))
- result = kv.second;
-
- return result;
- }
-
- size_t Size() const override { return m_map.size(); }
-
-private:
- std::istream & m_stream;
- std::unordered_map<uint64_t, std::istream::pos_type> m_map;
-};
+using IndexReader = ReaderPtr<Reader>;
bool IsBuilding(FeatureBuilder1 const & fb)
{
@@ -230,7 +83,7 @@ int GetRankFromValue(base::Json json)
}
boost::optional<KeyValue> GetDeepestRegion(std::vector<base::GeoObjectId> const & ids,
- KeyValueInterface const & regionKv)
+ KeyValueStorage const & regionKv)
{
boost::optional<KeyValue> deepest;
int deepestRank = 0;
@@ -310,7 +163,7 @@ base::Json AddAddress(FeatureBuilder1 const & fb, KeyValue const & regionKeyValu
boost::optional<KeyValue>
FindRegion(FeatureBuilder1 const & fb, indexer::RegionsIndex<IndexReader> const & regionIndex,
- KeyValueInterface const & regionKv)
+ KeyValueStorage const & regionKv)
{
auto const ids = SearchObjectsInIndex(fb, regionIndex);
return GetDeepestRegion(ids, regionKv);
@@ -327,7 +180,7 @@ MakeGeoObjectValueWithAddress(FeatureBuilder1 const & fb, KeyValue const & keyVa
boost::optional<base::Json>
FindHousePoi(FeatureBuilder1 const & fb,
indexer::GeoObjectsIndex<IndexReader> const & geoObjectsIndex,
- KeyValueInterface const & geoObjectsKv)
+ KeyValueStorage const & geoObjectsKv)
{
auto const ids = SearchObjectsInIndex(fb, geoObjectsIndex);
for (auto const & id : ids)
@@ -379,7 +232,7 @@ MakeTempGeoObjectsIndex(std::string const & pathToGeoObjectsTmpMwm)
}
void BuildGeoObjectsWithAddresses(indexer::RegionsIndex<IndexReader> const & regionIndex,
- KeyValueInterface const & regionKv,
+ KeyValueStorage const & regionKv,
std::string const & pathInGeoObjectsTmpMwm,
std::ostream & streamGeoObjectsKv, bool)
{
@@ -404,7 +257,7 @@ void BuildGeoObjectsWithAddresses(indexer::RegionsIndex<IndexReader> const & reg
void BuildGeoObjectsWithoutAddresses(indexer::GeoObjectsIndex<IndexReader> const & geoObjectsIndex,
std::string const & pathInGeoObjectsTmpMwm,
- KeyValueInterface const & geoObjectsKv,
+ KeyValueStorage const & geoObjectsKv,
std::ostream & streamGeoObjectsKv,
std::ostream & streamIdsWithoutAddress, bool)
{
@@ -432,10 +285,6 @@ void BuildGeoObjectsWithoutAddresses(indexer::GeoObjectsIndex<IndexReader> const
}
} // namespace
-namespace generator
-{
-namespace geo_objects
-{
bool GenerateGeoObjects(std::string const & pathInRegionsIndx,
std::string const & pathInRegionsKv,
std::string const & pathInGeoObjectsTmpMwm,
@@ -454,19 +303,16 @@ bool GenerateGeoObjects(std::string const & pathInRegionsIndx,
indexer::ReadIndex<indexer::RegionsIndexBox<IndexReader>, MmapReader>(pathInRegionsIndx);
// Regions key-value storage is small (~150 Mb). We will load everything into memory.
std::fstream streamRegionKv(pathInRegionsKv);
- KeyValueMem const regionsKv(streamRegionKv);
+ KeyValueStorage const regionsKv(streamRegionKv);
LOG(LINFO, ("Size of regions key-value storage:", regionsKv.Size()));
std::ofstream streamIdsWithoutAddress(pathOutIdsWithoutAddress);
std::ofstream streamGeoObjectsKv(pathOutGeoObjectsKv);
BuildGeoObjectsWithAddresses(regionIndex, regionsKv, pathInGeoObjectsTmpMwm,
streamGeoObjectsKv, verbose);
LOG(LINFO, ("Geo objects with addresses were built."));
- // Regions key-value storage is big (~80 Gb). We will not load the key value into memory.
- // This can be slow.
- // todo(maksimandrianov1): Investigate the issue of performance and if necessary improve.
std::ifstream tempStream(pathOutGeoObjectsKv);
auto const pred = [](KeyValue const & kv) { return HouseHasAddress(kv.second); };
- KeyValueMem const geoObjectsKv(tempStream, pred);
+ KeyValueStorage const geoObjectsKv(tempStream, pred);
LOG(LINFO, ("Size of geo objects key-value storage:", geoObjectsKv.Size()));
auto const geoObjectIndex = geoObjectIndexFuture.get();
LOG(LINFO, ("Index was built."));
diff --git a/generator/geo_objects/key_value_storage.cpp b/generator/geo_objects/key_value_storage.cpp
new file mode 100644
index 0000000000..98efd0284c
--- /dev/null
+++ b/generator/geo_objects/key_value_storage.cpp
@@ -0,0 +1,73 @@
+#include "generator/geo_objects/key_value_storage.hpp"
+
+#include "base/logging.hpp"
+
+namespace generator
+{
+namespace geo_objects
+{
+KeyValueStorage::KeyValueStorage(std::istream & stream, std::function<bool(KeyValue const &)> const & pred)
+{
+ std::string line;
+ std::streamoff lineNumber = 0;
+ while (std::getline(stream, line))
+ {
+ ++lineNumber;
+
+ KeyValue kv;
+ if (!ParseKeyValueLine(line, kv, lineNumber) || !pred(kv))
+ continue;
+
+ m_values.insert(kv);
+ }
+}
+
+// static
+bool KeyValueStorage::ParseKeyValueLine(std::string const & line, KeyValue & res, std::streamoff lineNumber)
+{
+ auto const pos = line.find(" ");
+ if (pos == std::string::npos)
+ {
+ LOG(LWARNING, ("Cannot find separator in line", lineNumber));
+ return false;
+ }
+
+ int64_t id;
+ if (!strings::to_int64(line.substr(0, pos), id))
+ {
+ LOG(LWARNING, ("Cannot parse id", line.substr(0, pos) , "in line", lineNumber));
+ return false;
+ }
+
+ base::Json json;
+ try
+ {
+ json = base::Json(line.c_str() + pos + 1);
+ if (!json.get())
+ return false;
+ }
+ catch (base::Json::Exception const & err)
+ {
+ LOG(LWARNING, ("Cannot create base::Json in line", lineNumber, ":", err.Msg()));
+ return false;
+ }
+
+ res = std::make_pair(static_cast<uint64_t>(id), json);
+ return true;
+}
+
+boost::optional<base::Json> KeyValueStorage::Find(uint64_t key) const
+{
+ auto const it = m_values.find(key);
+ if (it == std::end(m_values))
+ return {};
+
+ return it->second;
+}
+
+size_t KeyValueStorage::Size() const
+{
+ return m_values.size();
+}
+} // namespace geo_objects
+} // namespace generator
diff --git a/generator/geo_objects/key_value_storage.hpp b/generator/geo_objects/key_value_storage.hpp
new file mode 100644
index 0000000000..2a3d771b52
--- /dev/null
+++ b/generator/geo_objects/key_value_storage.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <cstdint>
+#include <fstream>
+#include <functional>
+#include <string>
+#include <unordered_map>
+#include <utility>
+
+#include <boost/optional.hpp>
+
+#include "3party/jansson/myjansson.hpp"
+
+namespace generator
+{
+namespace geo_objects
+{
+using KeyValue = std::pair<uint64_t, base::Json>;
+
+class KeyValueStorage
+{
+public:
+ explicit KeyValueStorage(std::istream & stream,
+ std::function<bool(KeyValue const &)> const & pred = DefaultPred);
+
+ boost::optional<base::Json> Find(uint64_t key) const;
+ size_t Size() const;
+
+private:
+ static bool DefaultPred(KeyValue const &) { return true; }
+ static bool ParseKeyValueLine(std::string const & line, KeyValue & res, std::streamoff lineNumber);
+
+ std::unordered_map<uint64_t, base::Json> m_values;
+};
+} // namespace geo_objects
+} // namespace generator