diff options
author | cc-engineering <13055392+cc-engineering@users.noreply.github.com> | 2019-04-09 17:24:01 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-09 17:24:01 +0300 |
commit | d3c01acd1948d59924b3735add4c593b5cb55b59 (patch) | |
tree | 579e559b899b467322f3c10f2ed0710d0abb5ab8 /generator | |
parent | 8bc91ab1d991f0540dd7727a2cc670b7a222fec8 (diff) |
[generator:geo_objects] Refactor: KeyValueMem -> KeyValueStorage (#10646)
* [generator:geo_objects] Refactor: KeyValueMem -> KeyValueStorage, remove KeyValueMap
Diffstat (limited to 'generator')
-rw-r--r-- | generator/CMakeLists.txt | 2 | ||||
-rw-r--r-- | generator/geo_objects/geo_objects.cpp | 180 | ||||
-rw-r--r-- | generator/geo_objects/key_value_storage.cpp | 73 | ||||
-rw-r--r-- | generator/geo_objects/key_value_storage.hpp | 36 |
4 files changed, 124 insertions, 167 deletions
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 4eda851148..010345b282 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt @@ -69,6 +69,8 @@ set(SRC geometry_holder.hpp geo_objects/geo_objects.cpp geo_objects/geo_objects.hpp + geo_objects/key_value_storage.cpp + geo_objects/key_value_storage.hpp holes.cpp holes.hpp intermediate_data.cpp diff --git a/generator/geo_objects/geo_objects.cpp b/generator/geo_objects/geo_objects.cpp index 0746065948..06e7b2b38d 100644 --- a/generator/geo_objects/geo_objects.cpp +++ b/generator/geo_objects/geo_objects.cpp @@ -1,5 +1,7 @@ #include "generator/geo_objects/geo_objects.hpp" +#include "generator/geo_objects/key_value_storage.hpp" + #include "generator/feature_builder.hpp" #include "generator/locality_sorter.hpp" #include "generator/regions/region_base.hpp" @@ -21,168 +23,19 @@ #include <fstream> #include <functional> #include <future> -#include <unordered_map> -#include <utility> #include "platform/platform.hpp" #include <boost/optional.hpp> #include "3party/jansson/myjansson.hpp" -namespace -{ -using KeyValue = std::pair<uint64_t, base::Json>; -using IndexReader = ReaderPtr<Reader>; - -bool DefaultPred(KeyValue const &) { return true; } - -bool ParseKey(std::string const & line, int64_t & key) -{ - auto const pos = line.find(" "); - if (pos == std::string::npos) - { - LOG(LWARNING, ("Cannot find separator.")); - return false; - } - - if (!strings::to_int64(line.substr(0, pos), key)) - { - LOG(LWARNING, ("Cannot parse id.")); - return false; - } - - return true; -} - -bool ParseKeyValueLine(std::string const & line, KeyValue & res) -{ - auto const pos = line.find(" "); - if (pos == std::string::npos) - { - LOG(LWARNING, ("Cannot find separator.")); - return false; - } - - int64_t id; - if (!strings::to_int64(line.substr(0, pos), id)) - { - LOG(LWARNING, ("Cannot parse id.")); - return false; - } - - base::Json json; - try - { - json = base::Json(line.substr(pos + 1)); - if (!json.get()) - return false; - } - catch (base::Json::Exception const &) - { - LOG(LWARNING, ("Cannot create base::Json.")); - return false; - } - - res = std::make_pair(static_cast<uint64_t>(id), json); - return true; -} - -// An interface for reading key-value storage. -class KeyValueInterface +namespace generator { -public: - virtual ~KeyValueInterface() = default; - - virtual boost::optional<base::Json> Find(uint64_t key) const = 0; - virtual size_t Size() const = 0; -}; - -// An implementation for reading key-value storage with loading and searching in memory. -class KeyValueMem : public KeyValueInterface +namespace geo_objects { -public: - KeyValueMem(std::istream & stream, std::function<bool(KeyValue const &)> pred = DefaultPred) - { - std::string line; - KeyValue kv; - while (std::getline(stream, line)) - { - if (!ParseKeyValueLine(line, kv) || !pred(kv)) - continue; - - m_map.insert(kv); - } - - } - - // KeyValueInterface overrides: - boost::optional<base::Json> Find(uint64_t key) const override - { - boost::optional<base::Json> result; - auto const it = m_map.find(key); - if (it != std::end(m_map)) - result = it->second; - - return result; - } - - size_t Size() const override { return m_map.size(); } - -private: - std::unordered_map<uint64_t, base::Json> m_map; -}; - -// An implementation for reading key-value storage with loading and searching in disk. -class KeyValueMap : public KeyValueInterface +namespace { -public: - KeyValueMap(std::istream & stream) : m_stream(stream) - { - std::string line; - std::istream::pos_type pos = 0; - KeyValue kv; - while (std::getline(m_stream, line)) - { - int64_t key; - if (!ParseKey(line, key)) - continue; - - m_map.emplace(key, pos); - pos = m_stream.tellg(); - } - - m_stream.clear(); - } - - // KeyValueInterface overrides: - boost::optional<base::Json> Find(uint64_t key) const override - { - boost::optional<base::Json> result; - auto const it = m_map.find(key); - if (it == std::end(m_map)) - return result; - - m_stream.seekg(it->second); - std::string line; - if (!std::getline(m_stream, line)) - { - LOG(LERROR, ("Cannot read line.")); - return result; - } - - KeyValue kv; - if (ParseKeyValueLine(line, kv)) - result = kv.second; - - return result; - } - - size_t Size() const override { return m_map.size(); } - -private: - std::istream & m_stream; - std::unordered_map<uint64_t, std::istream::pos_type> m_map; -}; +using IndexReader = ReaderPtr<Reader>; bool IsBuilding(FeatureBuilder1 const & fb) { @@ -230,7 +83,7 @@ int GetRankFromValue(base::Json json) } boost::optional<KeyValue> GetDeepestRegion(std::vector<base::GeoObjectId> const & ids, - KeyValueInterface const & regionKv) + KeyValueStorage const & regionKv) { boost::optional<KeyValue> deepest; int deepestRank = 0; @@ -310,7 +163,7 @@ base::Json AddAddress(FeatureBuilder1 const & fb, KeyValue const & regionKeyValu boost::optional<KeyValue> FindRegion(FeatureBuilder1 const & fb, indexer::RegionsIndex<IndexReader> const & regionIndex, - KeyValueInterface const & regionKv) + KeyValueStorage const & regionKv) { auto const ids = SearchObjectsInIndex(fb, regionIndex); return GetDeepestRegion(ids, regionKv); @@ -327,7 +180,7 @@ MakeGeoObjectValueWithAddress(FeatureBuilder1 const & fb, KeyValue const & keyVa boost::optional<base::Json> FindHousePoi(FeatureBuilder1 const & fb, indexer::GeoObjectsIndex<IndexReader> const & geoObjectsIndex, - KeyValueInterface const & geoObjectsKv) + KeyValueStorage const & geoObjectsKv) { auto const ids = SearchObjectsInIndex(fb, geoObjectsIndex); for (auto const & id : ids) @@ -379,7 +232,7 @@ MakeTempGeoObjectsIndex(std::string const & pathToGeoObjectsTmpMwm) } void BuildGeoObjectsWithAddresses(indexer::RegionsIndex<IndexReader> const & regionIndex, - KeyValueInterface const & regionKv, + KeyValueStorage const & regionKv, std::string const & pathInGeoObjectsTmpMwm, std::ostream & streamGeoObjectsKv, bool) { @@ -404,7 +257,7 @@ void BuildGeoObjectsWithAddresses(indexer::RegionsIndex<IndexReader> const & reg void BuildGeoObjectsWithoutAddresses(indexer::GeoObjectsIndex<IndexReader> const & geoObjectsIndex, std::string const & pathInGeoObjectsTmpMwm, - KeyValueInterface const & geoObjectsKv, + KeyValueStorage const & geoObjectsKv, std::ostream & streamGeoObjectsKv, std::ostream & streamIdsWithoutAddress, bool) { @@ -432,10 +285,6 @@ void BuildGeoObjectsWithoutAddresses(indexer::GeoObjectsIndex<IndexReader> const } } // namespace -namespace generator -{ -namespace geo_objects -{ bool GenerateGeoObjects(std::string const & pathInRegionsIndx, std::string const & pathInRegionsKv, std::string const & pathInGeoObjectsTmpMwm, @@ -454,19 +303,16 @@ bool GenerateGeoObjects(std::string const & pathInRegionsIndx, indexer::ReadIndex<indexer::RegionsIndexBox<IndexReader>, MmapReader>(pathInRegionsIndx); // Regions key-value storage is small (~150 Mb). We will load everything into memory. std::fstream streamRegionKv(pathInRegionsKv); - KeyValueMem const regionsKv(streamRegionKv); + KeyValueStorage const regionsKv(streamRegionKv); LOG(LINFO, ("Size of regions key-value storage:", regionsKv.Size())); std::ofstream streamIdsWithoutAddress(pathOutIdsWithoutAddress); std::ofstream streamGeoObjectsKv(pathOutGeoObjectsKv); BuildGeoObjectsWithAddresses(regionIndex, regionsKv, pathInGeoObjectsTmpMwm, streamGeoObjectsKv, verbose); LOG(LINFO, ("Geo objects with addresses were built.")); - // Regions key-value storage is big (~80 Gb). We will not load the key value into memory. - // This can be slow. - // todo(maksimandrianov1): Investigate the issue of performance and if necessary improve. std::ifstream tempStream(pathOutGeoObjectsKv); auto const pred = [](KeyValue const & kv) { return HouseHasAddress(kv.second); }; - KeyValueMem const geoObjectsKv(tempStream, pred); + KeyValueStorage const geoObjectsKv(tempStream, pred); LOG(LINFO, ("Size of geo objects key-value storage:", geoObjectsKv.Size())); auto const geoObjectIndex = geoObjectIndexFuture.get(); LOG(LINFO, ("Index was built.")); diff --git a/generator/geo_objects/key_value_storage.cpp b/generator/geo_objects/key_value_storage.cpp new file mode 100644 index 0000000000..98efd0284c --- /dev/null +++ b/generator/geo_objects/key_value_storage.cpp @@ -0,0 +1,73 @@ +#include "generator/geo_objects/key_value_storage.hpp" + +#include "base/logging.hpp" + +namespace generator +{ +namespace geo_objects +{ +KeyValueStorage::KeyValueStorage(std::istream & stream, std::function<bool(KeyValue const &)> const & pred) +{ + std::string line; + std::streamoff lineNumber = 0; + while (std::getline(stream, line)) + { + ++lineNumber; + + KeyValue kv; + if (!ParseKeyValueLine(line, kv, lineNumber) || !pred(kv)) + continue; + + m_values.insert(kv); + } +} + +// static +bool KeyValueStorage::ParseKeyValueLine(std::string const & line, KeyValue & res, std::streamoff lineNumber) +{ + auto const pos = line.find(" "); + if (pos == std::string::npos) + { + LOG(LWARNING, ("Cannot find separator in line", lineNumber)); + return false; + } + + int64_t id; + if (!strings::to_int64(line.substr(0, pos), id)) + { + LOG(LWARNING, ("Cannot parse id", line.substr(0, pos) , "in line", lineNumber)); + return false; + } + + base::Json json; + try + { + json = base::Json(line.c_str() + pos + 1); + if (!json.get()) + return false; + } + catch (base::Json::Exception const & err) + { + LOG(LWARNING, ("Cannot create base::Json in line", lineNumber, ":", err.Msg())); + return false; + } + + res = std::make_pair(static_cast<uint64_t>(id), json); + return true; +} + +boost::optional<base::Json> KeyValueStorage::Find(uint64_t key) const +{ + auto const it = m_values.find(key); + if (it == std::end(m_values)) + return {}; + + return it->second; +} + +size_t KeyValueStorage::Size() const +{ + return m_values.size(); +} +} // namespace geo_objects +} // namespace generator diff --git a/generator/geo_objects/key_value_storage.hpp b/generator/geo_objects/key_value_storage.hpp new file mode 100644 index 0000000000..2a3d771b52 --- /dev/null +++ b/generator/geo_objects/key_value_storage.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include <cstdint> +#include <fstream> +#include <functional> +#include <string> +#include <unordered_map> +#include <utility> + +#include <boost/optional.hpp> + +#include "3party/jansson/myjansson.hpp" + +namespace generator +{ +namespace geo_objects +{ +using KeyValue = std::pair<uint64_t, base::Json>; + +class KeyValueStorage +{ +public: + explicit KeyValueStorage(std::istream & stream, + std::function<bool(KeyValue const &)> const & pred = DefaultPred); + + boost::optional<base::Json> Find(uint64_t key) const; + size_t Size() const; + +private: + static bool DefaultPred(KeyValue const &) { return true; } + static bool ParseKeyValueLine(std::string const & line, KeyValue & res, std::streamoff lineNumber); + + std::unordered_map<uint64_t, base::Json> m_values; +}; +} // namespace geo_objects +} // namespace generator |