diff options
author | Maxim Pimenov <m@maps.me> | 2018-11-27 20:23:53 +0300 |
---|---|---|
committer | Sergey Yershov <syershov@maps.me> | 2018-11-28 13:41:58 +0300 |
commit | 335c44894cb9acfa5f98cf3792edfcca2f04e559 (patch) | |
tree | 921e8deeb550f524a6e09e71f34712a352c8e1a9 /geocoder | |
parent | 47767b1f181d076255c2b627cda21de32f3ff0cc (diff) |
[geocoder] UTF-8 everywhere.
Diffstat (limited to 'geocoder')
-rw-r--r-- | geocoder/geocoder.cpp | 8 | ||||
-rw-r--r-- | geocoder/geocoder.hpp | 6 | ||||
-rw-r--r-- | geocoder/geocoder_tests/geocoder_tests.cpp | 4 | ||||
-rw-r--r-- | geocoder/hierarchy.cpp | 18 | ||||
-rw-r--r-- | geocoder/hierarchy.hpp | 8 | ||||
-rw-r--r-- | geocoder/types.hpp | 2 |
6 files changed, 25 insertions, 21 deletions
diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp index a9ba123459..64c0cd41e3 100644 --- a/geocoder/geocoder.cpp +++ b/geocoder/geocoder.cpp @@ -91,7 +91,7 @@ bool HasParent(vector<geocoder::Geocoder::Layer> const & layers, strings::UniString MakeHouseNumber(geocoder::Tokens const & tokens) { - return strings::JoinStrings(tokens, strings::MakeUniString("")); + return strings::MakeUniString(strings::JoinStrings(tokens, " ")); } } // namespace @@ -100,7 +100,7 @@ namespace geocoder // Geocoder::Context ------------------------------------------------------------------------------- Geocoder::Context::Context(string const & query) : m_beam(kMaxResults) { - search::NormalizeAndTokenizeString(query, m_tokens); + search::NormalizeAndTokenizeAsUtf8(query, m_tokens); m_tokenTypes.assign(m_tokens.size(), Type::Count); m_numUsedTokens = 0; } @@ -115,7 +115,7 @@ size_t Geocoder::Context::GetNumUsedTokens() const return m_numUsedTokens; } -strings::UniString const & Geocoder::Context::GetToken(size_t id) const +string const & Geocoder::Context::GetToken(size_t id) const { CHECK_LESS(id, m_tokens.size(), ()); return m_tokens[id]; @@ -210,7 +210,7 @@ void Geocoder::Go(Context & ctx, Type type) const if (type == Type::Count) return; - vector<strings::UniString> subquery; + Tokens subquery; for (size_t i = 0; i < ctx.GetNumTokens(); ++i) { subquery.clear(); diff --git a/geocoder/geocoder.hpp b/geocoder/geocoder.hpp index 077b4e7402..e9c07b8ab9 100644 --- a/geocoder/geocoder.hpp +++ b/geocoder/geocoder.hpp @@ -8,6 +8,7 @@ #include "base/geo_object_id.hpp" #include "base/string_utils.hpp" +#include <cstddef> #include <string> #include <unordered_map> #include <utility> @@ -55,7 +56,7 @@ public: size_t GetNumTokens() const; size_t GetNumUsedTokens() const; - strings::UniString const & GetToken(size_t id) const; + std::string const & GetToken(size_t id) const; void MarkToken(size_t id, Type type); @@ -74,8 +75,7 @@ public: std::vector<Layer> const & GetLayers() const; private: - // todo(@m) std::string? - std::vector<strings::UniString> m_tokens; + Tokens m_tokens; std::vector<Type> m_tokenTypes; size_t m_numUsedTokens = 0; diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp index 52730b8d6e..43430b9da7 100644 --- a/geocoder/geocoder_tests/geocoder_tests.cpp +++ b/geocoder/geocoder_tests/geocoder_tests.cpp @@ -30,7 +30,7 @@ string const kRegionsData = R"#( geocoder::Tokens Split(string const & s) { geocoder::Tokens result; - search::NormalizeAndTokenizeString(s, result); + search::NormalizeAndTokenizeAsUtf8(s, result); return result; } } // namespace @@ -71,7 +71,7 @@ UNIT_TEST(Geocoder_Hierarchy) ScopedFile const regionsJsonFile("regions.jsonl", kRegionsData); Geocoder geocoder(regionsJsonFile.GetFullPath()); - auto entries = geocoder.GetHierarchy().GetEntries({strings::MakeUniString("florencia")}); + auto entries = geocoder.GetHierarchy().GetEntries({("florencia")}); TEST(entries, ()); TEST_EQUAL(entries->size(), 1, ()); diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp index a68d795283..11feca76c0 100644 --- a/geocoder/hierarchy.cpp +++ b/geocoder/hierarchy.cpp @@ -19,6 +19,11 @@ namespace { // Information will be logged for every |kLogBatch| entries. size_t const kLogBatch = 100000; + +string MakeIndexKey(geocoder::Tokens const & tokens) +{ + return strings::JoinStrings(tokens, " "); +} } // namespace namespace geocoder @@ -67,7 +72,8 @@ void Hierarchy::Entry::DeserializeFromJSONImpl(json_t * const root, string const LOG(LDEBUG, ("Duplicate address field", type, "when parsing", jsonStr)); hasDuplicateAddress = true; } - search::NormalizeAndTokenizeString(levelValue, m_address[i]); + + search::NormalizeAndTokenizeAsUtf8(levelValue, m_address[i]); if (!m_address[i].empty()) m_type = static_cast<Type>(i); @@ -75,7 +81,7 @@ void Hierarchy::Entry::DeserializeFromJSONImpl(json_t * const root, string const m_nameTokens.clear(); FromJSONObjectOptionalField(properties, "name", m_name); - search::NormalizeAndTokenizeString(m_name, m_nameTokens); + search::NormalizeAndTokenizeAsUtf8(m_name, m_nameTokens); if (m_name.empty()) ++stats.m_emptyNames; @@ -140,6 +146,7 @@ Hierarchy::Hierarchy(string const & pathToJsonHierarchy) ++stats.m_numLoaded; if (stats.m_numLoaded % kLogBatch == 0) LOG(LINFO, ("Read", stats.m_numLoaded, "entries")); + m_entriesStorage.emplace_back(move(entry)); } @@ -160,10 +167,9 @@ Hierarchy::Hierarchy(string const & pathToJsonHierarchy) LOG(LINFO, ("(End of stats.)")); } -vector<Hierarchy::Entry *> const * const Hierarchy::GetEntries( - vector<strings::UniString> const & tokens) const +vector<Hierarchy::Entry *> const * const Hierarchy::GetEntries(Tokens const & tokens) const { - auto it = m_entriesByTokens.find(tokens); + auto it = m_entriesByTokens.find(MakeIndexKey(tokens)); if (it == m_entriesByTokens.end()) return {}; @@ -181,7 +187,7 @@ void Hierarchy::IndexEntries() continue; size_t const t = static_cast<size_t>(e.m_type); - m_entriesByTokens[e.m_address[t]].emplace_back(&e); + m_entriesByTokens[MakeIndexKey(e.m_address[t])].emplace_back(&e); // Index every token but do not index prefixes. // for (auto const & tok : entry.m_address[t]) diff --git a/geocoder/hierarchy.hpp b/geocoder/hierarchy.hpp index d5bad91d6d..5ae3b603b3 100644 --- a/geocoder/hierarchy.hpp +++ b/geocoder/hierarchy.hpp @@ -3,13 +3,12 @@ #include "geocoder/types.hpp" #include "base/geo_object_id.hpp" -#include "base/string_utils.hpp" #include <array> #include <cstddef> #include <cstdint> -#include <map> #include <string> +#include <unordered_map> #include <utility> #include <vector> @@ -85,8 +84,7 @@ public: // todo This method (and the whole class, in fact) is in the // prototype stage and may be too slow. Proper indexing should // be implemented to perform this type of queries. - std::vector<Entry *> const * const GetEntries( - std::vector<strings::UniString> const & tokens) const; + std::vector<Entry *> const * const GetEntries(Tokens const & tokens) const; private: // Adds address information of entries to the index. @@ -95,7 +93,7 @@ private: // Fills |m_buildingsOnStreet| field for all street entries. void IndexHouses(); - std::map<Tokens, std::vector<Entry *>> m_entriesByTokens; + std::unordered_map<std::string, std::vector<Entry *>> m_entriesByTokens; std::vector<Entry> m_entriesStorage; }; diff --git a/geocoder/types.hpp b/geocoder/types.hpp index 41977c2643..8b2f260e39 100644 --- a/geocoder/types.hpp +++ b/geocoder/types.hpp @@ -7,7 +7,7 @@ namespace geocoder { -using Tokens = std::vector<strings::UniString>; +using Tokens = std::vector<std::string>; enum class Type { |