Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2018-11-27 20:23:53 +0300
committerSergey Yershov <syershov@maps.me>2018-11-28 13:41:58 +0300
commit335c44894cb9acfa5f98cf3792edfcca2f04e559 (patch)
tree921e8deeb550f524a6e09e71f34712a352c8e1a9 /geocoder
parent47767b1f181d076255c2b627cda21de32f3ff0cc (diff)
[geocoder] UTF-8 everywhere.
Diffstat (limited to 'geocoder')
-rw-r--r--geocoder/geocoder.cpp8
-rw-r--r--geocoder/geocoder.hpp6
-rw-r--r--geocoder/geocoder_tests/geocoder_tests.cpp4
-rw-r--r--geocoder/hierarchy.cpp18
-rw-r--r--geocoder/hierarchy.hpp8
-rw-r--r--geocoder/types.hpp2
6 files changed, 25 insertions, 21 deletions
diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp
index a9ba123459..64c0cd41e3 100644
--- a/geocoder/geocoder.cpp
+++ b/geocoder/geocoder.cpp
@@ -91,7 +91,7 @@ bool HasParent(vector<geocoder::Geocoder::Layer> const & layers,
strings::UniString MakeHouseNumber(geocoder::Tokens const & tokens)
{
- return strings::JoinStrings(tokens, strings::MakeUniString(""));
+ return strings::MakeUniString(strings::JoinStrings(tokens, " "));
}
} // namespace
@@ -100,7 +100,7 @@ namespace geocoder
// Geocoder::Context -------------------------------------------------------------------------------
Geocoder::Context::Context(string const & query) : m_beam(kMaxResults)
{
- search::NormalizeAndTokenizeString(query, m_tokens);
+ search::NormalizeAndTokenizeAsUtf8(query, m_tokens);
m_tokenTypes.assign(m_tokens.size(), Type::Count);
m_numUsedTokens = 0;
}
@@ -115,7 +115,7 @@ size_t Geocoder::Context::GetNumUsedTokens() const
return m_numUsedTokens;
}
-strings::UniString const & Geocoder::Context::GetToken(size_t id) const
+string const & Geocoder::Context::GetToken(size_t id) const
{
CHECK_LESS(id, m_tokens.size(), ());
return m_tokens[id];
@@ -210,7 +210,7 @@ void Geocoder::Go(Context & ctx, Type type) const
if (type == Type::Count)
return;
- vector<strings::UniString> subquery;
+ Tokens subquery;
for (size_t i = 0; i < ctx.GetNumTokens(); ++i)
{
subquery.clear();
diff --git a/geocoder/geocoder.hpp b/geocoder/geocoder.hpp
index 077b4e7402..e9c07b8ab9 100644
--- a/geocoder/geocoder.hpp
+++ b/geocoder/geocoder.hpp
@@ -8,6 +8,7 @@
#include "base/geo_object_id.hpp"
#include "base/string_utils.hpp"
+#include <cstddef>
#include <string>
#include <unordered_map>
#include <utility>
@@ -55,7 +56,7 @@ public:
size_t GetNumTokens() const;
size_t GetNumUsedTokens() const;
- strings::UniString const & GetToken(size_t id) const;
+ std::string const & GetToken(size_t id) const;
void MarkToken(size_t id, Type type);
@@ -74,8 +75,7 @@ public:
std::vector<Layer> const & GetLayers() const;
private:
- // todo(@m) std::string?
- std::vector<strings::UniString> m_tokens;
+ Tokens m_tokens;
std::vector<Type> m_tokenTypes;
size_t m_numUsedTokens = 0;
diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp
index 52730b8d6e..43430b9da7 100644
--- a/geocoder/geocoder_tests/geocoder_tests.cpp
+++ b/geocoder/geocoder_tests/geocoder_tests.cpp
@@ -30,7 +30,7 @@ string const kRegionsData = R"#(
geocoder::Tokens Split(string const & s)
{
geocoder::Tokens result;
- search::NormalizeAndTokenizeString(s, result);
+ search::NormalizeAndTokenizeAsUtf8(s, result);
return result;
}
} // namespace
@@ -71,7 +71,7 @@ UNIT_TEST(Geocoder_Hierarchy)
ScopedFile const regionsJsonFile("regions.jsonl", kRegionsData);
Geocoder geocoder(regionsJsonFile.GetFullPath());
- auto entries = geocoder.GetHierarchy().GetEntries({strings::MakeUniString("florencia")});
+ auto entries = geocoder.GetHierarchy().GetEntries({("florencia")});
TEST(entries, ());
TEST_EQUAL(entries->size(), 1, ());
diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp
index a68d795283..11feca76c0 100644
--- a/geocoder/hierarchy.cpp
+++ b/geocoder/hierarchy.cpp
@@ -19,6 +19,11 @@ namespace
{
// Information will be logged for every |kLogBatch| entries.
size_t const kLogBatch = 100000;
+
+string MakeIndexKey(geocoder::Tokens const & tokens)
+{
+ return strings::JoinStrings(tokens, " ");
+}
} // namespace
namespace geocoder
@@ -67,7 +72,8 @@ void Hierarchy::Entry::DeserializeFromJSONImpl(json_t * const root, string const
LOG(LDEBUG, ("Duplicate address field", type, "when parsing", jsonStr));
hasDuplicateAddress = true;
}
- search::NormalizeAndTokenizeString(levelValue, m_address[i]);
+
+ search::NormalizeAndTokenizeAsUtf8(levelValue, m_address[i]);
if (!m_address[i].empty())
m_type = static_cast<Type>(i);
@@ -75,7 +81,7 @@ void Hierarchy::Entry::DeserializeFromJSONImpl(json_t * const root, string const
m_nameTokens.clear();
FromJSONObjectOptionalField(properties, "name", m_name);
- search::NormalizeAndTokenizeString(m_name, m_nameTokens);
+ search::NormalizeAndTokenizeAsUtf8(m_name, m_nameTokens);
if (m_name.empty())
++stats.m_emptyNames;
@@ -140,6 +146,7 @@ Hierarchy::Hierarchy(string const & pathToJsonHierarchy)
++stats.m_numLoaded;
if (stats.m_numLoaded % kLogBatch == 0)
LOG(LINFO, ("Read", stats.m_numLoaded, "entries"));
+
m_entriesStorage.emplace_back(move(entry));
}
@@ -160,10 +167,9 @@ Hierarchy::Hierarchy(string const & pathToJsonHierarchy)
LOG(LINFO, ("(End of stats.)"));
}
-vector<Hierarchy::Entry *> const * const Hierarchy::GetEntries(
- vector<strings::UniString> const & tokens) const
+vector<Hierarchy::Entry *> const * const Hierarchy::GetEntries(Tokens const & tokens) const
{
- auto it = m_entriesByTokens.find(tokens);
+ auto it = m_entriesByTokens.find(MakeIndexKey(tokens));
if (it == m_entriesByTokens.end())
return {};
@@ -181,7 +187,7 @@ void Hierarchy::IndexEntries()
continue;
size_t const t = static_cast<size_t>(e.m_type);
- m_entriesByTokens[e.m_address[t]].emplace_back(&e);
+ m_entriesByTokens[MakeIndexKey(e.m_address[t])].emplace_back(&e);
// Index every token but do not index prefixes.
// for (auto const & tok : entry.m_address[t])
diff --git a/geocoder/hierarchy.hpp b/geocoder/hierarchy.hpp
index d5bad91d6d..5ae3b603b3 100644
--- a/geocoder/hierarchy.hpp
+++ b/geocoder/hierarchy.hpp
@@ -3,13 +3,12 @@
#include "geocoder/types.hpp"
#include "base/geo_object_id.hpp"
-#include "base/string_utils.hpp"
#include <array>
#include <cstddef>
#include <cstdint>
-#include <map>
#include <string>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -85,8 +84,7 @@ public:
// todo This method (and the whole class, in fact) is in the
// prototype stage and may be too slow. Proper indexing should
// be implemented to perform this type of queries.
- std::vector<Entry *> const * const GetEntries(
- std::vector<strings::UniString> const & tokens) const;
+ std::vector<Entry *> const * const GetEntries(Tokens const & tokens) const;
private:
// Adds address information of entries to the index.
@@ -95,7 +93,7 @@ private:
// Fills |m_buildingsOnStreet| field for all street entries.
void IndexHouses();
- std::map<Tokens, std::vector<Entry *>> m_entriesByTokens;
+ std::unordered_map<std::string, std::vector<Entry *>> m_entriesByTokens;
std::vector<Entry> m_entriesStorage;
};
diff --git a/geocoder/types.hpp b/geocoder/types.hpp
index 41977c2643..8b2f260e39 100644
--- a/geocoder/types.hpp
+++ b/geocoder/types.hpp
@@ -7,7 +7,7 @@
namespace geocoder
{
-using Tokens = std::vector<strings::UniString>;
+using Tokens = std::vector<std::string>;
enum class Type
{