Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2018-07-18 23:40:19 +0300
committerTatiana Yan <tatiana.kondakova@gmail.com>2018-07-19 14:01:59 +0300
commit1df068457991f32bf62a05d31adc74eabe276f0c (patch)
tree5de610420ba28439f115c96661bec7960c98a5a5 /geocoder
parent46d5cd803a7896260387dc0201ef0240888e0ca9 (diff)
[geocoder] Reading the hierarchy from json.
Diffstat (limited to 'geocoder')
-rw-r--r--geocoder/geocoder.cpp10
-rw-r--r--geocoder/geocoder.hpp6
-rw-r--r--geocoder/geocoder_tests/CMakeLists.txt3
-rw-r--r--geocoder/geocoder_tests/geocoder_tests.cpp36
-rw-r--r--geocoder/hierarchy.cpp126
-rw-r--r--geocoder/hierarchy.hpp60
6 files changed, 237 insertions, 4 deletions
diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp
index 4fdb8483ec..02ee0265b5 100644
--- a/geocoder/geocoder.cpp
+++ b/geocoder/geocoder.cpp
@@ -1,14 +1,20 @@
#include "geocoder/geocoder.hpp"
+#include "indexer/search_string_utils.hpp"
+
+#include "base/assert.hpp"
#include "base/osm_id.hpp"
+#include <algorithm>
+#include <memory>
+
using namespace std;
namespace geocoder
{
Geocoder::Geocoder(string pathToJsonHierarchy) : m_hierarchy(pathToJsonHierarchy) {}
-void Geocoder::ProcessQuery(string const & query, vector<Result> & results) const
+void Geocoder::ProcessQuery(string const & query, vector<Result> & results)
{
// Only here for demonstration purposes and will be removed shortly.
results.clear();
@@ -23,4 +29,6 @@ void Geocoder::ProcessQuery(string const & query, vector<Result> & results) cons
results.emplace_back(osm::Id(0x40000000F26943B9ULL), 0.1 /* certainty */);
}
}
+
+Hierarchy const & Geocoder::GetHierarchy() const { return m_hierarchy; }
} // namespace geocoder
diff --git a/geocoder/geocoder.hpp b/geocoder/geocoder.hpp
index eb1614900f..3614d67fc8 100644
--- a/geocoder/geocoder.hpp
+++ b/geocoder/geocoder.hpp
@@ -3,6 +3,8 @@
#include "geocoder/hierarchy.hpp"
#include "geocoder/result.hpp"
+#include "base/string_utils.hpp"
+
#include <string>
#include <vector>
@@ -30,7 +32,9 @@ class Geocoder
public:
explicit Geocoder(std::string pathToJsonHierarchy);
- void ProcessQuery(std::string const & query, std::vector<Result> & results) const;
+ void ProcessQuery(std::string const & query, std::vector<Result> & results);
+
+ Hierarchy const & GetHierarchy() const;
private:
Hierarchy m_hierarchy;
diff --git a/geocoder/geocoder_tests/CMakeLists.txt b/geocoder/geocoder_tests/CMakeLists.txt
index 7091fd3de6..cf9478c276 100644
--- a/geocoder/geocoder_tests/CMakeLists.txt
+++ b/geocoder/geocoder_tests/CMakeLists.txt
@@ -9,11 +9,14 @@ omim_add_test(${PROJECT_NAME} ${SRC})
omim_link_libraries(
${PROJECT_NAME}
+ platform_tests_support
geocoder
+ indexer
platform
coding
base
stats_client
+ jansson
${LIBZ}
)
diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp
index 934b18ae38..e2913e0983 100644
--- a/geocoder/geocoder_tests/geocoder_tests.cpp
+++ b/geocoder/geocoder_tests/geocoder_tests.cpp
@@ -2,21 +2,37 @@
#include "geocoder/geocoder.hpp"
+#include "indexer/search_string_utils.hpp"
+
+#include "platform/platform_tests_support/scoped_file.hpp"
+
#include "base/math.hpp"
#include <string>
#include <vector>
+using namespace platform::tests_support;
using namespace std;
namespace
{
double const kCertaintyEps = 1e-6;
+
+string const kRegionsData = R"#(
+-4611686018421500235 {"type": "Feature", "geometry": {"type": "Point", "coordinates": [-78.9263054493181, 22.08185765]}, "properties": {"name": "Florencia", "rank": 6, "address": {"subregion": "Florencia", "region": "Ciego de Ávila", "country": "Cuba"}}}
+)#";
+
+geocoder::Tokens Split(string const & s)
+{
+ geocoder::Tokens result;
+ search::NormalizeAndTokenizeString(s, result);
+ return result;
+}
} // namespace
namespace geocoder
{
-void TestGeocoder(Geocoder const & geocoder, string const & query, vector<Result> const & expected)
+void TestGeocoder(Geocoder & geocoder, string const & query, vector<Result> const & expected)
{
vector<Result> actual;
geocoder.ProcessQuery(query, actual);
@@ -37,4 +53,22 @@ UNIT_TEST(Geocoder_Smoke)
TestGeocoder(geocoder, "b",
{{osm::Id(0x8000000014527125ULL), 0.8}, {osm::Id(0x40000000F26943B9ULL), 0.1}});
}
+
+UNIT_TEST(Geocoder_Hierarchy)
+{
+ ScopedFile const regionsJsonFile("regions.jsonl", kRegionsData);
+ Geocoder geocoder(regionsJsonFile.GetFullPath());
+
+ vector<shared_ptr<Hierarchy::Entry>> entries;
+ geocoder.GetHierarchy().GetEntries({strings::MakeUniString("florencia")}, entries);
+
+ TEST_EQUAL(entries.size(), 1, ());
+ TEST(entries[0] != nullptr, ());
+ TEST_EQUAL(entries[0]->m_address[static_cast<size_t>(Hierarchy::EntryType::Country)],
+ Split("cuba"), ());
+ TEST_EQUAL(entries[0]->m_address[static_cast<size_t>(Hierarchy::EntryType::Region)],
+ Split("ciego de avila"), ());
+ TEST_EQUAL(entries[0]->m_address[static_cast<size_t>(Hierarchy::EntryType::Subregion)],
+ Split("florencia"), ());
+}
} // namespace geocoder
diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp
index 8138cfa090..63b812eb5b 100644
--- a/geocoder/hierarchy.cpp
+++ b/geocoder/hierarchy.cpp
@@ -1,13 +1,137 @@
#include "geocoder/hierarchy.hpp"
+#include "indexer/search_string_utils.hpp"
+
+#include "base/assert.hpp"
+#include "base/exception.hpp"
+#include "base/logging.hpp"
#include "base/macros.hpp"
+#include <fstream>
+#include <map>
+
using namespace std;
+namespace
+{
+using EntryType = geocoder::Hierarchy::EntryType;
+
+map<string, EntryType> const kKnownLevels = {
+ {"country", EntryType::Country},
+ {"region", EntryType::Region},
+ {"subregion", EntryType::Subregion},
+ {"locality", EntryType::Locality},
+ {"sublocality", EntryType::Sublocality},
+ {"suburb", EntryType::Suburb},
+ {"building", EntryType::Building},
+};
+} // namespace
+
namespace geocoder
{
+// Hierarchy::Entry --------------------------------------------------------------------------------
+bool Hierarchy::Entry::DeserializeFromJSON(string const & jsonStr)
+{
+ try
+ {
+ my::Json root(jsonStr.c_str());
+ DeserializeFromJSONImpl(root.get());
+ return true;
+ }
+ catch (my::Json::Exception const & e)
+ {
+ LOG(LWARNING, ("Can't parse entry:", e.Msg(), jsonStr));
+ }
+ return false;
+}
+
+// todo(@m) Factor out to geojson.hpp? Add geojson to myjansson?
+void Hierarchy::Entry::DeserializeFromJSONImpl(json_t * root)
+{
+ if (!json_is_object(root))
+ MYTHROW(my::Json::Exception, ("Not a json object."));
+
+ json_t * const properties = my::GetJSONObligatoryField(root, "properties");
+
+ FromJSONObject(properties, "name", m_name);
+ m_nameTokens.clear();
+ search::NormalizeAndTokenizeString(m_name, m_nameTokens);
+
+ json_t * const address = my::GetJSONObligatoryField(properties, "address");
+
+ for (auto const & e : kKnownLevels)
+ {
+ string const & levelKey = e.first;
+ string levelValue;
+ FromJSONObjectOptionalField(address, levelKey, levelValue);
+ if (levelValue.empty())
+ continue;
+
+ EntryType const type = e.second;
+ CHECK(m_address[static_cast<size_t>(type)].empty(), ());
+ search::NormalizeAndTokenizeString(levelValue, m_address[static_cast<size_t>(type)]);
+ }
+
+ for (size_t i = 0; i < static_cast<size_t>(Hierarchy::EntryType::Count); ++i)
+ {
+ if (!m_address[i].empty())
+ m_type = static_cast<Hierarchy::EntryType>(i);
+ }
+}
+
+// Hierarchy ---------------------------------------------------------------------------------------
Hierarchy::Hierarchy(string const & pathToJsonHierarchy)
{
- UNUSED_VALUE(pathToJsonHierarchy);
+ fstream fs(pathToJsonHierarchy);
+ string line;
+
+ while (getline(fs, line))
+ {
+ if (line.empty())
+ continue;
+
+ auto i = line.find(' ');
+ CHECK(i != string::npos, ());
+ int64_t encodedId;
+ CHECK(strings::to_any(line.substr(0, i), encodedId), ());
+ line = line.substr(i + 1);
+
+ Entry entry;
+ // todo(@m) We should really write uints as uints.
+ entry.m_osmId = osm::Id(static_cast<uint64_t>(encodedId));
+
+ CHECK(entry.DeserializeFromJSON(line), (line));
+ m_entries[entry.m_nameTokens].emplace_back(entry);
+ }
+}
+
+void Hierarchy::GetEntries(vector<strings::UniString> const & tokens,
+ vector<shared_ptr<Entry>> & entries) const
+{
+ entries.clear();
+
+ auto it = m_entries.find(tokens);
+ if (it == m_entries.end())
+ return;
+
+ for (auto const & entry : it->second)
+ entries.emplace_back(make_shared<Entry>(entry));
+}
+
+// Functions ---------------------------------------------------------------------------------------
+string DebugPrint(Hierarchy::EntryType const & type)
+{
+ switch (type)
+ {
+ case Hierarchy::EntryType::Country: return "country"; break;
+ case Hierarchy::EntryType::Region: return "region"; break;
+ case Hierarchy::EntryType::Subregion: return "subregion"; break;
+ case Hierarchy::EntryType::Locality: return "locality"; break;
+ case Hierarchy::EntryType::Sublocality: return "sublocality"; break;
+ case Hierarchy::EntryType::Suburb: return "suburb"; break;
+ case Hierarchy::EntryType::Building: return "building"; break;
+ case Hierarchy::EntryType::Count: return "count"; break;
+ }
+ CHECK_SWITCH();
}
} // namespace geocoder
diff --git a/geocoder/hierarchy.hpp b/geocoder/hierarchy.hpp
index 836998bead..0454f8e5b4 100644
--- a/geocoder/hierarchy.hpp
+++ b/geocoder/hierarchy.hpp
@@ -1,12 +1,72 @@
#pragma once
+#include "base/osm_id.hpp"
+#include "base/string_utils.hpp"
+
+#include <array>
+#include <cstddef>
+#include <memory>
#include <string>
+#include <utility>
+#include <vector>
+
+#include "3party/jansson/myjansson.hpp"
namespace geocoder
{
+using Tokens = std::vector<strings::UniString>;
+
class Hierarchy
{
public:
+ enum class EntryType
+ {
+ // It is important that the types are ordered from
+ // the more general to the more specific.
+ Country,
+ Region,
+ Subregion,
+ Locality,
+ Sublocality,
+ Suburb,
+ Building,
+
+ Count
+ };
+
+ // A single entry in the hierarchy directed acyclic graph.
+ // Currently, this is more or less the "properties"-"address"
+ // part of the geojson entry.
+ struct Entry
+ {
+ bool DeserializeFromJSON(std::string const & jsonStr);
+
+ void DeserializeFromJSONImpl(json_t * root);
+
+ osm::Id m_osmId = osm::Id(osm::Id::kInvalid);
+ std::string m_name;
+ std::vector<strings::UniString> m_nameTokens;
+
+ EntryType m_type = EntryType::Count;
+
+ // The address fields of this entry, one per EntryType.
+ std::array<Tokens, static_cast<size_t>(EntryType::Count) + 1> m_address;
+ };
+
explicit Hierarchy(std::string const & pathToJsonHierarchy);
+
+ // Fills |entries| with pointers to entries whose names exactly match |tokens| (the order
+ // matters).
+ //
+ // todo This method (and the whole class, in fact) is in the
+ // prototype stage and may be too slow. Proper indexing should
+ // be implemented to perform this type of queries.a
+ void GetEntries(std::vector<strings::UniString> const & tokens,
+ std::vector<std::shared_ptr<Entry>> & entries) const;
+
+private:
+ std::map<Tokens, std::vector<Entry>> m_entries;
};
+
+std::string DebugPrint(Hierarchy::EntryType const & type);
} // namespace geocoder