Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2018-11-28 17:18:24 +0300
committerTatiana Yan <tatiana.kondakova@gmail.com>2018-11-29 18:46:43 +0300
commit6b26fef83a27bc6d5b7b1ae38aac17ad1d75bc98 (patch)
tree1f2336e03531edd4c4fd50520a847a7b1c366803 /geocoder
parent43fb388d7c54d71766c9d9bc99c62a996862666f (diff)
[geocoder] Do not return anything if a house number is present but we could not find it.
Diffstat (limited to 'geocoder')
-rw-r--r--geocoder/CMakeLists.txt1
-rw-r--r--geocoder/beam.cpp32
-rw-r--r--geocoder/beam.hpp31
-rw-r--r--geocoder/geocoder.cpp22
-rw-r--r--geocoder/geocoder.hpp23
-rw-r--r--geocoder/geocoder_tests/geocoder_tests.cpp41
-rw-r--r--geocoder/hierarchy.cpp2
7 files changed, 100 insertions, 52 deletions
diff --git a/geocoder/CMakeLists.txt b/geocoder/CMakeLists.txt
index 01f2cf1a35..d10f7ce489 100644
--- a/geocoder/CMakeLists.txt
+++ b/geocoder/CMakeLists.txt
@@ -4,7 +4,6 @@ include_directories(${OMIM_ROOT}/3party/jansson/src)
set(
SRC
- beam.cpp
beam.hpp
geocoder.cpp
geocoder.hpp
diff --git a/geocoder/beam.cpp b/geocoder/beam.cpp
deleted file mode 100644
index 996f0523cc..0000000000
--- a/geocoder/beam.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "geocoder/beam.hpp"
-
-#include "base/assert.hpp"
-#include "base/macros.hpp"
-
-#include <algorithm>
-
-namespace geocoder
-{
-Beam::Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); }
-
-void Beam::Add(Key const & key, Value value)
-{
- if (PREDICT_FALSE(m_capacity == 0))
- return;
-
- Entry const e(key, value);
- auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e);
-
- if (it == m_entries.end())
- {
- if (m_entries.size() < m_capacity)
- m_entries.emplace_back(e);
- return;
- }
-
- if (m_entries.size() == m_capacity)
- m_entries.pop_back();
-
- m_entries.insert(it, e);
-}
-} // namespace geocoder
diff --git a/geocoder/beam.hpp b/geocoder/beam.hpp
index 68534ce125..d0728889f1 100644
--- a/geocoder/beam.hpp
+++ b/geocoder/beam.hpp
@@ -1,6 +1,7 @@
#pragma once
#include "base/geo_object_id.hpp"
+#include "base/macros.hpp"
#include <vector>
@@ -9,27 +10,47 @@ namespace geocoder
// A data structure to perform the beam search with.
// Maintains a list of (Key, Value) pairs sorted in the decreasing
// order of Values.
+template <typename TKey, typename TValue>
class Beam
{
public:
- using Key = base::GeoObjectId;
- using Value = double;
+ using Key = TKey;
+ using Value = TValue;
struct Entry
{
Key m_key;
Value m_value;
- Entry(Key const & key, Value value) : m_key(key), m_value(value) {}
+ Entry(Key const & key, Value const & value) : m_key(key), m_value(value) {}
bool operator<(Entry const & rhs) const { return m_value > rhs.m_value; }
};
- explicit Beam(size_t capacity);
+ explicit Beam(size_t capacity) : m_capacity(capacity) { m_entries.reserve(m_capacity); }
// O(log(n) + n) for |n| entries.
// O(|m_capacity|) in the worst case.
- void Add(Key const & key, Value value);
+ void Add(Key const & key, Value value)
+ {
+ if (PREDICT_FALSE(m_capacity == 0))
+ return;
+
+ Entry const e(key, value);
+ auto it = std::lower_bound(m_entries.begin(), m_entries.end(), e);
+
+ if (it == m_entries.end())
+ {
+ if (m_entries.size() < m_capacity)
+ m_entries.emplace_back(e);
+ return;
+ }
+
+ if (m_entries.size() == m_capacity)
+ m_entries.pop_back();
+
+ m_entries.insert(it, e);
+ }
std::vector<Entry> const & GetEntries() const { return m_entries; }
diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp
index 64c0cd41e3..e268eedbd9 100644
--- a/geocoder/geocoder.cpp
+++ b/geocoder/geocoder.cpp
@@ -142,9 +142,9 @@ bool Geocoder::Context::IsTokenUsed(size_t id) const
bool Geocoder::Context::AllTokensUsed() const { return m_numUsedTokens == m_tokens.size(); }
-void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty)
+void Geocoder::Context::AddResult(base::GeoObjectId const & osmId, double certainty, Type type)
{
- m_beam.Add(osmId, certainty);
+ m_beam.Add(BeamKey(osmId, type), certainty);
}
void Geocoder::Context::FillResults(vector<Result> & results) const
@@ -152,13 +152,15 @@ void Geocoder::Context::FillResults(vector<Result> & results) const
results.clear();
results.reserve(m_beam.GetEntries().size());
- set<decltype(m_beam)::Key> seen;
+ set<base::GeoObjectId> seen;
for (auto const & e : m_beam.GetEntries())
{
- if (!seen.insert(e.m_key).second)
+ if (!seen.insert(e.m_key.m_osmId).second)
continue;
- results.emplace_back(e.m_key /* osmId */, e.m_value /* certainty */);
+ if (m_surelyGotHouseNumber && e.m_key.m_type != Type::Building)
+ continue;
+ results.emplace_back(e.m_key.m_osmId, e.m_value /* certainty */);
}
if (!results.empty())
@@ -245,7 +247,7 @@ void Geocoder::Go(Context & ctx, Type type) const
for (auto const * e : curLayer.m_entries)
{
- ctx.AddResult(e->m_osmId, certainty);
+ ctx.AddResult(e->m_osmId, certainty, type);
}
ctx.GetLayers().emplace_back(move(curLayer));
@@ -258,8 +260,7 @@ void Geocoder::Go(Context & ctx, Type type) const
Go(ctx, NextType(type));
}
-void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery,
- Layer & curLayer) const
+void Geocoder::FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const
{
if (ctx.GetLayers().empty())
return;
@@ -272,6 +273,11 @@ void Geocoder::FillBuildingsLayer(Context const & ctx, Tokens const & subquery,
if (!search::house_numbers::LooksLikeHouseNumber(subqueryHN, false /* isPrefix */))
return;
+ // We've already filled a street layer and now see something that resembles
+ // a house number. While it still can be something else (a zip code, for example)
+ // let's stay on the safer side and set the house number bit.
+ ctx.SetHouseNumberBit();
+
for (auto const & se : layer.m_entries)
{
for (auto const & be : se->m_buildingsOnStreet)
diff --git a/geocoder/geocoder.hpp b/geocoder/geocoder.hpp
index e9c07b8ab9..0aaa29cf4d 100644
--- a/geocoder/geocoder.hpp
+++ b/geocoder/geocoder.hpp
@@ -48,6 +48,14 @@ public:
class Context
{
public:
+ struct BeamKey
+ {
+ BeamKey(base::GeoObjectId osmId, Type type) : m_osmId(osmId), m_type(type) {}
+
+ base::GeoObjectId m_osmId;
+ Type m_type;
+ };
+
Context(std::string const & query);
void Clear();
@@ -66,7 +74,7 @@ public:
// Returns true iff all tokens are used.
bool AllTokensUsed() const;
- void AddResult(base::GeoObjectId const & osmId, double certainty);
+ void AddResult(base::GeoObjectId const & osmId, double certainty, Type type);
void FillResults(std::vector<Result> & results) const;
@@ -74,15 +82,24 @@ public:
std::vector<Layer> const & GetLayers() const;
+ void SetHouseNumberBit() { m_surelyGotHouseNumber = true; }
+
private:
Tokens m_tokens;
std::vector<Type> m_tokenTypes;
size_t m_numUsedTokens = 0;
+ // Sticky bit that records a heuristic check whether
+ // the current query contains a house number.
+ // The rationale is that we must only emit buildings in this case
+ // and implement a fallback to a more powerful geocoder if we
+ // could not find a building.
+ bool m_surelyGotHouseNumber = false;
+
// The highest value of certainty for a fixed amount of
// the most relevant retrieved osm ids.
- Beam m_beam;
+ Beam<BeamKey, double> m_beam;
std::vector<Layer> m_layers;
};
@@ -96,7 +113,7 @@ public:
private:
void Go(Context & ctx, Type type) const;
- void FillBuildingsLayer(Context const & ctx, Tokens const & subquery, Layer & curLayer) const;
+ void FillBuildingsLayer(Context & ctx, Tokens const & subquery, Layer & curLayer) const;
void FillRegularLayer(Context const & ctx, Type type, Tokens const & subquery,
Layer & curLayer) const;
diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp
index 43430b9da7..455f9c6279 100644
--- a/geocoder/geocoder_tests/geocoder_tests.cpp
+++ b/geocoder/geocoder_tests/geocoder_tests.cpp
@@ -41,13 +41,14 @@ void TestGeocoder(Geocoder & geocoder, string const & query, vector<Result> && e
{
vector<Result> actual;
geocoder.ProcessQuery(query, actual);
- TEST_EQUAL(actual.size(), expected.size(), (actual, expected));
+ TEST_EQUAL(actual.size(), expected.size(), (query, actual, expected));
sort(actual.begin(), actual.end(), base::LessBy(&Result::m_osmId));
sort(expected.begin(), expected.end(), base::LessBy(&Result::m_osmId));
for (size_t i = 0; i < actual.size(); ++i)
{
- TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0, (actual[i].m_certainty));
- TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, ());
+ TEST(actual[i].m_certainty >= 0.0 && actual[i].m_certainty <= 1.0,
+ (query, actual[i].m_certainty));
+ TEST_EQUAL(actual[i].m_osmId, expected[i].m_osmId, (query));
TEST(base::AlmostEqualAbs(actual[i].m_certainty, expected[i].m_certainty, kCertaintyEps),
(query, actual[i].m_certainty, expected[i].m_certainty));
}
@@ -81,4 +82,38 @@ UNIT_TEST(Geocoder_Hierarchy)
TEST_EQUAL((*entries)[0]->m_address[static_cast<size_t>(Type::Subregion)], Split("florencia"),
());
}
+
+UNIT_TEST(Geocoder_OnlyBuildings)
+{
+ string const kData = R"#(
+10 {"properties": {"address": {"locality": "Some Locality"}}}
+
+21 {"properties": {"address": {"street": "Good", "locality": "Some Locality"}}}
+22 {"properties": {"address": {"building": "5", "street": "Good", "locality": "Some Locality"}}}
+
+31 {"properties": {"address": {"street": "Bad", "locality": "Some Locality"}}}
+32 {"properties": {"address": {"building": "10", "street": "Bad", "locality": "Some Locality"}}}
+)#";
+
+ ScopedFile const regionsJsonFile("regions.jsonl", kData);
+ Geocoder geocoder(regionsJsonFile.GetFullPath());
+
+ base::GeoObjectId const localityId(10);
+ base::GeoObjectId const goodStreetId(21);
+ base::GeoObjectId const badStreetId(31);
+ base::GeoObjectId const building5(22);
+ base::GeoObjectId const building10(32);
+
+ TestGeocoder(geocoder, "some locality", {{localityId, 1.0}});
+ TestGeocoder(geocoder, "some locality good", {{goodStreetId, 1.0}, {localityId, 0.857143}});
+ TestGeocoder(geocoder, "some locality bad", {{badStreetId, 1.0}, {localityId, 0.857143}});
+
+ TestGeocoder(geocoder, "some locality good 5", {{building5, 1.0}});
+ TestGeocoder(geocoder, "some locality bad 10", {{building10, 1.0}});
+
+ // There is a building "10" on Bad Street but we should not return it.
+ // Another possible resolution would be to return just "Good Street" (relaxed matching)
+ // but at the time of writing the goal is to either have an exact match or no match at all.
+ TestGeocoder(geocoder, "some locality good 10", {});
+}
} // namespace geocoder
diff --git a/geocoder/hierarchy.cpp b/geocoder/hierarchy.cpp
index 24da2d3046..0a92899221 100644
--- a/geocoder/hierarchy.cpp
+++ b/geocoder/hierarchy.cpp
@@ -232,6 +232,8 @@ void Hierarchy::IndexHouses()
size_t const t = static_cast<size_t>(Type::Street);
auto const * streetCandidates = GetEntries(be.m_address[t]);
+ if (streetCandidates == nullptr)
+ continue;
for (auto & se : *streetCandidates)
{