From df0291c237cde728855caca7aeb7940c9f79bf61 Mon Sep 17 00:00:00 2001 From: Anatoly Serdtcev Date: Wed, 14 Aug 2019 18:24:58 +0300 Subject: [geocoder] Improve result relevance: ignore numerical suburb/sublocality without locality matching --- base/base_tests/string_utils_test.cpp | 18 ++++++++++++++++ base/string_utils.cpp | 7 +++++++ base/string_utils.hpp | 1 + geocoder/geocoder.cpp | 33 ++++++++++++++++++++++++++++++ geocoder/geocoder.hpp | 3 +++ geocoder/geocoder_tests/geocoder_tests.cpp | 26 ++++++++++++++++++----- 6 files changed, 83 insertions(+), 5 deletions(-) diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp index ab4396f905..7e5eec9031 100644 --- a/base/base_tests/string_utils_test.cpp +++ b/base/base_tests/string_utils_test.cpp @@ -622,6 +622,24 @@ UNIT_TEST(IsUtf8Test) TEST(strings::IsASCIIString("Nice places in Zhodino.kml"), ()); } +UNIT_TEST(IsASCIINumericTest) +{ + TEST(strings::IsASCIINumeric("0"), ()); + TEST(strings::IsASCIINumeric("1"), ()); + TEST(strings::IsASCIINumeric("10"), ()); + TEST(strings::IsASCIINumeric("01"), ()); + TEST(strings::IsASCIINumeric("00"), ()); + + TEST(!strings::IsASCIINumeric(""), ()); + TEST(!strings::IsASCIINumeric(" "), ()); + TEST(!strings::IsASCIINumeric(" 9"), ()); + TEST(!strings::IsASCIINumeric("9 "), ()); + TEST(!strings::IsASCIINumeric("+3"), ()); + TEST(!strings::IsASCIINumeric("-2"), ()); + TEST(!strings::IsASCIINumeric("0x09"), ()); + TEST(!strings::IsASCIINumeric("0.1"), ()); +} + UNIT_TEST(CountNormLowerSymbols) { char const * strs[] = {"æüßs", diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 39eef9e4b1..2263af3255 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -276,6 +276,13 @@ bool IsASCIISpace(UniChar c) return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; } +bool IsASCIINumeric(std::string const & str) +{ + if (str.empty()) + return false; + return std::all_of(str.begin(), str.end(), strings::IsASCIIDigit); +} + bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } bool StartsWith(UniString const & s, UniString const & p) diff --git a/base/string_utils.hpp b/base/string_utils.hpp index 1c3173b353..1f86537a4b 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -120,6 +120,7 @@ UniString MakeUniString(std::string const & utf8s); std::string ToUtf8(UniString const & s); bool IsASCIIString(std::string const & str); bool IsASCIIDigit(UniChar c); +bool IsASCIINumeric(std::string const & str); bool IsASCIISpace(UniChar c); bool IsASCIILatin(UniChar c); diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp index 8a5e6fb4f0..714bf71beb 100644 --- a/geocoder/geocoder.cpp +++ b/geocoder/geocoder.cpp @@ -397,7 +397,12 @@ void Geocoder::FillRegularLayer(Context const & ctx, Type type, Tokens const & s return; if (ctx.GetLayers().empty() || HasParent(ctx.GetLayers(), d)) + { + if (type > Type::Locality && !IsRelevantLocalityMember(ctx, d, subquery)) + return; + curLayer.m_entries.emplace_back(docId); + } }); } @@ -415,4 +420,32 @@ bool Geocoder::HasParent(vector const & layers, Hierarchy::Entr } return false; } + +bool Geocoder::IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member, + Tokens const & subquery) const +{ + auto const isNumeric = subquery.size() == 1 && strings::IsASCIINumeric(subquery.front()); + return !isNumeric || HasMemberLocalityInMatching(ctx, member); +} + +bool Geocoder::HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry const & member) const +{ + for (auto const & layer : ctx.GetLayers()) + { + auto const layerType = layer.m_type; + if (layerType > Type::Locality) + break; + if (layerType != Type::Locality) + continue; + + for (auto const docId : layer.m_entries) + { + auto const & matchedEntry = m_index.GetDoc(docId); + if (m_hierarchy.IsParentTo(matchedEntry, member)) + return true; + } + } + + return false; +} } // namespace geocoder diff --git a/geocoder/geocoder.hpp b/geocoder/geocoder.hpp index 4e9f09a15d..007246881f 100644 --- a/geocoder/geocoder.hpp +++ b/geocoder/geocoder.hpp @@ -148,6 +148,9 @@ private: // Returns whether any of the paths through |layers| can be extended // by appending |e|. bool HasParent(std::vector const & layers, Hierarchy::Entry const & e) const; + bool IsRelevantLocalityMember(Context const & ctx, Hierarchy::Entry const & member, + Tokens const & subquery) const; + bool HasMemberLocalityInMatching(Context const & ctx, Hierarchy::Entry const & member) const; Hierarchy m_hierarchy; diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp index 5472da3c69..863b665753 100644 --- a/geocoder/geocoder_tests/geocoder_tests.cpp +++ b/geocoder/geocoder_tests/geocoder_tests.cpp @@ -272,18 +272,13 @@ UNIT_TEST(Geocoder_LocalityBuilding) { string const kData = R"#( 10 {"properties": {"locales": {"default": {"address": {"locality": "Zelenograd"}}}}} - 22 {"properties": {"locales": {"default": {"address": {"building": "2", "locality": "Zelenograd"}}}}} - 31 {"properties": {"locales": {"default": {"address": {"street": "Krymskaya", "locality": "Zelenograd"}}}}} 32 {"properties": {"locales": {"default": {"address": {"building": "2", "street": "Krymskaya", "locality": "Zelenograd"}}}}} )#"; - ScopedFile const regionsJsonFile("regions.jsonl", kData); Geocoder geocoder(regionsJsonFile.GetFullPath()); - base::GeoObjectId const building2(0x22); - TestGeocoder(geocoder, "Zelenograd 2", {{building2, 1.0}}); } @@ -305,6 +300,27 @@ UNIT_TEST(Geocoder_SubregionInLocality) TestGeocoder(geocoder, "Москва", {{Id{0x10}, 1.0}, {Id{0x11}, 0.6}}); } +// Geocoder_NumericalSuburb* ---------------------------------------------------------------------- +UNIT_TEST(Geocoder_NumericalSuburbRelevance) +{ + string const kData = R"#( +10 {"properties": {"locales": {"default": {"address": {"region": "Metro Manila"}}}}} +11 {"properties": {"locales": {"default": {"address": {"locality": "Caloocan", "region": "Metro Manila"}}}}} +12 {"properties": {"locales": {"default": {"address": {"suburb": "60", "locality": "Caloocan", "region": "Metro Manila"}}}}} +20 {"properties": {"locales": {"default": {"address": {"locality": "Белгород"}}}}} +21 {"properties": {"locales": {"default": {"address": {"street": "Щорса", "locality": "Белгород"}}}}} +22 {"properties": {"locales": {"default": {"address": {"building": "60", "street": "Щорса", "locality": "Белгород"}}}}} +)#"; + + ScopedFile const regionsJsonFile("regions.jsonl", kData); + Geocoder geocoder(regionsJsonFile.GetFullPath()); + + TestGeocoder(geocoder, "Caloocan, 60", {{Id{0x12}, 1.0}}); + TestGeocoder(geocoder, "60", {}); + TestGeocoder(geocoder, "Metro Manila, 60", {{Id{0x10}, 1.0}}); + TestGeocoder(geocoder, "Белгород, Щорса, 60", {{Id{0x22}, 1.0}}); +} + //-------------------------------------------------------------------------------------------------- UNIT_TEST(Geocoder_EmptyFileConcurrentRead) { -- cgit v1.2.3