From c4cd806f652cc52b9a6c15ff20e021b493ba5322 Mon Sep 17 00:00:00 2001 From: vng Date: Thu, 24 Mar 2016 03:41:01 +0300 Subject: [search] Treat street synonyms as prefix in search algos. --- generator/search_index_builder.cpp | 3 +- indexer/indexer_tests/search_string_utils_test.cpp | 42 +++- indexer/search_string_utils.cpp | 240 +++++++++++++++------ indexer/search_string_utils.hpp | 77 +------ map/map_tests/address_tests.cpp | 3 +- search/house_detector.cpp | 2 +- search/reverse_geocoder.cpp | 12 +- search/reverse_geocoder.hpp | 5 +- search/search_tests/house_detector_tests.cpp | 12 +- search/v2/geocoder.cpp | 2 +- 10 files changed, 224 insertions(+), 174 deletions(-) diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index f129fe0ca5..536cb32ed4 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -334,8 +334,7 @@ void BuildAddressTable(FilesContainerR & container, Writer & writer) size_t streetIndex; bool streetMatched = false; - string street; - search::GetStreetNameAsKey(data.Get(feature::AddressData::STREET), street); + strings::UniString const street = search::GetStreetNameAsKey(data.Get(feature::AddressData::STREET)); if (!street.empty()) { FeatureType ft; diff --git a/indexer/indexer_tests/search_string_utils_test.cpp b/indexer/indexer_tests/search_string_utils_test.cpp index f84ff8f33b..9fa0dbca80 100644 --- a/indexer/indexer_tests/search_string_utils_test.cpp +++ b/indexer/indexer_tests/search_string_utils_test.cpp @@ -4,9 +4,12 @@ #include "base/string_utils.hpp" +using namespace search; +using namespace strings; + UNIT_TEST(FeatureTypeToString) { - TEST_EQUAL("!type:123", strings::ToUtf8(search::FeatureTypeToString(123)), ()); + TEST_EQUAL("!type:123", ToUtf8(FeatureTypeToString(123)), ()); } UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines) @@ -37,18 +40,37 @@ UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines) }; for (size_t i = 0; i < ARRAY_SIZE(arr); i += 2) - TEST_EQUAL(arr[i + 1], strings::ToUtf8(search::NormalizeAndSimplifyString(arr[i])), (i)); + TEST_EQUAL(arr[i + 1], ToUtf8(NormalizeAndSimplifyString(arr[i])), (i)); } UNIT_TEST(Contains) { constexpr char const * kTestStr = "ØøÆ挜 Ўвага!"; - TEST(search::ContainsNormalized(kTestStr, ""), ()); - TEST(!search::ContainsNormalized("", "z"), ()); - TEST(search::ContainsNormalized(kTestStr, "ooae"), ()); - TEST(search::ContainsNormalized(kTestStr, " у"), ()); - TEST(search::ContainsNormalized(kTestStr, "Ў"), ()); - TEST(search::ContainsNormalized(kTestStr, "ўв"), ()); - TEST(!search::ContainsNormalized(kTestStr, "ага! "), ()); - TEST(!search::ContainsNormalized(kTestStr, "z"), ()); + TEST(ContainsNormalized(kTestStr, ""), ()); + TEST(!ContainsNormalized("", "z"), ()); + TEST(ContainsNormalized(kTestStr, "ooae"), ()); + TEST(ContainsNormalized(kTestStr, " у"), ()); + TEST(ContainsNormalized(kTestStr, "Ў"), ()); + TEST(ContainsNormalized(kTestStr, "ўв"), ()); + TEST(!ContainsNormalized(kTestStr, "ага! "), ()); + TEST(!ContainsNormalized(kTestStr, "z"), ()); +} + +namespace +{ +bool TestPrefixMatch(char const * s) +{ + return IsStreetSynonymPrefix(MakeUniString(s)); +} +} // namespace + +UNIT_TEST(StreetPrefixMatch) +{ + TEST(TestPrefixMatch("п"), ()); + TEST(TestPrefixMatch("пр"), ()); + TEST(TestPrefixMatch("про"), ()); + TEST(TestPrefixMatch("прое"), ()); + TEST(TestPrefixMatch("проез"), ()); + TEST(TestPrefixMatch("проезд"), ()); + TEST(!TestPrefixMatch("проездд"), ()); } diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 11bf4e3e2c..25f2488a72 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -1,118 +1,224 @@ #include "search_string_utils.hpp" -#include "std/set.hpp" +#include "base/macros.hpp" +#include "base/stl_helpers.hpp" + +#include "std/algorithm.hpp" #include "std/transform_iterator.hpp" -#include "base/macros.hpp" +namespace search +{ +using namespace strings; + +UniString NormalizeAndSimplifyString(string const & s) +{ + UniString uniString = MakeUniString(s); + for (size_t i = 0; i < uniString.size(); ++i) + { + UniChar & c = uniString[i]; + switch (c) + { + // Replace "d with stroke" to simple d letter. Used in Vietnamese. + // (unicode-compliant implementation leaves it unchanged) + case 0x0110: + case 0x0111: c = 'd'; break; + // Replace small turkish dotless 'ı' with dotted 'i'. + // Our own invented hack to avoid well-known Turkish I-letter bug. + case 0x0131: c = 'i'; break; + // Replace capital turkish dotted 'İ' with dotted lowercased 'i'. + // Here we need to handle this case manually too, because default unicode-compliant implementation + // of MakeLowerCase converts 'İ' to 'i' + 0x0307. + case 0x0130: c = 'i'; break; + // Some Danish-specific hacks. + case 0x00d8: // Ø + case 0x00f8: c = 'o'; break; // ø + case 0x0152: // Œ + case 0x0153: // œ + c = 'o'; + uniString.insert(uniString.begin() + (i++) + 1, 'e'); + break; + case 0x00c6: // Æ + case 0x00e6: // æ + c = 'a'; + uniString.insert(uniString.begin() + (i++) + 1, 'e'); + break; + } + } + + MakeLowerCaseInplace(uniString); + NormalizeInplace(uniString); + + // Remove accents that can appear after NFKD normalization. + uniString.erase_if([](UniChar const & c) + { + // ̀ COMBINING GRAVE ACCENT + // ́ COMBINING ACUTE ACCENT + return (c == 0x0300 || c == 0x0301); + }); + + return uniString; + + /// @todo Restore this logic to distinguish и-й in future. + /* + // Just after lower casing is a correct place to avoid normalization for specific chars. + static auto const isSpecificChar = [](UniChar c) -> bool + { + return c == 0x0439; // й + }; + UniString result; + result.reserve(uniString.size()); + for (auto i = uniString.begin(), end = uniString.end(); i != end;) + { + auto j = find_if(i, end, isSpecificChar); + // We don't check if (j != i) because UniString and Normalize handle it correctly. + UniString normString(i, j); + NormalizeInplace(normString); + result.insert(result.end(), normString.begin(), normString.end()); + if (j == end) + break; + result.push_back(*j); + i = j + 1; + } + return result; + */ +} char const * STREET_TOKENS_SEPARATOR = "\t -,."; -strings::UniString search::FeatureTypeToString(uint32_t type) +UniString FeatureTypeToString(uint32_t type) { - string const s = "!type:" + strings::to_string(type); - return strings::UniString(s.begin(), s.end()); + string const s = "!type:" + to_string(type); + return UniString(s.begin(), s.end()); } -/// @todo Move prefixes, suffixes into separate file (autogenerated). -/// "Набережная" улица встречается в городах +namespace +{ -char const * affics[] = +class StreetsSynonymsHolder { - // Russian - "аллея", "бульвар", "набережная", "переулок", "площадь", "проезд", "проспект", "шоссе", "тупик", "улица", "тракт", "ал", "бул", "наб", "пер", "пл", "пр", "просп", "ш", "туп", "ул", "тр", + vector m_synonyms; +public: + StreetsSynonymsHolder() + { + /// @todo Move prefixes, suffixes into separate file (autogenerated). + /// "Набережная" улица встречается в городах + + char const * affics[] = + { + // Russian + "аллея", "бульвар", "набережная", "переулок", "площадь", "проезд", "проспект", "шоссе", "тупик", "улица", "тракт", "ал", "бул", "наб", "пер", "пл", "пр", "просп", "ш", "туп", "ул", "тр", - // English - "street", "avenue", "square", "road", "boulevard", "drive", "highway", "lane", "way", "st", "av", "ave", "sq", "rd", "blvd", "dr", "hwy", "ln", + // English + "street", "avenue", "square", "road", "boulevard", "drive", "highway", "lane", "way", "circle", "st", "av", "ave", "sq", "rd", "blvd", "dr", "hwy", "ln", - // German - "strasse", "weg", "platz", + // German + "strasse", "weg", "platz", - // Lithuanian - "g", "pr", "pl", "kel", + // Lithuanian + "g", "pr", "pl", "kel", - // Български език - Bulgarian - "булевард", "бул", "площад", "пл", "улица", "ул", "квартал", "кв", + // Български език - Bulgarian + "булевард", "бул", "площад", "пл", "улица", "ул", "квартал", "кв", - // Canada - Canada - "allee", "alley", "autoroute", "aut", "bypass", "byway", "carrefour", "carref", "chemin", "côte", "crossing", "cross", "expressway", "freeway", "fwy", "line", "link", "loop", "parkway", "pky", "pkwy", "path", "pathway", "ptway", "route", "rte", "trail", "walk", + // Canada - Canada + "allee", "alley", "autoroute", "aut", "bypass", "byway", "carrefour", "carref", "chemin", "cercle", "circle", "côte", "crossing", "cross", "expressway", "freeway", "fwy", "line", "link", "loop", "parkway", "pky", "pkwy", "path", "pathway", "ptway", "route", "rue", "rte", "trail", "walk", - // Cesky - Czech - "ulice", "ul", "náměstí", "nám", + // Cesky - Czech + "ulice", "ul", "náměstí", "nám", - // Deutsch - German - "allee", "al", "brücke", "br", "chaussee", "gasse", "gr", "pfad", "straße", "str", + // Deutsch - German + "allee", "al", "brücke", "br", "chaussee", "gasse", "gr", "pfad", "straße", "str", - // Español - Spanish - "avenida", "avd", "avda", "bulevar", "bulev", "calle", "calleja", "cllja", "callejón", "callej", "cjon", "cllon", "callejuela", "cjla", "callizo", "cllzo", "calzada", "czada", "costera", "coste", "plza", "pza", "plazoleta", "pzta", "plazuela", "plzla", "tránsito", "trans", "transversal", "trval", "trasera", "tras", "travesía", "trva", + // Español - Spanish + "avenida", "avd", "avda", "bulevar", "bulev", "calle", "calleja", "cllja", "callejón", "callej", "cjon", "cllon", "callejuela", "cjla", "callizo", "cllzo", "calzada", "czada", "costera", "coste", "plza", "pza", "plazoleta", "pzta", "plazuela", "plzla", "tránsito", "trans", "transversal", "trval", "trasera", "tras", "travesía", "trva", - // Français - French - "rue", "avenue", "carré", "route", "boulevard", "drive", "autoroute", "lane", "chemin", + // Français - French + "rue", "avenue", "carré", "cercle", "route", "boulevard", "drive", "autoroute", "lane", "chemin", - // Nederlands - Dutch - "laan", "ln.", "straat", "steenweg", "stwg", "st", + // Nederlands - Dutch + "laan", "ln.", "straat", "steenweg", "stwg", "st", - // Norsk - Norwegian - "vei", "veien", "vn", "gaten", "gata", "gt", "plass", "plassen", "sving", "svingen", "sv", + // Norsk - Norwegian + "vei", "veien", "vn", "gaten", "gata", "gt", "plass", "plassen", "sving", "svingen", "sv", - // Polski - Polish - "aleja", "aleje", "aleji", "alejach", "aleją", "plac", "placu", "placem", "ulica", "ulicy", + // Polski - Polish + "aleja", "aleje", "aleji", "alejach", "aleją", "plac", "placu", "placem", "ulica", "ulicy", - // Português - Portuguese - "street", "avenida", "quadrado", "estrada", "boulevard", "carro", "auto-estrada", "lane", "caminho", + // Português - Portuguese + "street", "avenida", "quadrado", "estrada", "boulevard", "carro", "auto-estrada", "lane", "caminho", - // Română - Romanian - "bul", "bdul", "blv", "bulevard", "bulevardu", "calea", "cal", "piața", "pţa", "pța", "strada", "stra", "stradela", "sdla", "stradă", "unitate", "autostradă", "lane", + // Română - Romanian + "bul", "bdul", "blv", "bulevard", "bulevardu", "calea", "cal", "piața", "pţa", "pța", "strada", "stra", "stradela", "sdla", "stradă", "unitate", "autostradă", "lane", - // Slovenščina - Slovenian - "cesta", + // Slovenščina - Slovenian + "cesta", - // Suomi - Finnish - "kaari", "kri", "katu", "kuja", "kj", "kylä", "polku", "tie", "t", "tori", "väylä", "vlä", + // Suomi - Finnish + "kaari", "kri", "katu", "kuja", "kj", "kylä", "polku", "tie", "t", "tori", "väylä", "vlä", - // Svenska - Swedish - "väg", "vägen", "gatan", "gränd", "gränden", "stig", "stigen", "plats", "platsen", + // Svenska - Swedish + "väg", "vägen", "gatan", "gränd", "gränden", "stig", "stigen", "plats", "platsen", - // Türkçe - Turkish - "sokak", "sk", "sok", "sokağı", "cadde", "cd", "caddesi", "bulvar", "bulvarı", + // Türkçe - Turkish + "sokak", "sk", "sok", "sokağı", "cadde", "cd", "caddesi", "bulvar", "bulvarı", - // Tiếng Việt – Vietnamese - "quốc lộ", "ql", "tỉnh lộ", "tl", "Đại lộ", "Đl", "Đường", "Đ", "Đường sắt", "Đs", "Đường phố", "Đp", "vuông", "con Đường", "Đại lộ", "Đường cao tốc", + // Tiếng Việt – Vietnamese + "quốc lộ", "ql", "tỉnh lộ", "tl", "Đại lộ", "Đl", "Đường", "Đ", "Đường sắt", "Đs", "Đường phố", "Đp", "vuông", "con Đường", "Đại lộ", "Đường cao tốc", - // Українська - Ukrainian - "дорога", "провулок", "площа", "шосе", "вулиция", "дор", "пров", "вул" + // Українська - Ukrainian + "дорога", "провулок", "площа", "шосе", "вулиция", "дор", "пров", "вул" + }; + + m_synonyms.assign(make_transform_iterator(affics, &NormalizeAndSimplifyString), + make_transform_iterator(affics + ARRAY_SIZE(affics), &NormalizeAndSimplifyString)); + my::SortUnique(m_synonyms); + } + + bool MatchPrefix(UniString const & prefix) const + { + auto const it = lower_bound(m_synonyms.begin(), m_synonyms.end(), prefix); + return (it != m_synonyms.end() && StartsWith(*it, prefix)); + } + + bool MatchEqual(UniString const & prefix) const + { + return binary_search(m_synonyms.begin(), m_synonyms.end(), prefix); + } }; -void search::GetStreetName(strings::SimpleTokenizer iter, string & streetName) +StreetsSynonymsHolder g_streets; + +} // namespace + +UniString GetStreetNameAsKey(string const & name) { + UniString res; + SimpleTokenizer iter(name, STREET_TOKENS_SEPARATOR); while (iter) { - string const s = strings::MakeLowerCase(*iter); + UniString const s = NormalizeAndSimplifyString(*iter); ++iter; - char const ** end = affics + ARRAY_SIZE(affics); - - if (find(affics, end, s) == end) - streetName += s; + if (!g_streets.MatchEqual(s)) + res.append(s); } + return res; } -void search::GetStreetNameAsKey(string const & name, string & res) +bool IsStreetSynonym(UniString const & s) { - strings::SimpleTokenizer iter(name, STREET_TOKENS_SEPARATOR); - GetStreetName(iter, res); + return g_streets.MatchEqual(s); } -bool search::IsStreetSynonym(strings::UniString const & s) +bool IsStreetSynonymPrefix(UniString const & s) { - static set const kSynonyms( - make_transform_iterator(affics, &search::NormalizeAndSimplifyString), - make_transform_iterator(affics + ARRAY_SIZE(affics), &search::NormalizeAndSimplifyString)); - return kSynonyms.count(s) != 0; + return g_streets.MatchPrefix(s); } -bool search::ContainsNormalized(string const & str, string const & substr) +bool ContainsNormalized(string const & str, string const & substr) { - strings::UniString const ustr = search::NormalizeAndSimplifyString(str); - strings::UniString const usubstr = search::NormalizeAndSimplifyString(substr); + UniString const ustr = NormalizeAndSimplifyString(str); + UniString const usubstr = NormalizeAndSimplifyString(substr); return std::search(ustr.begin(), ustr.end(), usubstr.begin(), usubstr.end()) != ustr.end(); } +} // namespace search diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index afa4459f7a..5d1365c084 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -8,78 +8,7 @@ namespace search // This function should be used for all search strings normalization. // It does some magic text transformation which greatly helps us to improve our search. -inline strings::UniString NormalizeAndSimplifyString(string const & s) -{ - strings::UniString uniString = strings::MakeUniString(s); - for (size_t i = 0; i < uniString.size(); ++i) - { - strings::UniChar & c = uniString[i]; - switch (c) - { - // Replace "d with stroke" to simple d letter. Used in Vietnamese. - // (unicode-compliant implementation leaves it unchanged) - case 0x0110: - case 0x0111: c = 'd'; break; - // Replace small turkish dotless 'ı' with dotted 'i'. - // Our own invented hack to avoid well-known Turkish I-letter bug. - case 0x0131: c = 'i'; break; - // Replace capital turkish dotted 'İ' with dotted lowercased 'i'. - // Here we need to handle this case manually too, because default unicode-compliant implementation - // of MakeLowerCase converts 'İ' to 'i' + 0x0307. - case 0x0130: c = 'i'; break; - // Some Danish-specific hacks. - case 0x00d8: // Ø - case 0x00f8: c = 'o'; break; // ø - case 0x0152: // Œ - case 0x0153: // œ - c = 'o'; - uniString.insert(uniString.begin() + (i++) + 1, 'e'); - break; - case 0x00c6: // Æ - case 0x00e6: // æ - c = 'a'; - uniString.insert(uniString.begin() + (i++) + 1, 'e'); - break; - } - } - - MakeLowerCaseInplace(uniString); - NormalizeInplace(uniString); - - // Remove accents that can appear after NFKD normalization. - uniString.erase_if([](strings::UniChar const & c) - { - // ̀ COMBINING GRAVE ACCENT - // ́ COMBINING ACUTE ACCENT - return (c == 0x0300 || c == 0x0301); - }); - - return uniString; - - /// @todo Restore this logic to distinguish и-й in future. - /* - // Just after lower casing is a correct place to avoid normalization for specific chars. - static auto const isSpecificChar = [](UniChar c) -> bool - { - return c == 0x0439; // й - }; - UniString result; - result.reserve(uniString.size()); - for (auto i = uniString.begin(), end = uniString.end(); i != end;) - { - auto j = find_if(i, end, isSpecificChar); - // We don't check if (j != i) because UniString and Normalize handle it correctly. - UniString normString(i, j); - NormalizeInplace(normString); - result.insert(result.end(), normString.begin(), normString.end()); - if (j == end) - break; - result.push_back(*j); - i = j + 1; - } - return result; - */ -} +strings::UniString NormalizeAndSimplifyString(string const & s); template void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims) @@ -110,10 +39,10 @@ bool TokenizeStringAndCheckIfLastTokenIsPrefix(string const & s, delimiter); } -void GetStreetName(strings::SimpleTokenizer iter, string & streetName); -void GetStreetNameAsKey(string const & name, string & res); +strings::UniString GetStreetNameAsKey(string const & name); bool IsStreetSynonym(strings::UniString const & s); +bool IsStreetSynonymPrefix(strings::UniString const & s); /// Normalizes both str and substr, and then returns true if substr is found in str. /// Used in native platform code for search in localized strings (cuisines, categories, strings etc.). diff --git a/map/map_tests/address_tests.cpp b/map/map_tests/address_tests.cpp index 6c8c651afd..d8423812db 100644 --- a/map/map_tests/address_tests.cpp +++ b/map/map_tests/address_tests.cpp @@ -19,8 +19,7 @@ void TestAddress(ReverseGeocoder & coder, ms::LatLon const & ll, ReverseGeocoder::Address addr; coder.GetNearbyAddress(MercatorBounds::FromLatLon(ll), addr); - string key; - GetStreetNameAsKey(addr.m_street.m_name, key); + string const key = strings::ToUtf8(GetStreetNameAsKey(addr.m_street.m_name)); TEST_EQUAL(stName, key, (addr)); TEST_EQUAL(hNumber, addr.m_building.m_name, (addr)); diff --git a/search/house_detector.cpp b/search/house_detector.cpp index 49072ee010..8ce620fb8f 100644 --- a/search/house_detector.cpp +++ b/search/house_detector.cpp @@ -287,7 +287,7 @@ double Street::GetPrefixLength(size_t numSegs) const void Street::SetName(string const & name) { m_name = name; - GetStreetNameAsKey(name, m_processedName); + m_processedName = strings::ToUtf8(GetStreetNameAsKey(name)); } namespace diff --git a/search/reverse_geocoder.cpp b/search/reverse_geocoder.cpp index bc30412ccc..7585521ff9 100644 --- a/search/reverse_geocoder.cpp +++ b/search/reverse_geocoder.cpp @@ -62,11 +62,9 @@ void ReverseGeocoder::GetNearbyStreets(FeatureType & ft, vector & street } // static -size_t ReverseGeocoder::GetMatchedStreetIndex(string const & keyName, +size_t ReverseGeocoder::GetMatchedStreetIndex(strings::UniString const & keyName, vector const & streets) { - strings::UniString const expected = strings::MakeUniString(keyName); - // Find the exact match or the best match in kSimilarityTresholdPercent limit. size_t const count = streets.size(); size_t result = count; @@ -74,12 +72,10 @@ size_t ReverseGeocoder::GetMatchedStreetIndex(string const & keyName, for (size_t i = 0; i < count; ++i) { - string key; - search::GetStreetNameAsKey(streets[i].m_name, key); - strings::UniString const actual = strings::MakeUniString(key); + strings::UniString const actual = search::GetStreetNameAsKey(streets[i].m_name); - size_t const editDistance = - strings::EditDistance(expected.begin(), expected.end(), actual.begin(), actual.end()); + size_t const editDistance = strings::EditDistance(keyName.begin(), keyName.end(), + actual.begin(), actual.end()); if (editDistance == 0) return i; diff --git a/search/reverse_geocoder.hpp b/search/reverse_geocoder.hpp index 3751a78f35..2984a888f6 100644 --- a/search/reverse_geocoder.hpp +++ b/search/reverse_geocoder.hpp @@ -4,6 +4,8 @@ #include "indexer/feature_decl.hpp" +#include "base/string_utils.hpp" + #include "std/string.hpp" #include "std/utility.hpp" #include "std/vector.hpp" @@ -58,7 +60,8 @@ public: } }; - static size_t GetMatchedStreetIndex(string const & keyName, vector const & streets); + static size_t GetMatchedStreetIndex(strings::UniString const & keyName, + vector const & streets); struct Address { diff --git a/search/search_tests/house_detector_tests.cpp b/search/search_tests/house_detector_tests.cpp index 38677ec996..d8e8a6e100 100644 --- a/search/search_tests/house_detector_tests.cpp +++ b/search/search_tests/house_detector_tests.cpp @@ -57,7 +57,7 @@ class CollectStreetIDs static bool GetKey(string const & name, string & key) { TEST(!name.empty(), ()); - search::GetStreetNameAsKey(name, key); + key = strings::ToUtf8(search::GetStreetNameAsKey(name)); if (key.empty()) { @@ -331,19 +331,15 @@ UNIT_TEST(HS_StreetsCompare) namespace { - string GetStreetKey(string const & name) { - string res; - search::GetStreetNameAsKey(name, res); - return res; -} - + return strings::ToUtf8(search::GetStreetNameAsKey(name)); } +} // namespace UNIT_TEST(HS_StreetKey) { - TEST_EQUAL("крупской", GetStreetKey("улица Крупской"), ()); + TEST_EQUAL("крупскои", GetStreetKey("улица Крупской"), ()); TEST_EQUAL("уручская", GetStreetKey("Уручская ул."), ()); TEST_EQUAL("газетыправда", GetStreetKey("Пр. Газеты Правда"), ()); TEST_EQUAL("якупалы", GetStreetKey("улица Я. Купалы"), ()); diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index a63bef7f04..a64c3cdb18 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -998,7 +998,7 @@ void Geocoder::GreedilyMatchStreets() for (; curToken < m_numTokens && !m_usedTokens[curToken]; ++curToken) { auto const & token = m_params.GetTokens(curToken).front(); - if (IsStreetSynonym(token)) + if (IsStreetSynonymPrefix(token)) continue; if (feature::IsHouseNumber(token)) -- cgit v1.2.3