Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormpimenov <mpimenov@users.noreply.github.com>2016-04-22 20:15:55 +0300
committermpimenov <mpimenov@users.noreply.github.com>2016-04-22 20:15:55 +0300
commitb9cc722dd7c4a6d04f6cb18bd9855e22baee9ee2 (patch)
treeea612c12c5ff37e0e7b5e4de2cd6aa0666615cc5
parentaa7489bd4c60c356cf1cd5901d704328535d2eb9 (diff)
parentbdc06eada49d5be545ec5b503a1056db4473361d (diff)
Merge pull request #2961 from ygorshenin/add-postcodes-to-search-index
[search] Postcodes are added to the search index.
-rw-r--r--base/string_utils.cpp4
-rw-r--r--base/string_utils.hpp15
-rw-r--r--generator/search_index_builder.cpp22
-rw-r--r--indexer/search_string_utils.cpp6
-rw-r--r--indexer/search_string_utils.hpp2
-rw-r--r--search/feature_offset_match.hpp85
-rw-r--r--search/retrieval.cpp209
-rw-r--r--search/retrieval.hpp18
-rw-r--r--search/search.pro4
-rw-r--r--search/search_integration_tests/helpers.cpp9
-rw-r--r--search/search_integration_tests/helpers.hpp15
-rw-r--r--search/search_integration_tests/search_query_v2_test.cpp198
-rw-r--r--search/search_query.cpp7
-rw-r--r--search/search_tests/postcodes_matcher_tests.cpp72
-rw-r--r--search/search_tests/ranking_tests.cpp3
-rw-r--r--search/search_tests/search_tests.pro1
-rw-r--r--search/search_tests_support/test_feature.hpp1
-rw-r--r--search/search_trie.hpp1
-rw-r--r--search/v2/geocoder.cpp2
-rw-r--r--search/v2/locality_scorer.cpp6
-rw-r--r--search/v2/postcodes_matcher.cpp168
-rw-r--r--search/v2/postcodes_matcher.hpp15
-rw-r--r--search/v2/ranking_utils.hpp69
-rw-r--r--search/v2/token_slice.cpp27
-rw-r--r--search/v2/token_slice.hpp67
25 files changed, 808 insertions, 218 deletions
diff --git a/base/string_utils.cpp b/base/string_utils.cpp
index 5f26059688..da13ecb282 100644
--- a/base/string_utils.cpp
+++ b/base/string_utils.cpp
@@ -220,6 +220,10 @@ bool IsASCIIString(string const & str)
return true;
}
+bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; }
+
+bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
+
bool StartsWith(UniString const & s, UniString const & p)
{
if (p.size() > s.size())
diff --git a/base/string_utils.hpp b/base/string_utils.hpp
index 1c4634646a..f7c5b775ff 100644
--- a/base/string_utils.hpp
+++ b/base/string_utils.hpp
@@ -29,6 +29,19 @@ public:
template <class IterT> UniString(IterT b, IterT e) : BaseT(b, e) {}
bool IsEqualAscii(char const * s) const;
+
+ UniString & operator+=(UniString const & rhs)
+ {
+ append(rhs);
+ return *this;
+ }
+
+ UniString operator+(UniString const & rhs) const
+ {
+ UniString result(*this);
+ result += rhs;
+ return result;
+ }
};
/// Performs full case folding for string to make it search-compatible according
@@ -67,6 +80,8 @@ bool EqualNoCase(string const & s1, string const & s2);
UniString MakeUniString(string const & utf8s);
string ToUtf8(UniString const & s);
bool IsASCIIString(string const & str);
+bool IsASCIIDigit(UniChar c);
+bool IsASCIILatin(UniChar c);
inline string DebugPrint(UniString const & s)
{
diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp
index c008e0537c..e4e32ed20b 100644
--- a/generator/search_index_builder.cpp
+++ b/generator/search_index_builder.cpp
@@ -136,11 +136,11 @@ struct FeatureNameInserter
{
}
- void AddToken(signed char lang, strings::UniString const & s) const
+ void AddToken(uint8_t lang, strings::UniString const & s) const
{
strings::UniString key;
key.reserve(s.size() + 1);
- key.push_back(static_cast<uint8_t>(lang));
+ key.push_back(lang);
key.append(s.begin(), s.end());
m_keyValuePairs.emplace_back(key, m_val);
@@ -261,9 +261,11 @@ public:
void operator() (FeatureType const & f, uint32_t index) const
{
+ using namespace search;
+
feature::TypesHolder types(f);
- static search::TypesSkipper skipIndex;
+ static TypesSkipper skipIndex;
skipIndex.SkipTypes(types);
if (types.Empty())
@@ -278,6 +280,18 @@ public:
skipIndex.IsCountryOrState(types) ? m_synonyms : nullptr, m_keyValuePairs, hasStreetType);
m_valueBuilder.MakeValue(f, types, index, inserter.m_val);
+ string const postcode = f.GetMetadata().Get(feature::Metadata::FMD_POSTCODE);
+ if (!postcode.empty())
+ {
+ // See OSM TagInfo or Wiki about modern postcodes format. The average number of tokens is less
+ // than two.
+ buffer_vector<strings::UniString, 2> tokens;
+ SplitUniString(NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens),
+ Delimiters());
+ for (auto const & token : tokens)
+ inserter.AddToken(kPostcodesLang, PostcodeToString(token));
+ }
+
// Skip types for features without names.
if (!f.ForEachName(inserter))
skipIndex.SkipEmptyNameTypes(types);
@@ -291,7 +305,7 @@ public:
// add names of categories of the feature
for (uint32_t t : categoryTypes)
- inserter.AddToken(search::kCategoriesLang, search::FeatureTypeToString(c.GetIndexForType(t)));
+ inserter.AddToken(kCategoriesLang, FeatureTypeToString(c.GetIndexForType(t)));
}
};
diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp
index 24aaaa3764..0283191869 100644
--- a/indexer/search_string_utils.cpp
+++ b/indexer/search_string_utils.cpp
@@ -90,6 +90,12 @@ UniString FeatureTypeToString(uint32_t type)
return UniString(s.begin(), s.end());
}
+UniString PostcodeToString(strings::UniString const & postcode)
+{
+ static UniString const kPrefix = MakeUniString("!postcode:");
+ return kPrefix + postcode;
+}
+
namespace
{
char const * kStreetTokensSeparator = "\t -,.";
diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp
index 5d1365c084..1a8c5d0ed6 100644
--- a/indexer/search_string_utils.hpp
+++ b/indexer/search_string_utils.hpp
@@ -19,6 +19,8 @@ void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims
strings::UniString FeatureTypeToString(uint32_t type);
+strings::UniString PostcodeToString(strings::UniString const & postcode);
+
template <class ContainerT, class DelimsT>
bool TokenizeStringAndCheckIfLastTokenIsPrefix(strings::UniString const & s,
ContainerT & tokens,
diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp
index 232bec7b7e..c6f52f89e7 100644
--- a/search/feature_offset_match.hpp
+++ b/search/feature_offset_match.hpp
@@ -3,6 +3,7 @@
#include "search/search_index_values.hpp"
#include "search/search_query.hpp"
#include "search/search_query_params.hpp"
+#include "search/v2/token_slice.hpp"
#include "indexer/trie.hpp"
@@ -103,6 +104,25 @@ bool CheckMatchString(strings::UniChar const * rootPrefix, size_t rootPrefixSize
return false;
}
+
+template <typename TValue>
+bool FindLangIndex(trie::Iterator<ValueList<TValue>> const & trieRoot, uint8_t lang, uint32_t & langIx)
+{
+ ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ());
+
+ uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size());
+ for (uint32_t i = 0; i < numLangs; ++i)
+ {
+ auto const & edge = trieRoot.m_edge[i].m_label;
+ ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
+ if (edge[0] == lang)
+ {
+ langIx = i;
+ return true;
+ }
+ }
+ return false;
+}
} // namespace
template <typename TValue, typename TF>
@@ -222,7 +242,7 @@ public:
toDo(value);
}
};
-} // namespace search::impl
+} // impl
template <typename TValue>
struct TrieRootPrefix
@@ -345,27 +365,23 @@ template <typename TValue, typename THolder>
bool MatchCategoriesInTrie(SearchQueryParams const & params,
trie::Iterator<ValueList<TValue>> const & trieRoot, THolder && holder)
{
- ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ());
- uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size());
- for (uint32_t langIx = 0; langIx < numLangs; ++langIx)
- {
- auto const & edge = trieRoot.m_edge[langIx].m_label;
- ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
- if (edge[0] == search::kCategoriesLang)
- {
- auto const catRoot = trieRoot.GoToEdge(langIx);
- MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
-
- // Last token's prefix is used as a complete token here, to
- // limit the number of features in the last bucket of a
- // holder. Probably, this is a false optimization.
- holder.Resize(params.m_tokens.size() + 1);
- holder.SwitchTo(params.m_tokens.size());
- MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
- return true;
- }
- }
- return false;
+ uint32_t langIx = 0;
+ if (!impl::FindLangIndex(trieRoot, search::kCategoriesLang, langIx))
+ return false;
+
+ auto const & edge = trieRoot.m_edge[langIx].m_label;
+ ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ());
+
+ auto const catRoot = trieRoot.GoToEdge(langIx);
+ MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
+
+ // Last token's prefix is used as a complete token here, to limit
+ // the number of features in the last bucket of a holder. Probably,
+ // this is a false optimization.
+ holder.Resize(params.m_tokens.size() + 1);
+ holder.SwitchTo(params.m_tokens.size());
+ MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder);
+ return true;
}
// Calls toDo with trie root prefix and language code on each language
@@ -427,4 +443,29 @@ void MatchFeaturesInTrie(SearchQueryParams const & params,
intersecter.ForEachResult(forward<ToDo>(toDo));
}
+
+template <typename TValue, typename TFilter, typename ToDo>
+void MatchPostcodesInTrie(v2::TokenSlice const & slice,
+ trie::Iterator<ValueList<TValue>> const & trieRoot,
+ TFilter const & filter, ToDo && toDo)
+{
+ uint32_t langIx = 0;
+ if (!impl::FindLangIndex(trieRoot, search::kPostcodesLang, langIx))
+ return;
+
+ auto const & edge = trieRoot.m_edge[langIx].m_label;
+ auto const postcodesRoot = trieRoot.GoToEdge(langIx);
+
+ impl::OffsetIntersecter<TFilter, TValue> intersecter(filter);
+ for (size_t i = 0; i < slice.Size(); ++i)
+ {
+ if (slice.IsPrefix(i))
+ MatchTokenPrefixInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter);
+ else
+ MatchTokenInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter);
+ intersecter.NextStep();
+ }
+
+ intersecter.ForEachResult(forward<ToDo>(toDo));
+}
} // namespace search
diff --git a/search/retrieval.cpp b/search/retrieval.cpp
index 4afdb46e2b..c4376b5996 100644
--- a/search/retrieval.cpp
+++ b/search/retrieval.cpp
@@ -7,6 +7,7 @@
#include "search_trie.hpp"
#include "v2/mwm_context.hpp"
+#include "v2/token_slice.hpp"
#include "indexer/feature.hpp"
#include "indexer/feature_algo.hpp"
@@ -29,8 +30,65 @@ using osm::Editor;
namespace search
{
+namespace v2
+{
namespace
{
+class FeaturesCollector
+{
+public:
+ FeaturesCollector(my::Cancellable const & cancellable, vector<uint64_t> & features)
+ : m_cancellable(cancellable), m_features(features), m_counter(0)
+ {
+ }
+
+ template <typename TValue>
+ void operator()(TValue const & value)
+ {
+ if ((++m_counter & 0xFF) == 0)
+ BailIfCancelled(m_cancellable);
+ m_features.push_back(value.m_featureId);
+ }
+
+ inline void operator()(uint32_t feature) { m_features.push_back(feature); }
+
+ inline void operator()(uint64_t feature) { m_features.push_back(feature); }
+
+private:
+ my::Cancellable const & m_cancellable;
+ vector<uint64_t> & m_features;
+ uint32_t m_counter;
+};
+
+class EditedFeaturesHolder
+{
+public:
+ EditedFeaturesHolder(MwmSet::MwmId const & id)
+ {
+ Editor & editor = Editor::Instance();
+ m_deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted);
+ m_modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified);
+ m_created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created);
+ }
+
+ bool ModifiedOrDeleted(uint32_t featureIndex) const
+ {
+ return binary_search(m_deleted.begin(), m_deleted.end(), featureIndex) ||
+ binary_search(m_modified.begin(), m_modified.end(), featureIndex);
+ }
+
+ template <typename TFn>
+ void ForEachModifiedOrCreated(TFn & fn)
+ {
+ for_each(m_modified.begin(), m_modified.end(), fn);
+ for_each(m_created.begin(), m_created.end(), fn);
+ }
+
+private:
+ vector<uint32_t> m_deleted;
+ vector<uint32_t> m_modified;
+ vector<uint32_t> m_created;
+};
unique_ptr<coding::CompressedBitVector> SortFeaturesAndBuildCBV(vector<uint64_t> && features)
{
@@ -89,6 +147,13 @@ bool MatchFeatureByName(FeatureType const & ft, SearchQueryParams const & params
return matched;
}
+bool MatchFeatureByPostcode(FeatureType const & ft, v2::TokenSlice const & slice)
+{
+ string const postcode = ft.GetMetadata().Get(feature::Metadata::FMD_POSTCODE);
+ // TODO(@y): implement this.
+ return false;
+}
+
// Retrieves from the search index corresponding to |value| all
// features matching to |params|.
template <typename TValue>
@@ -96,17 +161,48 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl(
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
SearchQueryParams const & params)
{
- // Exclude from search all deleted/modified features and match all edited/created features separately.
- Editor & editor = Editor::Instance();
+ EditedFeaturesHolder holder(id);
+
+ serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams()));
+ ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
+
+ auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>(
+ SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams));
- auto const deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted);
- auto const modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified);
- auto const filter = [&](uint32_t featureIndex) -> bool
+ // TODO (@y, @m): This code may be optimized in the case where
+ // bit vectors are sorted in the search index.
+ vector<uint64_t> features;
+ FeaturesCollector collector(cancellable, features);
+
+ MatchFeaturesInTrie(params, *trieRoot, [&holder](uint32_t featureIndex)
+ {
+ return !holder.ModifiedOrDeleted(featureIndex);
+ },
+ collector);
+
+ // Match all edited/created features separately.
+ Editor & editor = Editor::Instance();
+ auto const matcher = [&](uint32_t featureIndex)
{
- return (!binary_search(deleted.begin(), deleted.end(), featureIndex) &&
- !binary_search(modified.begin(), modified.end(), featureIndex));
+ FeatureType ft;
+ VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ());
+ // TODO(AlexZ): Should we match by some feature's metafields too?
+ if (MatchFeatureByName(ft, params))
+ features.push_back(featureIndex);
};
+ holder.ForEachModifiedOrCreated(matcher);
+
+ return SortFeaturesAndBuildCBV(move(features));
+}
+
+template <typename TValue>
+unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeaturesImpl(
+ MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
+ TokenSlice const & slice)
+{
+ EditedFeaturesHolder holder(id);
+
serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams()));
ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG);
@@ -116,29 +212,26 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl(
// TODO (@y, @m): This code may be optimized in the case where
// bit vectors are sorted in the search index.
vector<uint64_t> features;
- uint32_t counter = 0;
- auto const collector = [&](TValue const & value)
- {
- if ((++counter & 0xFF) == 0)
- BailIfCancelled(cancellable);
- features.push_back(value.m_featureId);
- };
+ FeaturesCollector collector(cancellable, features);
- MatchFeaturesInTrie(params, *trieRoot, filter, collector);
+ MatchPostcodesInTrie(slice, *trieRoot, [&holder](uint32_t featureIndex)
+ {
+ return !holder.ModifiedOrDeleted(featureIndex);
+ },
+ collector);
// Match all edited/created features separately.
+ Editor & editor = Editor::Instance();
auto const matcher = [&](uint32_t featureIndex)
{
FeatureType ft;
VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ());
// TODO(AlexZ): Should we match by some feature's metafields too?
- if (MatchFeatureByName(ft, params))
+ if (MatchFeatureByPostcode(ft, slice))
features.push_back(featureIndex);
};
- for_each(modified.begin(), modified.end(), matcher);
- auto const created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created);
- for_each(created.begin(), created.end(), matcher);
+ holder.ForEachModifiedOrCreated(matcher);
return SortFeaturesAndBuildCBV(move(features));
}
@@ -149,42 +242,74 @@ unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeaturesImpl(
v2::MwmContext const & context, my::Cancellable const & cancellable,
covering::IntervalsT const & coverage, int scale)
{
- uint32_t counter = 0;
vector<uint64_t> features;
- context.ForEachIndex(coverage, scale, [&](uint64_t featureId)
- {
- if ((++counter & 0xFF) == 0)
- BailIfCancelled(cancellable);
- features.push_back(featureId);
- });
+ FeaturesCollector collector(cancellable, features);
+ context.ForEachIndex(coverage, scale, collector);
return SortFeaturesAndBuildCBV(move(features));
}
-} // namespace
+template <typename T>
+struct RetrieveAddressFeaturesAdaptor
+{
+ template <typename... TArgs>
+ unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args)
+ {
+ return RetrieveAddressFeaturesImpl<T>(forward<TArgs>(args)...);
+ }
+};
-namespace v2
+template <typename T>
+struct RetrievePostcodeFeaturesAdaptor
+{
+ template <typename... TArgs>
+ unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args)
+ {
+ return RetrievePostcodeFeaturesImpl<T>(forward<TArgs>(args)...);
+ }
+};
+
+template <template <typename> class T>
+struct Selector
{
+ template <typename... TArgs>
+ unique_ptr<coding::CompressedBitVector> operator()(MwmSet::MwmId const & id, MwmValue & value,
+ TArgs &&... args)
+ {
+ version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
+
+ if (mwmTraits.GetSearchIndexFormat() ==
+ version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
+ {
+ T<FeatureWithRankAndCenter> t;
+ return t(id, value, forward<TArgs>(args)...);
+ }
+ if (mwmTraits.GetSearchIndexFormat() ==
+ version::MwmTraits::SearchIndexFormat::CompressedBitVector)
+ {
+ T<FeatureIndexValue> t;
+ return t(id, value, forward<TArgs>(args)...);
+ }
+ return unique_ptr<coding::CompressedBitVector>();
+ }
+};
+} // namespace
+
unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
SearchQueryParams const & params)
{
- version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat());
+ Selector<RetrieveAddressFeaturesAdaptor> selector;
+ return selector(id, value, cancellable, params);
+}
- if (mwmTraits.GetSearchIndexFormat() ==
- version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter)
- {
- using TValue = FeatureWithRankAndCenter;
- return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
- }
- else if (mwmTraits.GetSearchIndexFormat() ==
- version::MwmTraits::SearchIndexFormat::CompressedBitVector)
- {
- using TValue = FeatureIndexValue;
- return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params);
- }
- return unique_ptr<coding::CompressedBitVector>();
+unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures(
+ MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
+ TokenSlice const & slice)
+{
+ Selector<RetrievePostcodeFeaturesAdaptor> selector;
+ return selector(id, value, cancellable, slice);
}
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
diff --git a/search/retrieval.hpp b/search/retrieval.hpp
index 79b7fd6452..7540211db3 100644
--- a/search/retrieval.hpp
+++ b/search/retrieval.hpp
@@ -21,16 +21,24 @@ namespace search
namespace v2
{
class MwmContext;
+class TokenSlice;
// Retrieves from the search index corresponding to |value| all
// features matching to |params|.
-unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(
- MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
- SearchQueryParams const & params);
+unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmId const & id,
+ MwmValue & value,
+ my::Cancellable const & cancellable,
+ SearchQueryParams const & params);
+
+// Retrieves from the search index corresponding to |value| all
+// postcodes matching to |slice|.
+unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures(
+ MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable,
+ TokenSlice const & slice);
// Retrieves from the geometry index corresponding to |value| all features belonging to |rect|.
unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures(
- MwmContext const & context, my::Cancellable const & cancellable,
- m2::RectD const & rect, int scale);
+ MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect,
+ int scale);
} // namespace v2
} // namespace search
diff --git a/search/search.pro b/search/search.pro
index 13676ab757..6ba422fe32 100644
--- a/search/search.pro
+++ b/search/search.pro
@@ -54,6 +54,7 @@ HEADERS += \
v2/locality_scorer.hpp \
v2/mwm_context.hpp \
v2/nested_rects_cache.hpp \
+ v2/postcodes_matcher.hpp \
v2/pre_ranking_info.hpp \
v2/rank_table_cache.hpp \
v2/ranking_info.hpp \
@@ -62,6 +63,7 @@ HEADERS += \
v2/search_query_v2.hpp \
v2/stats_cache.hpp \
v2/street_vicinity_loader.hpp \
+ v2/token_slice.hpp \
SOURCES += \
approximate_string_match.cpp \
@@ -99,6 +101,7 @@ SOURCES += \
v2/locality_scorer.cpp \
v2/mwm_context.cpp \
v2/nested_rects_cache.cpp \
+ v2/postcodes_matcher.cpp \
v2/pre_ranking_info.cpp \
v2/rank_table_cache.cpp \
v2/ranking_info.cpp \
@@ -106,3 +109,4 @@ SOURCES += \
v2/search_model.cpp \
v2/search_query_v2.cpp \
v2/street_vicinity_loader.cpp \
+ v2/token_slice.cpp \
diff --git a/search/search_integration_tests/helpers.cpp b/search/search_integration_tests/helpers.cpp
index cf2a7bce26..d099ddf703 100644
--- a/search/search_integration_tests/helpers.cpp
+++ b/search/search_integration_tests/helpers.cpp
@@ -33,7 +33,14 @@ void SearchTest::RegisterCountry(string const & name, m2::RectD const & rect)
bool SearchTest::ResultsMatch(string const & query,
vector<shared_ptr<tests_support::MatchingRule>> const & rules)
{
- tests_support::TestSearchRequest request(m_engine, query, "en", Mode::Everywhere, m_viewport);
+ return ResultsMatch(query, "en" /* locale */, rules);
+}
+
+bool SearchTest::ResultsMatch(string const & query,
+ string const & locale,
+ vector<shared_ptr<tests_support::MatchingRule>> const & rules)
+{
+ tests_support::TestSearchRequest request(m_engine, query, locale, Mode::Everywhere, m_viewport);
request.Wait();
return MatchResults(m_engine, rules, request.Results());
}
diff --git a/search/search_integration_tests/helpers.hpp b/search/search_integration_tests/helpers.hpp
index ea7412f8d8..5e3ffc32aa 100644
--- a/search/search_integration_tests/helpers.hpp
+++ b/search/search_integration_tests/helpers.hpp
@@ -72,11 +72,26 @@ public:
return id;
}
+ template <typename TBuildFn>
+ MwmSet::MwmId BuildWorld(TBuildFn && fn)
+ {
+ return BuildMwm("testWorld", feature::DataHeader::world, forward<TBuildFn>(fn));
+ }
+
+ template <typename TBuildFn>
+ MwmSet::MwmId BuildCountry(string const & name, TBuildFn && fn)
+ {
+ return BuildMwm(name, feature::DataHeader::country, forward<TBuildFn>(fn));
+ }
+
inline void SetViewport(m2::RectD const & viewport) { m_viewport = viewport; }
bool ResultsMatch(string const & query,
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
+ bool ResultsMatch(string const & query, string const & locale,
+ vector<shared_ptr<tests_support::MatchingRule>> const & rules);
+
bool ResultsMatch(string const & query, Mode mode,
vector<shared_ptr<tests_support::MatchingRule>> const & rules);
diff --git a/search/search_integration_tests/search_query_v2_test.cpp b/search/search_integration_tests/search_query_v2_test.cpp
index e0c5fe9211..f8a728cb7b 100644
--- a/search/search_integration_tests/search_query_v2_test.cpp
+++ b/search/search_integration_tests/search_query_v2_test.cpp
@@ -1,10 +1,15 @@
#include "testing/testing.hpp"
+#include "search/retrieval.hpp"
#include "search/search_integration_tests/helpers.hpp"
#include "search/search_tests_support/test_feature.hpp"
#include "search/search_tests_support/test_mwm_builder.hpp"
#include "search/search_tests_support/test_results_matching.hpp"
#include "search/search_tests_support/test_search_request.hpp"
+#include "search/v2/token_slice.hpp"
+
+#include "indexer/feature.hpp"
+#include "indexer/index.hpp"
#include "geometry/point2d.hpp"
#include "geometry/rect2d.hpp"
@@ -77,39 +82,38 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke)
TestPOI lantern1(m2::PointD(10.0005, 10.0005), "lantern 1", "en");
TestPOI lantern2(m2::PointD(10.0006, 10.0005), "lantern 2", "en");
- BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
- {
- builder.Add(wonderlandCountry);
- builder.Add(losAlamosCity);
- builder.Add(mskCity);
- });
- auto wonderlandId =
- BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
- {
- builder.Add(losAlamosCity);
- builder.Add(mskCity);
- builder.Add(longPondVillage);
-
- builder.Add(feynmanStreet);
- builder.Add(bohrStreet1);
- builder.Add(bohrStreet2);
- builder.Add(bohrStreet3);
- builder.Add(firstAprilStreet);
-
- builder.Add(feynmanHouse);
- builder.Add(bohrHouse);
- builder.Add(hilbertHouse);
- builder.Add(descartesHouse);
- builder.Add(bornHouse);
-
- builder.Add(busStop);
- builder.Add(tramStop);
- builder.Add(quantumTeleport1);
- builder.Add(quantumTeleport2);
- builder.Add(quantumCafe);
- builder.Add(lantern1);
- builder.Add(lantern2);
- });
+ BuildWorld([&](TestMwmBuilder & builder)
+ {
+ builder.Add(wonderlandCountry);
+ builder.Add(losAlamosCity);
+ builder.Add(mskCity);
+ });
+ auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
+ {
+ builder.Add(losAlamosCity);
+ builder.Add(mskCity);
+ builder.Add(longPondVillage);
+
+ builder.Add(feynmanStreet);
+ builder.Add(bohrStreet1);
+ builder.Add(bohrStreet2);
+ builder.Add(bohrStreet3);
+ builder.Add(firstAprilStreet);
+
+ builder.Add(feynmanHouse);
+ builder.Add(bohrHouse);
+ builder.Add(hilbertHouse);
+ builder.Add(descartesHouse);
+ builder.Add(bornHouse);
+
+ builder.Add(busStop);
+ builder.Add(tramStop);
+ builder.Add(quantumTeleport1);
+ builder.Add(quantumTeleport2);
+ builder.Add(quantumCafe);
+ builder.Add(lantern1);
+ builder.Add(lantern2);
+ });
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
{
@@ -182,11 +186,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld)
TestCountry wonderland(m2::PointD(0, 0), countryName, "en");
TestCity losAlamos(m2::PointD(0, 0), "Los Alamos", "en", 100 /* rank */);
- auto testWorldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
- {
- builder.Add(wonderland);
- builder.Add(losAlamos);
- });
+ auto testWorldId = BuildWorld([&](TestMwmBuilder & builder)
+ {
+ builder.Add(wonderland);
+ builder.Add(losAlamos);
+ });
RegisterCountry(countryName, m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0)));
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.5, -0.5)));
@@ -213,16 +217,15 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchByName)
"Hyde Park", "en");
TestPOI cafe(m2::PointD(1.0, 1.0), "London Cafe", "en");
- auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
- {
- builder.Add(london);
- });
- auto wonderlandId =
- BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
- {
- builder.Add(hydePark);
- builder.Add(cafe);
- });
+ auto worldId = BuildWorld([&](TestMwmBuilder & builder)
+ {
+ builder.Add(london);
+ });
+ auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
+ {
+ builder.Add(hydePark);
+ builder.Add(cafe);
+ });
SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.9, -0.9)));
{
@@ -248,11 +251,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, DisableSuggests)
TestCity london1(m2::PointD(1, 1), "London", "en", 100 /* rank */);
TestCity london2(m2::PointD(-1, -1), "London", "en", 100 /* rank */);
- auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
- {
- builder.Add(london1);
- builder.Add(london2);
- });
+ auto worldId = BuildWorld([&](TestMwmBuilder & builder)
+ {
+ builder.Add(london1);
+ builder.Add(london2);
+ });
SetViewport(m2::RectD(m2::PointD(0.5, 0.5), m2::PointD(1.5, 1.5)));
{
@@ -299,21 +302,20 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
TestPOI cafe2(m2::PointD(-0.99, -0.99), "", "en");
cafe2.SetTypes({{"amenity", "cafe"}});
-
- auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder)
- {
- builder.Add(sanFrancisco);
- builder.Add(lermontovo);
- });
- auto wonderlandId = BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder)
- {
- builder.Add(cafe1);
- builder.Add(cafe2);
- builder.Add(goldenGateBridge);
- builder.Add(goldenGateStreet);
- builder.Add(lermontov);
- builder.Add(waterfall);
- });
+ auto worldId = BuildWorld([&](TestMwmBuilder & builder)
+ {
+ builder.Add(sanFrancisco);
+ builder.Add(lermontovo);
+ });
+ auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
+ {
+ builder.Add(cafe1);
+ builder.Add(cafe2);
+ builder.Add(goldenGateBridge);
+ builder.Add(goldenGateStreet);
+ builder.Add(lermontov);
+ builder.Add(waterfall);
+ });
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
{
@@ -359,5 +361,65 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
TEST(ResultsMatch("waterfall", rules), ());
}
}
+
+UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes)
+{
+ string const countryName = "Russia";
+
+ TestCity city(m2::PointD(0, 0), "Долгопрудный", "ru", 100 /* rank */);
+ TestStreet street(
+ vector<m2::PointD>{m2::PointD(-0.5, 0.0), m2::PointD(0, 0), m2::PointD(0.5, 0.0)},
+ "Первомайская", "ru");
+ TestBuilding building(m2::PointD(0.0, 0.00001), "", "28 а", street, "ru");
+ building.SetPostcode("141701");
+
+ BuildWorld([&](TestMwmBuilder & builder)
+ {
+ builder.Add(city);
+ });
+ auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder)
+ {
+ builder.Add(street);
+ builder.Add(building);
+ });
+
+ // Tests that postcode is added to the search index.
+ {
+ auto handle = m_engine.GetMwmHandleById(countryId);
+ TEST(handle.IsAlive(), ());
+ my::Cancellable cancellable;
+
+ SearchQueryParams params;
+ params.m_tokens.emplace_back();
+ params.m_tokens.back().push_back(PostcodeToString(strings::MakeUniString("141701")));
+ auto * value = handle.GetValue<MwmValue>();
+ auto features = v2::RetrievePostcodeFeatures(countryId, *value, cancellable,
+ TokenSlice(params, 0, params.m_tokens.size()));
+ TEST_EQUAL(1, features->PopCount(), ());
+
+ uint64_t index = 0;
+ while (!features->GetBit(index))
+ ++index;
+
+ Index::FeaturesLoaderGuard loader(m_engine, countryId);
+ FeatureType ft;
+ loader.GetFeatureByIndex(index, ft);
+
+ auto rule = ExactMatch(countryId, building);
+ TEST(rule->Matches(ft), ());
+ }
+ {
+ TRules rules{ExactMatch(countryId, building)};
+ TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru" /* locale */, rules), ());
+ }
+
+ // TODO (@y): uncomment this test and add more tests when postcodes
+ // search will be implemented.
+ //
+ // {
+ // TRules rules{ExactMatch(countryId, building)};
+ // TEST(ResultsMatch("Долгопрудный первомайская 28а, 141701", "ru" /* locale */, rules), ());
+ // }
+}
} // namespace
} // namespace search
diff --git a/search/search_query.cpp b/search/search_query.cpp
index 2f23bf7e68..9c90c5e62f 100644
--- a/search/search_query.cpp
+++ b/search/search_query.cpp
@@ -12,6 +12,7 @@
#include "search/v2/pre_ranking_info.hpp"
#include "search/v2/ranking_info.hpp"
#include "search/v2/ranking_utils.hpp"
+#include "search/v2/token_slice.hpp"
#include "storage/country_info_getter.hpp"
#include "storage/index.hpp"
@@ -464,10 +465,8 @@ void Query::SetQuery(string const & query)
search::Delimiters delims;
SplitUniString(NormalizeAndSimplifyString(query), MakeBackInsertFunctor(m_tokens), delims);
- bool checkPrefix = true;
-
// Assign prefix with last parsed token.
- if (checkPrefix && !m_tokens.empty() && !delims(strings::LastUniChar(query)))
+ if (!m_tokens.empty() && !delims(strings::LastUniChar(query)))
{
m_prefix.swap(m_tokens.back());
m_tokens.pop_back();
@@ -623,7 +622,7 @@ class PreResult2Maker
info.m_nameScore = v2::NAME_SCORE_ZERO;
- v2::TokensSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken);
+ v2::TokenSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken);
for (auto const & lang : m_params.m_langs)
{
diff --git a/search/search_tests/postcodes_matcher_tests.cpp b/search/search_tests/postcodes_matcher_tests.cpp
new file mode 100644
index 0000000000..0438b5cdbb
--- /dev/null
+++ b/search/search_tests/postcodes_matcher_tests.cpp
@@ -0,0 +1,72 @@
+#include "../../testing/testing.hpp"
+
+#include "search/search_query_params.hpp"
+#include "search/v2/postcodes_matcher.hpp"
+#include "search/v2/token_slice.hpp"
+
+#include "indexer/search_delimiters.hpp"
+#include "indexer/search_string_utils.hpp"
+
+#include "base/stl_add.hpp"
+#include "base/string_utils.hpp"
+
+#include "std/string.hpp"
+#include "std/vector.hpp"
+
+using namespace strings;
+
+namespace search
+{
+namespace v2
+{
+namespace
+{
+bool LooksLikePostcode(string const & s, bool checkPrefix)
+{
+ vector<UniString> tokens;
+ bool const lastTokenIsPrefix =
+ TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters());
+
+ size_t const numTokens = tokens.size();
+
+ SearchQueryParams params;
+ if (checkPrefix && lastTokenIsPrefix)
+ {
+ params.m_prefixTokens.push_back(tokens.back());
+ tokens.pop_back();
+ }
+
+ for (auto const & token : tokens)
+ {
+ params.m_tokens.emplace_back();
+ params.m_tokens.back().push_back(token);
+ }
+
+ return LooksLikePostcode(TokenSlice(params, 0, numTokens));
+}
+
+UNIT_TEST(PostcodesMatcher_Smoke)
+{
+ TEST(LooksLikePostcode("141701", false /* checkPrefix */), ());
+ TEST(LooksLikePostcode("141", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("BA22 9HR", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("BA22", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("DE56 4FW", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("NY 1000", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("AZ 85203", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("AZ", true /* checkPrefix */), ());
+
+ TEST(LooksLikePostcode("803 0271", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("803-0271", true /* checkPrefix */), ());
+ TEST(LooksLikePostcode("〒803-0271", true /* checkPrefix */), ());
+
+ TEST(!LooksLikePostcode("1 мая", true /* checkPrefix */), ());
+ TEST(!LooksLikePostcode("1 мая улица", true /* checkPrefix */), ());
+ TEST(!LooksLikePostcode("москва", true /* checkPrefix */), ());
+ TEST(!LooksLikePostcode("39 с 79", true /* checkPrefix */), ());
+}
+} // namespace
+} // namespace v2
+} // namespace search
diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp
index b74e35a284..cb82359f11 100644
--- a/search/search_tests/ranking_tests.cpp
+++ b/search/search_tests/ranking_tests.cpp
@@ -2,6 +2,7 @@
#include "search/search_query_params.hpp"
#include "search/v2/ranking_utils.hpp"
+#include "search/v2/token_slice.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
@@ -32,7 +33,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken,
params.m_prefixTokens.swap(params.m_tokens.back());
params.m_tokens.pop_back();
}
- return GetNameScore(name, TokensSlice(params, startToken, endToken));
+ return GetNameScore(name, TokenSlice(params, startToken, endToken));
}
UNIT_TEST(NameTest_Smoke)
diff --git a/search/search_tests/search_tests.pro b/search/search_tests/search_tests.pro
index a0b8b8538b..92ff99f8fe 100644
--- a/search/search_tests/search_tests.pro
+++ b/search/search_tests/search_tests.pro
@@ -27,6 +27,7 @@ SOURCES += \
latlon_match_test.cpp \
locality_finder_test.cpp \
locality_scorer_test.cpp \
+ postcodes_matcher_tests.cpp \
query_saver_tests.cpp \
ranking_tests.cpp \
string_intersection_test.cpp \
diff --git a/search/search_tests_support/test_feature.hpp b/search/search_tests_support/test_feature.hpp
index 933d128609..b038998a1e 100644
--- a/search/search_tests_support/test_feature.hpp
+++ b/search/search_tests_support/test_feature.hpp
@@ -19,6 +19,7 @@ public:
bool Matches(FeatureType const & feature) const;
inline void SetPostcode(string const & postcode) { m_postcode = postcode; }
+ inline uint64_t GetId() const { return m_id; }
inline string const & GetName() const { return m_name; }
virtual void Serialize(FeatureBuilder1 & fb) const;
diff --git a/search/search_trie.hpp b/search/search_trie.hpp
index 944f5c3652..2248d1694d 100644
--- a/search/search_trie.hpp
+++ b/search/search_trie.hpp
@@ -6,6 +6,7 @@
namespace search
{
static const uint8_t kCategoriesLang = 128;
+static const uint8_t kPostcodesLang = 129;
static const uint8_t kPointCodingBits = 20;
} // namespace search
diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp
index 70631326ef..fa9edda019 100644
--- a/search/v2/geocoder.cpp
+++ b/search/v2/geocoder.cpp
@@ -979,6 +979,8 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter)
m_filter = &filter;
MY_SCOPE_GUARD(resetFilter, [&]() { m_filter = nullptr; });
+ // TODO (@y): implement postcodes matching here.
+
// The order is rather important. Match streets first, then all other stuff.
GreedilyMatchStreets();
MatchPOIsAndBuildings(0 /* curToken */);
diff --git a/search/v2/locality_scorer.cpp b/search/v2/locality_scorer.cpp
index 49e7e8a817..5c790ab6aa 100644
--- a/search/v2/locality_scorer.cpp
+++ b/search/v2/locality_scorer.cpp
@@ -1,5 +1,7 @@
#include "search/v2/locality_scorer.hpp"
+#include "search/v2/token_slice.hpp"
+
#include "std/algorithm.hpp"
namespace search
@@ -99,8 +101,8 @@ void LocalityScorer::SortByName(vector<ExLocality> & ls) const
auto score = NAME_SCORE_ZERO;
for (auto const & name : names)
{
- score = max(score, GetNameScore(name, v2::TokensSlice(m_params, l.m_locality.m_startToken,
- l.m_locality.m_endToken)));
+ score = max(score, GetNameScore(name, v2::TokenSlice(m_params, l.m_locality.m_startToken,
+ l.m_locality.m_endToken)));
}
l.m_nameScore = score;
}
diff --git a/search/v2/postcodes_matcher.cpp b/search/v2/postcodes_matcher.cpp
new file mode 100644
index 0000000000..8c28d83eb2
--- /dev/null
+++ b/search/v2/postcodes_matcher.cpp
@@ -0,0 +1,168 @@
+#include "search/v2/postcodes_matcher.hpp"
+
+#include "search/v2/token_slice.hpp"
+
+#include "indexer/search_delimiters.hpp"
+#include "indexer/search_string_utils.hpp"
+
+#include "base/logging.hpp"
+#include "base/macros.hpp"
+#include "base/stl_add.hpp"
+#include "base/string_utils.hpp"
+
+#include "std/transform_iterator.hpp"
+#include "std/unique_ptr.hpp"
+#include "std/utility.hpp"
+#include "std/vector.hpp"
+
+using namespace strings;
+
+namespace search
+{
+namespace v2
+{
+namespace
+{
+// Top patterns for postcodes. See
+// search/search_quality/clusterize_postcodes.lisp for details how
+// these patterns were constructed.
+char const * const g_patterns[] = {
+ "aa nnnn", "aa nnnnn", "aaa nnnn", "aan", "aan naa", "aana naa", "aann",
+ "aann naa", "aannaa", "aannnaa", "aannnn", "an naa", "ana naa", "ana nan",
+ "ananan", "ann aann", "ann naa", "annnnaaa", "nn nnn", "nnn", "nnn nn",
+ "nnn nnn", "nnn nnnn", "nnnn", "nnnn aa", "nnnn nnn", "nnnnaa", "nnnnn",
+ "nnnnn nnn", "nnnnn nnnn", "nnnnn nnnnn", "nnnnnn", "nnnnnnn", "nnnnnnnn", "〒nnn nnnn"};
+
+UniChar SimplifyChar(UniChar const & c)
+{
+ if (IsASCIIDigit(c))
+ return 'n';
+ if (IsASCIILatin(c))
+ return 'a';
+ return c;
+}
+
+struct Node
+{
+ Node() : m_isLeaf(false) {}
+
+ Node const * Move(UniChar c) const
+ {
+ for (auto const & p : m_moves)
+ {
+ if (p.first == c)
+ return p.second.get();
+ }
+ return nullptr;
+ }
+
+ template <typename TIt>
+ Node const * Move(TIt begin, TIt end) const
+ {
+ Node const * cur = this;
+ for (; begin != end && cur; ++begin)
+ cur = cur->Move(*begin);
+ return cur;
+ }
+
+ Node & MakeMove(UniChar c)
+ {
+ for (auto const & p : m_moves)
+ {
+ if (p.first == c)
+ return *p.second;
+ }
+ m_moves.emplace_back(c, make_unique<Node>());
+ return *m_moves.back().second;
+ }
+
+ template <typename TIt>
+ Node & MakeMove(TIt begin, TIt end)
+ {
+ Node * cur = this;
+ for (; begin != end; ++begin)
+ cur = &cur->MakeMove(*begin);
+ return *cur;
+ }
+
+ buffer_vector<pair<UniChar, unique_ptr<Node>>, 2> m_moves;
+ bool m_isLeaf;
+
+ DISALLOW_COPY(Node);
+};
+
+// This class puts all strings from g_patterns to a trie with a low
+// branching factor and matches queries against these patterns.
+class PostcodesMatcher
+{
+public:
+ PostcodesMatcher() : m_root(), m_maxNumTokensInPostcode(0)
+ {
+ search::Delimiters delimiters;
+ for (auto const * pattern : g_patterns)
+ AddString(MakeUniString(pattern), delimiters);
+ }
+
+ // Checks that given tokens match to at least one of postcodes
+ // patterns.
+ //
+ // Complexity: O(total length of tokens in |slice|).
+ bool HasString(TokenSlice const & slice) const
+ {
+ Node const * cur = &m_root;
+ for (size_t i = 0; i < slice.Size() && cur; ++i)
+ {
+ auto const & s = slice.Get(i).front();
+ cur = cur->Move(make_transform_iterator(s.begin(), &SimplifyChar),
+ make_transform_iterator(s.end(), &SimplifyChar));
+ if (cur && i + 1 < slice.Size())
+ cur = cur->Move(' ');
+ }
+
+ if (!cur)
+ return false;
+
+ if (slice.Size() > 0 && slice.IsPrefix(slice.Size() - 1))
+ return true;
+
+ return cur->m_isLeaf;
+ }
+
+ inline size_t GetMaxNumTokensInPostcode() const { return m_maxNumTokensInPostcode; }
+
+private:
+ void AddString(UniString const & s, search::Delimiters & delimiters)
+ {
+ vector<UniString> tokens;
+ SplitUniString(s, MakeBackInsertFunctor(tokens), delimiters);
+ m_maxNumTokensInPostcode = max(m_maxNumTokensInPostcode, tokens.size());
+
+ Node * cur = &m_root;
+ for (size_t i = 0; i < tokens.size(); ++i)
+ {
+ cur = &cur->MakeMove(tokens[i].begin(), tokens[i].end());
+ if (i + 1 != tokens.size())
+ cur = &cur->MakeMove(' ');
+ }
+ cur->m_isLeaf = true;
+ }
+
+ Node m_root;
+
+ size_t m_maxNumTokensInPostcode;
+
+ DISALLOW_COPY(PostcodesMatcher);
+};
+
+PostcodesMatcher const & GetPostcodesMatcher()
+{
+ static PostcodesMatcher kMatcher;
+ return kMatcher;
+}
+} // namespace
+
+bool LooksLikePostcode(TokenSlice const & slice) { return GetPostcodesMatcher().HasString(slice); }
+
+size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); }
+} // namespace v2
+} // namespace search
diff --git a/search/v2/postcodes_matcher.hpp b/search/v2/postcodes_matcher.hpp
new file mode 100644
index 0000000000..b0e2398e8e
--- /dev/null
+++ b/search/v2/postcodes_matcher.hpp
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "std/cstdint.hpp"
+
+namespace search
+{
+namespace v2
+{
+class TokenSlice;
+
+bool LooksLikePostcode(TokenSlice const & slice);
+
+size_t GetMaxNumTokensInPostcode();
+} // namespace v2
+} // namespace search
diff --git a/search/v2/ranking_utils.hpp b/search/v2/ranking_utils.hpp
index a559192930..7fca403cd1 100644
--- a/search/v2/ranking_utils.hpp
+++ b/search/v2/ranking_utils.hpp
@@ -7,7 +7,6 @@
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
-#include "base/assert.hpp"
#include "base/stl_add.hpp"
#include "base/string_utils.hpp"
@@ -42,74 +41,6 @@ enum NameScore
NAME_SCORE_COUNT
};
-class TokensSlice
-{
-public:
- TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
- : m_params(params), m_offset(startToken), m_size(endToken - startToken)
- {
- ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
- }
-
- inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
- {
- ASSERT_LESS(i, Size(), ());
- return m_params.GetTokens(m_offset + i);
- }
-
- inline size_t Size() const { return m_size; }
-
- inline bool Empty() const { return Size() == 0; }
-
- inline bool IsPrefix(size_t i) const
- {
- ASSERT_LESS(i, Size(), ());
- return m_offset + i == m_params.m_tokens.size();
- }
-
-private:
- SearchQueryParams const & m_params;
- size_t const m_offset;
- size_t const m_size;
-};
-
-class TokensSliceNoCategories
-{
-public:
- TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken)
- : m_params(params)
- {
- ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
-
- m_indexes.reserve(endToken - startToken);
- for (size_t i = startToken; i < endToken; ++i)
- {
- if (!m_params.m_isCategorySynonym[i])
- m_indexes.push_back(i);
- }
- }
-
- inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
- {
- ASSERT_LESS(i, Size(), ());
- return m_params.GetTokens(m_indexes[i]);
- }
-
- inline size_t Size() const { return m_indexes.size(); }
-
- inline bool Empty() const { return Size() == 0; }
-
- inline bool IsPrefix(size_t i) const
- {
- ASSERT_LESS(i, Size(), ());
- return m_indexes[i] == m_params.m_tokens.size();
- }
-
-private:
- SearchQueryParams const & m_params;
- vector<size_t> m_indexes;
-};
-
template <typename TSlice>
NameScore GetNameScore(string const & name, TSlice const & slice)
{
diff --git a/search/v2/token_slice.cpp b/search/v2/token_slice.cpp
new file mode 100644
index 0000000000..38556c40df
--- /dev/null
+++ b/search/v2/token_slice.cpp
@@ -0,0 +1,27 @@
+#include "search/v2/token_slice.hpp"
+
+namespace search
+{
+namespace v2
+{
+TokenSlice::TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken)
+ : m_params(params), m_offset(startToken), m_size(endToken - startToken)
+{
+ ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
+}
+
+TokenSliceNoCategories::TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken,
+ size_t endToken)
+ : m_params(params)
+{
+ ASSERT_LESS_OR_EQUAL(startToken, endToken, ());
+
+ m_indexes.reserve(endToken - startToken);
+ for (size_t i = startToken; i < endToken; ++i)
+ {
+ if (!m_params.m_isCategorySynonym[i])
+ m_indexes.push_back(i);
+ }
+}
+} // namespace v2
+} // namespace search
diff --git a/search/v2/token_slice.hpp b/search/v2/token_slice.hpp
new file mode 100644
index 0000000000..7b9553e2f4
--- /dev/null
+++ b/search/v2/token_slice.hpp
@@ -0,0 +1,67 @@
+#pragma once
+
+#include "search/search_query_params.hpp"
+
+#include "base/assert.hpp"
+
+#include "std/cstdint.hpp"
+#include "std/vector.hpp"
+
+namespace search
+{
+namespace v2
+{
+class TokenSlice
+{
+public:
+ TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken);
+
+ inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_params.GetTokens(m_offset + i);
+ }
+
+ inline size_t Size() const { return m_size; }
+
+ inline bool Empty() const { return Size() == 0; }
+
+ inline bool IsPrefix(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_offset + i == m_params.m_tokens.size();
+ }
+
+private:
+ SearchQueryParams const & m_params;
+ size_t const m_offset;
+ size_t const m_size;
+};
+
+class TokenSliceNoCategories
+{
+public:
+ TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken);
+
+ inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_params.GetTokens(m_indexes[i]);
+ }
+
+ inline size_t Size() const { return m_indexes.size(); }
+
+ inline bool Empty() const { return Size() == 0; }
+
+ inline bool IsPrefix(size_t i) const
+ {
+ ASSERT_LESS(i, Size(), ());
+ return m_indexes[i] == m_params.m_tokens.size();
+ }
+
+private:
+ SearchQueryParams const & m_params;
+ vector<size_t> m_indexes;
+};
+} // namespace v2
+} // namespace search