diff options
25 files changed, 808 insertions, 218 deletions
diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 5f26059688..da13ecb282 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -220,6 +220,10 @@ bool IsASCIIString(string const & str) return true; } +bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; } + +bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } + bool StartsWith(UniString const & s, UniString const & p) { if (p.size() > s.size()) diff --git a/base/string_utils.hpp b/base/string_utils.hpp index 1c4634646a..f7c5b775ff 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -29,6 +29,19 @@ public: template <class IterT> UniString(IterT b, IterT e) : BaseT(b, e) {} bool IsEqualAscii(char const * s) const; + + UniString & operator+=(UniString const & rhs) + { + append(rhs); + return *this; + } + + UniString operator+(UniString const & rhs) const + { + UniString result(*this); + result += rhs; + return result; + } }; /// Performs full case folding for string to make it search-compatible according @@ -67,6 +80,8 @@ bool EqualNoCase(string const & s1, string const & s2); UniString MakeUniString(string const & utf8s); string ToUtf8(UniString const & s); bool IsASCIIString(string const & str); +bool IsASCIIDigit(UniChar c); +bool IsASCIILatin(UniChar c); inline string DebugPrint(UniString const & s) { diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index c008e0537c..e4e32ed20b 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -136,11 +136,11 @@ struct FeatureNameInserter { } - void AddToken(signed char lang, strings::UniString const & s) const + void AddToken(uint8_t lang, strings::UniString const & s) const { strings::UniString key; key.reserve(s.size() + 1); - key.push_back(static_cast<uint8_t>(lang)); + key.push_back(lang); key.append(s.begin(), s.end()); m_keyValuePairs.emplace_back(key, m_val); @@ -261,9 +261,11 @@ public: void operator() (FeatureType const & f, uint32_t index) const { + using namespace search; + feature::TypesHolder types(f); - static search::TypesSkipper skipIndex; + static TypesSkipper skipIndex; skipIndex.SkipTypes(types); if (types.Empty()) @@ -278,6 +280,18 @@ public: skipIndex.IsCountryOrState(types) ? m_synonyms : nullptr, m_keyValuePairs, hasStreetType); m_valueBuilder.MakeValue(f, types, index, inserter.m_val); + string const postcode = f.GetMetadata().Get(feature::Metadata::FMD_POSTCODE); + if (!postcode.empty()) + { + // See OSM TagInfo or Wiki about modern postcodes format. The average number of tokens is less + // than two. + buffer_vector<strings::UniString, 2> tokens; + SplitUniString(NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens), + Delimiters()); + for (auto const & token : tokens) + inserter.AddToken(kPostcodesLang, PostcodeToString(token)); + } + // Skip types for features without names. if (!f.ForEachName(inserter)) skipIndex.SkipEmptyNameTypes(types); @@ -291,7 +305,7 @@ public: // add names of categories of the feature for (uint32_t t : categoryTypes) - inserter.AddToken(search::kCategoriesLang, search::FeatureTypeToString(c.GetIndexForType(t))); + inserter.AddToken(kCategoriesLang, FeatureTypeToString(c.GetIndexForType(t))); } }; diff --git a/indexer/search_string_utils.cpp b/indexer/search_string_utils.cpp index 24aaaa3764..0283191869 100644 --- a/indexer/search_string_utils.cpp +++ b/indexer/search_string_utils.cpp @@ -90,6 +90,12 @@ UniString FeatureTypeToString(uint32_t type) return UniString(s.begin(), s.end()); } +UniString PostcodeToString(strings::UniString const & postcode) +{ + static UniString const kPrefix = MakeUniString("!postcode:"); + return kPrefix + postcode; +} + namespace { char const * kStreetTokensSeparator = "\t -,."; diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp index 5d1365c084..1a8c5d0ed6 100644 --- a/indexer/search_string_utils.hpp +++ b/indexer/search_string_utils.hpp @@ -19,6 +19,8 @@ void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims strings::UniString FeatureTypeToString(uint32_t type); +strings::UniString PostcodeToString(strings::UniString const & postcode); + template <class ContainerT, class DelimsT> bool TokenizeStringAndCheckIfLastTokenIsPrefix(strings::UniString const & s, ContainerT & tokens, diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index 232bec7b7e..c6f52f89e7 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -3,6 +3,7 @@ #include "search/search_index_values.hpp" #include "search/search_query.hpp" #include "search/search_query_params.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/trie.hpp" @@ -103,6 +104,25 @@ bool CheckMatchString(strings::UniChar const * rootPrefix, size_t rootPrefixSize return false; } + +template <typename TValue> +bool FindLangIndex(trie::Iterator<ValueList<TValue>> const & trieRoot, uint8_t lang, uint32_t & langIx) +{ + ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ()); + + uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size()); + for (uint32_t i = 0; i < numLangs; ++i) + { + auto const & edge = trieRoot.m_edge[i].m_label; + ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); + if (edge[0] == lang) + { + langIx = i; + return true; + } + } + return false; +} } // namespace template <typename TValue, typename TF> @@ -222,7 +242,7 @@ public: toDo(value); } }; -} // namespace search::impl +} // impl template <typename TValue> struct TrieRootPrefix @@ -345,27 +365,23 @@ template <typename TValue, typename THolder> bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::Iterator<ValueList<TValue>> const & trieRoot, THolder && holder) { - ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ()); - uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size()); - for (uint32_t langIx = 0; langIx < numLangs; ++langIx) - { - auto const & edge = trieRoot.m_edge[langIx].m_label; - ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); - if (edge[0] == search::kCategoriesLang) - { - auto const catRoot = trieRoot.GoToEdge(langIx); - MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); - - // Last token's prefix is used as a complete token here, to - // limit the number of features in the last bucket of a - // holder. Probably, this is a false optimization. - holder.Resize(params.m_tokens.size() + 1); - holder.SwitchTo(params.m_tokens.size()); - MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); - return true; - } - } - return false; + uint32_t langIx = 0; + if (!impl::FindLangIndex(trieRoot, search::kCategoriesLang, langIx)) + return false; + + auto const & edge = trieRoot.m_edge[langIx].m_label; + ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); + + auto const catRoot = trieRoot.GoToEdge(langIx); + MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); + + // Last token's prefix is used as a complete token here, to limit + // the number of features in the last bucket of a holder. Probably, + // this is a false optimization. + holder.Resize(params.m_tokens.size() + 1); + holder.SwitchTo(params.m_tokens.size()); + MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); + return true; } // Calls toDo with trie root prefix and language code on each language @@ -427,4 +443,29 @@ void MatchFeaturesInTrie(SearchQueryParams const & params, intersecter.ForEachResult(forward<ToDo>(toDo)); } + +template <typename TValue, typename TFilter, typename ToDo> +void MatchPostcodesInTrie(v2::TokenSlice const & slice, + trie::Iterator<ValueList<TValue>> const & trieRoot, + TFilter const & filter, ToDo && toDo) +{ + uint32_t langIx = 0; + if (!impl::FindLangIndex(trieRoot, search::kPostcodesLang, langIx)) + return; + + auto const & edge = trieRoot.m_edge[langIx].m_label; + auto const postcodesRoot = trieRoot.GoToEdge(langIx); + + impl::OffsetIntersecter<TFilter, TValue> intersecter(filter); + for (size_t i = 0; i < slice.Size(); ++i) + { + if (slice.IsPrefix(i)) + MatchTokenPrefixInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter); + else + MatchTokenInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter); + intersecter.NextStep(); + } + + intersecter.ForEachResult(forward<ToDo>(toDo)); +} } // namespace search diff --git a/search/retrieval.cpp b/search/retrieval.cpp index 4afdb46e2b..c4376b5996 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -7,6 +7,7 @@ #include "search_trie.hpp" #include "v2/mwm_context.hpp" +#include "v2/token_slice.hpp" #include "indexer/feature.hpp" #include "indexer/feature_algo.hpp" @@ -29,8 +30,65 @@ using osm::Editor; namespace search { +namespace v2 +{ namespace { +class FeaturesCollector +{ +public: + FeaturesCollector(my::Cancellable const & cancellable, vector<uint64_t> & features) + : m_cancellable(cancellable), m_features(features), m_counter(0) + { + } + + template <typename TValue> + void operator()(TValue const & value) + { + if ((++m_counter & 0xFF) == 0) + BailIfCancelled(m_cancellable); + m_features.push_back(value.m_featureId); + } + + inline void operator()(uint32_t feature) { m_features.push_back(feature); } + + inline void operator()(uint64_t feature) { m_features.push_back(feature); } + +private: + my::Cancellable const & m_cancellable; + vector<uint64_t> & m_features; + uint32_t m_counter; +}; + +class EditedFeaturesHolder +{ +public: + EditedFeaturesHolder(MwmSet::MwmId const & id) + { + Editor & editor = Editor::Instance(); + m_deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted); + m_modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified); + m_created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created); + } + + bool ModifiedOrDeleted(uint32_t featureIndex) const + { + return binary_search(m_deleted.begin(), m_deleted.end(), featureIndex) || + binary_search(m_modified.begin(), m_modified.end(), featureIndex); + } + + template <typename TFn> + void ForEachModifiedOrCreated(TFn & fn) + { + for_each(m_modified.begin(), m_modified.end(), fn); + for_each(m_created.begin(), m_created.end(), fn); + } + +private: + vector<uint32_t> m_deleted; + vector<uint32_t> m_modified; + vector<uint32_t> m_created; +}; unique_ptr<coding::CompressedBitVector> SortFeaturesAndBuildCBV(vector<uint64_t> && features) { @@ -89,6 +147,13 @@ bool MatchFeatureByName(FeatureType const & ft, SearchQueryParams const & params return matched; } +bool MatchFeatureByPostcode(FeatureType const & ft, v2::TokenSlice const & slice) +{ + string const postcode = ft.GetMetadata().Get(feature::Metadata::FMD_POSTCODE); + // TODO(@y): implement this. + return false; +} + // Retrieves from the search index corresponding to |value| all // features matching to |params|. template <typename TValue> @@ -96,17 +161,48 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl( MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params) { - // Exclude from search all deleted/modified features and match all edited/created features separately. - Editor & editor = Editor::Instance(); + EditedFeaturesHolder holder(id); + + serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams())); + ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG); + + auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>( + SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams)); - auto const deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted); - auto const modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified); - auto const filter = [&](uint32_t featureIndex) -> bool + // TODO (@y, @m): This code may be optimized in the case where + // bit vectors are sorted in the search index. + vector<uint64_t> features; + FeaturesCollector collector(cancellable, features); + + MatchFeaturesInTrie(params, *trieRoot, [&holder](uint32_t featureIndex) + { + return !holder.ModifiedOrDeleted(featureIndex); + }, + collector); + + // Match all edited/created features separately. + Editor & editor = Editor::Instance(); + auto const matcher = [&](uint32_t featureIndex) { - return (!binary_search(deleted.begin(), deleted.end(), featureIndex) && - !binary_search(modified.begin(), modified.end(), featureIndex)); + FeatureType ft; + VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ()); + // TODO(AlexZ): Should we match by some feature's metafields too? + if (MatchFeatureByName(ft, params)) + features.push_back(featureIndex); }; + holder.ForEachModifiedOrCreated(matcher); + + return SortFeaturesAndBuildCBV(move(features)); +} + +template <typename TValue> +unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeaturesImpl( + MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, + TokenSlice const & slice) +{ + EditedFeaturesHolder holder(id); + serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams())); ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG); @@ -116,29 +212,26 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl( // TODO (@y, @m): This code may be optimized in the case where // bit vectors are sorted in the search index. vector<uint64_t> features; - uint32_t counter = 0; - auto const collector = [&](TValue const & value) - { - if ((++counter & 0xFF) == 0) - BailIfCancelled(cancellable); - features.push_back(value.m_featureId); - }; + FeaturesCollector collector(cancellable, features); - MatchFeaturesInTrie(params, *trieRoot, filter, collector); + MatchPostcodesInTrie(slice, *trieRoot, [&holder](uint32_t featureIndex) + { + return !holder.ModifiedOrDeleted(featureIndex); + }, + collector); // Match all edited/created features separately. + Editor & editor = Editor::Instance(); auto const matcher = [&](uint32_t featureIndex) { FeatureType ft; VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ()); // TODO(AlexZ): Should we match by some feature's metafields too? - if (MatchFeatureByName(ft, params)) + if (MatchFeatureByPostcode(ft, slice)) features.push_back(featureIndex); }; - for_each(modified.begin(), modified.end(), matcher); - auto const created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created); - for_each(created.begin(), created.end(), matcher); + holder.ForEachModifiedOrCreated(matcher); return SortFeaturesAndBuildCBV(move(features)); } @@ -149,42 +242,74 @@ unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeaturesImpl( v2::MwmContext const & context, my::Cancellable const & cancellable, covering::IntervalsT const & coverage, int scale) { - uint32_t counter = 0; vector<uint64_t> features; - context.ForEachIndex(coverage, scale, [&](uint64_t featureId) - { - if ((++counter & 0xFF) == 0) - BailIfCancelled(cancellable); - features.push_back(featureId); - }); + FeaturesCollector collector(cancellable, features); + context.ForEachIndex(coverage, scale, collector); return SortFeaturesAndBuildCBV(move(features)); } -} // namespace +template <typename T> +struct RetrieveAddressFeaturesAdaptor +{ + template <typename... TArgs> + unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args) + { + return RetrieveAddressFeaturesImpl<T>(forward<TArgs>(args)...); + } +}; -namespace v2 +template <typename T> +struct RetrievePostcodeFeaturesAdaptor +{ + template <typename... TArgs> + unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args) + { + return RetrievePostcodeFeaturesImpl<T>(forward<TArgs>(args)...); + } +}; + +template <template <typename> class T> +struct Selector { + template <typename... TArgs> + unique_ptr<coding::CompressedBitVector> operator()(MwmSet::MwmId const & id, MwmValue & value, + TArgs &&... args) + { + version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat()); + + if (mwmTraits.GetSearchIndexFormat() == + version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter) + { + T<FeatureWithRankAndCenter> t; + return t(id, value, forward<TArgs>(args)...); + } + if (mwmTraits.GetSearchIndexFormat() == + version::MwmTraits::SearchIndexFormat::CompressedBitVector) + { + T<FeatureIndexValue> t; + return t(id, value, forward<TArgs>(args)...); + } + return unique_ptr<coding::CompressedBitVector>(); + } +}; +} // namespace + unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures( MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params) { - version::MwmTraits mwmTraits(value.GetMwmVersion().GetFormat()); + Selector<RetrieveAddressFeaturesAdaptor> selector; + return selector(id, value, cancellable, params); +} - if (mwmTraits.GetSearchIndexFormat() == - version::MwmTraits::SearchIndexFormat::FeaturesWithRankAndCenter) - { - using TValue = FeatureWithRankAndCenter; - return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params); - } - else if (mwmTraits.GetSearchIndexFormat() == - version::MwmTraits::SearchIndexFormat::CompressedBitVector) - { - using TValue = FeatureIndexValue; - return RetrieveAddressFeaturesImpl<TValue>(id, value, cancellable, params); - } - return unique_ptr<coding::CompressedBitVector>(); +unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures( + MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, + TokenSlice const & slice) +{ + Selector<RetrievePostcodeFeaturesAdaptor> selector; + return selector(id, value, cancellable, slice); } unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures( diff --git a/search/retrieval.hpp b/search/retrieval.hpp index 79b7fd6452..7540211db3 100644 --- a/search/retrieval.hpp +++ b/search/retrieval.hpp @@ -21,16 +21,24 @@ namespace search namespace v2 { class MwmContext; +class TokenSlice; // Retrieves from the search index corresponding to |value| all // features matching to |params|. -unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures( - MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, - SearchQueryParams const & params); +unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmId const & id, + MwmValue & value, + my::Cancellable const & cancellable, + SearchQueryParams const & params); + +// Retrieves from the search index corresponding to |value| all +// postcodes matching to |slice|. +unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures( + MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, + TokenSlice const & slice); // Retrieves from the geometry index corresponding to |value| all features belonging to |rect|. unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures( - MwmContext const & context, my::Cancellable const & cancellable, - m2::RectD const & rect, int scale); + MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect, + int scale); } // namespace v2 } // namespace search diff --git a/search/search.pro b/search/search.pro index 13676ab757..6ba422fe32 100644 --- a/search/search.pro +++ b/search/search.pro @@ -54,6 +54,7 @@ HEADERS += \ v2/locality_scorer.hpp \ v2/mwm_context.hpp \ v2/nested_rects_cache.hpp \ + v2/postcodes_matcher.hpp \ v2/pre_ranking_info.hpp \ v2/rank_table_cache.hpp \ v2/ranking_info.hpp \ @@ -62,6 +63,7 @@ HEADERS += \ v2/search_query_v2.hpp \ v2/stats_cache.hpp \ v2/street_vicinity_loader.hpp \ + v2/token_slice.hpp \ SOURCES += \ approximate_string_match.cpp \ @@ -99,6 +101,7 @@ SOURCES += \ v2/locality_scorer.cpp \ v2/mwm_context.cpp \ v2/nested_rects_cache.cpp \ + v2/postcodes_matcher.cpp \ v2/pre_ranking_info.cpp \ v2/rank_table_cache.cpp \ v2/ranking_info.cpp \ @@ -106,3 +109,4 @@ SOURCES += \ v2/search_model.cpp \ v2/search_query_v2.cpp \ v2/street_vicinity_loader.cpp \ + v2/token_slice.cpp \ diff --git a/search/search_integration_tests/helpers.cpp b/search/search_integration_tests/helpers.cpp index cf2a7bce26..d099ddf703 100644 --- a/search/search_integration_tests/helpers.cpp +++ b/search/search_integration_tests/helpers.cpp @@ -33,7 +33,14 @@ void SearchTest::RegisterCountry(string const & name, m2::RectD const & rect) bool SearchTest::ResultsMatch(string const & query, vector<shared_ptr<tests_support::MatchingRule>> const & rules) { - tests_support::TestSearchRequest request(m_engine, query, "en", Mode::Everywhere, m_viewport); + return ResultsMatch(query, "en" /* locale */, rules); +} + +bool SearchTest::ResultsMatch(string const & query, + string const & locale, + vector<shared_ptr<tests_support::MatchingRule>> const & rules) +{ + tests_support::TestSearchRequest request(m_engine, query, locale, Mode::Everywhere, m_viewport); request.Wait(); return MatchResults(m_engine, rules, request.Results()); } diff --git a/search/search_integration_tests/helpers.hpp b/search/search_integration_tests/helpers.hpp index ea7412f8d8..5e3ffc32aa 100644 --- a/search/search_integration_tests/helpers.hpp +++ b/search/search_integration_tests/helpers.hpp @@ -72,11 +72,26 @@ public: return id; } + template <typename TBuildFn> + MwmSet::MwmId BuildWorld(TBuildFn && fn) + { + return BuildMwm("testWorld", feature::DataHeader::world, forward<TBuildFn>(fn)); + } + + template <typename TBuildFn> + MwmSet::MwmId BuildCountry(string const & name, TBuildFn && fn) + { + return BuildMwm(name, feature::DataHeader::country, forward<TBuildFn>(fn)); + } + inline void SetViewport(m2::RectD const & viewport) { m_viewport = viewport; } bool ResultsMatch(string const & query, vector<shared_ptr<tests_support::MatchingRule>> const & rules); + bool ResultsMatch(string const & query, string const & locale, + vector<shared_ptr<tests_support::MatchingRule>> const & rules); + bool ResultsMatch(string const & query, Mode mode, vector<shared_ptr<tests_support::MatchingRule>> const & rules); diff --git a/search/search_integration_tests/search_query_v2_test.cpp b/search/search_integration_tests/search_query_v2_test.cpp index e0c5fe9211..f8a728cb7b 100644 --- a/search/search_integration_tests/search_query_v2_test.cpp +++ b/search/search_integration_tests/search_query_v2_test.cpp @@ -1,10 +1,15 @@ #include "testing/testing.hpp" +#include "search/retrieval.hpp" #include "search/search_integration_tests/helpers.hpp" #include "search/search_tests_support/test_feature.hpp" #include "search/search_tests_support/test_mwm_builder.hpp" #include "search/search_tests_support/test_results_matching.hpp" #include "search/search_tests_support/test_search_request.hpp" +#include "search/v2/token_slice.hpp" + +#include "indexer/feature.hpp" +#include "indexer/index.hpp" #include "geometry/point2d.hpp" #include "geometry/rect2d.hpp" @@ -77,39 +82,38 @@ UNIT_CLASS_TEST(SearchQueryV2Test, Smoke) TestPOI lantern1(m2::PointD(10.0005, 10.0005), "lantern 1", "en"); TestPOI lantern2(m2::PointD(10.0006, 10.0005), "lantern 2", "en"); - BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder) - { - builder.Add(wonderlandCountry); - builder.Add(losAlamosCity); - builder.Add(mskCity); - }); - auto wonderlandId = - BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder) - { - builder.Add(losAlamosCity); - builder.Add(mskCity); - builder.Add(longPondVillage); - - builder.Add(feynmanStreet); - builder.Add(bohrStreet1); - builder.Add(bohrStreet2); - builder.Add(bohrStreet3); - builder.Add(firstAprilStreet); - - builder.Add(feynmanHouse); - builder.Add(bohrHouse); - builder.Add(hilbertHouse); - builder.Add(descartesHouse); - builder.Add(bornHouse); - - builder.Add(busStop); - builder.Add(tramStop); - builder.Add(quantumTeleport1); - builder.Add(quantumTeleport2); - builder.Add(quantumCafe); - builder.Add(lantern1); - builder.Add(lantern2); - }); + BuildWorld([&](TestMwmBuilder & builder) + { + builder.Add(wonderlandCountry); + builder.Add(losAlamosCity); + builder.Add(mskCity); + }); + auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder) + { + builder.Add(losAlamosCity); + builder.Add(mskCity); + builder.Add(longPondVillage); + + builder.Add(feynmanStreet); + builder.Add(bohrStreet1); + builder.Add(bohrStreet2); + builder.Add(bohrStreet3); + builder.Add(firstAprilStreet); + + builder.Add(feynmanHouse); + builder.Add(bohrHouse); + builder.Add(hilbertHouse); + builder.Add(descartesHouse); + builder.Add(bornHouse); + + builder.Add(busStop); + builder.Add(tramStop); + builder.Add(quantumTeleport1); + builder.Add(quantumTeleport2); + builder.Add(quantumCafe); + builder.Add(lantern1); + builder.Add(lantern2); + }); SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0))); { @@ -182,11 +186,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchInWorld) TestCountry wonderland(m2::PointD(0, 0), countryName, "en"); TestCity losAlamos(m2::PointD(0, 0), "Los Alamos", "en", 100 /* rank */); - auto testWorldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder) - { - builder.Add(wonderland); - builder.Add(losAlamos); - }); + auto testWorldId = BuildWorld([&](TestMwmBuilder & builder) + { + builder.Add(wonderland); + builder.Add(losAlamos); + }); RegisterCountry(countryName, m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(1.0, 1.0))); SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.5, -0.5))); @@ -213,16 +217,15 @@ UNIT_CLASS_TEST(SearchQueryV2Test, SearchByName) "Hyde Park", "en"); TestPOI cafe(m2::PointD(1.0, 1.0), "London Cafe", "en"); - auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder) - { - builder.Add(london); - }); - auto wonderlandId = - BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder) - { - builder.Add(hydePark); - builder.Add(cafe); - }); + auto worldId = BuildWorld([&](TestMwmBuilder & builder) + { + builder.Add(london); + }); + auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder) + { + builder.Add(hydePark); + builder.Add(cafe); + }); SetViewport(m2::RectD(m2::PointD(-1.0, -1.0), m2::PointD(-0.9, -0.9))); { @@ -248,11 +251,11 @@ UNIT_CLASS_TEST(SearchQueryV2Test, DisableSuggests) TestCity london1(m2::PointD(1, 1), "London", "en", 100 /* rank */); TestCity london2(m2::PointD(-1, -1), "London", "en", 100 /* rank */); - auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder) - { - builder.Add(london1); - builder.Add(london2); - }); + auto worldId = BuildWorld([&](TestMwmBuilder & builder) + { + builder.Add(london1); + builder.Add(london2); + }); SetViewport(m2::RectD(m2::PointD(0.5, 0.5), m2::PointD(1.5, 1.5))); { @@ -299,21 +302,20 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo) TestPOI cafe2(m2::PointD(-0.99, -0.99), "", "en"); cafe2.SetTypes({{"amenity", "cafe"}}); - - auto worldId = BuildMwm("testWorld", feature::DataHeader::world, [&](TestMwmBuilder & builder) - { - builder.Add(sanFrancisco); - builder.Add(lermontovo); - }); - auto wonderlandId = BuildMwm(countryName, feature::DataHeader::country, [&](TestMwmBuilder & builder) - { - builder.Add(cafe1); - builder.Add(cafe2); - builder.Add(goldenGateBridge); - builder.Add(goldenGateStreet); - builder.Add(lermontov); - builder.Add(waterfall); - }); + auto worldId = BuildWorld([&](TestMwmBuilder & builder) + { + builder.Add(sanFrancisco); + builder.Add(lermontovo); + }); + auto wonderlandId = BuildCountry(countryName, [&](TestMwmBuilder & builder) + { + builder.Add(cafe1); + builder.Add(cafe2); + builder.Add(goldenGateBridge); + builder.Add(goldenGateStreet); + builder.Add(lermontov); + builder.Add(waterfall); + }); SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5))); { @@ -359,5 +361,65 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo) TEST(ResultsMatch("waterfall", rules), ()); } } + +UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes) +{ + string const countryName = "Russia"; + + TestCity city(m2::PointD(0, 0), "Долгопрудный", "ru", 100 /* rank */); + TestStreet street( + vector<m2::PointD>{m2::PointD(-0.5, 0.0), m2::PointD(0, 0), m2::PointD(0.5, 0.0)}, + "Первомайская", "ru"); + TestBuilding building(m2::PointD(0.0, 0.00001), "", "28 а", street, "ru"); + building.SetPostcode("141701"); + + BuildWorld([&](TestMwmBuilder & builder) + { + builder.Add(city); + }); + auto countryId = BuildCountry(countryName, [&](TestMwmBuilder & builder) + { + builder.Add(street); + builder.Add(building); + }); + + // Tests that postcode is added to the search index. + { + auto handle = m_engine.GetMwmHandleById(countryId); + TEST(handle.IsAlive(), ()); + my::Cancellable cancellable; + + SearchQueryParams params; + params.m_tokens.emplace_back(); + params.m_tokens.back().push_back(PostcodeToString(strings::MakeUniString("141701"))); + auto * value = handle.GetValue<MwmValue>(); + auto features = v2::RetrievePostcodeFeatures(countryId, *value, cancellable, + TokenSlice(params, 0, params.m_tokens.size())); + TEST_EQUAL(1, features->PopCount(), ()); + + uint64_t index = 0; + while (!features->GetBit(index)) + ++index; + + Index::FeaturesLoaderGuard loader(m_engine, countryId); + FeatureType ft; + loader.GetFeatureByIndex(index, ft); + + auto rule = ExactMatch(countryId, building); + TEST(rule->Matches(ft), ()); + } + { + TRules rules{ExactMatch(countryId, building)}; + TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru" /* locale */, rules), ()); + } + + // TODO (@y): uncomment this test and add more tests when postcodes + // search will be implemented. + // + // { + // TRules rules{ExactMatch(countryId, building)}; + // TEST(ResultsMatch("Долгопрудный первомайская 28а, 141701", "ru" /* locale */, rules), ()); + // } +} } // namespace } // namespace search diff --git a/search/search_query.cpp b/search/search_query.cpp index 2f23bf7e68..9c90c5e62f 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -12,6 +12,7 @@ #include "search/v2/pre_ranking_info.hpp" #include "search/v2/ranking_info.hpp" #include "search/v2/ranking_utils.hpp" +#include "search/v2/token_slice.hpp" #include "storage/country_info_getter.hpp" #include "storage/index.hpp" @@ -464,10 +465,8 @@ void Query::SetQuery(string const & query) search::Delimiters delims; SplitUniString(NormalizeAndSimplifyString(query), MakeBackInsertFunctor(m_tokens), delims); - bool checkPrefix = true; - // Assign prefix with last parsed token. - if (checkPrefix && !m_tokens.empty() && !delims(strings::LastUniChar(query))) + if (!m_tokens.empty() && !delims(strings::LastUniChar(query))) { m_prefix.swap(m_tokens.back()); m_tokens.pop_back(); @@ -623,7 +622,7 @@ class PreResult2Maker info.m_nameScore = v2::NAME_SCORE_ZERO; - v2::TokensSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken); + v2::TokenSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken); for (auto const & lang : m_params.m_langs) { diff --git a/search/search_tests/postcodes_matcher_tests.cpp b/search/search_tests/postcodes_matcher_tests.cpp new file mode 100644 index 0000000000..0438b5cdbb --- /dev/null +++ b/search/search_tests/postcodes_matcher_tests.cpp @@ -0,0 +1,72 @@ +#include "../../testing/testing.hpp" + +#include "search/search_query_params.hpp" +#include "search/v2/postcodes_matcher.hpp" +#include "search/v2/token_slice.hpp" + +#include "indexer/search_delimiters.hpp" +#include "indexer/search_string_utils.hpp" + +#include "base/stl_add.hpp" +#include "base/string_utils.hpp" + +#include "std/string.hpp" +#include "std/vector.hpp" + +using namespace strings; + +namespace search +{ +namespace v2 +{ +namespace +{ +bool LooksLikePostcode(string const & s, bool checkPrefix) +{ + vector<UniString> tokens; + bool const lastTokenIsPrefix = + TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters()); + + size_t const numTokens = tokens.size(); + + SearchQueryParams params; + if (checkPrefix && lastTokenIsPrefix) + { + params.m_prefixTokens.push_back(tokens.back()); + tokens.pop_back(); + } + + for (auto const & token : tokens) + { + params.m_tokens.emplace_back(); + params.m_tokens.back().push_back(token); + } + + return LooksLikePostcode(TokenSlice(params, 0, numTokens)); +} + +UNIT_TEST(PostcodesMatcher_Smoke) +{ + TEST(LooksLikePostcode("141701", false /* checkPrefix */), ()); + TEST(LooksLikePostcode("141", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("BA22 9HR", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("BA22", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("DE56 4FW", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("NY 1000", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("AZ 85203", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("AZ", true /* checkPrefix */), ()); + + TEST(LooksLikePostcode("803 0271", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("803-0271", true /* checkPrefix */), ()); + TEST(LooksLikePostcode("〒803-0271", true /* checkPrefix */), ()); + + TEST(!LooksLikePostcode("1 мая", true /* checkPrefix */), ()); + TEST(!LooksLikePostcode("1 мая улица", true /* checkPrefix */), ()); + TEST(!LooksLikePostcode("москва", true /* checkPrefix */), ()); + TEST(!LooksLikePostcode("39 с 79", true /* checkPrefix */), ()); +} +} // namespace +} // namespace v2 +} // namespace search diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index b74e35a284..cb82359f11 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -2,6 +2,7 @@ #include "search/search_query_params.hpp" #include "search/v2/ranking_utils.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" @@ -32,7 +33,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken, params.m_prefixTokens.swap(params.m_tokens.back()); params.m_tokens.pop_back(); } - return GetNameScore(name, TokensSlice(params, startToken, endToken)); + return GetNameScore(name, TokenSlice(params, startToken, endToken)); } UNIT_TEST(NameTest_Smoke) diff --git a/search/search_tests/search_tests.pro b/search/search_tests/search_tests.pro index a0b8b8538b..92ff99f8fe 100644 --- a/search/search_tests/search_tests.pro +++ b/search/search_tests/search_tests.pro @@ -27,6 +27,7 @@ SOURCES += \ latlon_match_test.cpp \ locality_finder_test.cpp \ locality_scorer_test.cpp \ + postcodes_matcher_tests.cpp \ query_saver_tests.cpp \ ranking_tests.cpp \ string_intersection_test.cpp \ diff --git a/search/search_tests_support/test_feature.hpp b/search/search_tests_support/test_feature.hpp index 933d128609..b038998a1e 100644 --- a/search/search_tests_support/test_feature.hpp +++ b/search/search_tests_support/test_feature.hpp @@ -19,6 +19,7 @@ public: bool Matches(FeatureType const & feature) const; inline void SetPostcode(string const & postcode) { m_postcode = postcode; } + inline uint64_t GetId() const { return m_id; } inline string const & GetName() const { return m_name; } virtual void Serialize(FeatureBuilder1 & fb) const; diff --git a/search/search_trie.hpp b/search/search_trie.hpp index 944f5c3652..2248d1694d 100644 --- a/search/search_trie.hpp +++ b/search/search_trie.hpp @@ -6,6 +6,7 @@ namespace search { static const uint8_t kCategoriesLang = 128; +static const uint8_t kPostcodesLang = 129; static const uint8_t kPointCodingBits = 20; } // namespace search diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp index 70631326ef..fa9edda019 100644 --- a/search/v2/geocoder.cpp +++ b/search/v2/geocoder.cpp @@ -979,6 +979,8 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter) m_filter = &filter; MY_SCOPE_GUARD(resetFilter, [&]() { m_filter = nullptr; }); + // TODO (@y): implement postcodes matching here. + // The order is rather important. Match streets first, then all other stuff. GreedilyMatchStreets(); MatchPOIsAndBuildings(0 /* curToken */); diff --git a/search/v2/locality_scorer.cpp b/search/v2/locality_scorer.cpp index 49e7e8a817..5c790ab6aa 100644 --- a/search/v2/locality_scorer.cpp +++ b/search/v2/locality_scorer.cpp @@ -1,5 +1,7 @@ #include "search/v2/locality_scorer.hpp" +#include "search/v2/token_slice.hpp" + #include "std/algorithm.hpp" namespace search @@ -99,8 +101,8 @@ void LocalityScorer::SortByName(vector<ExLocality> & ls) const auto score = NAME_SCORE_ZERO; for (auto const & name : names) { - score = max(score, GetNameScore(name, v2::TokensSlice(m_params, l.m_locality.m_startToken, - l.m_locality.m_endToken))); + score = max(score, GetNameScore(name, v2::TokenSlice(m_params, l.m_locality.m_startToken, + l.m_locality.m_endToken))); } l.m_nameScore = score; } diff --git a/search/v2/postcodes_matcher.cpp b/search/v2/postcodes_matcher.cpp new file mode 100644 index 0000000000..8c28d83eb2 --- /dev/null +++ b/search/v2/postcodes_matcher.cpp @@ -0,0 +1,168 @@ +#include "search/v2/postcodes_matcher.hpp" + +#include "search/v2/token_slice.hpp" + +#include "indexer/search_delimiters.hpp" +#include "indexer/search_string_utils.hpp" + +#include "base/logging.hpp" +#include "base/macros.hpp" +#include "base/stl_add.hpp" +#include "base/string_utils.hpp" + +#include "std/transform_iterator.hpp" +#include "std/unique_ptr.hpp" +#include "std/utility.hpp" +#include "std/vector.hpp" + +using namespace strings; + +namespace search +{ +namespace v2 +{ +namespace +{ +// Top patterns for postcodes. See +// search/search_quality/clusterize_postcodes.lisp for details how +// these patterns were constructed. +char const * const g_patterns[] = { + "aa nnnn", "aa nnnnn", "aaa nnnn", "aan", "aan naa", "aana naa", "aann", + "aann naa", "aannaa", "aannnaa", "aannnn", "an naa", "ana naa", "ana nan", + "ananan", "ann aann", "ann naa", "annnnaaa", "nn nnn", "nnn", "nnn nn", + "nnn nnn", "nnn nnnn", "nnnn", "nnnn aa", "nnnn nnn", "nnnnaa", "nnnnn", + "nnnnn nnn", "nnnnn nnnn", "nnnnn nnnnn", "nnnnnn", "nnnnnnn", "nnnnnnnn", "〒nnn nnnn"}; + +UniChar SimplifyChar(UniChar const & c) +{ + if (IsASCIIDigit(c)) + return 'n'; + if (IsASCIILatin(c)) + return 'a'; + return c; +} + +struct Node +{ + Node() : m_isLeaf(false) {} + + Node const * Move(UniChar c) const + { + for (auto const & p : m_moves) + { + if (p.first == c) + return p.second.get(); + } + return nullptr; + } + + template <typename TIt> + Node const * Move(TIt begin, TIt end) const + { + Node const * cur = this; + for (; begin != end && cur; ++begin) + cur = cur->Move(*begin); + return cur; + } + + Node & MakeMove(UniChar c) + { + for (auto const & p : m_moves) + { + if (p.first == c) + return *p.second; + } + m_moves.emplace_back(c, make_unique<Node>()); + return *m_moves.back().second; + } + + template <typename TIt> + Node & MakeMove(TIt begin, TIt end) + { + Node * cur = this; + for (; begin != end; ++begin) + cur = &cur->MakeMove(*begin); + return *cur; + } + + buffer_vector<pair<UniChar, unique_ptr<Node>>, 2> m_moves; + bool m_isLeaf; + + DISALLOW_COPY(Node); +}; + +// This class puts all strings from g_patterns to a trie with a low +// branching factor and matches queries against these patterns. +class PostcodesMatcher +{ +public: + PostcodesMatcher() : m_root(), m_maxNumTokensInPostcode(0) + { + search::Delimiters delimiters; + for (auto const * pattern : g_patterns) + AddString(MakeUniString(pattern), delimiters); + } + + // Checks that given tokens match to at least one of postcodes + // patterns. + // + // Complexity: O(total length of tokens in |slice|). + bool HasString(TokenSlice const & slice) const + { + Node const * cur = &m_root; + for (size_t i = 0; i < slice.Size() && cur; ++i) + { + auto const & s = slice.Get(i).front(); + cur = cur->Move(make_transform_iterator(s.begin(), &SimplifyChar), + make_transform_iterator(s.end(), &SimplifyChar)); + if (cur && i + 1 < slice.Size()) + cur = cur->Move(' '); + } + + if (!cur) + return false; + + if (slice.Size() > 0 && slice.IsPrefix(slice.Size() - 1)) + return true; + + return cur->m_isLeaf; + } + + inline size_t GetMaxNumTokensInPostcode() const { return m_maxNumTokensInPostcode; } + +private: + void AddString(UniString const & s, search::Delimiters & delimiters) + { + vector<UniString> tokens; + SplitUniString(s, MakeBackInsertFunctor(tokens), delimiters); + m_maxNumTokensInPostcode = max(m_maxNumTokensInPostcode, tokens.size()); + + Node * cur = &m_root; + for (size_t i = 0; i < tokens.size(); ++i) + { + cur = &cur->MakeMove(tokens[i].begin(), tokens[i].end()); + if (i + 1 != tokens.size()) + cur = &cur->MakeMove(' '); + } + cur->m_isLeaf = true; + } + + Node m_root; + + size_t m_maxNumTokensInPostcode; + + DISALLOW_COPY(PostcodesMatcher); +}; + +PostcodesMatcher const & GetPostcodesMatcher() +{ + static PostcodesMatcher kMatcher; + return kMatcher; +} +} // namespace + +bool LooksLikePostcode(TokenSlice const & slice) { return GetPostcodesMatcher().HasString(slice); } + +size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); } +} // namespace v2 +} // namespace search diff --git a/search/v2/postcodes_matcher.hpp b/search/v2/postcodes_matcher.hpp new file mode 100644 index 0000000000..b0e2398e8e --- /dev/null +++ b/search/v2/postcodes_matcher.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include "std/cstdint.hpp" + +namespace search +{ +namespace v2 +{ +class TokenSlice; + +bool LooksLikePostcode(TokenSlice const & slice); + +size_t GetMaxNumTokensInPostcode(); +} // namespace v2 +} // namespace search diff --git a/search/v2/ranking_utils.hpp b/search/v2/ranking_utils.hpp index a559192930..7fca403cd1 100644 --- a/search/v2/ranking_utils.hpp +++ b/search/v2/ranking_utils.hpp @@ -7,7 +7,6 @@ #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" -#include "base/assert.hpp" #include "base/stl_add.hpp" #include "base/string_utils.hpp" @@ -42,74 +41,6 @@ enum NameScore NAME_SCORE_COUNT }; -class TokensSlice -{ -public: - TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken) - : m_params(params), m_offset(startToken), m_size(endToken - startToken) - { - ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); - } - - inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const - { - ASSERT_LESS(i, Size(), ()); - return m_params.GetTokens(m_offset + i); - } - - inline size_t Size() const { return m_size; } - - inline bool Empty() const { return Size() == 0; } - - inline bool IsPrefix(size_t i) const - { - ASSERT_LESS(i, Size(), ()); - return m_offset + i == m_params.m_tokens.size(); - } - -private: - SearchQueryParams const & m_params; - size_t const m_offset; - size_t const m_size; -}; - -class TokensSliceNoCategories -{ -public: - TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken) - : m_params(params) - { - ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); - - m_indexes.reserve(endToken - startToken); - for (size_t i = startToken; i < endToken; ++i) - { - if (!m_params.m_isCategorySynonym[i]) - m_indexes.push_back(i); - } - } - - inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const - { - ASSERT_LESS(i, Size(), ()); - return m_params.GetTokens(m_indexes[i]); - } - - inline size_t Size() const { return m_indexes.size(); } - - inline bool Empty() const { return Size() == 0; } - - inline bool IsPrefix(size_t i) const - { - ASSERT_LESS(i, Size(), ()); - return m_indexes[i] == m_params.m_tokens.size(); - } - -private: - SearchQueryParams const & m_params; - vector<size_t> m_indexes; -}; - template <typename TSlice> NameScore GetNameScore(string const & name, TSlice const & slice) { diff --git a/search/v2/token_slice.cpp b/search/v2/token_slice.cpp new file mode 100644 index 0000000000..38556c40df --- /dev/null +++ b/search/v2/token_slice.cpp @@ -0,0 +1,27 @@ +#include "search/v2/token_slice.hpp" + +namespace search +{ +namespace v2 +{ +TokenSlice::TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken) + : m_params(params), m_offset(startToken), m_size(endToken - startToken) +{ + ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); +} + +TokenSliceNoCategories::TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken, + size_t endToken) + : m_params(params) +{ + ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); + + m_indexes.reserve(endToken - startToken); + for (size_t i = startToken; i < endToken; ++i) + { + if (!m_params.m_isCategorySynonym[i]) + m_indexes.push_back(i); + } +} +} // namespace v2 +} // namespace search diff --git a/search/v2/token_slice.hpp b/search/v2/token_slice.hpp new file mode 100644 index 0000000000..7b9553e2f4 --- /dev/null +++ b/search/v2/token_slice.hpp @@ -0,0 +1,67 @@ +#pragma once + +#include "search/search_query_params.hpp" + +#include "base/assert.hpp" + +#include "std/cstdint.hpp" +#include "std/vector.hpp" + +namespace search +{ +namespace v2 +{ +class TokenSlice +{ +public: + TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken); + + inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_params.GetTokens(m_offset + i); + } + + inline size_t Size() const { return m_size; } + + inline bool Empty() const { return Size() == 0; } + + inline bool IsPrefix(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_offset + i == m_params.m_tokens.size(); + } + +private: + SearchQueryParams const & m_params; + size_t const m_offset; + size_t const m_size; +}; + +class TokenSliceNoCategories +{ +public: + TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken); + + inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_params.GetTokens(m_indexes[i]); + } + + inline size_t Size() const { return m_indexes.size(); } + + inline bool Empty() const { return Size() == 0; } + + inline bool IsPrefix(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_indexes[i] == m_params.m_tokens.size(); + } + +private: + SearchQueryParams const & m_params; + vector<size_t> m_indexes; +}; +} // namespace v2 +} // namespace search |