diff options
author | Yuri Gorshenin <y@maps.me> | 2016-04-22 18:34:40 +0300 |
---|---|---|
committer | Yuri Gorshenin <y@maps.me> | 2016-04-22 20:11:05 +0300 |
commit | bdc06eada49d5be545ec5b503a1056db4473361d (patch) | |
tree | dfc6f843079767162bfb0e057df69dcedc7840ec | |
parent | f48c3e6a6a55501908279b73c39089ca0e04f01e (diff) |
Review fixes.
-rw-r--r-- | generator/search_index_builder.cpp | 12 | ||||
-rw-r--r-- | search/feature_offset_match.hpp | 85 | ||||
-rw-r--r-- | search/retrieval.cpp | 160 | ||||
-rw-r--r-- | search/retrieval.hpp | 7 | ||||
-rw-r--r-- | search/search.pro | 4 | ||||
-rw-r--r-- | search/search_integration_tests/search_query_v2_test.cpp | 31 | ||||
-rw-r--r-- | search/search_query.cpp | 4 | ||||
-rw-r--r-- | search/search_tests/postcodes_matcher_tests.cpp | 4 | ||||
-rw-r--r-- | search/search_tests/ranking_tests.cpp | 4 | ||||
-rw-r--r-- | search/search_tests_support/test_feature.hpp | 1 | ||||
-rw-r--r-- | search/search_trie.hpp | 2 | ||||
-rw-r--r-- | search/v2/locality_scorer.cpp | 6 | ||||
-rw-r--r-- | search/v2/postcodes_matcher.cpp | 15 | ||||
-rw-r--r-- | search/v2/postcodes_matcher.hpp | 4 | ||||
-rw-r--r-- | search/v2/token_slice.cpp (renamed from search/v2/tokens_slice.cpp) | 8 | ||||
-rw-r--r-- | search/v2/token_slice.hpp (renamed from search/v2/tokens_slice.hpp) | 8 |
16 files changed, 274 insertions, 81 deletions
diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index 9ba6112884..e4e32ed20b 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -261,9 +261,11 @@ public: void operator() (FeatureType const & f, uint32_t index) const { + using namespace search; + feature::TypesHolder types(f); - static search::TypesSkipper skipIndex; + static TypesSkipper skipIndex; skipIndex.SkipTypes(types); if (types.Empty()) @@ -284,10 +286,10 @@ public: // See OSM TagInfo or Wiki about modern postcodes format. The average number of tokens is less // than two. buffer_vector<strings::UniString, 2> tokens; - SplitUniString(search::NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens), - search::Delimiters()); + SplitUniString(NormalizeAndSimplifyString(postcode), MakeBackInsertFunctor(tokens), + Delimiters()); for (auto const & token : tokens) - inserter.AddToken(search::kCategoriesLang, search::PostcodeToString(token)); + inserter.AddToken(kPostcodesLang, PostcodeToString(token)); } // Skip types for features without names. @@ -303,7 +305,7 @@ public: // add names of categories of the feature for (uint32_t t : categoryTypes) - inserter.AddToken(search::kCategoriesLang, search::FeatureTypeToString(c.GetIndexForType(t))); + inserter.AddToken(kCategoriesLang, FeatureTypeToString(c.GetIndexForType(t))); } }; diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index 232bec7b7e..c6f52f89e7 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -3,6 +3,7 @@ #include "search/search_index_values.hpp" #include "search/search_query.hpp" #include "search/search_query_params.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/trie.hpp" @@ -103,6 +104,25 @@ bool CheckMatchString(strings::UniChar const * rootPrefix, size_t rootPrefixSize return false; } + +template <typename TValue> +bool FindLangIndex(trie::Iterator<ValueList<TValue>> const & trieRoot, uint8_t lang, uint32_t & langIx) +{ + ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ()); + + uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size()); + for (uint32_t i = 0; i < numLangs; ++i) + { + auto const & edge = trieRoot.m_edge[i].m_label; + ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); + if (edge[0] == lang) + { + langIx = i; + return true; + } + } + return false; +} } // namespace template <typename TValue, typename TF> @@ -222,7 +242,7 @@ public: toDo(value); } }; -} // namespace search::impl +} // impl template <typename TValue> struct TrieRootPrefix @@ -345,27 +365,23 @@ template <typename TValue, typename THolder> bool MatchCategoriesInTrie(SearchQueryParams const & params, trie::Iterator<ValueList<TValue>> const & trieRoot, THolder && holder) { - ASSERT_LESS(trieRoot.m_edge.size(), numeric_limits<uint32_t>::max(), ()); - uint32_t const numLangs = static_cast<uint32_t>(trieRoot.m_edge.size()); - for (uint32_t langIx = 0; langIx < numLangs; ++langIx) - { - auto const & edge = trieRoot.m_edge[langIx].m_label; - ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); - if (edge[0] == search::kCategoriesLang) - { - auto const catRoot = trieRoot.GoToEdge(langIx); - MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); - - // Last token's prefix is used as a complete token here, to - // limit the number of features in the last bucket of a - // holder. Probably, this is a false optimization. - holder.Resize(params.m_tokens.size() + 1); - holder.SwitchTo(params.m_tokens.size()); - MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); - return true; - } - } - return false; + uint32_t langIx = 0; + if (!impl::FindLangIndex(trieRoot, search::kCategoriesLang, langIx)) + return false; + + auto const & edge = trieRoot.m_edge[langIx].m_label; + ASSERT_GREATER_OR_EQUAL(edge.size(), 1, ()); + + auto const catRoot = trieRoot.GoToEdge(langIx); + MatchTokensInTrie(params.m_tokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); + + // Last token's prefix is used as a complete token here, to limit + // the number of features in the last bucket of a holder. Probably, + // this is a false optimization. + holder.Resize(params.m_tokens.size() + 1); + holder.SwitchTo(params.m_tokens.size()); + MatchTokenInTrie(params.m_prefixTokens, TrieRootPrefix<TValue>(*catRoot, edge), holder); + return true; } // Calls toDo with trie root prefix and language code on each language @@ -427,4 +443,29 @@ void MatchFeaturesInTrie(SearchQueryParams const & params, intersecter.ForEachResult(forward<ToDo>(toDo)); } + +template <typename TValue, typename TFilter, typename ToDo> +void MatchPostcodesInTrie(v2::TokenSlice const & slice, + trie::Iterator<ValueList<TValue>> const & trieRoot, + TFilter const & filter, ToDo && toDo) +{ + uint32_t langIx = 0; + if (!impl::FindLangIndex(trieRoot, search::kPostcodesLang, langIx)) + return; + + auto const & edge = trieRoot.m_edge[langIx].m_label; + auto const postcodesRoot = trieRoot.GoToEdge(langIx); + + impl::OffsetIntersecter<TFilter, TValue> intersecter(filter); + for (size_t i = 0; i < slice.Size(); ++i) + { + if (slice.IsPrefix(i)) + MatchTokenPrefixInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter); + else + MatchTokenInTrie(slice.Get(i), TrieRootPrefix<TValue>(*postcodesRoot, edge), intersecter); + intersecter.NextStep(); + } + + intersecter.ForEachResult(forward<ToDo>(toDo)); +} } // namespace search diff --git a/search/retrieval.cpp b/search/retrieval.cpp index 3d4ac39a62..c4376b5996 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -7,6 +7,7 @@ #include "search_trie.hpp" #include "v2/mwm_context.hpp" +#include "v2/token_slice.hpp" #include "indexer/feature.hpp" #include "indexer/feature_algo.hpp" @@ -29,8 +30,65 @@ using osm::Editor; namespace search { +namespace v2 +{ namespace { +class FeaturesCollector +{ +public: + FeaturesCollector(my::Cancellable const & cancellable, vector<uint64_t> & features) + : m_cancellable(cancellable), m_features(features), m_counter(0) + { + } + + template <typename TValue> + void operator()(TValue const & value) + { + if ((++m_counter & 0xFF) == 0) + BailIfCancelled(m_cancellable); + m_features.push_back(value.m_featureId); + } + + inline void operator()(uint32_t feature) { m_features.push_back(feature); } + + inline void operator()(uint64_t feature) { m_features.push_back(feature); } + +private: + my::Cancellable const & m_cancellable; + vector<uint64_t> & m_features; + uint32_t m_counter; +}; + +class EditedFeaturesHolder +{ +public: + EditedFeaturesHolder(MwmSet::MwmId const & id) + { + Editor & editor = Editor::Instance(); + m_deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted); + m_modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified); + m_created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created); + } + + bool ModifiedOrDeleted(uint32_t featureIndex) const + { + return binary_search(m_deleted.begin(), m_deleted.end(), featureIndex) || + binary_search(m_modified.begin(), m_modified.end(), featureIndex); + } + + template <typename TFn> + void ForEachModifiedOrCreated(TFn & fn) + { + for_each(m_modified.begin(), m_modified.end(), fn); + for_each(m_created.begin(), m_created.end(), fn); + } + +private: + vector<uint32_t> m_deleted; + vector<uint32_t> m_modified; + vector<uint32_t> m_created; +}; unique_ptr<coding::CompressedBitVector> SortFeaturesAndBuildCBV(vector<uint64_t> && features) { @@ -89,6 +147,13 @@ bool MatchFeatureByName(FeatureType const & ft, SearchQueryParams const & params return matched; } +bool MatchFeatureByPostcode(FeatureType const & ft, v2::TokenSlice const & slice) +{ + string const postcode = ft.GetMetadata().Get(feature::Metadata::FMD_POSTCODE); + // TODO(@y): implement this. + return false; +} + // Retrieves from the search index corresponding to |value| all // features matching to |params|. template <typename TValue> @@ -96,17 +161,48 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl( MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params) { - // Exclude from search all deleted/modified features and match all edited/created features separately. - Editor & editor = Editor::Instance(); + EditedFeaturesHolder holder(id); + + serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams())); + ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG); + + auto const trieRoot = trie::ReadTrie<SubReaderWrapper<Reader>, ValueList<TValue>>( + SubReaderWrapper<Reader>(searchReader.GetPtr()), SingleValueSerializer<TValue>(codingParams)); + + // TODO (@y, @m): This code may be optimized in the case where + // bit vectors are sorted in the search index. + vector<uint64_t> features; + FeaturesCollector collector(cancellable, features); + + MatchFeaturesInTrie(params, *trieRoot, [&holder](uint32_t featureIndex) + { + return !holder.ModifiedOrDeleted(featureIndex); + }, + collector); - auto const deleted = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Deleted); - auto const modified = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Modified); - auto const filter = [&](uint32_t featureIndex) -> bool + // Match all edited/created features separately. + Editor & editor = Editor::Instance(); + auto const matcher = [&](uint32_t featureIndex) { - return (!binary_search(deleted.begin(), deleted.end(), featureIndex) && - !binary_search(modified.begin(), modified.end(), featureIndex)); + FeatureType ft; + VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ()); + // TODO(AlexZ): Should we match by some feature's metafields too? + if (MatchFeatureByName(ft, params)) + features.push_back(featureIndex); }; + holder.ForEachModifiedOrCreated(matcher); + + return SortFeaturesAndBuildCBV(move(features)); +} + +template <typename TValue> +unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeaturesImpl( + MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, + TokenSlice const & slice) +{ + EditedFeaturesHolder holder(id); + serial::CodingParams codingParams(trie::GetCodingParams(value.GetHeader().GetDefCodingParams())); ModelReaderPtr searchReader = value.m_cont.GetReader(SEARCH_INDEX_FILE_TAG); @@ -116,29 +212,26 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeaturesImpl( // TODO (@y, @m): This code may be optimized in the case where // bit vectors are sorted in the search index. vector<uint64_t> features; - uint32_t counter = 0; - auto const collector = [&](TValue const & value) - { - if ((++counter & 0xFF) == 0) - BailIfCancelled(cancellable); - features.push_back(value.m_featureId); - }; + FeaturesCollector collector(cancellable, features); - MatchFeaturesInTrie(params, *trieRoot, filter, collector); + MatchPostcodesInTrie(slice, *trieRoot, [&holder](uint32_t featureIndex) + { + return !holder.ModifiedOrDeleted(featureIndex); + }, + collector); // Match all edited/created features separately. + Editor & editor = Editor::Instance(); auto const matcher = [&](uint32_t featureIndex) { FeatureType ft; VERIFY(editor.GetEditedFeature(id, featureIndex, ft), ()); // TODO(AlexZ): Should we match by some feature's metafields too? - if (MatchFeatureByName(ft, params)) + if (MatchFeatureByPostcode(ft, slice)) features.push_back(featureIndex); }; - for_each(modified.begin(), modified.end(), matcher); - auto const created = editor.GetFeaturesByStatus(id, Editor::FeatureStatus::Created); - for_each(created.begin(), created.end(), matcher); + holder.ForEachModifiedOrCreated(matcher); return SortFeaturesAndBuildCBV(move(features)); } @@ -149,16 +242,11 @@ unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeaturesImpl( v2::MwmContext const & context, my::Cancellable const & cancellable, covering::IntervalsT const & coverage, int scale) { - uint32_t counter = 0; vector<uint64_t> features; - context.ForEachIndex(coverage, scale, [&](uint64_t featureId) - { - if ((++counter & 0xFF) == 0) - BailIfCancelled(cancellable); - features.push_back(featureId); - }); + FeaturesCollector collector(cancellable, features); + context.ForEachIndex(coverage, scale, collector); return SortFeaturesAndBuildCBV(move(features)); } @@ -172,6 +260,16 @@ struct RetrieveAddressFeaturesAdaptor } }; +template <typename T> +struct RetrievePostcodeFeaturesAdaptor +{ + template <typename... TArgs> + unique_ptr<coding::CompressedBitVector> operator()(TArgs &&... args) + { + return RetrievePostcodeFeaturesImpl<T>(forward<TArgs>(args)...); + } +}; + template <template <typename> class T> struct Selector { @@ -198,8 +296,6 @@ struct Selector }; } // namespace -namespace v2 -{ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures( MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, SearchQueryParams const & params) @@ -208,6 +304,14 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures( return selector(id, value, cancellable, params); } +unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures( + MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, + TokenSlice const & slice) +{ + Selector<RetrievePostcodeFeaturesAdaptor> selector; + return selector(id, value, cancellable, slice); +} + unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures( MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect, int scale) diff --git a/search/retrieval.hpp b/search/retrieval.hpp index e0c55d6cf5..7540211db3 100644 --- a/search/retrieval.hpp +++ b/search/retrieval.hpp @@ -21,6 +21,7 @@ namespace search namespace v2 { class MwmContext; +class TokenSlice; // Retrieves from the search index corresponding to |value| all // features matching to |params|. @@ -29,6 +30,12 @@ unique_ptr<coding::CompressedBitVector> RetrieveAddressFeatures(MwmSet::MwmId co my::Cancellable const & cancellable, SearchQueryParams const & params); +// Retrieves from the search index corresponding to |value| all +// postcodes matching to |slice|. +unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures( + MwmSet::MwmId const & id, MwmValue & value, my::Cancellable const & cancellable, + TokenSlice const & slice); + // Retrieves from the geometry index corresponding to |value| all features belonging to |rect|. unique_ptr<coding::CompressedBitVector> RetrieveGeometryFeatures( MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect, diff --git a/search/search.pro b/search/search.pro index 0b4a76bfa0..6ba422fe32 100644 --- a/search/search.pro +++ b/search/search.pro @@ -63,7 +63,7 @@ HEADERS += \ v2/search_query_v2.hpp \ v2/stats_cache.hpp \ v2/street_vicinity_loader.hpp \ - v2/tokens_slice.hpp \ + v2/token_slice.hpp \ SOURCES += \ approximate_string_match.cpp \ @@ -109,4 +109,4 @@ SOURCES += \ v2/search_model.cpp \ v2/search_query_v2.cpp \ v2/street_vicinity_loader.cpp \ - v2/tokens_slice.cpp \ + v2/token_slice.cpp \ diff --git a/search/search_integration_tests/search_query_v2_test.cpp b/search/search_integration_tests/search_query_v2_test.cpp index 7ed19b2c58..f8a728cb7b 100644 --- a/search/search_integration_tests/search_query_v2_test.cpp +++ b/search/search_integration_tests/search_query_v2_test.cpp @@ -1,10 +1,15 @@ #include "testing/testing.hpp" +#include "search/retrieval.hpp" #include "search/search_integration_tests/helpers.hpp" #include "search/search_tests_support/test_feature.hpp" #include "search/search_tests_support/test_mwm_builder.hpp" #include "search/search_tests_support/test_results_matching.hpp" #include "search/search_tests_support/test_search_request.hpp" +#include "search/v2/token_slice.hpp" + +#include "indexer/feature.hpp" +#include "indexer/index.hpp" #include "geometry/point2d.hpp" #include "geometry/rect2d.hpp" @@ -377,6 +382,32 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestPostcodes) builder.Add(street); builder.Add(building); }); + + // Tests that postcode is added to the search index. + { + auto handle = m_engine.GetMwmHandleById(countryId); + TEST(handle.IsAlive(), ()); + my::Cancellable cancellable; + + SearchQueryParams params; + params.m_tokens.emplace_back(); + params.m_tokens.back().push_back(PostcodeToString(strings::MakeUniString("141701"))); + auto * value = handle.GetValue<MwmValue>(); + auto features = v2::RetrievePostcodeFeatures(countryId, *value, cancellable, + TokenSlice(params, 0, params.m_tokens.size())); + TEST_EQUAL(1, features->PopCount(), ()); + + uint64_t index = 0; + while (!features->GetBit(index)) + ++index; + + Index::FeaturesLoaderGuard loader(m_engine, countryId); + FeatureType ft; + loader.GetFeatureByIndex(index, ft); + + auto rule = ExactMatch(countryId, building); + TEST(rule->Matches(ft), ()); + } { TRules rules{ExactMatch(countryId, building)}; TEST(ResultsMatch("Долгопрудный первомайская 28а", "ru" /* locale */, rules), ()); diff --git a/search/search_query.cpp b/search/search_query.cpp index d60c3f7120..9c90c5e62f 100644 --- a/search/search_query.cpp +++ b/search/search_query.cpp @@ -12,7 +12,7 @@ #include "search/v2/pre_ranking_info.hpp" #include "search/v2/ranking_info.hpp" #include "search/v2/ranking_utils.hpp" -#include "search/v2/tokens_slice.hpp" +#include "search/v2/token_slice.hpp" #include "storage/country_info_getter.hpp" #include "storage/index.hpp" @@ -622,7 +622,7 @@ class PreResult2Maker info.m_nameScore = v2::NAME_SCORE_ZERO; - v2::TokensSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken); + v2::TokenSliceNoCategories slice(m_params, preInfo.m_startToken, preInfo.m_endToken); for (auto const & lang : m_params.m_langs) { diff --git a/search/search_tests/postcodes_matcher_tests.cpp b/search/search_tests/postcodes_matcher_tests.cpp index 79013bc289..0438b5cdbb 100644 --- a/search/search_tests/postcodes_matcher_tests.cpp +++ b/search/search_tests/postcodes_matcher_tests.cpp @@ -2,7 +2,7 @@ #include "search/search_query_params.hpp" #include "search/v2/postcodes_matcher.hpp" -#include "search/v2/tokens_slice.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" @@ -42,7 +42,7 @@ bool LooksLikePostcode(string const & s, bool checkPrefix) params.m_tokens.back().push_back(token); } - return LooksLikePostcode(TokensSlice(params, 0, numTokens)); + return LooksLikePostcode(TokenSlice(params, 0, numTokens)); } UNIT_TEST(PostcodesMatcher_Smoke) diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index 37a2b15da0..cb82359f11 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -2,7 +2,7 @@ #include "search/search_query_params.hpp" #include "search/v2/ranking_utils.hpp" -#include "search/v2/tokens_slice.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" @@ -33,7 +33,7 @@ NameScore GetScore(string const & name, string const & query, size_t startToken, params.m_prefixTokens.swap(params.m_tokens.back()); params.m_tokens.pop_back(); } - return GetNameScore(name, TokensSlice(params, startToken, endToken)); + return GetNameScore(name, TokenSlice(params, startToken, endToken)); } UNIT_TEST(NameTest_Smoke) diff --git a/search/search_tests_support/test_feature.hpp b/search/search_tests_support/test_feature.hpp index 933d128609..b038998a1e 100644 --- a/search/search_tests_support/test_feature.hpp +++ b/search/search_tests_support/test_feature.hpp @@ -19,6 +19,7 @@ public: bool Matches(FeatureType const & feature) const; inline void SetPostcode(string const & postcode) { m_postcode = postcode; } + inline uint64_t GetId() const { return m_id; } inline string const & GetName() const { return m_name; } virtual void Serialize(FeatureBuilder1 & fb) const; diff --git a/search/search_trie.hpp b/search/search_trie.hpp index 513f50007d..2248d1694d 100644 --- a/search/search_trie.hpp +++ b/search/search_trie.hpp @@ -5,8 +5,8 @@ namespace search { -static const uint8_t kPostcodeLang = 127; static const uint8_t kCategoriesLang = 128; +static const uint8_t kPostcodesLang = 129; static const uint8_t kPointCodingBits = 20; } // namespace search diff --git a/search/v2/locality_scorer.cpp b/search/v2/locality_scorer.cpp index 053d57a595..5c790ab6aa 100644 --- a/search/v2/locality_scorer.cpp +++ b/search/v2/locality_scorer.cpp @@ -1,6 +1,6 @@ #include "search/v2/locality_scorer.hpp" -#include "search/v2/tokens_slice.hpp" +#include "search/v2/token_slice.hpp" #include "std/algorithm.hpp" @@ -101,8 +101,8 @@ void LocalityScorer::SortByName(vector<ExLocality> & ls) const auto score = NAME_SCORE_ZERO; for (auto const & name : names) { - score = max(score, GetNameScore(name, v2::TokensSlice(m_params, l.m_locality.m_startToken, - l.m_locality.m_endToken))); + score = max(score, GetNameScore(name, v2::TokenSlice(m_params, l.m_locality.m_startToken, + l.m_locality.m_endToken))); } l.m_nameScore = score; } diff --git a/search/v2/postcodes_matcher.cpp b/search/v2/postcodes_matcher.cpp index 84bfa1fcae..8c28d83eb2 100644 --- a/search/v2/postcodes_matcher.cpp +++ b/search/v2/postcodes_matcher.cpp @@ -1,6 +1,6 @@ #include "search/v2/postcodes_matcher.hpp" -#include "search/v2/tokens_slice.hpp" +#include "search/v2/token_slice.hpp" #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" @@ -24,7 +24,8 @@ namespace v2 namespace { // Top patterns for postcodes. See -// search/search_quality/clusterize_postcodes.lisp for details. +// search/search_quality/clusterize_postcodes.lisp for details how +// these patterns were constructed. char const * const g_patterns[] = { "aa nnnn", "aa nnnnn", "aaa nnnn", "aan", "aan naa", "aana naa", "aann", "aann naa", "aannaa", "aannnaa", "aannnn", "an naa", "ana naa", "ana nan", @@ -90,6 +91,8 @@ struct Node DISALLOW_COPY(Node); }; +// This class puts all strings from g_patterns to a trie with a low +// branching factor and matches queries against these patterns. class PostcodesMatcher { public: @@ -100,7 +103,11 @@ public: AddString(MakeUniString(pattern), delimiters); } - bool HasString(TokensSlice const & slice) const + // Checks that given tokens match to at least one of postcodes + // patterns. + // + // Complexity: O(total length of tokens in |slice|). + bool HasString(TokenSlice const & slice) const { Node const * cur = &m_root; for (size_t i = 0; i < slice.Size() && cur; ++i) @@ -154,7 +161,7 @@ PostcodesMatcher const & GetPostcodesMatcher() } } // namespace -bool LooksLikePostcode(TokensSlice const & slice) { return GetPostcodesMatcher().HasString(slice); } +bool LooksLikePostcode(TokenSlice const & slice) { return GetPostcodesMatcher().HasString(slice); } size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); } } // namespace v2 diff --git a/search/v2/postcodes_matcher.hpp b/search/v2/postcodes_matcher.hpp index 266e63b334..b0e2398e8e 100644 --- a/search/v2/postcodes_matcher.hpp +++ b/search/v2/postcodes_matcher.hpp @@ -6,9 +6,9 @@ namespace search { namespace v2 { -class TokensSlice; +class TokenSlice; -bool LooksLikePostcode(TokensSlice const & slice); +bool LooksLikePostcode(TokenSlice const & slice); size_t GetMaxNumTokensInPostcode(); } // namespace v2 diff --git a/search/v2/tokens_slice.cpp b/search/v2/token_slice.cpp index 9a523dcc02..38556c40df 100644 --- a/search/v2/tokens_slice.cpp +++ b/search/v2/token_slice.cpp @@ -1,17 +1,17 @@ -#include "search/v2/tokens_slice.hpp" +#include "search/v2/token_slice.hpp" namespace search { namespace v2 { -TokensSlice::TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken) +TokenSlice::TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken) : m_params(params), m_offset(startToken), m_size(endToken - startToken) { ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); } -TokensSliceNoCategories::TokensSliceNoCategories(SearchQueryParams const & params, - size_t startToken, size_t endToken) +TokenSliceNoCategories::TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken, + size_t endToken) : m_params(params) { ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); diff --git a/search/v2/tokens_slice.hpp b/search/v2/token_slice.hpp index 70173fffdd..7b9553e2f4 100644 --- a/search/v2/tokens_slice.hpp +++ b/search/v2/token_slice.hpp @@ -11,10 +11,10 @@ namespace search { namespace v2 { -class TokensSlice +class TokenSlice { public: - TokensSlice(SearchQueryParams const & params, size_t startToken, size_t endToken); + TokenSlice(SearchQueryParams const & params, size_t startToken, size_t endToken); inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const { @@ -38,10 +38,10 @@ private: size_t const m_size; }; -class TokensSliceNoCategories +class TokenSliceNoCategories { public: - TokensSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken); + TokenSliceNoCategories(SearchQueryParams const & params, size_t startToken, size_t endToken); inline SearchQueryParams::TSynonymsVector const & Get(size_t i) const { |