From f672cb0b8227ddafcd6b271724dda3e05df9bfef Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Fri, 27 May 2016 19:40:55 +0300 Subject: [search] Got rid of the v2 directory and namespace. --- search/cbv_ptr.cpp | 67 + search/cbv_ptr.hpp | 58 + search/feature_offset_match.hpp | 4 +- search/features_filter.cpp | 47 + search/features_filter.hpp | 59 + search/features_layer.cpp | 33 + search/features_layer.hpp | 43 + search/features_layer_matcher.cpp | 143 ++ search/features_layer_matcher.hpp | 371 +++++ search/features_layer_path_finder.cpp | 196 +++ search/features_layer_path_finder.hpp | 85 ++ search/geocoder.cpp | 1601 +++++++++++++++++++ search/geocoder.hpp | 390 +++++ search/geometry_cache.cpp | 79 + search/geometry_cache.hpp | 107 ++ search/house_numbers_matcher.cpp | 320 ++++ search/house_numbers_matcher.hpp | 64 + search/house_to_street_table.cpp | 69 + search/house_to_street_table.hpp | 26 + search/intermediate_result.cpp | 3 +- search/intermediate_result.hpp | 18 +- search/intersection_result.cpp | 64 + search/intersection_result.hpp | 37 + search/locality_finder.cpp | 5 +- search/locality_scorer.cpp | 137 ++ search/locality_scorer.hpp | 55 + search/mwm_context.cpp | 46 + search/mwm_context.hpp | 94 ++ search/nested_rects_cache.cpp | 102 ++ search/nested_rects_cache.hpp | 46 + search/pre_ranker.cpp | 2 +- search/pre_ranking_info.cpp | 20 + search/pre_ranking_info.hpp | 31 + search/processor.cpp | 58 +- search/processor.hpp | 17 +- search/rank_table_cache.cpp | 27 + search/rank_table_cache.hpp | 46 + search/ranking_info.cpp | 93 ++ search/ranking_info.hpp | 49 + search/ranking_utils.cpp | 41 + search/ranking_utils.hpp | 90 ++ search/result.hpp | 6 +- search/retrieval.cpp | 24 +- search/retrieval.hpp | 4 +- search/reverse_geocoder.cpp | 6 +- search/reverse_geocoder.hpp | 4 +- search/search.pro | 82 +- search/search_integration_tests/processor_test.cpp | 7 +- search/search_model.cpp | 138 ++ search/search_model.hpp | 62 + .../features_collector_tool.cpp | 4 +- .../search_quality_tool/search_quality_tool.cpp | 4 +- search/search_tests/house_numbers_matcher_test.cpp | 6 +- search/search_tests/locality_scorer_test.cpp | 3 +- search/search_tests/ranking_tests.cpp | 5 +- search/stats_cache.hpp | 63 + search/street_vicinity_loader.cpp | 67 + search/street_vicinity_loader.hpp | 95 ++ search/token_slice.cpp | 66 + search/token_slice.hpp | 110 ++ search/types_skipper.hpp | 4 +- search/v2/cbv_ptr.cpp | 77 - search/v2/cbv_ptr.hpp | 61 - search/v2/features_filter.cpp | 49 - search/v2/features_filter.hpp | 61 - search/v2/features_layer.cpp | 35 - search/v2/features_layer.hpp | 45 - search/v2/features_layer_matcher.cpp | 148 -- search/v2/features_layer_matcher.hpp | 372 ----- search/v2/features_layer_path_finder.cpp | 198 --- search/v2/features_layer_path_finder.hpp | 87 -- search/v2/geocoder.cpp | 1607 -------------------- search/v2/geocoder.hpp | 392 ----- search/v2/geometry_cache.cpp | 79 - search/v2/geometry_cache.hpp | 109 -- search/v2/house_numbers_matcher.cpp | 322 ---- search/v2/house_numbers_matcher.hpp | 66 - search/v2/house_to_street_table.cpp | 71 - search/v2/house_to_street_table.hpp | 28 - search/v2/intersection_result.cpp | 66 - search/v2/intersection_result.hpp | 39 - search/v2/locality_scorer.cpp | 139 -- search/v2/locality_scorer.hpp | 57 - search/v2/mwm_context.cpp | 52 - search/v2/mwm_context.hpp | 96 -- search/v2/nested_rects_cache.cpp | 104 -- search/v2/nested_rects_cache.hpp | 48 - search/v2/pre_ranking_info.cpp | 22 - search/v2/pre_ranking_info.hpp | 33 - search/v2/rank_table_cache.cpp | 37 - search/v2/rank_table_cache.hpp | 57 - search/v2/ranking_info.cpp | 103 -- search/v2/ranking_info.hpp | 51 - search/v2/ranking_utils.cpp | 43 - search/v2/ranking_utils.hpp | 92 -- search/v2/search_model.cpp | 149 -- search/v2/search_model.hpp | 65 - search/v2/stats_cache.hpp | 68 - search/v2/street_vicinity_loader.cpp | 72 - search/v2/street_vicinity_loader.hpp | 98 -- search/v2/token_slice.cpp | 71 - search/v2/token_slice.hpp | 112 -- 102 files changed, 5464 insertions(+), 5620 deletions(-) create mode 100644 search/cbv_ptr.cpp create mode 100644 search/cbv_ptr.hpp create mode 100644 search/features_filter.cpp create mode 100644 search/features_filter.hpp create mode 100644 search/features_layer.cpp create mode 100644 search/features_layer.hpp create mode 100644 search/features_layer_matcher.cpp create mode 100644 search/features_layer_matcher.hpp create mode 100644 search/features_layer_path_finder.cpp create mode 100644 search/features_layer_path_finder.hpp create mode 100644 search/geocoder.cpp create mode 100644 search/geocoder.hpp create mode 100644 search/geometry_cache.cpp create mode 100644 search/geometry_cache.hpp create mode 100644 search/house_numbers_matcher.cpp create mode 100644 search/house_numbers_matcher.hpp create mode 100644 search/house_to_street_table.cpp create mode 100644 search/house_to_street_table.hpp create mode 100644 search/intersection_result.cpp create mode 100644 search/intersection_result.hpp create mode 100644 search/locality_scorer.cpp create mode 100644 search/locality_scorer.hpp create mode 100644 search/mwm_context.cpp create mode 100644 search/mwm_context.hpp create mode 100644 search/nested_rects_cache.cpp create mode 100644 search/nested_rects_cache.hpp create mode 100644 search/pre_ranking_info.cpp create mode 100644 search/pre_ranking_info.hpp create mode 100644 search/rank_table_cache.cpp create mode 100644 search/rank_table_cache.hpp create mode 100644 search/ranking_info.cpp create mode 100644 search/ranking_info.hpp create mode 100644 search/ranking_utils.cpp create mode 100644 search/ranking_utils.hpp create mode 100644 search/search_model.cpp create mode 100644 search/search_model.hpp create mode 100644 search/stats_cache.hpp create mode 100644 search/street_vicinity_loader.cpp create mode 100644 search/street_vicinity_loader.hpp create mode 100644 search/token_slice.cpp create mode 100644 search/token_slice.hpp delete mode 100644 search/v2/cbv_ptr.cpp delete mode 100644 search/v2/cbv_ptr.hpp delete mode 100644 search/v2/features_filter.cpp delete mode 100644 search/v2/features_filter.hpp delete mode 100644 search/v2/features_layer.cpp delete mode 100644 search/v2/features_layer.hpp delete mode 100644 search/v2/features_layer_matcher.cpp delete mode 100644 search/v2/features_layer_matcher.hpp delete mode 100644 search/v2/features_layer_path_finder.cpp delete mode 100644 search/v2/features_layer_path_finder.hpp delete mode 100644 search/v2/geocoder.cpp delete mode 100644 search/v2/geocoder.hpp delete mode 100644 search/v2/geometry_cache.cpp delete mode 100644 search/v2/geometry_cache.hpp delete mode 100644 search/v2/house_numbers_matcher.cpp delete mode 100644 search/v2/house_numbers_matcher.hpp delete mode 100644 search/v2/house_to_street_table.cpp delete mode 100644 search/v2/house_to_street_table.hpp delete mode 100644 search/v2/intersection_result.cpp delete mode 100644 search/v2/intersection_result.hpp delete mode 100644 search/v2/locality_scorer.cpp delete mode 100644 search/v2/locality_scorer.hpp delete mode 100644 search/v2/mwm_context.cpp delete mode 100644 search/v2/mwm_context.hpp delete mode 100644 search/v2/nested_rects_cache.cpp delete mode 100644 search/v2/nested_rects_cache.hpp delete mode 100644 search/v2/pre_ranking_info.cpp delete mode 100644 search/v2/pre_ranking_info.hpp delete mode 100644 search/v2/rank_table_cache.cpp delete mode 100644 search/v2/rank_table_cache.hpp delete mode 100644 search/v2/ranking_info.cpp delete mode 100644 search/v2/ranking_info.hpp delete mode 100644 search/v2/ranking_utils.cpp delete mode 100644 search/v2/ranking_utils.hpp delete mode 100644 search/v2/search_model.cpp delete mode 100644 search/v2/search_model.hpp delete mode 100644 search/v2/stats_cache.hpp delete mode 100644 search/v2/street_vicinity_loader.cpp delete mode 100644 search/v2/street_vicinity_loader.hpp delete mode 100644 search/v2/token_slice.cpp delete mode 100644 search/v2/token_slice.hpp (limited to 'search') diff --git a/search/cbv_ptr.cpp b/search/cbv_ptr.cpp new file mode 100644 index 0000000000..7bdf63e64a --- /dev/null +++ b/search/cbv_ptr.cpp @@ -0,0 +1,67 @@ +#include "search/cbv_ptr.hpp" + +namespace search +{ +CBVPtr::CBVPtr(coding::CompressedBitVector const * p, bool isOwner) { Set(p, isOwner); } + +void CBVPtr::Release() +{ + if (m_isOwner) + delete m_ptr; + + m_ptr = nullptr; + m_isOwner = false; + m_isFull = false; +} + +void CBVPtr::Set(coding::CompressedBitVector const * p, bool isOwner /* = false*/) +{ + Release(); + + m_ptr = p; + m_isOwner = p && isOwner; +} + +void CBVPtr::Set(unique_ptr p) +{ + Set(p.release(), true /* isOwner */); +} + +void CBVPtr::Union(coding::CompressedBitVector const * p) +{ + if (!p || m_isFull) + return; + + if (!m_ptr) + { + m_ptr = p; + m_isFull = false; + } + else + { + Set(coding::CompressedBitVector::Union(*m_ptr, *p).release(), true); + } +} + +void CBVPtr::Intersect(coding::CompressedBitVector const * p) +{ + if (!p) + { + Release(); + return; + } + + if (m_ptr) + { + Set(coding::CompressedBitVector::Intersect(*m_ptr, *p).release(), true); + } + else if (m_isFull) + { + m_ptr = p; + m_isFull = false; + } +} + +bool CBVPtr::IsEmpty() const { return !m_isFull && coding::CompressedBitVector::IsEmpty(m_ptr); } + +} // namespace search diff --git a/search/cbv_ptr.hpp b/search/cbv_ptr.hpp new file mode 100644 index 0000000000..3c9cd68fd8 --- /dev/null +++ b/search/cbv_ptr.hpp @@ -0,0 +1,58 @@ +#pragma once + +#include "coding/compressed_bit_vector.hpp" + +#include "base/assert.hpp" +#include "base/macros.hpp" + +#include "std/function.hpp" +#include "std/utility.hpp" + +namespace search +{ +/// CompressedBitVector pointer class that incapsulates +/// binary operators logic and takes ownership if needed. +class CBVPtr +{ + DISALLOW_COPY_AND_MOVE(CBVPtr); + + coding::CompressedBitVector const * m_ptr = nullptr; + bool m_isOwner = false; + bool m_isFull = false; ///< True iff all bits are set to one. + + void Release(); + +public: + CBVPtr() = default; + CBVPtr(coding::CompressedBitVector const * p, bool isOwner); + ~CBVPtr() { Release(); } + + inline void SetFull() + { + Release(); + m_isFull = true; + } + + void Set(coding::CompressedBitVector const * p, bool isOwner = false); + void Set(unique_ptr p); + + inline coding::CompressedBitVector const * Get() const { return m_ptr; } + + coding::CompressedBitVector const & operator*() const { return *m_ptr; } + coding::CompressedBitVector const * operator->() const { return m_ptr; } + + bool IsEmpty() const; + + void Union(coding::CompressedBitVector const * p); + void Intersect(coding::CompressedBitVector const * p); + + template + void ForEach(TFn && fn) const + { + ASSERT(!m_isFull, ()); + if (!IsEmpty()) + coding::CompressedBitVectorEnumerator::ForEach(*m_ptr, forward(fn)); + } +}; + +} // namespace search diff --git a/search/feature_offset_match.hpp b/search/feature_offset_match.hpp index 075c51a37c..9ce4023339 100644 --- a/search/feature_offset_match.hpp +++ b/search/feature_offset_match.hpp @@ -3,7 +3,7 @@ #include "search/query_params.hpp" #include "search/search_common.hpp" #include "search/search_index_values.hpp" -#include "search/v2/token_slice.hpp" +#include "search/token_slice.hpp" #include "indexer/trie.hpp" @@ -445,7 +445,7 @@ void MatchFeaturesInTrie(QueryParams const & params, } template -void MatchPostcodesInTrie(v2::TokenSlice const & slice, +void MatchPostcodesInTrie(TokenSlice const & slice, trie::Iterator> const & trieRoot, TFilter const & filter, ToDo && toDo) { diff --git a/search/features_filter.cpp b/search/features_filter.cpp new file mode 100644 index 0000000000..0f9912ad07 --- /dev/null +++ b/search/features_filter.cpp @@ -0,0 +1,47 @@ +#include "search/features_filter.hpp" + +#include "coding/compressed_bit_vector.hpp" + +#include "std/algorithm.hpp" + +namespace search +{ +// FeaturesFilter ---------------------------------------------------------------------------------- +FeaturesFilter::FeaturesFilter(coding::CompressedBitVector const & filter, uint32_t threshold) + : m_filter(filter), m_threshold(threshold) +{ +} + +bool FeaturesFilter::NeedToFilter(coding::CompressedBitVector const & cbv) const +{ + return cbv.PopCount() > m_threshold; +} + +// LocalityFilter ---------------------------------------------------------------------------------- +LocalityFilter::LocalityFilter(coding::CompressedBitVector const & filter) + : FeaturesFilter(filter, 0 /* threshold */) +{ +} + +unique_ptr LocalityFilter::Filter( + coding::CompressedBitVector const & cbv) const +{ + return coding::CompressedBitVector::Intersect(m_filter, cbv); +} + +// ViewportFilter ---------------------------------------------------------------------------------- +ViewportFilter::ViewportFilter(coding::CompressedBitVector const & filter, uint32_t threshold) + : FeaturesFilter(filter, threshold) +{ +} + +unique_ptr ViewportFilter::Filter( + coding::CompressedBitVector const & cbv) const +{ + auto result = coding::CompressedBitVector::Intersect(m_filter, cbv); + if (!coding::CompressedBitVector::IsEmpty(result)) + return result; + return cbv.LeaveFirstSetNBits(m_threshold); +} + +} // namespace search diff --git a/search/features_filter.hpp b/search/features_filter.hpp new file mode 100644 index 0000000000..fb5f425c3c --- /dev/null +++ b/search/features_filter.hpp @@ -0,0 +1,59 @@ +#pragma once + +#include "std/unique_ptr.hpp" + +namespace coding +{ +class CompressedBitVector; +} + +namespace search +{ +// A lightweight filter of features. +// +// NOTE: this class and its subclasses *ARE* thread-safe. +class FeaturesFilter +{ +public: + FeaturesFilter(coding::CompressedBitVector const & filter, uint32_t threshold); + + virtual ~FeaturesFilter() = default; + + bool NeedToFilter(coding::CompressedBitVector const & features) const; + + virtual unique_ptr Filter( + coding::CompressedBitVector const & cbv) const = 0; + +protected: + coding::CompressedBitVector const & m_filter; + uint32_t const m_threshold; +}; + +// Exact filter - leaves only features belonging to the set it was +// constructed from. +class LocalityFilter : public FeaturesFilter +{ +public: + LocalityFilter(coding::CompressedBitVector const & filter); + + // FeaturesFilter overrides: + unique_ptr Filter( + coding::CompressedBitVector const & cbv) const override; +}; + +// Fuzzy filter - tries to leave only features belonging to the set it +// was constructed from, but if the result is empty, leaves at most +// first |threshold| features instead. This property is quite useful +// when there are no matching features in viewport but it's ok to +// process a limited number of features outside the viewport. +class ViewportFilter : public FeaturesFilter +{ +public: + ViewportFilter(coding::CompressedBitVector const & filter, uint32_t threshold); + + // FeaturesFilter overrides: + unique_ptr Filter( + coding::CompressedBitVector const & cbv) const override; +}; + +} // namespace search diff --git a/search/features_layer.cpp b/search/features_layer.cpp new file mode 100644 index 0000000000..e3c077db77 --- /dev/null +++ b/search/features_layer.cpp @@ -0,0 +1,33 @@ +#include "search/features_layer.hpp" + +#include "base/internal/message.hpp" + +#include "std/sstream.hpp" + +namespace search +{ +FeaturesLayer::FeaturesLayer() { Clear(); } + +void FeaturesLayer::Clear() +{ + m_sortedFeatures = nullptr; + m_subQuery.clear(); + m_startToken = 0; + m_endToken = 0; + m_type = SearchModel::SEARCH_TYPE_COUNT; + m_hasDelayedFeatures = false; + m_lastTokenIsPrefix = false; +} + +string DebugPrint(FeaturesLayer const & layer) +{ + ostringstream os; + os << "FeaturesLayer [ size of m_sortedFeatures: " + << (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0) + << ", m_subQuery: " << DebugPrint(layer.m_subQuery) << ", m_startToken: " << layer.m_startToken + << ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type) + << ", m_lastTokenIsPrefix: " << layer.m_lastTokenIsPrefix << " ]"; + return os.str(); +} + +} // namespace search diff --git a/search/features_layer.hpp b/search/features_layer.hpp new file mode 100644 index 0000000000..b3ec6bb467 --- /dev/null +++ b/search/features_layer.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include "search/search_model.hpp" + +#include "base/string_utils.hpp" + +#include "std/vector.hpp" + +namespace search +{ +// This structure represents a part of search query interpretation - +// when to a substring of tokens [m_startToken, m_endToken) is matched +// with a set of m_features of the same m_type. +struct FeaturesLayer +{ + FeaturesLayer(); + + void Clear(); + + // Non-owning ptr to a sorted vector of features. + vector const * m_sortedFeatures; + + strings::UniString m_subQuery; + + size_t m_startToken; + size_t m_endToken; + SearchModel::SearchType m_type; + + // *NOTE* This field is meaningful only when m_type equals to + // SEARCH_TYPE_BUILDING. + // + // When true, m_sortedFeatures contains only features retrieved from + // search index by m_subQuery, and it's necessary for Geocoder to + // perform additional work to retrieve features matching by house + // number. + bool m_hasDelayedFeatures; + + bool m_lastTokenIsPrefix; +}; + +string DebugPrint(FeaturesLayer const & layer); + +} // namespace search diff --git a/search/features_layer_matcher.cpp b/search/features_layer_matcher.cpp new file mode 100644 index 0000000000..1f98b22ffb --- /dev/null +++ b/search/features_layer_matcher.cpp @@ -0,0 +1,143 @@ +#include "search/features_layer_matcher.hpp" + +#include "search/house_to_street_table.hpp" +#include "search/reverse_geocoder.hpp" + +#include "indexer/scales.hpp" + +#include "base/assert.hpp" + +namespace search +{ +/// Max distance from house to street where we do search matching +/// even if there is no exact street written for this house. +int constexpr kMaxApproxStreetDistanceM = 100; + +FeaturesLayerMatcher::FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable) + : m_context(nullptr) + , m_postcodes(nullptr) + , m_reverseGeocoder(index) + , m_nearbyStreetsCache("FeatureToNearbyStreets") + , m_matchingStreetsCache("BuildingToStreet") + , m_loader(scales::GetUpperScale(), ReverseGeocoder::kLookupRadiusM) + , m_cancellable(cancellable) +{ +} + +void FeaturesLayerMatcher::SetContext(MwmContext * context) +{ + ASSERT(context, ()); + if (m_context == context) + return; + + m_context = context; + m_loader.SetContext(context); +} + +void FeaturesLayerMatcher::SetPostcodes(coding::CompressedBitVector const * postcodes) +{ + m_postcodes = postcodes; +} + +void FeaturesLayerMatcher::OnQueryFinished() +{ + m_nearbyStreetsCache.ClearIfNeeded(); + m_matchingStreetsCache.ClearIfNeeded(); + m_loader.OnQueryFinished(); +} + +uint32_t FeaturesLayerMatcher::GetMatchingStreet(uint32_t houseId) +{ + FeatureType feature; + return GetMatchingStreetImpl(houseId, feature); +} + +uint32_t FeaturesLayerMatcher::GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature) +{ + return GetMatchingStreetImpl(houseId, houseFeature); +} + +FeaturesLayerMatcher::TStreets const & FeaturesLayerMatcher::GetNearbyStreets(uint32_t featureId) +{ + FeatureType feature; + return GetNearbyStreetsImpl(featureId, feature); +} + +FeaturesLayerMatcher::TStreets const & FeaturesLayerMatcher::GetNearbyStreets(uint32_t featureId, + FeatureType & feature) +{ + return GetNearbyStreetsImpl(featureId, feature); +} + +FeaturesLayerMatcher::TStreets const & FeaturesLayerMatcher::GetNearbyStreetsImpl( + uint32_t featureId, FeatureType & feature) +{ + auto entry = m_nearbyStreetsCache.Get(featureId); + if (!entry.second) + return entry.first; + + if (!feature.GetID().IsValid()) + GetByIndex(featureId, feature); + + auto & streets = entry.first; + m_reverseGeocoder.GetNearbyStreets(feature, streets); + for (size_t i = 0; i < streets.size(); ++i) + { + if (streets[i].m_distanceMeters > ReverseGeocoder::kLookupRadiusM) + { + streets.resize(i); + break; + } + } + + return streets; +} + +uint32_t FeaturesLayerMatcher::GetMatchingStreetImpl(uint32_t houseId, FeatureType & houseFeature) +{ + // Check if this feature is modified - the logic will be different. + string streetName; + bool const edited = + osm::Editor::Instance().GetEditedFeatureStreet(houseFeature.GetID(), streetName); + + // Check the cached result value. + auto entry = m_matchingStreetsCache.Get(houseId); + if (!edited && !entry.second) + return entry.first; + + // Load feature if needed. + if (!houseFeature.GetID().IsValid()) + GetByIndex(houseId, houseFeature); + + // Get nearby streets and calculate the resulting index. + auto const & streets = GetNearbyStreets(houseId, houseFeature); + uint32_t & result = entry.first; + result = kInvalidId; + + if (edited) + { + auto const ret = find_if(streets.begin(), streets.end(), [&streetName](TStreet const & st) + { + return st.m_name == streetName; + }); + if (ret != streets.end()) + result = ret->m_id.m_index; + } + else + { + uint32_t index; + if (m_context->GetStreetIndex(houseId, index) && index < streets.size()) + result = streets[index].m_id.m_index; + } + + // If there is no saved street for feature, assume that it's a nearest street if it's too close. + if (result == kInvalidId && !streets.empty() && + streets[0].m_distanceMeters < kMaxApproxStreetDistanceM) + { + result = streets[0].m_id.m_index; + } + + return result; +} + +} // namespace search diff --git a/search/features_layer_matcher.hpp b/search/features_layer_matcher.hpp new file mode 100644 index 0000000000..8b18e1484c --- /dev/null +++ b/search/features_layer_matcher.hpp @@ -0,0 +1,371 @@ +#pragma once + +#include "search/cancel_exception.hpp" +#include "search/features_layer.hpp" +#include "search/house_numbers_matcher.hpp" +#include "search/mwm_context.hpp" +#include "search/reverse_geocoder.hpp" +#include "search/search_model.hpp" +#include "search/street_vicinity_loader.hpp" + +#include "indexer/feature.hpp" +#include "indexer/feature_algo.hpp" +#include "indexer/feature_impl.hpp" +#include "indexer/features_vector.hpp" +#include "indexer/ftypes_matcher.hpp" +#include "indexer/mwm_set.hpp" + +#include "geometry/mercator.hpp" +#include "geometry/point2d.hpp" +#include "geometry/rect2d.hpp" + +#include "coding/compressed_bit_vector.hpp" + +#include "base/cancellable.hpp" +#include "base/logging.hpp" +#include "base/macros.hpp" +#include "base/stl_helpers.hpp" +#include "base/string_utils.hpp" + +#include "std/algorithm.hpp" +#include "std/bind.hpp" +#include "std/limits.hpp" +#include "std/unordered_map.hpp" +#include "std/vector.hpp" + +class Index; + +namespace search +{ +// This class performs pairwise intersection between two layers of +// features, where the first (child) layer is geographically smaller +// than the second (parent) one. It emits all pairs +// (feature-from-child-layer, feature-from-parent-layer) of matching +// features, where feature-from-child-layer belongs-to +// feature-from-parent-layer. Belongs-to is a partial relation on +// features, and has different meaning for different search classes: +// +// * BUILDING belongs-to STREET iff the building is located on the street; +// * BUILDING belongs-to CITY iff the building is located in the city; +// * POI belongs-to BUILDING iff the poi is (roughly) located near or inside the building; +// * STREET belongs-to CITY iff the street is (roughly) located in the city; +// * etc. +// +// NOTE: this class *IS NOT* thread-safe. +class FeaturesLayerMatcher +{ +public: + static uint32_t const kInvalidId = numeric_limits::max(); + static int constexpr kBuildingRadiusMeters = 50; + static int constexpr kStreetRadiusMeters = 100; + + FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable); + void SetContext(MwmContext * context); + void SetPostcodes(coding::CompressedBitVector const * postcodes); + + template + void Match(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) + { + if (child.m_type >= parent.m_type) + return; + switch (parent.m_type) + { + case SearchModel::SEARCH_TYPE_POI: + case SearchModel::SEARCH_TYPE_CITY: + case SearchModel::SEARCH_TYPE_VILLAGE: + case SearchModel::SEARCH_TYPE_STATE: + case SearchModel::SEARCH_TYPE_COUNTRY: + case SearchModel::SEARCH_TYPE_UNCLASSIFIED: + case SearchModel::SEARCH_TYPE_COUNT: + ASSERT(false, ("Invalid parent layer type:", parent.m_type)); + break; + case SearchModel::SEARCH_TYPE_BUILDING: + ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ()); + MatchPOIsWithBuildings(child, parent, forward(fn)); + break; + case SearchModel::SEARCH_TYPE_STREET: + ASSERT(child.m_type == SearchModel::SEARCH_TYPE_POI || + child.m_type == SearchModel::SEARCH_TYPE_BUILDING, + ("Invalid child layer type:", child.m_type)); + if (child.m_type == SearchModel::SEARCH_TYPE_POI) + MatchPOIsWithStreets(child, parent, forward(fn)); + else + MatchBuildingsWithStreets(child, parent, forward(fn)); + break; + } + } + + void OnQueryFinished(); + +private: + template + void MatchPOIsWithBuildings(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) + { + // Following code initially loads centers of POIs and then, for + // each building, tries to find all POIs located at distance less + // than kBuildingRadiusMeters. + + ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ()); + ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_BUILDING, ()); + + auto const & pois = *child.m_sortedFeatures; + auto const & buildings = *parent.m_sortedFeatures; + + BailIfCancelled(m_cancellable); + + vector poiCenters(pois.size()); + + size_t const numPOIs = pois.size(); + vector isPOIProcessed(numPOIs); + size_t processedPOIs = 0; + + for (size_t i = 0; i < pois.size(); ++i) + { + FeatureType poiFt; + GetByIndex(pois[i], poiFt); + poiCenters[i] = feature::GetCenter(poiFt, FeatureType::WORST_GEOMETRY); + } + + for (size_t i = 0; i < buildings.size() && processedPOIs != numPOIs; ++i) + { + BailIfCancelled(m_cancellable); + + FeatureType buildingFt; + GetByIndex(buildings[i], buildingFt); + + for (size_t j = 0; j < pois.size(); ++j) + { + if (isPOIProcessed[j]) + continue; + + double const distMeters = feature::GetMinDistanceMeters(buildingFt, poiCenters[j]); + if (distMeters <= kBuildingRadiusMeters) + { + fn(pois[j], buildings[i]); + isPOIProcessed[j] = true; + ++processedPOIs; + } + } + } + + if (!parent.m_hasDelayedFeatures) + return; + + // |buildings| doesn't contain buildings matching by house number, + // so following code reads buildings in POIs vicinities and checks + // house numbers. + vector queryParses; + ParseQuery(parent.m_subQuery, parent.m_lastTokenIsPrefix, queryParses); + if (queryParses.empty()) + return; + + for (size_t i = 0; i < pois.size(); ++i) + { + m_context->ForEachFeature( + MercatorBounds::RectByCenterXYAndSizeInMeters(poiCenters[i], kBuildingRadiusMeters), + [&](FeatureType & ft) + { + if (m_postcodes && !m_postcodes->GetBit(ft.GetID().m_index)) + return; + if (HouseNumbersMatch(strings::MakeUniString(ft.GetHouseNumber()), queryParses)) + { + double const distanceM = + MercatorBounds::DistanceOnEarth(feature::GetCenter(ft), poiCenters[i]); + if (distanceM < kBuildingRadiusMeters) + fn(pois[i], ft.GetID().m_index); + } + }); + } + } + + template + void MatchPOIsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) + { + ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ()); + ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ()); + + auto const & pois = *child.m_sortedFeatures; + auto const & streets = *parent.m_sortedFeatures; + + // When the number of POIs is less than the number of STREETs, + // it's faster to check nearby streets for POIs. + if (pois.size() < streets.size()) + { + for (uint32_t poiId : pois) + { + for (auto const & street : GetNearbyStreets(poiId)) + { + if (street.m_distanceMeters > kStreetRadiusMeters) + break; + + uint32_t const streetId = street.m_id.m_index; + if (binary_search(streets.begin(), streets.end(), streetId)) + fn(poiId, streetId); + } + } + return; + } + + for (uint32_t streetId : streets) + { + BailIfCancelled(m_cancellable); + m_loader.ForEachInVicinity(streetId, pois, kStreetRadiusMeters, bind(fn, _1, streetId)); + } + } + + template + void MatchBuildingsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, + TFn && fn) + { + ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_BUILDING, ()); + ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ()); + + auto const & buildings = *child.m_sortedFeatures; + auto const & streets = *parent.m_sortedFeatures; + + // When all buildings are in |buildings| and the number of + // buildings less than the number of streets, it's probably faster + // to check nearby streets for each building instead of street + // vicinities loading. + if (!child.m_hasDelayedFeatures && buildings.size() < streets.size()) + { + for (uint32_t const houseId : buildings) + { + uint32_t const streetId = GetMatchingStreet(houseId); + if (binary_search(streets.begin(), streets.end(), streetId)) + fn(houseId, streetId); + } + return; + } + + vector queryParses; + ParseQuery(child.m_subQuery, child.m_lastTokenIsPrefix, queryParses); + + uint32_t numFilterInvocations = 0; + auto houseNumberFilter = [&](uint32_t id, FeatureType & feature, bool & loaded) -> bool + { + ++numFilterInvocations; + if ((numFilterInvocations & 0xFF) == 0) + BailIfCancelled(m_cancellable); + + if (binary_search(buildings.begin(), buildings.end(), id)) + return true; + + if (m_postcodes && !m_postcodes->GetBit(id)) + return false; + + // HouseNumbersMatch() calls are expensive, so following code + // tries to reduce the number of calls. The most important + // optimization: as first tokens from the house-number part of + // the query and feature's house numbers must be numbers, their + // first symbols must be the same. + + if (!loaded) + { + GetByIndex(id, feature); + loaded = true; + } + + if (!child.m_hasDelayedFeatures) + return false; + + strings::UniString const houseNumber(strings::MakeUniString(feature.GetHouseNumber())); + if (!feature::IsHouseNumber(houseNumber)) + return false; + return HouseNumbersMatch(houseNumber, queryParses); + }; + + unordered_map cache; + auto cachingHouseNumberFilter = [&](uint32_t id, FeatureType & feature, bool & loaded) -> bool + { + auto const it = cache.find(id); + if (it != cache.cend()) + return it->second; + bool const result = houseNumberFilter(id, feature, loaded); + cache[id] = result; + return result; + }; + + ProjectionOnStreet proj; + for (uint32_t streetId : streets) + { + BailIfCancelled(m_cancellable); + StreetVicinityLoader::Street const & street = m_loader.GetStreet(streetId); + if (street.IsEmpty()) + continue; + + auto const & calculator = *street.m_calculator; + + for (uint32_t houseId : street.m_features) + { + FeatureType feature; + bool loaded = false; + if (!cachingHouseNumberFilter(houseId, feature, loaded)) + continue; + + if (!loaded) + GetByIndex(houseId, feature); + + // Best geometry is used here as feature::GetCenter(feature) + // actually modifies internal state of a |feature| by caching + // it's geometry. So, when GetMatchingStreet(houseId, feature) + // is called, high precision geometry is used again to compute + // |feature|'s center, and this is a right behavior as + // house-to-street table was generated by using high-precision + // centers of features. + m2::PointD const center = feature::GetCenter(feature); + if (calculator.GetProjection(center, proj) && + proj.m_distMeters <= ReverseGeocoder::kLookupRadiusM && + GetMatchingStreet(houseId, feature) == streetId) + { + fn(houseId, streetId); + } + } + } + } + + // Returns id of a street feature corresponding to a |houseId|, or + // kInvalidId if there're not such street. + uint32_t GetMatchingStreet(uint32_t houseId); + uint32_t GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature); + uint32_t GetMatchingStreetImpl(uint32_t houseId, FeatureType & houseFeature); + + using TStreet = ReverseGeocoder::Street; + using TStreets = vector; + + TStreets const & GetNearbyStreets(uint32_t featureId); + TStreets const & GetNearbyStreets(uint32_t featureId, FeatureType & feature); + TStreets const & GetNearbyStreetsImpl(uint32_t featureId, FeatureType & feature); + + inline void GetByIndex(uint32_t id, FeatureType & ft) const + { + /// @todo Add Cache for feature id -> (point, name / house number). + /// TODO(vng): GetFeature below can return false if feature was deleted by user in the Editor. + /// This code should be fixed to take that into an account. + /// Until we don't show "Delete" button to our users, this code will work correctly. + /// Correct fix would be injection into ForEachInIntervalAndScale, so deleted features will + /// never + /// be emitted and used in other code. + UNUSED_VALUE(m_context->GetFeature(id, ft)); + } + + MwmContext * m_context; + + coding::CompressedBitVector const * m_postcodes; + + ReverseGeocoder m_reverseGeocoder; + + // Cache of streets in a feature's vicinity. All lists in the cache + // are ordered by distance from the corresponding feature. + Cache m_nearbyStreetsCache; + + // Cache of correct streets for buildings. Current search algorithm + // supports only one street for a building, whereas buildings can be + // located on multiple streets. + Cache m_matchingStreetsCache; + + StreetVicinityLoader m_loader; + my::Cancellable const & m_cancellable; +}; + +} // namespace search diff --git a/search/features_layer_path_finder.cpp b/search/features_layer_path_finder.cpp new file mode 100644 index 0000000000..91db4ef27f --- /dev/null +++ b/search/features_layer_path_finder.cpp @@ -0,0 +1,196 @@ +#include "search/features_layer_path_finder.hpp" + +#include "search/cancel_exception.hpp" +#include "search/features_layer_matcher.hpp" +#include "search/house_numbers_matcher.hpp" + +#include "indexer/features_vector.hpp" + +#include "base/cancellable.hpp" + +namespace search +{ +namespace +{ +using TParentGraph = unordered_map; + +// This function tries to estimate amount of work needed to perform an +// intersection pass on a sequence of layers. +template +uint64_t CalcPassCost(TIt begin, TIt end) +{ + uint64_t cost = 0; + + if (begin == end) + return cost; + + uint64_t reachable = max((*begin)->m_sortedFeatures->size(), size_t(1)); + for (++begin; begin != end; ++begin) + { + uint64_t const layer = max((*begin)->m_sortedFeatures->size(), size_t(1)); + cost += layer * reachable; + reachable = min(reachable, layer); + } + return cost; +} + +uint64_t CalcTopDownPassCost(vector const & layers) +{ + return CalcPassCost(layers.rbegin(), layers.rend()); +} + +uint64_t CalcBottomUpPassCost(vector const & layers) +{ + return CalcPassCost(layers.begin(), layers.end()); +} + +bool LooksLikeHouseNumber(strings::UniString const & query, bool queryIsPrefix) +{ + vector parses; + ParseQuery(query, queryIsPrefix, parses); + for (auto const & parse : parses) + { + if (parse.IsEmpty()) + continue; + if (feature::IsHouseNumber(parse.m_parts.front())) + return true; + } + return false; +} + +bool GetPath(uint32_t id, vector const & layers, TParentGraph const & parent, + IntersectionResult & result) +{ + result.Clear(); + + size_t level = 0; + TParentGraph::const_iterator it; + do + { + result.Set(layers[level]->m_type, id); + ++level; + it = parent.find(id); + if (it != parent.cend()) + id = it->second; + } while (level < layers.size() && it != parent.cend()); + return level == layers.size(); +} +} // namespace + +FeaturesLayerPathFinder::FeaturesLayerPathFinder(my::Cancellable const & cancellable) + : m_cancellable(cancellable) +{ +} + +void FeaturesLayerPathFinder::FindReachableVertices(FeaturesLayerMatcher & matcher, + vector const & layers, + vector & results) +{ + if (layers.empty()) + return; + + uint64_t const topDownCost = CalcTopDownPassCost(layers); + uint64_t const bottomUpCost = CalcBottomUpPassCost(layers); + + if (bottomUpCost < topDownCost) + FindReachableVerticesBottomUp(matcher, layers, results); + else + FindReachableVerticesTopDown(matcher, layers, results); +} + +void FeaturesLayerPathFinder::FindReachableVerticesTopDown( + FeaturesLayerMatcher & matcher, vector const & layers, + vector & results) +{ + ASSERT(!layers.empty(), ()); + + vector reachable = *(layers.back()->m_sortedFeatures); + vector buffer; + + TParentGraph parent; + + auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature) + { + parent[childFeature] = parentFeature; + buffer.push_back(childFeature); + }; + + for (size_t i = layers.size() - 1; i != 0; --i) + { + BailIfCancelled(m_cancellable); + + if (reachable.empty()) + return; + + FeaturesLayer parent(*layers[i]); + if (i != layers.size() - 1) + my::SortUnique(reachable); + parent.m_sortedFeatures = &reachable; + parent.m_hasDelayedFeatures = false; + + FeaturesLayer child(*layers[i - 1]); + child.m_hasDelayedFeatures = child.m_type == SearchModel::SEARCH_TYPE_BUILDING && + LooksLikeHouseNumber(child.m_subQuery, child.m_lastTokenIsPrefix); + + buffer.clear(); + matcher.Match(child, parent, addEdge); + reachable.swap(buffer); + } + + IntersectionResult result; + for (auto const & id : reachable) + { + if (GetPath(id, layers, parent, result)) + results.push_back(result); + } +} + +void FeaturesLayerPathFinder::FindReachableVerticesBottomUp( + FeaturesLayerMatcher & matcher, vector const & layers, + vector & results) +{ + ASSERT(!layers.empty(), ()); + + vector reachable = *(layers.front()->m_sortedFeatures); + vector buffer; + + TParentGraph parent; + + auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature) + { + parent[childFeature] = parentFeature; + buffer.push_back(parentFeature); + }; + + for (size_t i = 0; i + 1 != layers.size(); ++i) + { + BailIfCancelled(m_cancellable); + + if (reachable.empty()) + return; + + FeaturesLayer child(*layers[i]); + if (i != 0) + my::SortUnique(reachable); + child.m_sortedFeatures = &reachable; + child.m_hasDelayedFeatures = false; + + FeaturesLayer parent(*layers[i + 1]); + parent.m_hasDelayedFeatures = + parent.m_type == SearchModel::SEARCH_TYPE_BUILDING && + LooksLikeHouseNumber(parent.m_subQuery, parent.m_lastTokenIsPrefix); + + buffer.clear(); + matcher.Match(child, parent, addEdge); + reachable.swap(buffer); + } + + IntersectionResult result; + for (auto const & id : *(layers.front()->m_sortedFeatures)) + { + if (GetPath(id, layers, parent, result)) + results.push_back(result); + } +} + +} // namespace search diff --git a/search/features_layer_path_finder.hpp b/search/features_layer_path_finder.hpp new file mode 100644 index 0000000000..ce58dc1e73 --- /dev/null +++ b/search/features_layer_path_finder.hpp @@ -0,0 +1,85 @@ +#pragma once + +#include "search/features_layer.hpp" +#include "search/intersection_result.hpp" + +#include "std/vector.hpp" + +#if defined(DEBUG) +#include "base/logging.hpp" +#include "base/timer.hpp" +#endif // defined(DEBUG) + +class FeaturesVector; +class MwmValue; + +namespace my +{ +class Cancellable; +} + +namespace search +{ +class FeaturesLayerMatcher; + +// This class is able to find all paths through a layered graph, with +// vertices as features, and edges as pairs of vertices satisfying +// belongs-to relation. For more details on belongs-to relation see +// documentation for FeaturesLayerMatcher. +// +// In short, this class is able to find all features matching to a +// given interpretation of a search query. +// +// NOTE: this class *IS* thread-safe. +class FeaturesLayerPathFinder +{ +public: + FeaturesLayerPathFinder(my::Cancellable const & cancellable); + + template + void ForEachReachableVertex(FeaturesLayerMatcher & matcher, + vector const & layers, TFn && fn) + { + if (layers.empty()) + return; + +// TODO (@y): remove following code as soon as +// FindReachableVertices() will work fast for most cases +// (significantly less than 1 second). +#if defined(DEBUG) + for (auto const * layer : layers) + LOG(LINFO, (DebugPrint(*layer))); + my::Timer timer; +#endif // defined(DEBUG) + + vector results; + FindReachableVertices(matcher, layers, results); + +#if defined(DEBUG) + LOG(LINFO, ("Found:", results.size(), "elapsed:", timer.ElapsedSeconds(), "seconds")); +#endif // defined(DEBUG) + + for_each(results.begin(), results.end(), forward(fn)); + } + +private: + void FindReachableVertices(FeaturesLayerMatcher & matcher, + vector const & layers, + vector & results); + + // Tries to find all |reachable| features from the lowest layer in a + // high level -> low level pass. + void FindReachableVerticesTopDown(FeaturesLayerMatcher & matcher, + vector const & layers, + vector & results); + + // Tries to find all |reachable| features from the lowest layer in a + // low level -> high level pass. + void FindReachableVerticesBottomUp(FeaturesLayerMatcher & matcher, + vector const & layers, + vector & results); + + my::Cancellable const & m_cancellable; +}; + +} // namespace search diff --git a/search/geocoder.cpp b/search/geocoder.cpp new file mode 100644 index 0000000000..0c50de6cc4 --- /dev/null +++ b/search/geocoder.cpp @@ -0,0 +1,1601 @@ +#include "search/geocoder.hpp" + +#include "search/cbv_ptr.hpp" +#include "search/dummy_rank_table.hpp" +#include "search/features_filter.hpp" +#include "search/features_layer_matcher.hpp" +#include "search/locality_scorer.hpp" +#include "search/processor.hpp" +#include "search/retrieval.hpp" +#include "search/token_slice.hpp" + +#include "indexer/classificator.hpp" +#include "indexer/feature_decl.hpp" +#include "indexer/feature_impl.hpp" +#include "indexer/ftypes_matcher.hpp" +#include "indexer/index.hpp" +#include "indexer/postcodes_matcher.hpp" +#include "indexer/rank_table.hpp" +#include "indexer/search_delimiters.hpp" +#include "indexer/search_string_utils.hpp" + +#include "storage/country_info_getter.hpp" + +#include "coding/multilang_utf8_string.hpp" + +#include "platform/preferred_languages.hpp" + +#include "geometry/mercator.hpp" + +#include "base/assert.hpp" +#include "base/logging.hpp" +#include "base/macros.hpp" +#include "base/scope_guard.hpp" +#include "base/stl_add.hpp" +#include "base/stl_helpers.hpp" + +#include "std/algorithm.hpp" +#include "std/bind.hpp" +#include "std/iterator.hpp" +#include "std/sstream.hpp" +#include "std/target_os.hpp" +#include "std/transform_iterator.hpp" + +#include "defines.hpp" + +#if defined(DEBUG) +#include "base/timer.hpp" +#endif + +#if defined(USE_GOOGLE_PROFILER) && defined(OMIM_OS_LINUX) +#include +#endif + +namespace search +{ +namespace +{ +size_t constexpr kMaxNumCities = 5; +size_t constexpr kMaxNumStates = 5; +size_t constexpr kMaxNumVillages = 5; +size_t constexpr kMaxNumCountries = 5; + +// This constant limits number of localities that will be extracted +// from World map. Villages are not counted here as they're not +// included into World map. +// @vng Set this value to possible maximum. +size_t const kMaxNumLocalities = LocalityScorer::kDefaultReadLimit; + +size_t constexpr kPivotRectsCacheSize = 10; +size_t constexpr kLocalityRectsCacheSize = 10; + +strings::UniString const kUniSpace(strings::MakeUniString(" ")); + +struct ScopedMarkTokens +{ + ScopedMarkTokens(vector & usedTokens, size_t from, size_t to) + : m_usedTokens(usedTokens), m_from(from), m_to(to) + { + ASSERT_LESS_OR_EQUAL(m_from, m_to, ()); + ASSERT_LESS_OR_EQUAL(m_to, m_usedTokens.size(), ()); +#if defined(DEBUG) + for (size_t i = m_from; i != m_to; ++i) + ASSERT(!m_usedTokens[i], (i)); +#endif + fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, true /* used */); + } + + ~ScopedMarkTokens() + { + fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, false /* used */); + } + + vector & m_usedTokens; + size_t const m_from; + size_t const m_to; +}; + +class LazyRankTable : public RankTable +{ +public: + LazyRankTable(MwmValue const & value) : m_value(value) {} + + uint8_t Get(uint64_t i) const override + { + EnsureTableLoaded(); + return m_table->Get(i); + } + + uint64_t Size() const override + { + EnsureTableLoaded(); + return m_table->Size(); + } + + RankTable::Version GetVersion() const override + { + EnsureTableLoaded(); + return m_table->GetVersion(); + } + + void Serialize(Writer & writer, bool preserveHostEndiannes) override + { + EnsureTableLoaded(); + m_table->Serialize(writer, preserveHostEndiannes); + } + +private: + void EnsureTableLoaded() const + { + if (m_table) + return; + m_table = search::RankTable::Load(m_value.m_cont); + if (!m_table) + m_table = make_unique(); + } + + MwmValue const & m_value; + mutable unique_ptr m_table; +}; + +class LocalityScorerDelegate : public LocalityScorer::Delegate +{ +public: + LocalityScorerDelegate(MwmContext const & context, Geocoder::Params const & params) + : m_context(context), m_params(params), m_ranks(m_context.m_value) + { + } + + // LocalityScorer::Delegate overrides: + void GetNames(uint32_t featureId, vector & names) const override + { + FeatureType ft; + if (!m_context.GetFeature(featureId, ft)) + return; + for (auto const & lang : m_params.m_langs) + { + string name; + if (ft.GetName(lang, name)) + names.push_back(name); + } + } + + uint8_t GetRank(uint32_t featureId) const override { return m_ranks.Get(featureId); } + +private: + MwmContext const & m_context; + Geocoder::Params const & m_params; + LazyRankTable m_ranks; +}; + +class StreetCategories +{ +public: + static StreetCategories const & Instance() + { + static StreetCategories const instance; + return instance; + } + + template + void ForEach(TFn && fn) const + { + for_each(m_categories.cbegin(), m_categories.cend(), forward(fn)); + } + + bool Contains(strings::UniString const & category) const + { + return binary_search(m_categories.cbegin(), m_categories.cend(), category); + } + + vector const & GetCategories() const { return m_categories; } + +private: + StreetCategories() + { + auto const & classificator = classif(); + auto addCategory = [&](uint32_t type) + { + uint32_t const index = classificator.GetIndexForType(type); + m_categories.push_back(FeatureTypeToString(index)); + }; + ftypes::IsStreetChecker::Instance().ForEachType(addCategory); + sort(m_categories.begin(), m_categories.end()); + } + + vector m_categories; + + DISALLOW_COPY_AND_MOVE(StreetCategories); +}; + +void JoinQueryTokens(QueryParams const & params, size_t curToken, size_t endToken, + strings::UniString const & sep, strings::UniString & res) +{ + ASSERT_LESS_OR_EQUAL(curToken, endToken, ()); + for (size_t i = curToken; i < endToken; ++i) + { + if (i < params.m_tokens.size()) + { + res.append(params.m_tokens[i].front()); + } + else + { + CHECK_EQUAL(i, params.m_tokens.size(), ()); + CHECK(!params.m_prefixTokens.empty(), ()); + res.append(params.m_prefixTokens.front()); + } + + if (i + 1 != endToken) + res.append(sep); + } +} + +void GetAffiliationName(FeatureType const & ft, string & name) +{ + VERIFY(ft.GetName(StringUtf8Multilang::kDefaultCode, name), ()); + ASSERT(!name.empty(), ()); +} + +// todo(@m) Refactor at least here, or even at indexer/ftypes_matcher.hpp. +vector GetVillageCategories() +{ + vector categories; + + auto const & classificator = classif(); + auto addCategory = [&](uint32_t type) + { + uint32_t const index = classificator.GetIndexForType(type); + categories.push_back(FeatureTypeToString(index)); + }; + ftypes::IsVillageChecker::Instance().ForEachType(addCategory); + + return categories; +} + +bool HasSearchIndex(MwmValue const & value) { return value.m_cont.IsExist(SEARCH_INDEX_FILE_TAG); } + +bool HasGeometryIndex(MwmValue & value) { return value.m_cont.IsExist(INDEX_FILE_TAG); } + +MwmSet::MwmHandle FindWorld(Index & index, vector> const & infos) +{ + MwmSet::MwmHandle handle; + for (auto const & info : infos) + { + if (info->GetType() == MwmInfo::WORLD) + { + handle = index.GetMwmHandleById(MwmSet::MwmId(info)); + break; + } + } + return handle; +} + +strings::UniString AsciiToUniString(char const * s) { return strings::UniString(s, s + strlen(s)); } + +bool IsStopWord(strings::UniString const & s) +{ + /// @todo Get all common used stop words and factor out this array into + /// search_string_utils.cpp module for example. + static char const * arr[] = {"a", "de", "da", "la"}; + + static set const kStopWords( + make_transform_iterator(arr, &AsciiToUniString), + make_transform_iterator(arr + ARRAY_SIZE(arr), &AsciiToUniString)); + + return kStopWords.count(s) > 0; +} + +double Area(m2::RectD const & rect) { return rect.IsValid() ? rect.SizeX() * rect.SizeY() : 0; } + +// Computes an average similaty between |rect| and |pivot|. By +// similarity between two rects we mean a fraction of the area of +// rects intersection to the area of the smallest rect. +double GetSimilarity(m2::RectD const & pivot, m2::RectD const & rect) +{ + double const area = min(Area(pivot), Area(rect)); + if (area == 0.0) + return 0.0; + m2::RectD p = pivot; + if (!p.Intersect(rect)) + return 0.0; + return Area(p) / area; +} + +// Returns shortest distance from the |pivot| to the |rect|. +// +// *NOTE* calculations below are incorrect, because shortest distance +// on the Mercator's plane is not the same as shortest distance on the +// Earth. But we assume that it is not an issue here. +double GetDistanceMeters(m2::PointD const & pivot, m2::RectD const & rect) +{ + if (rect.IsPointInside(pivot)) + return 0.0; + + double distance = numeric_limits::max(); + m2::ProjectionToSection proj; + + proj.SetBounds(rect.LeftTop(), rect.RightTop()); + distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); + + proj.SetBounds(rect.LeftBottom(), rect.RightBottom()); + distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); + + proj.SetBounds(rect.LeftTop(), rect.LeftBottom()); + distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); + + proj.SetBounds(rect.RightTop(), rect.RightBottom()); + distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); + + return distance; +} + +struct KeyedMwmInfo +{ + KeyedMwmInfo(shared_ptr const & info, m2::RectD const & pivot) : m_info(info) + { + auto const & rect = m_info->m_limitRect; + m_similarity = GetSimilarity(pivot, rect); + m_distance = GetDistanceMeters(pivot.Center(), rect); + } + + bool operator<(KeyedMwmInfo const & rhs) const + { + if (m_distance == 0.0 && rhs.m_distance == 0.0) + return m_similarity > rhs.m_similarity; + return m_distance < rhs.m_distance; + } + + shared_ptr m_info; + double m_similarity; + double m_distance; +}; + +// Reorders maps in a way that prefix consists of maps intersecting +// with pivot, suffix consists of all other maps ordered by minimum +// distance from pivot. Returns number of maps in prefix. +size_t OrderCountries(m2::RectD const & pivot, vector> & infos) +{ + // TODO (@y): remove this if crashes in this function + // disappear. Otherwise, remove null infos and re-check MwmSet + // again. + for (auto const & info : infos) + { + CHECK(info.get(), + ("MwmSet invariant violated. Please, contact @y if you know how to reproduce this.")); + } + + vector keyedInfos; + keyedInfos.reserve(infos.size()); + for (auto const & info : infos) + keyedInfos.emplace_back(info, pivot); + sort(keyedInfos.begin(), keyedInfos.end()); + + infos.clear(); + for (auto const & info : keyedInfos) + infos.emplace_back(info.m_info); + + auto intersects = [&](shared_ptr const & info) -> bool + { + return pivot.IsIntersect(info->m_limitRect); + }; + + auto const sep = stable_partition(infos.begin(), infos.end(), intersects); + return distance(infos.begin(), sep); +} + +// Performs pairwise union of adjacent bit vectors +// until at most one bit vector is left. +void UniteCBVs(vector> & cbvs) +{ + while (cbvs.size() > 1) + { + size_t i = 0; + size_t j = 0; + for (; j + 1 < cbvs.size(); j += 2) + cbvs[i++] = coding::CompressedBitVector::Union(*cbvs[j], *cbvs[j + 1]); + for (; j < cbvs.size(); ++j) + cbvs[i++] = move(cbvs[j]); + cbvs.resize(i); + } +} +} // namespace + +// Geocoder::Params -------------------------------------------------------------------------------- +Geocoder::Params::Params() : m_mode(Mode::Everywhere), m_accuratePivotCenter(0, 0) {} + +// Geocoder::Geocoder ------------------------------------------------------------------------------ +Geocoder::Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter) + : m_index(index) + , m_infoGetter(infoGetter) + , m_numTokens(0) + , m_model(SearchModel::Instance()) + , m_pivotRectsCache(kPivotRectsCacheSize, static_cast(*this), + Processor::kMaxViewportRadiusM) + , m_localityRectsCache(kLocalityRectsCacheSize, static_cast(*this)) + , m_pivotFeatures(index) + , m_villages(nullptr) + , m_filter(nullptr) + , m_matcher(nullptr) + , m_finder(static_cast(*this)) + , m_lastMatchedRegion(nullptr) + , m_preRanker(nullptr) +{ +} + +Geocoder::~Geocoder() {} + +void Geocoder::SetParams(Params const & params) +{ + m_params = params; + + // Filter stop words. + if (m_params.m_tokens.size() > 1) + { + for (auto & v : m_params.m_tokens) + my::EraseIf(v, &IsStopWord); + + auto & v = m_params.m_tokens; + my::EraseIf(v, mem_fn(&Params::TSynonymsVector::empty)); + + // If all tokens are stop words - give up. + if (m_params.m_tokens.empty()) + m_params = params; + } + + m_retrievalParams = m_params; + m_numTokens = m_params.m_tokens.size(); + if (!m_params.m_prefixTokens.empty()) + ++m_numTokens; + + // Remove all category synonyms for streets, as they're extracted + // individually via LoadStreets. + for (size_t i = 0; i < m_numTokens; ++i) + { + auto & synonyms = m_params.GetTokens(i); + ASSERT(!synonyms.empty(), ()); + + if (IsStreetSynonym(synonyms.front())) + { + auto b = synonyms.begin(); + auto e = synonyms.end(); + auto const & categories = StreetCategories::Instance(); + synonyms.erase(remove_if(b + 1, e, bind(&StreetCategories::Contains, cref(categories), _1)), + e); + } + } + + LOG(LDEBUG, ("Languages =", m_params.m_langs)); +} + +void Geocoder::GoEverywhere(PreRanker & preRanker) +{ +// TODO (@y): remove following code as soon as Geocoder::Go() will +// work fast for most cases (significantly less than 1 second). +#if defined(DEBUG) + my::Timer timer; + MY_SCOPE_GUARD(printDuration, [&timer]() + { + LOG(LINFO, ("Total geocoding time:", timer.ElapsedSeconds(), "seconds")); + }); +#endif +#if defined(USE_GOOGLE_PROFILER) && defined(OMIM_OS_LINUX) + ProfilerStart("/tmp/geocoder.prof"); + MY_SCOPE_GUARD(stopProfiler, &ProfilerStop); +#endif + + if (m_numTokens == 0) + return; + + vector> infos; + m_index.GetMwmsInfo(infos); + + GoImpl(preRanker, infos, false /* inViewport */); +} + +void Geocoder::GoInViewport(PreRanker & preRanker) +{ + if (m_numTokens == 0) + return; + + vector> infos; + m_index.GetMwmsInfo(infos); + + my::EraseIf(infos, [this](shared_ptr const & info) + { + return !m_params.m_pivot.IsIntersect(info->m_limitRect); + }); + + GoImpl(preRanker, infos, true /* inViewport */); +} + +void Geocoder::GoImpl(PreRanker & preRanker, vector> & infos, bool inViewport) +{ + m_preRanker = &preRanker; + + try + { + // Tries to find world and fill localities table. + { + m_cities.clear(); + for (auto & regions : m_regions) + regions.clear(); + MwmSet::MwmHandle handle = FindWorld(m_index, infos); + if (handle.IsAlive()) + { + auto & value = *handle.GetValue(); + + // All MwmIds are unique during the application lifetime, so + // it's ok to save MwmId. + m_worldId = handle.GetId(); + m_context = make_unique(move(handle)); + if (HasSearchIndex(value)) + { + PrepareAddressFeatures(); + FillLocalitiesTable(); + } + m_context.reset(); + } + } + + // Orders countries by distance from viewport center and position. + // This order is used during MatchAroundPivot() stage - we try to + // match as many features as possible without trying to match + // locality (COUNTRY or CITY), and only when there are too many + // features, viewport and position vicinity filter is used. To + // prevent full search in all mwms, we need to limit somehow a set + // of mwms for MatchAroundPivot(), so, we always call + // MatchAroundPivot() on maps intersecting with pivot rect, other + // maps are ordered by distance from pivot, and we stop to call + // MatchAroundPivot() on them as soon as at least one feature is + // found. + size_t const numIntersectingMaps = OrderCountries(m_params.m_pivot, infos); + + // MatchAroundPivot() should always be matched in mwms + // intersecting with position and viewport. + auto const & cancellable = static_cast(*this); + auto processCountry = [&](size_t index, unique_ptr context) + { + ASSERT(context, ()); + m_context = move(context); + MY_SCOPE_GUARD(cleanup, [&]() + { + LOG(LDEBUG, (m_context->GetName(), "geocoding complete.")); + m_matcher->OnQueryFinished(); + m_matcher = nullptr; + m_context.reset(); + m_addressFeatures.clear(); + m_streets = nullptr; + m_villages = nullptr; + }); + + auto it = m_matchersCache.find(m_context->GetId()); + if (it == m_matchersCache.end()) + { + it = m_matchersCache.insert(make_pair(m_context->GetId(), make_unique( + m_index, cancellable))) + .first; + } + m_matcher = it->second.get(); + m_matcher->SetContext(m_context.get()); + + PrepareAddressFeatures(); + + coding::CompressedBitVector const * viewportCBV = nullptr; + if (inViewport) + viewportCBV = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT); + + if (viewportCBV) + { + for (size_t i = 0; i < m_numTokens; ++i) + { + m_addressFeatures[i] = + coding::CompressedBitVector::Intersect(*m_addressFeatures[i], *viewportCBV); + } + } + + // |m_streets| will be initialized in LimitedSearch() and its + // callees, if needed. + m_streets = nullptr; + + m_villages = LoadVillages(*m_context); + + auto citiesFromWorld = m_cities; + FillVillageLocalities(); + MY_SCOPE_GUARD(remove_villages, [&]() + { + m_cities = citiesFromWorld; + }); + + m_usedTokens.assign(m_numTokens, false); + + m_lastMatchedRegion = nullptr; + MatchRegions(REGION_TYPE_COUNTRY); + + if (index < numIntersectingMaps || m_preRanker->IsEmpty()) + MatchAroundPivot(); + }; + + // Iterates through all alive mwms and performs geocoding. + ForEachCountry(infos, processCountry); + } + catch (CancelException & e) + { + } + + // Fill results ranks, as they were missed. + FillMissingFieldsInResults(); +} + +void Geocoder::ClearCaches() +{ + m_pivotRectsCache.Clear(); + m_localityRectsCache.Clear(); + m_pivotFeatures.Clear(); + + m_addressFeatures.clear(); + m_matchersCache.clear(); + m_streetsCache.clear(); + m_villages.reset(); + m_postcodes.Clear(); +} + +void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) +{ + ASSERT_LESS(curToken, endToken, ()); + ASSERT_LESS_OR_EQUAL(endToken, m_numTokens, ()); + + m_retrievalParams.m_tokens.clear(); + m_retrievalParams.m_prefixTokens.clear(); + + // TODO (@y): possibly it's not cheap to copy vectors of strings. + // Profile it, and in case of serious performance loss, refactor + // QueryParams to support subsets of tokens. + for (size_t i = curToken; i < endToken; ++i) + { + if (i < m_params.m_tokens.size()) + m_retrievalParams.m_tokens.push_back(m_params.m_tokens[i]); + else + m_retrievalParams.m_prefixTokens = m_params.m_prefixTokens; + } +} + +void Geocoder::PrepareAddressFeatures() +{ + m_addressFeatures.resize(m_numTokens); + for (size_t i = 0; i < m_numTokens; ++i) + { + PrepareRetrievalParams(i, i + 1); + m_addressFeatures[i] = + RetrieveAddressFeatures(m_context->GetId(), m_context->m_value, + static_cast(*this), m_retrievalParams); + ASSERT(m_addressFeatures[i], ()); + } +} + +void Geocoder::InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, + FeaturesLayer & layer) +{ + layer.Clear(); + layer.m_type = type; + layer.m_startToken = startToken; + layer.m_endToken = endToken; + + JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, + layer.m_subQuery); + layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); +} + +void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter, + size_t const maxNumLocalities, + vector & preLocalities) +{ + preLocalities.clear(); + + for (size_t startToken = 0; startToken < m_numTokens; ++startToken) + { + CBVPtr intersection; + intersection.SetFull(); + if (filter) + intersection.Intersect(filter); + intersection.Intersect(m_addressFeatures[startToken].get()); + if (intersection.IsEmpty()) + continue; + + for (size_t endToken = startToken + 1; endToken <= m_numTokens; ++endToken) + { + // Skip locality candidates that match only numbers. + if (!m_params.IsNumberTokens(startToken, endToken)) + { + intersection.ForEach([&](uint32_t featureId) + { + Locality l; + l.m_countryId = m_context->GetId(); + l.m_featureId = featureId; + l.m_startToken = startToken; + l.m_endToken = endToken; + preLocalities.push_back(l); + }); + } + + if (endToken < m_numTokens) + { + intersection.Intersect(m_addressFeatures[endToken].get()); + if (intersection.IsEmpty()) + break; + } + } + } + + LocalityScorerDelegate delegate(*m_context, m_params); + LocalityScorer scorer(m_params, delegate); + scorer.GetTopLocalities(maxNumLocalities, preLocalities); +} + +void Geocoder::FillLocalitiesTable() +{ + vector preLocalities; + FillLocalityCandidates(nullptr, kMaxNumLocalities, preLocalities); + + size_t numCities = 0; + size_t numStates = 0; + size_t numCountries = 0; + for (auto & l : preLocalities) + { + FeatureType ft; + m_context->GetFeature(l.m_featureId, ft); + + auto addRegionMaps = [&](size_t & count, size_t maxCount, RegionType type) + { + if (count < maxCount && ft.GetFeatureType() == feature::GEOM_POINT) + { + Region region(l, type); + region.m_center = ft.GetCenter(); + + string name; + GetAffiliationName(ft, region.m_enName); + LOG(LDEBUG, ("Region =", region.m_enName)); + + m_infoGetter.GetMatchedRegions(region.m_enName, region.m_ids); + if (region.m_ids.empty()) + LOG(LWARNING, ("Maps not found for region", region.m_enName)); + + ++count; + m_regions[type][make_pair(l.m_startToken, l.m_endToken)].push_back(region); + } + }; + + switch (m_model.GetSearchType(ft)) + { + case SearchModel::SEARCH_TYPE_CITY: + { + if (numCities < kMaxNumCities && ft.GetFeatureType() == feature::GEOM_POINT) + { + ++numCities; + + auto const center = feature::GetCenter(ft); + auto const population = ft.GetPopulation(); + auto const radius = ftypes::GetRadiusByPopulation(population); + + City city(l, SearchModel::SEARCH_TYPE_CITY); + city.m_rect = MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius); + +#if defined(DEBUG) + ft.GetName(StringUtf8Multilang::kDefaultCode, city.m_defaultName); + LOG(LDEBUG, ("City =", city.m_defaultName, radius)); +#endif + + m_cities[{l.m_startToken, l.m_endToken}].push_back(city); + } + break; + } + case SearchModel::SEARCH_TYPE_STATE: + { + addRegionMaps(numStates, kMaxNumStates, REGION_TYPE_STATE); + break; + } + case SearchModel::SEARCH_TYPE_COUNTRY: + { + addRegionMaps(numCountries, kMaxNumCountries, REGION_TYPE_COUNTRY); + break; + } + default: break; + } + } +} + +void Geocoder::FillVillageLocalities() +{ + vector preLocalities; + FillLocalityCandidates(m_villages.get(), kMaxNumVillages, preLocalities); + + size_t numVillages = 0; + + for (auto & l : preLocalities) + { + FeatureType ft; + m_context->GetFeature(l.m_featureId, ft); + + if (m_model.GetSearchType(ft) != SearchModel::SEARCH_TYPE_VILLAGE) + continue; + + // We accept lines and areas as village features. + auto const center = feature::GetCenter(ft); + ++numVillages; + City village(l, SearchModel::SEARCH_TYPE_VILLAGE); + + auto const population = ft.GetPopulation(); + double const radius = ftypes::GetRadiusByPopulation(population); + village.m_rect = MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius); + +#if defined(DEBUG) + ft.GetName(StringUtf8Multilang::kDefaultCode, village.m_defaultName); + LOG(LDEBUG, ("Village =", village.m_defaultName)); +#endif + + m_cities[{l.m_startToken, l.m_endToken}].push_back(village); + if (numVillages >= kMaxNumVillages) + break; + } +} + +template +void Geocoder::ForEachCountry(vector> const & infos, TFn && fn) +{ + for (size_t i = 0; i < infos.size(); ++i) + { + auto const & info = infos[i]; + if (info->GetType() != MwmInfo::COUNTRY && info->GetType() != MwmInfo::WORLD) + continue; + if (info->GetType() == MwmInfo::COUNTRY && m_params.m_mode == Mode::World) + continue; + + auto handle = m_index.GetMwmHandleById(MwmSet::MwmId(info)); + if (!handle.IsAlive()) + continue; + auto & value = *handle.GetValue(); + if (!HasSearchIndex(value) || !HasGeometryIndex(value)) + continue; + fn(i, make_unique(move(handle))); + } +} + +void Geocoder::MatchRegions(RegionType type) +{ + switch (type) + { + case REGION_TYPE_STATE: + // Tries to skip state matching and go to cities matching. + // Then, performs states matching. + MatchCities(); + break; + case REGION_TYPE_COUNTRY: + // Tries to skip country matching and go to states matching. + // Then, performs countries matching. + MatchRegions(REGION_TYPE_STATE); + break; + case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); return; + } + + auto const & regions = m_regions[type]; + + auto const & fileName = m_context->GetName(); + bool const isWorld = m_context->GetInfo()->GetType() == MwmInfo::WORLD; + + // Try to match regions. + for (auto const & p : regions) + { + BailIfCancelled(); + + size_t const startToken = p.first.first; + size_t const endToken = p.first.second; + if (HasUsedTokensInRange(startToken, endToken)) + continue; + + for (auto const & region : p.second) + { + bool matches = false; + + // On the World.mwm we need to check that CITY - STATE - COUNTRY + // form a nested sequence. Otherwise, as mwm borders do not + // intersect state or country boundaries, it's enough to check + // mwm that is currently being processed belongs to region. + if (isWorld) + { + matches = m_lastMatchedRegion == nullptr || + m_infoGetter.IsBelongToRegions(region.m_center, m_lastMatchedRegion->m_ids); + } + else + { + matches = m_infoGetter.IsBelongToRegions(fileName, region.m_ids); + } + + if (!matches) + continue; + + ScopedMarkTokens mark(m_usedTokens, startToken, endToken); + if (AllTokensUsed()) + { + // Region matches to search query, we need to emit it as is. + EmitResult(region, startToken, endToken); + continue; + } + + m_lastMatchedRegion = ®ion; + MY_SCOPE_GUARD(cleanup, [this]() + { + m_lastMatchedRegion = nullptr; + }); + switch (type) + { + case REGION_TYPE_STATE: MatchCities(); break; + case REGION_TYPE_COUNTRY: MatchRegions(REGION_TYPE_STATE); break; + case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); break; + } + } + } +} + +void Geocoder::MatchCities() +{ + // Localities are ordered my (m_startToken, m_endToken) pairs. + for (auto const & p : m_cities) + { + size_t const startToken = p.first.first; + size_t const endToken = p.first.second; + if (HasUsedTokensInRange(startToken, endToken)) + continue; + + for (auto const & city : p.second) + { + BailIfCancelled(); + + if (m_lastMatchedRegion && + !m_infoGetter.IsBelongToRegions(city.m_rect.Center(), m_lastMatchedRegion->m_ids)) + { + continue; + } + + ScopedMarkTokens mark(m_usedTokens, startToken, endToken); + if (AllTokensUsed()) + { + // City matches to search query, we need to emit it as is. + EmitResult(city, startToken, endToken); + continue; + } + + // No need to search features in the World map. + if (m_context->GetInfo()->GetType() == MwmInfo::WORLD) + continue; + + auto const * cityFeatures = + RetrieveGeometryFeatures(*m_context, city.m_rect, RECT_ID_LOCALITY); + + if (coding::CompressedBitVector::IsEmpty(cityFeatures)) + continue; + + LocalityFilter filter(*cityFeatures); + LimitedSearch(filter); + } + } +} + +void Geocoder::MatchAroundPivot() +{ + auto const * features = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT); + + if (!features) + return; + + ViewportFilter filter(*features, m_preRanker->Limit() /* threshold */); + LimitedSearch(filter); +} + +void Geocoder::LimitedSearch(FeaturesFilter const & filter) +{ + m_filter = &filter; + MY_SCOPE_GUARD(resetFilter, [&]() + { + m_filter = nullptr; + }); + + if (!m_streets) + m_streets = LoadStreets(*m_context); + + MatchUnclassified(0 /* curToken */); + + auto search = [this]() + { + GreedilyMatchStreets(); + MatchPOIsAndBuildings(0 /* curToken */); + }; + + WithPostcodes(search); + search(); +} + +template +void Geocoder::WithPostcodes(TFn && fn) +{ + size_t const maxPostcodeTokens = GetMaxNumTokensInPostcode(); + + for (size_t startToken = 0; startToken != m_numTokens; ++startToken) + { + size_t endToken = startToken; + for (size_t n = 1; startToken + n <= m_numTokens && n <= maxPostcodeTokens; ++n) + { + if (m_usedTokens[startToken + n - 1]) + break; + + TokenSlice slice(m_params, startToken, startToken + n); + auto const isPrefix = startToken + n == m_numTokens; + if (LooksLikePostcode(QuerySlice(slice), isPrefix)) + endToken = startToken + n; + } + if (startToken == endToken) + continue; + + auto postcodes = + RetrievePostcodeFeatures(*m_context, TokenSlice(m_params, startToken, endToken)); + MY_SCOPE_GUARD(cleanup, [&]() + { + m_postcodes.Clear(); + }); + + if (!coding::CompressedBitVector::IsEmpty(postcodes)) + { + ScopedMarkTokens mark(m_usedTokens, startToken, endToken); + + m_postcodes.Clear(); + m_postcodes.m_startToken = startToken; + m_postcodes.m_endToken = endToken; + m_postcodes.m_features = move(postcodes); + + fn(); + } + } +} + +void Geocoder::GreedilyMatchStreets() +{ + for (size_t startToken = 0; startToken < m_numTokens; ++startToken) + { + if (m_usedTokens[startToken]) + continue; + + // Here we try to match as many tokens as possible while + // intersection is a non-empty bit vector of streets. All tokens + // that are synonyms to streets are ignored. Moreover, each time + // a token that looks like a beginning of a house number is met, + // we try to use current intersection of tokens as a street layer + // and try to match buildings or pois. + unique_ptr allFeatures; + + size_t curToken = startToken; + + // This variable is used for prevention of duplicate calls to + // CreateStreetsLayerAndMatchLowerLayers() with the same + // arguments. + size_t lastStopToken = curToken; + + for (; curToken < m_numTokens && !m_usedTokens[curToken]; ++curToken) + { + auto const & token = m_params.GetTokens(curToken).front(); + if (IsStreetSynonymPrefix(token)) + continue; + + if (feature::IsHouseNumber(token)) + { + CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, allFeatures); + lastStopToken = curToken; + } + + unique_ptr buffer; + if (startToken == curToken || coding::CompressedBitVector::IsEmpty(allFeatures)) + buffer = coding::CompressedBitVector::Intersect(*m_streets, *m_addressFeatures[curToken]); + else + buffer = coding::CompressedBitVector::Intersect(*allFeatures, *m_addressFeatures[curToken]); + + if (coding::CompressedBitVector::IsEmpty(buffer)) + break; + + allFeatures.swap(buffer); + } + + if (curToken != lastStopToken) + CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, allFeatures); + } +} + +void Geocoder::CreateStreetsLayerAndMatchLowerLayers( + size_t startToken, size_t endToken, unique_ptr const & features) +{ + ASSERT(m_layers.empty(), ()); + + if (coding::CompressedBitVector::IsEmpty(features)) + return; + + CBVPtr filtered(features.get(), false /* isOwner */); + if (m_filter->NeedToFilter(*features)) + filtered.Set(m_filter->Filter(*features).release(), true /* isOwner */); + + m_layers.emplace_back(); + MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); + + auto & layer = m_layers.back(); + InitLayer(SearchModel::SEARCH_TYPE_STREET, startToken, endToken, layer); + + vector sortedFeatures; + sortedFeatures.reserve(features->PopCount()); + filtered.ForEach(MakeBackInsertFunctor(sortedFeatures)); + layer.m_sortedFeatures = &sortedFeatures; + + ScopedMarkTokens mark(m_usedTokens, startToken, endToken); + MatchPOIsAndBuildings(0 /* curToken */); +} + +void Geocoder::MatchPOIsAndBuildings(size_t curToken) +{ + BailIfCancelled(); + + curToken = SkipUsedTokens(curToken); + if (curToken == m_numTokens) + { + // All tokens were consumed, find paths through layers, emit + // features. + if (m_postcodes.IsEmpty()) + return FindPaths(); + + // When there are no layers but user entered a postcode, we have + // to emit all features matching to the postcode. + if (m_layers.size() == 0) + { + CBVPtr filtered; + if (m_filter->NeedToFilter(*m_postcodes.m_features)) + filtered.Set(m_filter->Filter(*m_postcodes.m_features)); + else + filtered.Set(m_postcodes.m_features.get(), false /* isOwner */); + filtered.ForEach([&](uint32_t id) + { + EmitResult(m_context->GetId(), id, GetSearchTypeInGeocoding(id), + m_postcodes.m_startToken, m_postcodes.m_endToken); + }); + return; + } + + if (!(m_layers.size() == 1 && m_layers[0].m_type == SearchModel::SEARCH_TYPE_STREET)) + return FindPaths(); + + // If there're only one street layer but user also entered a + // postcode, we need to emit all features matching to postcode on + // the given street. + m_layers.emplace_back(); + MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); + + auto & layer = m_layers.back(); + InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_startToken, m_postcodes.m_endToken, + layer); + + vector features; + coding::CompressedBitVectorEnumerator::ForEach(*m_postcodes.m_features, + MakeBackInsertFunctor(features)); + layer.m_sortedFeatures = &features; + return FindPaths(); + } + + m_layers.emplace_back(); + MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); + + // Clusters of features by search type. Each cluster is a sorted + // list of ids. + size_t const kNumClusters = SearchModel::SEARCH_TYPE_BUILDING + 1; + vector clusters[kNumClusters]; + + // Appends |featureId| to the end of the corresponding cluster, if + // any. + auto clusterize = [&](uint32_t featureId) + { + auto const searchType = GetSearchTypeInGeocoding(featureId); + + // All SEARCH_TYPE_CITY features were filtered in + // MatchCities(). All SEARCH_TYPE_STREET features were + // filtered in GreedilyMatchStreets(). + if (searchType < kNumClusters) + { + if (m_postcodes.IsEmpty() || m_postcodes.m_features->GetBit(featureId)) + clusters[searchType].push_back(featureId); + } + }; + + CBVPtr features; + features.SetFull(); + + // Try to consume [curToken, m_numTokens) tokens range. + for (size_t n = 1; curToken + n <= m_numTokens && !m_usedTokens[curToken + n - 1]; ++n) + { + // At this point |features| is the intersection of + // m_addressFeatures[curToken], m_addressFeatures[curToken + 1], + // ..., m_addressFeatures[curToken + n - 2]. + + BailIfCancelled(); + + { + auto & layer = m_layers.back(); + InitLayer(layer.m_type, curToken, curToken + n, layer); + } + + features.Intersect(m_addressFeatures[curToken + n - 1].get()); + ASSERT(features.Get(), ()); + + CBVPtr filtered; + if (m_filter->NeedToFilter(*features)) + filtered.Set(m_filter->Filter(*features)); + else + filtered.Set(features.Get(), false /* isOwner */); + ASSERT(filtered.Get(), ()); + + bool const looksLikeHouseNumber = feature::IsHouseNumber(m_layers.back().m_subQuery); + + if (filtered.IsEmpty() && !looksLikeHouseNumber) + break; + + if (n == 1) + { + filtered.ForEach(clusterize); + } + else + { + auto noFeature = [&filtered](uint32_t featureId) -> bool + { + return !filtered->GetBit(featureId); + }; + for (auto & cluster : clusters) + my::EraseIf(cluster, noFeature); + + size_t curs[kNumClusters] = {}; + size_t ends[kNumClusters]; + for (size_t i = 0; i < kNumClusters; ++i) + ends[i] = clusters[i].size(); + filtered.ForEach([&](uint32_t featureId) + { + bool found = false; + for (size_t i = 0; i < kNumClusters && !found; ++i) + { + size_t & cur = curs[i]; + size_t const end = ends[i]; + while (cur != end && clusters[i][cur] < featureId) + ++cur; + if (cur != end && clusters[i][cur] == featureId) + found = true; + } + if (!found) + clusterize(featureId); + }); + for (size_t i = 0; i < kNumClusters; ++i) + inplace_merge(clusters[i].begin(), clusters[i].begin() + ends[i], clusters[i].end()); + } + + for (size_t i = 0; i < kNumClusters; ++i) + { + // ATTENTION: DO NOT USE layer after recursive calls to + // MatchPOIsAndBuildings(). This may lead to use-after-free. + auto & layer = m_layers.back(); + layer.m_sortedFeatures = &clusters[i]; + + if (i == SearchModel::SEARCH_TYPE_BUILDING) + { + if (layer.m_sortedFeatures->empty() && !looksLikeHouseNumber) + continue; + } + else if (layer.m_sortedFeatures->empty()) + { + continue; + } + + layer.m_type = static_cast(i); + if (IsLayerSequenceSane()) + MatchPOIsAndBuildings(curToken + n); + } + } +} + +bool Geocoder::IsLayerSequenceSane() const +{ + ASSERT(!m_layers.empty(), ()); + static_assert(SearchModel::SEARCH_TYPE_COUNT <= 32, + "Select a wider type to represent search types mask."); + uint32_t mask = 0; + size_t buildingIndex = m_layers.size(); + size_t streetIndex = m_layers.size(); + + // Following loop returns false iff there're two different layers + // of the same search type. + for (size_t i = 0; i < m_layers.size(); ++i) + { + auto const & layer = m_layers[i]; + ASSERT_NOT_EQUAL(layer.m_type, SearchModel::SEARCH_TYPE_COUNT, ()); + + // TODO (@y): probably it's worth to check belongs-to-locality here. + uint32_t bit = 1U << layer.m_type; + if (mask & bit) + return false; + mask |= bit; + + if (layer.m_type == SearchModel::SEARCH_TYPE_BUILDING) + buildingIndex = i; + else if (layer.m_type == SearchModel::SEARCH_TYPE_STREET) + streetIndex = i; + } + + bool const hasBuildings = buildingIndex != m_layers.size(); + bool const hasStreets = streetIndex != m_layers.size(); + + // Checks that building and street layers are neighbours. + if (hasBuildings && hasStreets) + { + auto const & buildings = m_layers[buildingIndex]; + auto const & streets = m_layers[streetIndex]; + if (buildings.m_startToken != streets.m_endToken && + buildings.m_endToken != streets.m_startToken) + { + return false; + } + } + + return true; +} + +void Geocoder::FindPaths() +{ + if (m_layers.empty()) + return; + + // Layers ordered by search type. + vector sortedLayers; + sortedLayers.reserve(m_layers.size()); + for (auto & layer : m_layers) + sortedLayers.push_back(&layer); + sort(sortedLayers.begin(), sortedLayers.end(), my::LessBy(&FeaturesLayer::m_type)); + + auto const & innermostLayer = *sortedLayers.front(); + + m_matcher->SetPostcodes(m_postcodes.m_features.get()); + m_finder.ForEachReachableVertex( + *m_matcher, sortedLayers, [this, &innermostLayer](IntersectionResult const & result) + { + ASSERT(result.IsValid(), ()); + // TODO(@y, @m, @vng): use rest fields of IntersectionResult for + // better scoring. + EmitResult(m_context->GetId(), result.InnermostResult(), innermostLayer.m_type, + innermostLayer.m_startToken, innermostLayer.m_endToken); + }); +} + +void Geocoder::EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchModel::SearchType type, + size_t startToken, size_t endToken) +{ + FeatureID id(mwmId, ftId); + + // Distance and rank will be filled at the end, for all results at once. + // + // TODO (@y, @m): need to skip zero rank features that are too + // distant from the pivot when there're enough results close to the + // pivot. + PreRankingInfo info; + info.m_searchType = type; + info.m_startToken = startToken; + info.m_endToken = endToken; + + m_preRanker->Emplace(id, info); +} + +void Geocoder::EmitResult(Region const & region, size_t startToken, size_t endToken) +{ + SearchModel::SearchType type; + switch (region.m_type) + { + case REGION_TYPE_STATE: type = SearchModel::SEARCH_TYPE_STATE; break; + case REGION_TYPE_COUNTRY: type = SearchModel::SEARCH_TYPE_COUNTRY; break; + case REGION_TYPE_COUNT: type = SearchModel::SEARCH_TYPE_COUNT; break; + } + EmitResult(m_worldId, region.m_featureId, type, startToken, endToken); +} + +void Geocoder::EmitResult(City const & city, size_t startToken, size_t endToken) +{ + EmitResult(city.m_countryId, city.m_featureId, city.m_type, startToken, endToken); +} + +void Geocoder::FillMissingFieldsInResults() +{ + MwmSet::MwmId mwmId; + MwmSet::MwmHandle mwmHandle; + unique_ptr rankTable; + + m_preRanker->ForEachInfo([&](FeatureID const & id, PreRankingInfo & info) + { + if (id.m_mwmId != mwmId) + { + mwmId = id.m_mwmId; + mwmHandle = m_index.GetMwmHandleById(mwmId); + if (mwmHandle.IsAlive()) + rankTable = + RankTable::Load(mwmHandle.GetValue()->m_cont); + else + rankTable = make_unique(); + } + + info.m_rank = rankTable->Get(id.m_index); + }); + + if (m_preRanker->Size() > m_preRanker->Limit()) + { + m_pivotFeatures.SetPosition(m_params.m_accuratePivotCenter, m_params.m_scale); + m_preRanker->ForEachInfo([&](FeatureID const & id, PreRankingInfo & info) + { + info.m_distanceToPivot = + m_pivotFeatures.GetDistanceToFeatureMeters(id); + }); + } +} + +void Geocoder::MatchUnclassified(size_t curToken) +{ + ASSERT(m_layers.empty(), ()); + + // We need to match all unused tokens to UNCLASSIFIED features, + // therefore unused tokens must be adjacent to each other. For + // example, as parks are UNCLASSIFIED now, it's ok to match "London + // Hyde Park", because London will be matched as a city and rest + // adjacent tokens will be matched to "Hyde Park", whereas it's not + // ok to match something to "Park London Hyde", because tokens + // "Park" and "Hyde" are not adjacent. + if (NumUnusedTokensGroups() != 1) + return; + + CBVPtr allFeatures; + allFeatures.SetFull(); + + auto startToken = curToken; + for (curToken = SkipUsedTokens(curToken); curToken < m_numTokens && !m_usedTokens[curToken]; + ++curToken) + { + allFeatures.Intersect(m_addressFeatures[curToken].get()); + } + + if (m_filter->NeedToFilter(*allFeatures)) + allFeatures.Set(m_filter->Filter(*allFeatures).release(), true /* isOwner */); + + if (allFeatures.IsEmpty()) + return; + + auto emitUnclassified = [&](uint32_t featureId) + { + auto type = GetSearchTypeInGeocoding(featureId); + if (type == SearchModel::SEARCH_TYPE_UNCLASSIFIED) + EmitResult(m_context->GetId(), featureId, type, startToken, curToken); + }; + allFeatures.ForEach(emitUnclassified); +} + +unique_ptr Geocoder::LoadCategories( + MwmContext & context, vector const & categories) +{ + ASSERT(context.m_handle.IsAlive(), ()); + ASSERT(HasSearchIndex(context.m_value), ()); + + m_retrievalParams.m_tokens.resize(1); + m_retrievalParams.m_tokens[0].resize(1); + m_retrievalParams.m_prefixTokens.clear(); + + vector> cbvs; + + for_each(categories.begin(), categories.end(), [&](strings::UniString const & category) + { + m_retrievalParams.m_tokens[0][0] = category; + auto cbv = RetrieveAddressFeatures(context.GetId(), context.m_value, + static_cast(*this), + m_retrievalParams); + if (!coding::CompressedBitVector::IsEmpty(cbv)) + cbvs.push_back(move(cbv)); + }); + + UniteCBVs(cbvs); + if (cbvs.empty()) + cbvs.push_back(make_unique()); + + return move(cbvs[0]); +} + +coding::CompressedBitVector const * Geocoder::LoadStreets(MwmContext & context) +{ + if (!context.m_handle.IsAlive() || !HasSearchIndex(context.m_value)) + return nullptr; + + auto mwmId = context.m_handle.GetId(); + auto const it = m_streetsCache.find(mwmId); + if (it != m_streetsCache.cend()) + return it->second.get(); + + auto streets = LoadCategories(context, StreetCategories::Instance().GetCategories()); + + auto const * result = streets.get(); + m_streetsCache[mwmId] = move(streets); + return result; +} + +unique_ptr Geocoder::LoadVillages(MwmContext & context) +{ + if (!context.m_handle.IsAlive() || !HasSearchIndex(context.m_value)) + return make_unique(); + + return LoadCategories(context, GetVillageCategories()); +} + +unique_ptr Geocoder::RetrievePostcodeFeatures( + MwmContext const & context, TokenSlice const & slice) +{ + return ::search::RetrievePostcodeFeatures(context.GetId(), context.m_value, + static_cast(*this), slice); +} + +coding::CompressedBitVector const * Geocoder::RetrieveGeometryFeatures(MwmContext const & context, + m2::RectD const & rect, + RectId id) +{ + switch (id) + { + case RECT_ID_PIVOT: return m_pivotRectsCache.Get(context, rect, m_params.m_scale); + case RECT_ID_LOCALITY: return m_localityRectsCache.Get(context, rect, m_params.m_scale); + case RECT_ID_COUNT: ASSERT(false, ("Invalid RectId.")); return nullptr; + } +} + +SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId) +{ + if (m_streets->GetBit(featureId)) + return SearchModel::SEARCH_TYPE_STREET; + if (m_villages->GetBit(featureId)) + return SearchModel::SEARCH_TYPE_VILLAGE; + + FeatureType feature; + m_context->GetFeature(featureId, feature); + return m_model.GetSearchType(feature); +} + +bool Geocoder::AllTokensUsed() const +{ + return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor()); +} + +bool Geocoder::HasUsedTokensInRange(size_t from, size_t to) const +{ + return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor()); +} + +size_t Geocoder::NumUnusedTokensGroups() const +{ + size_t numGroups = 0; + for (size_t i = 0; i < m_usedTokens.size(); ++i) + { + if (!m_usedTokens[i] && (i == 0 || m_usedTokens[i - 1])) + ++numGroups; + } + return numGroups; +} + +size_t Geocoder::SkipUsedTokens(size_t curToken) const +{ + while (curToken != m_usedTokens.size() && m_usedTokens[curToken]) + ++curToken; + return curToken; +} + +string DebugPrint(Geocoder::Locality const & locality) +{ + ostringstream os; + os << "Locality [" << DebugPrint(locality.m_countryId) << ", featureId=" << locality.m_featureId + << ", startToken=" << locality.m_startToken << ", endToken=" << locality.m_endToken << "]"; + return os.str(); +} + +} // namespace search diff --git a/search/geocoder.hpp b/search/geocoder.hpp new file mode 100644 index 0000000000..13810f27f4 --- /dev/null +++ b/search/geocoder.hpp @@ -0,0 +1,390 @@ +#pragma once + +#include "search/cancel_exception.hpp" +#include "search/features_layer.hpp" +#include "search/features_layer_path_finder.hpp" +#include "search/geometry_cache.hpp" +#include "search/mode.hpp" +#include "search/mwm_context.hpp" +#include "search/nested_rects_cache.hpp" +#include "search/pre_ranking_info.hpp" +#include "search/query_params.hpp" +#include "search/ranking_utils.hpp" +#include "search/search_model.hpp" + +#include "indexer/index.hpp" +#include "indexer/mwm_set.hpp" + +#include "storage/country_info_getter.hpp" + +#include "coding/compressed_bit_vector.hpp" + +#include "geometry/rect2d.hpp" + +#include "base/buffer_vector.hpp" +#include "base/cancellable.hpp" +#include "base/macros.hpp" +#include "base/string_utils.hpp" + +#include "std/limits.hpp" +#include "std/set.hpp" +#include "std/string.hpp" +#include "std/unique_ptr.hpp" +#include "std/unordered_map.hpp" +#include "std/vector.hpp" + +class MwmInfo; +class MwmValue; + +namespace coding +{ +class CompressedBitVector; +} + +namespace storage +{ +class CountryInfoGetter; +} // namespace storage + +namespace search +{ +class PreRanker; + +class FeaturesFilter; +class FeaturesLayerMatcher; +class SearchModel; +class TokenSlice; + +// This class is used to retrieve all features corresponding to a +// search query. Search query is represented as a sequence of tokens +// (including synonyms for these tokens), and Geocoder tries to build +// all possible partitions (or layers) of the search query, where each +// layer is a set of features corresponding to some search class +// (e.g. POI, BUILDING, STREET, etc., see search_model.hpp). +// Then, Geocoder builds a layered graph, with edges between features +// on adjacent layers (e.g. between BUILDING ans STREET, STREET and +// CITY, etc.). Usually an edge between two features means that a +// feature from the lowest layer geometrically belongs to a feature +// from the highest layer (BUILDING is located on STREET, STREET is +// located inside CITY, CITY is located inside STATE, etc.). Final +// part is to find all paths through this layered graph and report all +// features from the lowest layer, that are reachable from the +// highest layer. +class Geocoder : public my::Cancellable +{ +public: + struct Params : public QueryParams + { + Params(); + + Mode m_mode; + + // We need to pass both pivot and pivot center because pivot is + // usually a rectangle created by radius and center, and due to + // precision loss, |m_pivot|.Center() may differ from + // |m_accuratePivotCenter|. Therefore |m_pivot| should be used for + // fast filtering of features outside of the rectangle, while + // |m_accuratePivotCenter| should be used when it's needed to + // compute a distance from a feature to the pivot. + m2::RectD m_pivot; + m2::PointD m_accuratePivotCenter; + }; + + enum RegionType + { + REGION_TYPE_STATE, + REGION_TYPE_COUNTRY, + REGION_TYPE_COUNT + }; + + struct Locality + { + Locality() : m_featureId(0), m_startToken(0), m_endToken(0) {} + + Locality(uint32_t featureId, size_t startToken, size_t endToken) + : m_featureId(featureId), m_startToken(startToken), m_endToken(endToken) + { + } + + MwmSet::MwmId m_countryId; + uint32_t m_featureId; + size_t m_startToken; + size_t m_endToken; + }; + + // This struct represents a country or US- or Canadian- state. It + // is used to filter maps before search. + struct Region : public Locality + { + Region(Locality const & l, RegionType type) : Locality(l), m_center(0, 0), m_type(type) {} + + storage::CountryInfoGetter::TRegionIdSet m_ids; + string m_enName; + m2::PointD m_center; + RegionType m_type; + }; + + // This struct represents a city or a village. It is used to filter features + // during search. + // todo(@m) It works well as is, but consider a new naming scheme + // when counties etc. are added. E.g., Region for countries and + // states and Locality for smaller settlements. + struct City : public Locality + { + City(Locality const & l, SearchModel::SearchType type) : Locality(l), m_type(type) {} + + m2::RectD m_rect; + SearchModel::SearchType m_type; +#if defined(DEBUG) + string m_defaultName; +#endif + }; + + Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter); + + ~Geocoder() override; + + // Sets search query params. + void SetParams(Params const & params); + + // Starts geocoding, retrieved features will be appended to + // |results|. + void GoEverywhere(PreRanker & preRanker); + void GoInViewport(PreRanker & preRanker); + + void ClearCaches(); + +private: + enum RectId + { + RECT_ID_PIVOT, + RECT_ID_LOCALITY, + RECT_ID_COUNT + }; + + struct Postcodes + { + void Clear() + { + m_startToken = 0; + m_endToken = 0; + m_features.reset(); + } + + inline bool IsEmpty() const { return coding::CompressedBitVector::IsEmpty(m_features); } + + size_t m_startToken = 0; + size_t m_endToken = 0; + unique_ptr m_features; + }; + + void GoImpl(PreRanker & preRanker, vector> & infos, bool inViewport); + + template + using TLocalitiesCache = map, vector>; + + QueryParams::TSynonymsVector const & GetTokens(size_t i) const; + + // Fills |m_retrievalParams| with [curToken, endToken) subsequence + // of search query tokens. + void PrepareRetrievalParams(size_t curToken, size_t endToken); + + // Creates a cache of posting lists corresponding to features in m_context + // for each token and saves it to m_addressFeatures. + void PrepareAddressFeatures(); + + void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, + FeaturesLayer & layer); + + void FillLocalityCandidates(coding::CompressedBitVector const * filter, + size_t const maxNumLocalities, vector & preLocalities); + + void FillLocalitiesTable(); + + void FillVillageLocalities(); + + template + void ForEachCountry(vector> const & infos, TFn && fn); + + // Throws CancelException if cancelled. + inline void BailIfCancelled() + { + ::search::BailIfCancelled(static_cast(*this)); + } + + // Tries to find all countries and states in a search query and then + // performs matching of cities in found maps. + void MatchRegions(RegionType type); + + // Tries to find all cities in a search query and then performs + // matching of streets in found cities. + void MatchCities(); + + // Tries to do geocoding without localities, ie. find POIs, + // BUILDINGs and STREETs without knowledge about country, state, + // city or village. If during the geocoding too many features are + // retrieved, viewport is used to throw away excess features. + void MatchAroundPivot(); + + // Tries to do geocoding in a limited scope, assuming that knowledge + // about high-level features, like cities or countries, is + // incorporated into |filter|. + void LimitedSearch(FeaturesFilter const & filter); + + template + void WithPostcodes(TFn && fn); + + // Tries to match some adjacent tokens in the query as streets and + // then performs geocoding in street vicinities. + void GreedilyMatchStreets(); + + void CreateStreetsLayerAndMatchLowerLayers( + size_t startToken, size_t endToken, unique_ptr const & features); + + // Tries to find all paths in a search tree, where each edge is + // marked with some substring of the query tokens. These paths are + // called "layer sequence" and current path is stored in |m_layers|. + void MatchPOIsAndBuildings(size_t curToken); + + // Returns true if current path in the search tree (see comment for + // MatchPOIsAndBuildings()) looks sane. This method is used as a fast + // pre-check to cut off unnecessary work. + bool IsLayerSequenceSane() const; + + // Finds all paths through layers and emits reachable features from + // the lowest layer. + void FindPaths(); + + // Forms result and feeds it to |m_preRanker|. + void EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchModel::SearchType type, + size_t startToken, size_t endToken); + void EmitResult(Region const & region, size_t startToken, size_t endToken); + void EmitResult(City const & city, size_t startToken, size_t endToken); + + // Computes missing fields for all results in |m_preRanker|. + void FillMissingFieldsInResults(); + + // Tries to match unclassified objects from lower layers, like + // parks, forests, lakes, rivers, etc. This method finds all + // UNCLASSIFIED objects that match to all currently unused tokens. + void MatchUnclassified(size_t curToken); + + unique_ptr LoadCategories( + MwmContext & context, vector const & categories); + + coding::CompressedBitVector const * LoadStreets(MwmContext & context); + + unique_ptr LoadVillages(MwmContext & context); + + // A wrapper around RetrievePostcodeFeatures. + unique_ptr RetrievePostcodeFeatures(MwmContext const & context, + TokenSlice const & slice); + + // A caching wrapper around Retrieval::RetrieveGeometryFeatures. + coding::CompressedBitVector const * RetrieveGeometryFeatures(MwmContext const & context, + m2::RectD const & rect, RectId id); + + // This is a faster wrapper around SearchModel::GetSearchType(), as + // it uses pre-loaded lists of streets and villages. + SearchModel::SearchType GetSearchTypeInGeocoding(uint32_t featureId); + + // Returns true iff all tokens are used. + bool AllTokensUsed() const; + + // Returns true if there exists at least one used token in [from, + // to). + bool HasUsedTokensInRange(size_t from, size_t to) const; + + // Counts number of groups of consecutive unused tokens. + size_t NumUnusedTokensGroups() const; + + // Advances |curToken| to the nearest unused token, or to the end of + // |m_usedTokens| if there are no unused tokens. + size_t SkipUsedTokens(size_t curToken) const; + + Index & m_index; + + storage::CountryInfoGetter const & m_infoGetter; + + // Geocoder params. + Params m_params; + + // Total number of search query tokens. + size_t m_numTokens; + + // This field is used to map features to a limited number of search + // classes. + SearchModel const & m_model; + + // Following fields are set up by Search() method and can be + // modified and used only from Search() or its callees. + + MwmSet::MwmId m_worldId; + + // Context of the currently processed mwm. + unique_ptr m_context; + + // m_cities stores both big cities that are visible at World.mwm + // and small villages and hamlets that are not. + TLocalitiesCache m_cities; + TLocalitiesCache m_regions[REGION_TYPE_COUNT]; + + // Caches of features in rects. These caches are separated from + // TLocalitiesCache because the latter are quite lightweight and not + // all of them are needed. + PivotRectsCache m_pivotRectsCache; + LocalityRectsCache m_localityRectsCache; + + // Cache of nested rects used to estimate distance from a feature to the pivot. + NestedRectsCache m_pivotFeatures; + + // Cache of posting lists for each token in the query. TODO (@y, + // @m, @vng): consider to update this cache lazily, as user inputs + // tokens one-by-one. + vector> m_addressFeatures; + + // Cache of street ids in mwms. + map> m_streetsCache; + + // Street features in the mwm that is currently being processed. + // The initialization of m_streets is postponed in order to gain + // some speed. Therefore m_streets may be used only in + // LimitedSearch() and in all its callees. + coding::CompressedBitVector const * m_streets; + + // Village features in the mwm that is currently being processed. + unique_ptr m_villages; + + // Postcodes features in the mwm that is currently being processed. + Postcodes m_postcodes; + + // This vector is used to indicate what tokens were matched by + // locality and can't be re-used during the geocoding process. + vector m_usedTokens; + + // This filter is used to throw away excess features. + FeaturesFilter const * m_filter; + + // Features matcher for layers intersection. + map> m_matchersCache; + FeaturesLayerMatcher * m_matcher; + + // Path finder for interpretations. + FeaturesLayerPathFinder m_finder; + + // Search query params prepared for retrieval. + QueryParams m_retrievalParams; + + // Pointer to the most nested region filled during geocoding. + Region const * m_lastMatchedRegion; + + // Stack of layers filled during geocoding. + vector m_layers; + + // Non-owning. + PreRanker * m_preRanker; +}; + +string DebugPrint(Geocoder::Locality const & locality); + +} // namespace search diff --git a/search/geometry_cache.cpp b/search/geometry_cache.cpp new file mode 100644 index 0000000000..275c2ff875 --- /dev/null +++ b/search/geometry_cache.cpp @@ -0,0 +1,79 @@ +#include "search/geometry_cache.hpp" + +#include "search/geometry_utils.hpp" +#include "search/mwm_context.hpp" +#include "search/retrieval.hpp" + +#include "geometry/mercator.hpp" + +namespace search +{ +namespace +{ +double constexpr kCellEps = MercatorBounds::GetCellID2PointAbsEpsilon(); +} // namespace + +// GeometryCache ----------------------------------------------------------------------------------- +GeometryCache::GeometryCache(size_t maxNumEntries, my::Cancellable const & cancellable) + : m_maxNumEntries(maxNumEntries), m_cancellable(cancellable) +{ + CHECK_GREATER(m_maxNumEntries, 0, ()); +} + +void GeometryCache::InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, + Entry & entry) +{ + entry.m_rect = rect; + entry.m_cbv = RetrieveGeometryFeatures(context, m_cancellable, rect, scale); + entry.m_scale = scale; +} + +// PivotRectsCache --------------------------------------------------------------------------------- +PivotRectsCache::PivotRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable, + double maxRadiusMeters) + : GeometryCache(maxNumEntries, cancellable), m_maxRadiusMeters(maxRadiusMeters) +{ +} + +coding::CompressedBitVector const * PivotRectsCache::Get(MwmContext const & context, + m2::RectD const & rect, int scale) +{ + auto p = FindOrCreateEntry( + context.GetId(), [&rect, &scale](Entry const & entry) + { + return scale == entry.m_scale && + (entry.m_rect.IsRectInside(rect) || IsEqualMercator(rect, entry.m_rect, kCellEps)); + }); + auto & entry = p.first; + if (p.second) + { + m2::RectD normRect = + MercatorBounds::RectByCenterXYAndSizeInMeters(rect.Center(), m_maxRadiusMeters); + if (!normRect.IsRectInside(rect)) + normRect = rect; + InitEntry(context, normRect, scale, entry); + } + return entry.m_cbv.get(); +} + +// LocalityRectsCache ------------------------------------------------------------------------------ +LocalityRectsCache::LocalityRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable) + : GeometryCache(maxNumEntries, cancellable) +{ +} + +coding::CompressedBitVector const * LocalityRectsCache::Get(MwmContext const & context, + m2::RectD const & rect, int scale) +{ + auto p = FindOrCreateEntry(context.GetId(), [&rect, &scale](Entry const & entry) + { + return scale == entry.m_scale && + IsEqualMercator(rect, entry.m_rect, kCellEps); + }); + auto & entry = p.first; + if (p.second) + InitEntry(context, rect, scale, entry); + return entry.m_cbv.get(); +} + +} // namespace search diff --git a/search/geometry_cache.hpp b/search/geometry_cache.hpp new file mode 100644 index 0000000000..d5d305301b --- /dev/null +++ b/search/geometry_cache.hpp @@ -0,0 +1,107 @@ +#pragma once + +#include "indexer/mwm_set.hpp" + +#include "coding/compressed_bit_vector.hpp" + +#include "geometry/rect2d.hpp" + +#include "base/assert.hpp" + +#include "std/algorithm.hpp" +#include "std/cstdint.hpp" +#include "std/deque.hpp" +#include "std/map.hpp" +#include "std/unique_ptr.hpp" +#include "std/utility.hpp" + +namespace my +{ +class Cancellable; +}; + +namespace search +{ +class MwmContext; + +// This class represents a simple cache of features in rects for all mwms. +// +// *NOTE* This class is not thread-safe. +class GeometryCache +{ +public: + virtual ~GeometryCache() = default; + + // Returns (hopefully, cached) list of features in a given + // rect. Note that return value may be invalidated on next calls to + // this method. + virtual coding::CompressedBitVector const * Get(MwmContext const & context, + m2::RectD const & rect, int scale) = 0; + + inline void Clear() { m_entries.clear(); } + +protected: + struct Entry + { + m2::RectD m_rect; + unique_ptr m_cbv; + int m_scale = 0; + }; + + // |maxNumEntries| denotes the maximum number of rectangles that + // will be cached for each mwm individually. + GeometryCache(size_t maxNumEntries, my::Cancellable const & cancellable); + + template + pair FindOrCreateEntry(MwmSet::MwmId const & id, TPred && pred) + { + auto & entries = m_entries[id]; + auto it = find_if(entries.begin(), entries.end(), forward(pred)); + if (it != entries.end()) + { + if (it != entries.begin()) + iter_swap(entries.begin(), it); + return pair(entries.front(), false); + } + + entries.emplace_front(); + if (entries.size() == m_maxNumEntries + 1) + entries.pop_back(); + + ASSERT_LESS_OR_EQUAL(entries.size(), m_maxNumEntries, ()); + ASSERT(!entries.empty(), ()); + return pair(entries.front(), true); + } + + void InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, Entry & entry); + + map> m_entries; + size_t const m_maxNumEntries; + my::Cancellable const & m_cancellable; +}; + +class PivotRectsCache : public GeometryCache +{ +public: + PivotRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable, + double maxRadiusMeters); + + // GeometryCache overrides: + coding::CompressedBitVector const * Get(MwmContext const & context, m2::RectD const & rect, + int scale) override; + +private: + double const m_maxRadiusMeters; +}; + +class LocalityRectsCache : public GeometryCache +{ +public: + LocalityRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable); + + // GeometryCache overrides: + coding::CompressedBitVector const * Get(MwmContext const & context, m2::RectD const & rect, + int scale) override; +}; + +} // namespace search diff --git a/search/house_numbers_matcher.cpp b/search/house_numbers_matcher.cpp new file mode 100644 index 0000000000..504b75d859 --- /dev/null +++ b/search/house_numbers_matcher.cpp @@ -0,0 +1,320 @@ +#include "search/house_numbers_matcher.hpp" + +#include "std/algorithm.hpp" +#include "std/iterator.hpp" +#include "std/limits.hpp" +#include "std/sstream.hpp" + +#include "base/logging.hpp" + +using namespace strings; + +namespace search +{ +namespace +{ +size_t constexpr kInvalidNum = numeric_limits::max(); + +HouseNumberTokenizer::CharClass GetCharClass(UniChar c) +{ + static UniString const kSeps = MakeUniString("\"\\/(),. \t№#-"); + if (c >= '0' && c <= '9') + return HouseNumberTokenizer::CharClass::Digit; + if (find(kSeps.begin(), kSeps.end(), c) != kSeps.end()) + return HouseNumberTokenizer::CharClass::Separator; + return HouseNumberTokenizer::CharClass::Other; +} + +bool IsShortWord(HouseNumberTokenizer::Token const & t) +{ + return t.m_klass == HouseNumberTokenizer::CharClass::Other && t.m_token.size() <= 3; +} + +bool IsNumber(HouseNumberTokenizer::Token const & t) +{ + return t.m_klass == HouseNumberTokenizer::CharClass::Digit; +} + +bool IsNumberOrShortWord(HouseNumberTokenizer::Token const & t) +{ + return IsNumber(t) || IsShortWord(t); +} + +bool IsBuildingSynonymPrefix(UniString const & p) +{ + static UniString kSynonyms[] = { + MakeUniString("building"), MakeUniString("bld"), MakeUniString("unit"), + MakeUniString("block"), MakeUniString("blk"), MakeUniString("корпус"), + MakeUniString("литер"), MakeUniString("строение"), MakeUniString("блок")}; + + for (UniString const & s : kSynonyms) + { + if (StartsWith(s, p)) + return true; + } + return false; +} + +size_t GetNumTokensForBuildingPart(vector const & ts, size_t i, + vector & memory); + +size_t GetNumTokensForBuildingPartImpl(vector const & ts, size_t i, + vector & memory) +{ + ASSERT_LESS(i, ts.size(), ()); + + auto const & token = ts[i]; + if (token.m_klass != HouseNumberTokenizer::CharClass::Other) + return 0; + + if (!IsBuildingSynonymPrefix(token.m_token)) + return 0; + + // No sense in single "корпус" or "литер". + if (i + 1 >= ts.size()) + return 0; + + if (!IsNumberOrShortWord(ts[i + 1])) + return 0; + + // No sense in "корпус корпус" or "литер литер". + if (ts[i + 1].m_token == token.m_token) + return 0; + + // Consume next token, either number or short word. + size_t j = i + 2; + + // Consume one more number of short word, if possible. + if (j < ts.size() && IsNumberOrShortWord(ts[j]) && ts[j].m_klass != ts[j - 1].m_klass && + GetNumTokensForBuildingPart(ts, j, memory) == 0) + { + ++j; + } + + return j - i; +} + +// Returns number of tokens starting at position |i|, where the first +// token is some way of writing of "корпус", or "building", second +// token is a number or a letter, and (possibly) third token which can +// be a letter when second token is a number. |memory| is used here to +// store results of previous calls and prevents degradation to +// non-linear time. +// +// TODO (@y, @m): the parser is quite complex now. Consider to just +// throw out all prefixes of "building" or "литер" and sort rest +// tokens. Number of false positives will be higher but the parser +// will be more robust, simple and faster. +size_t GetNumTokensForBuildingPart(vector const & ts, size_t i, + vector & memory) +{ + if (i >= ts.size()) + return 0; + if (memory[i] == kInvalidNum) + memory[i] = GetNumTokensForBuildingPartImpl(ts, i, memory); + return memory[i]; +} + +void MergeTokens(vector const & ts, vector & rs) +{ + vector memory(ts.size(), kInvalidNum); + + size_t i = 0; + while (i < ts.size()) + { + switch (ts[i].m_klass) + { + case HouseNumberTokenizer::CharClass::Digit: + { + UniString token = ts[i].m_token; + ++i; + // Process cases like "123 б" or "9PQ". + if (i < ts.size() && IsShortWord(ts[i]) && GetNumTokensForBuildingPart(ts, i, memory) == 0) + { + token.append(ts[i].m_token.begin(), ts[i].m_token.end()); + ++i; + } + rs.push_back(move(token)); + break; + } + case HouseNumberTokenizer::CharClass::Separator: + { + ASSERT(false, ("Seps can't be merged.")); + ++i; + break; + } + case HouseNumberTokenizer::CharClass::Other: + { + if (size_t numTokens = GetNumTokensForBuildingPart(ts, i, memory)) + { + UniString token; + ++i; + for (size_t j = 1; j < numTokens; ++j, ++i) + token.append(ts[i].m_token.begin(), ts[i].m_token.end()); + rs.push_back(move(token)); + break; + } + + rs.push_back(ts[i].m_token); + ++i; + break; + } + } + } + + if (!rs.empty()) + sort(rs.begin() + 1, rs.end()); +} + +bool ParsesMatch(Parse const & houseNumberParse, Parse const & queryParse) +{ + if (houseNumberParse.IsEmpty() || queryParse.IsEmpty()) + return false; + + auto const & h = houseNumberParse.m_parts; + auto const & q = queryParse.m_parts; + + // Check first tokens, hope, house numbers. + if (h[0] != q[0]) + return false; + + size_t i = 1, j = 1; + while (i != h.size() && j != q.size()) + { + while (i != h.size() && h[i] < q[j]) + ++i; + if (i == h.size() || h[i] != q[j]) + return false; + ++i; + ++j; + } + + if (queryParse.m_hasTrailingBuildingPrefixSynonym) + { + // In this case, at least one more unmatched part must be in a + // house number. + return j == q.size() && h.size() > q.size(); + } + + return j == q.size(); +} +} // namespace + +// static +void HouseNumberTokenizer::Tokenize(UniString const & s, vector & ts) +{ + size_t i = 0; + while (i < s.size()) + { + CharClass klass = GetCharClass(s[i]); + + size_t j = i; + while (j < s.size() && GetCharClass(s[j]) == klass) + ++j; + + if (klass != CharClass::Separator) + { + UniString token(s.begin() + i, s.begin() + j); + ts.emplace_back(move(token), klass); + } + + i = j; + } +} + +void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps) +{ + vector tokens; + HouseNumberTokenizer::Tokenize(MakeLowerCase(query), tokens); + + { + ps.emplace_back(); + Parse & p = ps.back(); + MergeTokens(tokens, p.m_parts); + } + + // *NOTE* |tokens| is modified in the following block. + if (queryIsPrefix && !tokens.empty() && + tokens.back().m_klass == HouseNumberTokenizer::CharClass::Other && + IsBuildingSynonymPrefix(tokens.back().m_token)) + { + tokens.pop_back(); + ps.emplace_back(); + Parse & p = ps.back(); + MergeTokens(tokens, p.m_parts); + p.m_hasTrailingBuildingPrefixSynonym = true; + } +} + +bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query, + bool queryIsPrefix) +{ + if (houseNumber == query) + return true; + + vector queryParses; + ParseQuery(query, queryIsPrefix, queryParses); + + return HouseNumbersMatch(houseNumber, queryParses); +} + +bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryParses) +{ + if (houseNumber.empty() || queryParses.empty()) + return false; + + // Fast pre-check, helps to early exit without complex house number + // parsing. + bool good = false; + for (auto const & queryParse : queryParses) + { + if (!queryParse.IsEmpty() && houseNumber[0] == queryParse.m_parts.front()[0]) + { + good = true; + break; + } + } + if (!good) + return false; + + Parse houseNumberParse; + { + vector tokens; + HouseNumberTokenizer::Tokenize(MakeLowerCase(houseNumber), tokens); + MergeTokens(tokens, houseNumberParse.m_parts); + } + + for (auto const & queryParse : queryParses) + { + if (ParsesMatch(houseNumberParse, queryParse)) + return true; + } + return false; +} + +string DebugPrint(HouseNumberTokenizer::CharClass charClass) +{ + switch (charClass) + { + case HouseNumberTokenizer::CharClass::Separator: return "Separator"; + case HouseNumberTokenizer::CharClass::Digit: return "Digit"; + case HouseNumberTokenizer::CharClass::Other: return "Other"; + } + return "Unknown"; +} + +string DebugPrint(HouseNumberTokenizer::Token const & token) +{ + ostringstream os; + os << "Token [" << DebugPrint(token.m_token) << ", " << DebugPrint(token.m_klass) << "]"; + return os.str(); +} + +string DebugPrint(Parse const & parse) +{ + ostringstream os; + os << "Parse [" << DebugPrint(parse.m_parts) << "]"; + return os.str(); +} + +} // namespace search diff --git a/search/house_numbers_matcher.hpp b/search/house_numbers_matcher.hpp new file mode 100644 index 0000000000..72299a1777 --- /dev/null +++ b/search/house_numbers_matcher.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "base/string_utils.hpp" + +#include "std/string.hpp" +#include "std/vector.hpp" + +namespace search +{ +// This class splits a string representing a house number to groups of +// symbols from the same class (separators, digits or other symbols, +// hope, letters). +class HouseNumberTokenizer +{ +public: + enum class CharClass + { + Separator, + Digit, + Other, + }; + + struct Token + { + Token() : m_klass(CharClass::Separator) {} + Token(strings::UniString const & token, CharClass klass) : m_token(token), m_klass(klass) {} + Token(strings::UniString && token, CharClass klass) : m_token(move(token)), m_klass(klass) {} + + strings::UniString m_token; + CharClass m_klass; + }; + + // Performs greedy split of |s| by character classes. Note that this + // function never emits Tokens corresponding to Separator classes. + static void Tokenize(strings::UniString const & s, vector & ts); +}; + +struct Parse +{ + inline bool IsEmpty() const { return m_parts.empty(); } + + vector m_parts; + bool m_hasTrailingBuildingPrefixSynonym = false; +}; + +// Parses query for later faster processing, when multiple buildings +// are matched against the query. +void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps); + +// Returns true when |query| matches to |houseNumber|. +bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query, + bool queryIsPrefix); + +// Returns true when at least one parse of the query matches to +// |houseNumber|. +bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryParses); + +string DebugPrint(HouseNumberTokenizer::CharClass charClass); + +string DebugPrint(HouseNumberTokenizer::Token const & token); + +string DebugPrint(Parse const & parse); + +} // namespace search diff --git a/search/house_to_street_table.cpp b/search/house_to_street_table.cpp new file mode 100644 index 0000000000..6321b0c636 --- /dev/null +++ b/search/house_to_street_table.cpp @@ -0,0 +1,69 @@ +#include "search/house_to_street_table.hpp" + +#include "indexer/index.hpp" + +#include "platform/mwm_traits.hpp" + +#include "coding/fixed_bits_ddvector.hpp" +#include "coding/reader.hpp" + +#include "base/assert.hpp" + +#include "defines.hpp" + +namespace search +{ +namespace +{ +class Fixed3BitsTable : public HouseToStreetTable +{ +public: + using TVector = FixedBitsDDVector<3, ModelReaderPtr>; + + Fixed3BitsTable(MwmValue & value) + : m_vector(TVector::Create(value.m_cont.GetReader(SEARCH_ADDRESS_FILE_TAG))) + { + ASSERT(m_vector.get(), ("Can't instantiate FixedBitsDDVector.")); + } + + // HouseToStreetTable overrides: + bool Get(uint32_t houseId, uint32_t & streetIndex) const override + { + return m_vector->Get(houseId, streetIndex); + } + +private: + unique_ptr m_vector; +}; + +class DummyTable : public HouseToStreetTable +{ +public: + // HouseToStreetTable overrides: + bool Get(uint32_t /* houseId */, uint32_t & /* streetIndex */) const override { return false; } +}; +} // namespace + +unique_ptr HouseToStreetTable::Load(MwmValue & value) +{ + version::MwmTraits traits(value.GetMwmVersion().GetFormat()); + auto const format = traits.GetHouseToStreetTableFormat(); + + unique_ptr result; + + try + { + if (format == version::MwmTraits::HouseToStreetTableFormat::Fixed3BitsDDVector) + result.reset(new Fixed3BitsTable(value)); + } + catch (Reader::OpenException const & ex) + { + LOG(LWARNING, (ex.Msg())); + } + + if (!result) + result.reset(new DummyTable()); + return result; +} + +} // namespace search diff --git a/search/house_to_street_table.hpp b/search/house_to_street_table.hpp new file mode 100644 index 0000000000..f395e635ba --- /dev/null +++ b/search/house_to_street_table.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include "std/limits.hpp" +#include "std/unique_ptr.hpp" + +class MwmValue; + +namespace search +{ +class HouseToStreetTable +{ +public: + virtual ~HouseToStreetTable() = default; + + /// @todo Actually, value may be nullptr in the very common case. + /// It's better to construct a table from MwmHandle. + static unique_ptr Load(MwmValue & value); + + // Returns true and stores to |streetIndex| the index number of the + // correct street corresponding to the house in the list of streets + // generated by ReverseGeocoder. Returns false if there is no such + // street. + virtual bool Get(uint32_t houseId, uint32_t & streetIndex) const = 0; +}; + +} // namespace search diff --git a/search/intermediate_result.cpp b/search/intermediate_result.cpp index 3fd67841fb..5bf687a96a 100644 --- a/search/intermediate_result.cpp +++ b/search/intermediate_result.cpp @@ -60,8 +60,7 @@ void ProcessMetadata(FeatureType const & ft, Result::Metadata & meta) namespace impl { -PreResult1::PreResult1(FeatureID const & fID, v2::PreRankingInfo const & info) - : m_id(fID), m_info(info) +PreResult1::PreResult1(FeatureID const & fID, PreRankingInfo const & info) : m_id(fID), m_info(info) { ASSERT(m_id.IsValid(), ()); } diff --git a/search/intermediate_result.hpp b/search/intermediate_result.hpp index 840b4db8ac..259abfbb67 100644 --- a/search/intermediate_result.hpp +++ b/search/intermediate_result.hpp @@ -1,9 +1,9 @@ #pragma once +#include "search/pre_ranking_info.hpp" +#include "search/ranking_info.hpp" +#include "search/ranking_utils.hpp" #include "search/result.hpp" -#include "search/v2/pre_ranking_info.hpp" -#include "search/v2/ranking_info.hpp" -#include "search/v2/ranking_utils.hpp" #include "indexer/feature_data.hpp" @@ -27,7 +27,7 @@ namespace impl class PreResult1 { public: - PreResult1(FeatureID const & fID, v2::PreRankingInfo const & info); + PreResult1(FeatureID const & fID, PreRankingInfo const & info); static bool LessRank(PreResult1 const & r1, PreResult1 const & r2); static bool LessDistance(PreResult1 const & r1, PreResult1 const & r2); @@ -35,14 +35,14 @@ public: inline FeatureID GetId() const { return m_id; } inline double GetDistance() const { return m_info.m_distanceToPivot; } inline uint8_t GetRank() const { return m_info.m_rank; } - inline v2::PreRankingInfo & GetInfo() { return m_info; } - inline v2::PreRankingInfo const & GetInfo() const { return m_info; } + inline PreRankingInfo & GetInfo() { return m_info; } + inline PreRankingInfo const & GetInfo() const { return m_info; } private: friend class PreResult2; FeatureID m_id; - v2::PreRankingInfo m_info; + PreRankingInfo m_info; }; /// Second result class. Objects are creating during reading of features. @@ -66,7 +66,7 @@ public: /// For RESULT_LATLON. PreResult2(double lat, double lon); - inline search::v2::RankingInfo const & GetRankingInfo() const { return m_info; } + inline search::RankingInfo const & GetRankingInfo() const { return m_info; } template inline void SetRankingInfo(TInfo && info) @@ -143,7 +143,7 @@ private: double m_distance; ResultType m_resultType; - v2::RankingInfo m_info; + RankingInfo m_info; feature::EGeomType m_geomType; Result::Metadata m_metadata; diff --git a/search/intersection_result.cpp b/search/intersection_result.cpp new file mode 100644 index 0000000000..72dd5bdff6 --- /dev/null +++ b/search/intersection_result.cpp @@ -0,0 +1,64 @@ +#include "search/intersection_result.hpp" + +#include "std/limits.hpp" +#include "std/sstream.hpp" + +namespace search +{ +// static +uint32_t const IntersectionResult::kInvalidId = numeric_limits::max(); + +IntersectionResult::IntersectionResult() + : m_poi(kInvalidId), m_building(kInvalidId), m_street(kInvalidId) +{ +} + +void IntersectionResult::Set(SearchModel::SearchType type, uint32_t id) +{ + switch (type) + { + case SearchModel::SEARCH_TYPE_POI: m_poi = id; break; + case SearchModel::SEARCH_TYPE_BUILDING: m_building = id; break; + case SearchModel::SEARCH_TYPE_STREET: m_street = id; break; + case SearchModel::SEARCH_TYPE_CITY: + case SearchModel::SEARCH_TYPE_VILLAGE: + case SearchModel::SEARCH_TYPE_STATE: + case SearchModel::SEARCH_TYPE_COUNTRY: + case SearchModel::SEARCH_TYPE_UNCLASSIFIED: + case SearchModel::SEARCH_TYPE_COUNT: ASSERT(false, ("Unsupported type.")); break; + } +} + +uint32_t IntersectionResult::InnermostResult() const +{ + if (m_poi != kInvalidId) + return m_poi; + if (m_building != kInvalidId) + return m_building; + if (m_street != kInvalidId) + return m_street; + return kInvalidId; +} + +void IntersectionResult::Clear() +{ + m_poi = kInvalidId; + m_building = kInvalidId; + m_street = kInvalidId; +} + +string DebugPrint(IntersectionResult const & result) +{ + ostringstream os; + os << "IntersectionResult [ "; + if (result.m_poi != IntersectionResult::kInvalidId) + os << "POI:" << result.m_poi << " "; + if (result.m_building != IntersectionResult::kInvalidId) + os << "BUILDING:" << result.m_building << " "; + if (result.m_street != IntersectionResult::kInvalidId) + os << "STREET:" << result.m_street << " "; + os << "]"; + return os.str(); +} + +} // namespace search diff --git a/search/intersection_result.hpp b/search/intersection_result.hpp new file mode 100644 index 0000000000..9285cbe5cd --- /dev/null +++ b/search/intersection_result.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include "search/search_model.hpp" + +#include "std/cstdint.hpp" +#include "std/string.hpp" + +namespace search +{ +// This class holds higher-level features for an intersection result, +// i.e. BUILDING and STREET for POI or STREET for BUILDING. +struct IntersectionResult +{ + static uint32_t const kInvalidId; + + IntersectionResult(); + + void Set(SearchModel::SearchType type, uint32_t id); + + // Returns the first valid feature among the [POI, BUILDING, + // STREET]. + uint32_t InnermostResult() const; + + // Returns true when at least one valid feature exists. + inline bool IsValid() const { return InnermostResult() != kInvalidId; } + + // Clears all fields to an invalid state. + void Clear(); + + uint32_t m_poi; + uint32_t m_building; + uint32_t m_street; +}; + +string DebugPrint(IntersectionResult const & result); + +} // namespace search diff --git a/search/locality_finder.cpp b/search/locality_finder.cpp index 11c479a63c..4df74233d2 100644 --- a/search/locality_finder.cpp +++ b/search/locality_finder.cpp @@ -1,5 +1,6 @@ #include "search/locality_finder.hpp" -#include "search/v2/mwm_context.hpp" + +#include "search/mwm_context.hpp" #include "indexer/ftypes_matcher.hpp" @@ -126,7 +127,7 @@ void LocalityFinder::UpdateCache(Cache & cache, m2::PointD const & pt) const if (handle.IsAlive() && value->GetHeader().GetType() == feature::DataHeader::world) { cache.m_rect = rect; - v2::MwmContext(move(handle)).ForEachFeature(rect, DoLoader(*this, cache)); + MwmContext(move(handle)).ForEachFeature(rect, DoLoader(*this, cache)); break; } } diff --git a/search/locality_scorer.cpp b/search/locality_scorer.cpp new file mode 100644 index 0000000000..48eff3e136 --- /dev/null +++ b/search/locality_scorer.cpp @@ -0,0 +1,137 @@ +#include "search/locality_scorer.hpp" + +#include "search/token_slice.hpp" + +#include "std/algorithm.hpp" + +namespace search +{ +// static +size_t const LocalityScorer::kDefaultReadLimit = 100; + +namespace +{ +bool IsAlmostFullMatch(NameScore score) +{ + return score == NAME_SCORE_FULL_MATCH_PREFIX || score == NAME_SCORE_FULL_MATCH; +} +} // namespace + +// LocalityScorer::ExLocality ---------------------------------------------------------------------- +LocalityScorer::ExLocality::ExLocality() : m_numTokens(0), m_rank(0), m_nameScore(NAME_SCORE_ZERO) +{ +} + +LocalityScorer::ExLocality::ExLocality(Geocoder::Locality const & locality) + : m_locality(locality) + , m_numTokens(locality.m_endToken - locality.m_startToken) + , m_rank(0) + , m_nameScore(NAME_SCORE_ZERO) +{ +} + +// LocalityScorer ---------------------------------------------------------------------------------- +LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & delegate) + : m_params(params), m_delegate(delegate) +{ +} + +void LocalityScorer::GetTopLocalities(size_t limit, vector & localities) const +{ + vector ls; + ls.reserve(localities.size()); + for (auto const & locality : localities) + ls.emplace_back(locality); + + RemoveDuplicates(ls); + LeaveTopByRank(std::max(limit, kDefaultReadLimit), ls); + SortByName(ls); + if (ls.size() > limit) + ls.resize(limit); + + localities.clear(); + localities.reserve(ls.size()); + for (auto const & l : ls) + localities.push_back(l.m_locality); +} + +void LocalityScorer::RemoveDuplicates(vector & ls) const +{ + sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) + { + if (lhs.GetId() != rhs.GetId()) + return lhs.GetId() < rhs.GetId(); + return lhs.m_numTokens > rhs.m_numTokens; + }); + ls.erase(unique(ls.begin(), ls.end(), + [](ExLocality const & lhs, ExLocality const & rhs) + { + return lhs.GetId() == rhs.GetId(); + }), + ls.end()); +} + +void LocalityScorer::LeaveTopByRank(size_t limit, vector & ls) const +{ + if (ls.size() <= limit) + return; + + for (auto & l : ls) + l.m_rank = m_delegate.GetRank(l.GetId()); + + sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) + { + if (lhs.m_rank != rhs.m_rank) + return lhs.m_rank > rhs.m_rank; + return lhs.m_numTokens > rhs.m_numTokens; + }); + ls.resize(limit); +} + +void LocalityScorer::SortByName(vector & ls) const +{ + vector names; + for (auto & l : ls) + { + names.clear(); + m_delegate.GetNames(l.GetId(), names); + + auto score = NAME_SCORE_ZERO; + for (auto const & name : names) + { + score = max(score, GetNameScore(name, TokenSlice(m_params, l.m_locality.m_startToken, + l.m_locality.m_endToken))); + } + l.m_nameScore = score; + } + + sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) + { + if (IsAlmostFullMatch(lhs.m_nameScore) && IsAlmostFullMatch(rhs.m_nameScore)) + { + // When both localities match well, e.g. full or full prefix + // match, the one with larger number of tokens is selected. In + // case of tie, the one with better score is selected. + if (lhs.m_numTokens != rhs.m_numTokens) + return lhs.m_numTokens > rhs.m_numTokens; + if (lhs.m_nameScore != rhs.m_nameScore) + return lhs.m_nameScore > rhs.m_nameScore; + } + else + { + // When name scores differ, the one with better name score is + // selected. In case of tie, the one with larger number of + // matched tokens is selected. + if (lhs.m_nameScore != rhs.m_nameScore) + return lhs.m_nameScore > rhs.m_nameScore; + if (lhs.m_numTokens != rhs.m_numTokens) + return lhs.m_numTokens > rhs.m_numTokens; + } + + // Okay, in case of tie we select the one with better rank. This + // is a quite arbitrary decision and definitely may be improved. + return lhs.m_rank > rhs.m_rank; + }); +} + +} // namespace search diff --git a/search/locality_scorer.hpp b/search/locality_scorer.hpp new file mode 100644 index 0000000000..102350d0ee --- /dev/null +++ b/search/locality_scorer.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include "search/geocoder.hpp" +#include "search/ranking_utils.hpp" + +#include "std/string.hpp" +#include "std/vector.hpp" + +namespace search +{ +struct QueryParams; + +class LocalityScorer +{ +public: + static size_t const kDefaultReadLimit; + + class Delegate + { + public: + virtual ~Delegate() = default; + + virtual void GetNames(uint32_t featureId, vector & names) const = 0; + virtual uint8_t GetRank(uint32_t featureId) const = 0; + }; + + LocalityScorer(QueryParams const & params, Delegate const & delegate); + + // Leaves at most |limit| elements of |localities|, ordered by some + // combination of ranks and number of matched tokens. + void GetTopLocalities(size_t limit, vector & localities) const; + +private: + struct ExLocality + { + ExLocality(); + explicit ExLocality(Geocoder::Locality const & locality); + + inline uint32_t GetId() const { return m_locality.m_featureId; } + + Geocoder::Locality m_locality; + size_t m_numTokens; + uint8_t m_rank; + NameScore m_nameScore; + }; + + void RemoveDuplicates(vector & ls) const; + void LeaveTopByRank(size_t limit, vector & ls) const; + void SortByName(vector & ls) const; + + QueryParams const & m_params; + Delegate const & m_delegate; +}; + +} // namespace search diff --git a/search/mwm_context.cpp b/search/mwm_context.cpp new file mode 100644 index 0000000000..b89575d420 --- /dev/null +++ b/search/mwm_context.cpp @@ -0,0 +1,46 @@ +#include "search/mwm_context.hpp" + +namespace search +{ +void CoverRect(m2::RectD const & rect, int scale, covering::IntervalsT & result) +{ + covering::CoveringGetter covering(rect, covering::ViewportWithLowLevels); + auto const & intervals = covering.Get(scale); + result.insert(result.end(), intervals.begin(), intervals.end()); +} + +MwmContext::MwmContext(MwmSet::MwmHandle handle) + : m_handle(move(handle)) + , m_value(*m_handle.GetValue()) + , m_vector(m_value.m_cont, m_value.GetHeader(), m_value.m_table) + , m_index(m_value.m_cont.GetReader(INDEX_FILE_TAG), m_value.m_factory) +{ +} + +bool MwmContext::GetFeature(uint32_t index, FeatureType & ft) const +{ + switch (GetEditedStatus(index)) + { + case osm::Editor::FeatureStatus::Deleted: return false; + case osm::Editor::FeatureStatus::Modified: + case osm::Editor::FeatureStatus::Created: + VERIFY(osm::Editor::Instance().GetEditedFeature(GetId(), index, ft), ()); + return true; + case osm::Editor::FeatureStatus::Untouched: + m_vector.GetByIndex(index, ft); + ft.SetID(FeatureID(GetId(), index)); + return true; + } +} + +bool MwmContext::GetStreetIndex(uint32_t houseId, uint32_t & streetId) +{ + if (!m_houseToStreetTable) + { + m_houseToStreetTable = HouseToStreetTable::Load(m_value); + ASSERT(m_houseToStreetTable, ()); + } + return m_houseToStreetTable->Get(houseId, streetId); +} + +} // namespace search diff --git a/search/mwm_context.hpp b/search/mwm_context.hpp new file mode 100644 index 0000000000..200f5cd053 --- /dev/null +++ b/search/mwm_context.hpp @@ -0,0 +1,94 @@ +#pragma once + +#include "search/house_to_street_table.hpp" + +#include "indexer/features_vector.hpp" +#include "indexer/index.hpp" +#include "indexer/scale_index.hpp" + +#include "base/macros.hpp" + +#include "std/unique_ptr.hpp" + +class MwmValue; + +namespace search +{ +void CoverRect(m2::RectD const & rect, int scale, covering::IntervalsT & result); + +/// @todo Move this class into "index" library and make it more generic. +/// Now it duplicates "Index" functionality. +class MwmContext +{ +public: + MwmSet::MwmHandle m_handle; + MwmValue & m_value; + +private: + FeaturesVector m_vector; + ScaleIndex m_index; + unique_ptr m_houseToStreetTable; + +public: + explicit MwmContext(MwmSet::MwmHandle handle); + + inline MwmSet::MwmId const & GetId() const { return m_handle.GetId(); } + inline string const & GetName() const { return GetInfo()->GetCountryName(); } + inline shared_ptr const & GetInfo() const { return GetId().GetInfo(); } + + template + void ForEachIndex(covering::IntervalsT const & intervals, uint32_t scale, TFn && fn) const + { + ForEachIndexImpl(intervals, scale, [&](uint32_t index) + { + // TODO: Optimize deleted checks by getting vector of deleted indexes from + // the Editor. + if (GetEditedStatus(index) != osm::Editor::FeatureStatus::Deleted) + fn(index); + }); + } + + template + void ForEachFeature(m2::RectD const & rect, TFn && fn) const + { + uint32_t const scale = m_value.GetHeader().GetLastScale(); + covering::IntervalsT intervals; + CoverRect(rect, scale, intervals); + + ForEachIndexImpl(intervals, scale, [&](uint32_t index) + { + FeatureType ft; + if (GetFeature(index, ft)) + fn(ft); + }); + } + + // @returns false if feature was deleted by user. + bool GetFeature(uint32_t index, FeatureType & ft) const; + + bool GetStreetIndex(uint32_t houseId, uint32_t & streetId); + +private: + osm::Editor::FeatureStatus GetEditedStatus(uint32_t index) const + { + return osm::Editor::Instance().GetFeatureStatus(GetId(), index); + } + + template + void ForEachIndexImpl(covering::IntervalsT const & intervals, uint32_t scale, TFn && fn) const + { + CheckUniqueIndexes checkUnique(m_value.GetHeader().GetFormat() >= version::Format::v5); + for (auto const & i : intervals) + m_index.ForEachInIntervalAndScale( + [&](uint32_t index) + { + if (checkUnique(index)) + fn(index); + }, + i.first, i.second, scale); + } + + DISALLOW_COPY_AND_MOVE(MwmContext); +}; + +} // namespace search diff --git a/search/nested_rects_cache.cpp b/search/nested_rects_cache.cpp new file mode 100644 index 0000000000..07c5ae68ce --- /dev/null +++ b/search/nested_rects_cache.cpp @@ -0,0 +1,102 @@ +#include "search/nested_rects_cache.hpp" + +#include "search/ranking_info.hpp" + +#include "indexer/index.hpp" + +#include "geometry/mercator.hpp" +#include "geometry/rect2d.hpp" + +#include "base/assert.hpp" +#include "base/stl_add.hpp" + +#include "std/algorithm.hpp" + +namespace search +{ +namespace +{ +double const kPositionToleranceMeters = 15.0; +} // namespace + +NestedRectsCache::NestedRectsCache(Index & index) + : m_index(index), m_scale(0), m_position(0, 0), m_valid(false) +{ +} + +void NestedRectsCache::SetPosition(m2::PointD const & position, int scale) +{ + double distance = MercatorBounds::DistanceOnEarth(position, m_position); + if (distance < kPositionToleranceMeters && scale == m_scale && m_valid) + return; + m_position = position; + m_scale = scale; + Update(); +} + +double NestedRectsCache::GetDistanceToFeatureMeters(FeatureID const & id) const +{ + if (!m_valid) + return RankingInfo::kMaxDistMeters; + + size_t bucket = 0; + for (; bucket != RECT_SCALE_COUNT; ++bucket) + { + if (binary_search(m_features[bucket].begin(), m_features[bucket].end(), id)) + break; + } + auto const scale = static_cast(bucket); + + if (scale != RECT_SCALE_COUNT) + return GetRadiusMeters(scale); + + if (auto const & info = id.m_mwmId.GetInfo()) + { + auto const & rect = info->m_limitRect; + return max(MercatorBounds::DistanceOnEarth(rect.Center(), m_position), GetRadiusMeters(scale)); + } + + return RankingInfo::kMaxDistMeters; +} + +void NestedRectsCache::Clear() +{ + for (int scale = 0; scale != RECT_SCALE_COUNT; ++scale) + { + m_features[scale].clear(); + m_features[scale].shrink_to_fit(); + } + m_valid = false; +} + +// static +double NestedRectsCache::GetRadiusMeters(RectScale scale) +{ + switch (scale) + { + case RECT_SCALE_TINY: return 100.0; + case RECT_SCALE_SMALL: return 300.0; + case RECT_SCALE_MEDIUM: return 1000.0; + case RECT_SCALE_LARGE: return 2500.0; + case RECT_SCALE_COUNT: return 5000.0; + } +} + +void NestedRectsCache::Update() +{ + for (int scale = 0; scale != RECT_SCALE_COUNT; ++scale) + { + auto & features = m_features[scale]; + + features.clear(); + m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters( + m_position, GetRadiusMeters(static_cast(scale))); + auto addId = MakeBackInsertFunctor(features); + m_index.ForEachFeatureIDInRect(addId, rect, m_scale); + sort(features.begin(), features.end()); + } + + m_valid = true; +} + +} // namespace search diff --git a/search/nested_rects_cache.hpp b/search/nested_rects_cache.hpp new file mode 100644 index 0000000000..919ed29a74 --- /dev/null +++ b/search/nested_rects_cache.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include "indexer/feature_decl.hpp" + +#include "geometry/point2d.hpp" + +class Index; + +namespace search +{ +class NestedRectsCache +{ +public: + explicit NestedRectsCache(Index & index); + + void SetPosition(m2::PointD const & position, int scale); + + double GetDistanceToFeatureMeters(FeatureID const & id) const; + + void Clear(); + +private: + enum RectScale + { + RECT_SCALE_TINY, + RECT_SCALE_SMALL, + RECT_SCALE_MEDIUM, + RECT_SCALE_LARGE, + + RECT_SCALE_COUNT + }; + + static double GetRadiusMeters(RectScale scale); + + void Update(); + + Index & m_index; + int m_scale; + m2::PointD m_position; + bool m_valid; + + // Sorted lists of features. + vector m_features[RECT_SCALE_COUNT]; +}; + +} // namespace search diff --git a/search/pre_ranker.cpp b/search/pre_ranker.cpp index 2d99624053..54e6f628fc 100644 --- a/search/pre_ranker.cpp +++ b/search/pre_ranker.cpp @@ -1,6 +1,6 @@ #include "search/pre_ranker.hpp" -#include "search/v2/pre_ranking_info.hpp" +#include "search/pre_ranking_info.hpp" #include "base/stl_helpers.hpp" diff --git a/search/pre_ranking_info.cpp b/search/pre_ranking_info.cpp new file mode 100644 index 0000000000..8de435ac55 --- /dev/null +++ b/search/pre_ranking_info.cpp @@ -0,0 +1,20 @@ +#include "search/ranking_info.hpp" + +#include "std/sstream.hpp" + +namespace search +{ +string DebugPrint(PreRankingInfo const & info) +{ + ostringstream os; + os << "PreRankingInfo ["; + os << "m_distanceToPivot:" << info.m_distanceToPivot << ","; + os << "m_startToken:" << info.m_startToken << ","; + os << "m_endToken:" << info.m_endToken << ","; + os << "m_rank:" << info.m_rank << ","; + os << "m_searchType:" << info.m_searchType; + os << "]"; + return os.str(); +} + +} // namespace search diff --git a/search/pre_ranking_info.hpp b/search/pre_ranking_info.hpp new file mode 100644 index 0000000000..3b0cd80c7b --- /dev/null +++ b/search/pre_ranking_info.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include "search/search_model.hpp" + +#include "std/cstdint.hpp" + +namespace search +{ +struct PreRankingInfo +{ + inline size_t GetNumTokens() const { return m_endToken - m_startToken; } + + // An abstract distance from the feature to the pivot. Measurement + // units do not matter here. + double m_distanceToPivot = 0; + + // Tokens [m_startToken, m_endToken) match to the feature name or + // house number. + size_t m_startToken = 0; + size_t m_endToken = 0; + + // Rank of the feature. + uint8_t m_rank = 0; + + // Search type for the feature. + SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT; +}; + +string DebugPrint(PreRankingInfo const & info); + +} // namespace search diff --git a/search/processor.cpp b/search/processor.cpp index d3a4727094..60385621e2 100644 --- a/search/processor.cpp +++ b/search/processor.cpp @@ -5,14 +5,14 @@ #include "search/intermediate_result.hpp" #include "search/latlon_match.hpp" #include "search/locality.hpp" +#include "search/pre_ranking_info.hpp" #include "search/query_params.hpp" +#include "search/ranking_info.hpp" +#include "search/ranking_utils.hpp" #include "search/region.hpp" #include "search/search_common.hpp" #include "search/search_index_values.hpp" #include "search/search_string_intersection.hpp" -#include "search/v2/pre_ranking_info.hpp" -#include "search/v2/ranking_info.hpp" -#include "search/v2/ranking_utils.hpp" #include "storage/country_info_getter.hpp" #include "storage/index.hpp" @@ -181,18 +181,18 @@ m2::RectD GetRectAroundPosition(m2::PointD const & position) } template -void UpdateNameScore(string const & name, TSlice const & slice, v2::NameScore & bestScore) +void UpdateNameScore(string const & name, TSlice const & slice, NameScore & bestScore) { - auto const score = v2::GetNameScore(name, slice); + auto const score = GetNameScore(name, slice); if (score > bestScore) bestScore = score; } template void UpdateNameScore(vector const & tokens, TSlice const & slice, - v2::NameScore & bestScore) + NameScore & bestScore) { - auto const score = v2::GetNameScore(tokens, slice); + auto const score = GetNameScore(tokens, slice); if (score > bestScore) bestScore = score; } @@ -358,7 +358,7 @@ void Processor::SetQuery(string const & query) // get preffered types to show in results m_prefferedTypes.clear(); - ForEachCategoryTypes(v2::QuerySliceOnRawStrings(m_tokens, m_prefix), + ForEachCategoryTypes(QuerySliceOnRawStrings(m_tokens, m_prefix), [&](size_t, uint32_t t) { m_prefferedTypes.insert(t); @@ -497,10 +497,10 @@ void Processor::Search(Results & results, size_t limit) if (m_tokens.empty()) SuggestStrings(results); - v2::Geocoder::Params params; + Geocoder::Params params; + InitParams(params); params.m_mode = m_mode; - params.m_pivot = GetPivotRect(); params.m_accuratePivotCenter = GetPivotPoint(); m_geocoder.SetParams(params); @@ -512,7 +512,8 @@ void Processor::Search(Results & results, size_t limit) void Processor::SearchViewportPoints(Results & results) { - v2::Geocoder::Params params; + Geocoder::Params params; + InitParams(params); params.m_pivot = m_viewport[CURRENT_V]; params.m_accuratePivotCenter = params.m_pivot.Center(); @@ -551,7 +552,7 @@ namespace impl class PreResult2Maker { Processor & m_processor; - v2::Geocoder::Params const & m_params; + Geocoder::Params const & m_params; unique_ptr m_pFV; @@ -577,7 +578,7 @@ class PreResult2Maker } void InitRankingInfo(FeatureType const & ft, m2::PointD const & center, - impl::PreResult1 const & res, search::v2::RankingInfo & info) + impl::PreResult1 const & res, search::RankingInfo & info) { auto const & preInfo = res.GetInfo(); @@ -586,11 +587,10 @@ class PreResult2Maker info.m_distanceToPivot = MercatorBounds::DistanceOnEarth(center, pivot); info.m_rank = preInfo.m_rank; info.m_searchType = preInfo.m_searchType; - info.m_nameScore = v2::NAME_SCORE_ZERO; + info.m_nameScore = NAME_SCORE_ZERO; - v2::TokenSlice slice(m_params, preInfo.m_startToken, preInfo.m_endToken); - v2::TokenSliceNoCategories sliceNoCategories(m_params, preInfo.m_startToken, - preInfo.m_endToken); + TokenSlice slice(m_params, preInfo.m_startToken, preInfo.m_endToken); + TokenSliceNoCategories sliceNoCategories(m_params, preInfo.m_startToken, preInfo.m_endToken); for (auto const & lang : m_params.m_langs) { @@ -604,12 +604,12 @@ class PreResult2Maker UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore); } - if (info.m_searchType == v2::SearchModel::SEARCH_TYPE_BUILDING) + if (info.m_searchType == SearchModel::SEARCH_TYPE_BUILDING) UpdateNameScore(ft.GetHouseNumber(), sliceNoCategories, info.m_nameScore); feature::TypesHolder holder(ft); vector> matched(slice.Size()); - m_processor.ForEachCategoryTypes(v2::QuerySlice(slice), [&](size_t i, uint32_t t) + m_processor.ForEachCategoryTypes(QuerySlice(slice), [&](size_t i, uint32_t t) { ++matched[i].second; if (holder.Has(t)) @@ -626,13 +626,13 @@ class PreResult2Maker }); } - uint8_t NormalizeRank(uint8_t rank, v2::SearchModel::SearchType type, m2::PointD const & center, + uint8_t NormalizeRank(uint8_t rank, SearchModel::SearchType type, m2::PointD const & center, string const & country) { switch (type) { - case v2::SearchModel::SEARCH_TYPE_VILLAGE: return rank /= 1.5; - case v2::SearchModel::SEARCH_TYPE_CITY: + case SearchModel::SEARCH_TYPE_VILLAGE: return rank /= 1.5; + case SearchModel::SEARCH_TYPE_CITY: { if (m_processor.GetViewport(Processor::CURRENT_V).IsPointInside(center)) return rank * 2; @@ -645,7 +645,7 @@ class PreResult2Maker if (info.IsNotEmpty() && info.m_name == m_processor.GetPivotRegion()) return rank *= 1.7; } - case v2::SearchModel::SEARCH_TYPE_COUNTRY: + case SearchModel::SEARCH_TYPE_COUNTRY: return rank /= 1.5; // For all other search types, rank should be zero for now. @@ -654,7 +654,7 @@ class PreResult2Maker } public: - explicit PreResult2Maker(Processor & q, v2::Geocoder::Params const & params) + explicit PreResult2Maker(Processor & q, Geocoder::Params const & params) : m_processor(q), m_params(params) { } @@ -671,7 +671,7 @@ public: auto res2 = make_unique(ft, &res1, center, m_processor.GetPosition() /* pivot */, name, country); - search::v2::RankingInfo info; + search::RankingInfo info; InitRankingInfo(ft, center, res1, info); info.m_rank = NormalizeRank(info.m_rank, info.m_searchType, center, country); res2->SetRankingInfo(move(info)); @@ -682,7 +682,7 @@ public: } // namespace impl template -void Processor::MakePreResult2(v2::Geocoder::Params const & params, vector & cont, +void Processor::MakePreResult2(Geocoder::Params const & params, vector & cont, vector & streets) { m_preRanker.Filter(m_viewportSearch); @@ -707,7 +707,7 @@ void Processor::MakePreResult2(v2::Geocoder::Params const & params, vector & }); } -void Processor::FlushResults(v2::Geocoder::Params const & params, Results & res, size_t resCount) +void Processor::FlushResults(Geocoder::Params const & params, Results & res, size_t resCount) { vector indV; vector streets; @@ -736,7 +736,7 @@ void Processor::FlushResults(v2::Geocoder::Params const & params, Results & res, } } -void Processor::FlushViewportResults(v2::Geocoder::Params const & params, Results & res) +void Processor::FlushViewportResults(Geocoder::Params const & params, Results & res) { vector indV; vector streets; @@ -1155,7 +1155,7 @@ void Processor::InitParams(QueryParams & params) } } }; - ForEachCategoryTypes(v2::QuerySliceOnRawStrings(m_tokens, m_prefix), addSyms); + ForEachCategoryTypes(QuerySliceOnRawStrings(m_tokens, m_prefix), addSyms); for (auto & tokens : params.m_tokens) { diff --git a/search/processor.hpp b/search/processor.hpp index 514b2e1994..c91a574e4d 100644 --- a/search/processor.hpp +++ b/search/processor.hpp @@ -1,13 +1,13 @@ #pragma once +#include "search/geocoder.hpp" #include "search/keyword_lang_matcher.hpp" #include "search/mode.hpp" #include "search/pre_ranker.hpp" +#include "search/rank_table_cache.hpp" #include "search/reverse_geocoder.hpp" #include "search/search_trie.hpp" #include "search/suggest.hpp" -#include "search/v2/geocoder.hpp" -#include "search/v2/rank_table_cache.hpp" -#include "search/v2/token_slice.hpp" +#include "search/token_slice.hpp" #include "indexer/ftypes_matcher.hpp" #include "indexer/index.hpp" @@ -52,10 +52,7 @@ struct Region; struct QueryParams; class ReverseGeocoder; -namespace v2 -{ class Geocoder; -} // namespace search::v2 namespace impl { @@ -159,11 +156,11 @@ protected: void ClearCache(size_t ind); template - void MakePreResult2(v2::Geocoder::Params const & params, vector & cont, + void MakePreResult2(Geocoder::Params const & params, vector & cont, vector & streets); - void FlushResults(v2::Geocoder::Params const & params, Results & res, size_t resCount); - void FlushViewportResults(v2::Geocoder::Params const & params, Results & res); + void FlushResults(Geocoder::Params const & params, Results & res, size_t resCount); + void FlushViewportResults(Geocoder::Params const & params, Results & res); void RemoveStringPrefix(string const & str, string & res) const; void GetSuggestion(string const & name, string & suggest) const; @@ -219,7 +216,7 @@ protected: bool m_keepHouseNumberInQuery; PreRanker m_preRanker; - v2::Geocoder m_geocoder; + Geocoder m_geocoder; ReverseGeocoder const m_reverseGeocoder; }; } // namespace search diff --git a/search/rank_table_cache.cpp b/search/rank_table_cache.cpp new file mode 100644 index 0000000000..83b94f9b94 --- /dev/null +++ b/search/rank_table_cache.cpp @@ -0,0 +1,27 @@ +#include "search/rank_table_cache.hpp" + +#include "search/dummy_rank_table.hpp" + +#include "indexer/rank_table.hpp" + +namespace search +{ +RankTable const & RankTableCache::Get(Index & index, TId const & mwmId) +{ + auto const it = m_ranks.find(TKey(mwmId)); + if (it != m_ranks.end()) + return *it->second; + + TKey handle(index.GetMwmHandleById(mwmId)); + auto table = RankTable::Load(handle.GetValue()->m_cont); + if (!table) + table.reset(new DummyRankTable()); + + return *(m_ranks.emplace(move(handle), move(table)).first->second.get()); +} + +void RankTableCache::Remove(TId const & id) { m_ranks.erase(TKey(id)); } + +void RankTableCache::Clear() { m_ranks.clear(); } + +} // namespace search diff --git a/search/rank_table_cache.hpp b/search/rank_table_cache.hpp new file mode 100644 index 0000000000..2332a3a3c1 --- /dev/null +++ b/search/rank_table_cache.hpp @@ -0,0 +1,46 @@ +#pragma once + +#include "indexer/index.hpp" + +#include "base/macros.hpp" + +#include "std/map.hpp" +#include "std/unique_ptr.hpp" + +namespace search +{ +class RankTable; + +class RankTableCache +{ + using TId = MwmSet::MwmId; + + struct TKey : public MwmSet::MwmHandle + { + TKey() = default; + TKey(TKey &&) = default; + + explicit TKey(TId const & id) { this->m_mwmId = id; } + explicit TKey(MwmSet::MwmHandle && handle) : MwmSet::MwmHandle(move(handle)) {} + }; + +public: + RankTableCache() = default; + + RankTable const & Get(Index & index, TId const & mwmId); + + void Remove(TId const & id); + void Clear(); + +private: + struct Compare + { + bool operator()(TKey const & r1, TKey const & r2) const { return (r1.GetId() < r2.GetId()); } + }; + + map, Compare> m_ranks; + + DISALLOW_COPY_AND_MOVE(RankTableCache); +}; + +} // namespace search diff --git a/search/ranking_info.cpp b/search/ranking_info.cpp new file mode 100644 index 0000000000..163208b652 --- /dev/null +++ b/search/ranking_info.cpp @@ -0,0 +1,93 @@ +#include "search/ranking_info.hpp" + +#include "std/cmath.hpp" +#include "std/iomanip.hpp" +#include "std/limits.hpp" +#include "std/sstream.hpp" + +namespace search +{ +namespace +{ +// See search/search_quality/scoring_model.py for details. In short, +// these coeffs correspond to coeffs in a linear model. +double const kDistanceToPivot = -0.7579760; +double const kRank = 0.9185310; +double const kFalseCats = -0.7996119; +double const kNameScore[NameScore::NAME_SCORE_COUNT] = { + -1.0000000 /* Zero */, 0.3585180 /* Substring Prefix */, 0.3198080 /* Substring */, + 0.3216740 /* Full Match Prefix */, 0.3216740 /* Full Match */ +}; +double const kSearchType[SearchModel::SEARCH_TYPE_COUNT] = { + -0.1419479 /* POI */, -0.1419479 /* Building */, -0.1462099 /* Street */, + -0.1509122 /* Unclassified */, -0.2591458 /* Village */, -0.0451342 /* City */, + 0.2515398 /* State */, 0.4918102 /* Country */ +}; + +double TransformDistance(double distance) +{ + return min(distance, RankingInfo::kMaxDistMeters) / RankingInfo::kMaxDistMeters; +} +} // namespace + +// static +double const RankingInfo::kMaxDistMeters = 2e6; + +// static +void RankingInfo::PrintCSVHeader(ostream & os) +{ + os << "DistanceToPivot" + << ",Rank" + << ",NameScore" + << ",SearchType" + << ",PureCats" + << ",FalseCats"; +} + +string DebugPrint(RankingInfo const & info) +{ + ostringstream os; + os << "RankingInfo ["; + os << "m_distanceToPivot:" << info.m_distanceToPivot << ","; + os << "m_rank:" << static_cast(info.m_rank) << ","; + os << "m_nameScore:" << DebugPrint(info.m_nameScore) << ","; + os << "m_searchType:" << DebugPrint(info.m_searchType) << ","; + os << "m_pureCats:" << info.m_pureCats << ","; + os << "m_falseCats:" << info.m_falseCats; + os << "]"; + return os.str(); +} + +void RankingInfo::ToCSV(ostream & os) const +{ + os << fixed; + os << m_distanceToPivot << "," << static_cast(m_rank) << "," << DebugPrint(m_nameScore) + << "," << DebugPrint(m_searchType) << "," << m_pureCats << "," << m_falseCats; +} + +double RankingInfo::GetLinearModelRank() const +{ + // NOTE: this code must be consistent with scoring_model.py. Keep + // this in mind when you're going to change scoring_model.py or this + // code. We're working on automatic rank calculation code generator + // integrated in the build system. + double const distanceToPivot = TransformDistance(m_distanceToPivot); + double const rank = static_cast(m_rank) / numeric_limits::max(); + + auto nameScore = m_nameScore; + if (m_pureCats || m_falseCats) + { + // If the feature was matched only by categorial tokens, it's + // better for ranking to set name score to zero. For example, + // when we're looking for a "cafe", cafes "Cafe Pushkin" and + // "Lermontov" both match to the request, but must be ranked in + // accordance to their distances to the user position or viewport, + // in spite of "Cafe Pushkin" has a non-zero name rank. + nameScore = NAME_SCORE_ZERO; + } + + return kDistanceToPivot * distanceToPivot + kRank * rank + kNameScore[nameScore] + + kSearchType[m_searchType] + m_falseCats * kFalseCats; +} + +} // namespace search diff --git a/search/ranking_info.hpp b/search/ranking_info.hpp new file mode 100644 index 0000000000..d5ee955af4 --- /dev/null +++ b/search/ranking_info.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include "search/pre_ranking_info.hpp" +#include "search/ranking_utils.hpp" +#include "search/search_model.hpp" + +#include "std/iostream.hpp" + +class FeatureType; + +namespace search +{ +struct RankingInfo +{ + static double const kMaxDistMeters; + + // Distance from the feature to the pivot point. + double m_distanceToPivot = kMaxDistMeters; + + // Rank of the feature. + uint8_t m_rank = 0; + + // Score for the feature's name. + NameScore m_nameScore = NAME_SCORE_ZERO; + + // Search type for the feature. + SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT; + + // True if all of the tokens that the feature was matched by + // correspond to this feature's categories. + bool m_pureCats = false; + + // True if none of the tokens that the feature was matched by + // corresponds to this feature's categories although all of the + // tokens are categorial ones. + bool m_falseCats = false; + + static void PrintCSVHeader(ostream & os); + + void ToCSV(ostream & os) const; + + // Returns rank calculated by a linear model. Large values + // correspond to important features. + double GetLinearModelRank() const; +}; + +string DebugPrint(RankingInfo const & info); + +} // namespace search diff --git a/search/ranking_utils.cpp b/search/ranking_utils.cpp new file mode 100644 index 0000000000..39dcf2fe7f --- /dev/null +++ b/search/ranking_utils.cpp @@ -0,0 +1,41 @@ +#include "search/ranking_utils.hpp" + +#include "std/algorithm.hpp" + +using namespace strings; + +namespace search +{ +namespace impl +{ +bool Match(vector const & tokens, UniString const & token) +{ + return find(tokens.begin(), tokens.end(), token) != tokens.end(); +} + +bool PrefixMatch(vector const & prefixes, UniString const & token) +{ + for (auto const & prefix : prefixes) + { + if (StartsWith(token, prefix)) + return true; + } + return false; +} +} // namespace impl + +string DebugPrint(NameScore score) +{ + switch (score) + { + case NAME_SCORE_ZERO: return "Zero"; + case NAME_SCORE_SUBSTRING_PREFIX: return "Substring Prefix"; + case NAME_SCORE_SUBSTRING: return "Substring"; + case NAME_SCORE_FULL_MATCH_PREFIX: return "Full Match Prefix"; + case NAME_SCORE_FULL_MATCH: return "Full Match"; + case NAME_SCORE_COUNT: return "Count"; + } + return "Unknown"; +} + +} // namespace search diff --git a/search/ranking_utils.hpp b/search/ranking_utils.hpp new file mode 100644 index 0000000000..75b94e4c50 --- /dev/null +++ b/search/ranking_utils.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include "search/query_params.hpp" +#include "search/search_model.hpp" + +#include "indexer/search_delimiters.hpp" +#include "indexer/search_string_utils.hpp" + +#include "base/stl_add.hpp" +#include "base/string_utils.hpp" + +#include "std/cstdint.hpp" +#include "std/limits.hpp" +#include "std/string.hpp" +#include "std/vector.hpp" + +namespace search +{ +struct QueryParams; + +namespace impl +{ +bool Match(vector const & tokens, strings::UniString const & token); + +bool PrefixMatch(vector const & prefixes, strings::UniString const & token); +} // namespace impl + +// The order and numeric values are important here. Please, check all +// use-cases before changing this enum. +enum NameScore +{ + NAME_SCORE_ZERO = 0, + NAME_SCORE_SUBSTRING_PREFIX = 1, + NAME_SCORE_SUBSTRING = 2, + NAME_SCORE_FULL_MATCH_PREFIX = 3, + NAME_SCORE_FULL_MATCH = 4, + + NAME_SCORE_COUNT +}; + +template +NameScore GetNameScore(string const & name, TSlice const & slice) +{ + if (slice.Empty()) + return NAME_SCORE_ZERO; + + vector tokens; + SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters()); + return GetNameScore(tokens, slice); +} + +template +NameScore GetNameScore(vector const & tokens, TSlice const & slice) +{ + if (slice.Empty()) + return NAME_SCORE_ZERO; + + size_t const n = tokens.size(); + size_t const m = slice.Size(); + + bool const lastTokenIsPrefix = slice.IsPrefix(m - 1); + + NameScore score = NAME_SCORE_ZERO; + for (int offset = 0; offset + m <= n; ++offset) + { + bool match = true; + for (int i = 0; i < m - 1 && match; ++i) + match = match && impl::Match(slice.Get(i), tokens[offset + i]); + if (!match) + continue; + + if (impl::Match(slice.Get(m - 1), tokens[offset + m - 1])) + { + if (m == n) + return NAME_SCORE_FULL_MATCH; + score = max(score, NAME_SCORE_SUBSTRING); + } + if (lastTokenIsPrefix && impl::PrefixMatch(slice.Get(m - 1), tokens[offset + m - 1])) + { + if (m == n) + return NAME_SCORE_FULL_MATCH_PREFIX; + score = max(score, NAME_SCORE_SUBSTRING_PREFIX); + } + } + return score; +} + +string DebugPrint(NameScore score); + +} // namespace search diff --git a/search/result.hpp b/search/result.hpp index 896aa2c181..19d1576411 100644 --- a/search/result.hpp +++ b/search/result.hpp @@ -1,5 +1,5 @@ #pragma once -#include "search/v2/ranking_info.hpp" +#include "search/ranking_info.hpp" #include "indexer/feature_decl.hpp" @@ -94,7 +94,7 @@ public: int32_t GetPositionInResults() const { return m_positionInResults; } void SetPositionInResults(int32_t pos) { m_positionInResults = pos; } - inline v2::RankingInfo const & GetRankingInfo() const { return m_info; } + inline RankingInfo const & GetRankingInfo() const { return m_info; } template inline void SetRankingInfo(TInfo && info) @@ -115,7 +115,7 @@ private: string m_suggestionStr; buffer_vector, 4> m_hightlightRanges; - v2::RankingInfo m_info; + RankingInfo m_info; // The position that this result occupied in the vector returned // by a search query. -1 if undefined. diff --git a/search/retrieval.cpp b/search/retrieval.cpp index a2ecb76677..8a312d8f6b 100644 --- a/search/retrieval.cpp +++ b/search/retrieval.cpp @@ -1,13 +1,12 @@ #include "retrieval.hpp" -#include "cancel_exception.hpp" -#include "feature_offset_match.hpp" -#include "interval_set.hpp" -#include "search_index_values.hpp" -#include "search_trie.hpp" - -#include "v2/mwm_context.hpp" -#include "v2/token_slice.hpp" +#include "search/cancel_exception.hpp" +#include "search/feature_offset_match.hpp" +#include "search/interval_set.hpp" +#include "search/mwm_context.hpp" +#include "search/search_index_values.hpp" +#include "search/search_trie.hpp" +#include "search/token_slice.hpp" #include "indexer/feature.hpp" #include "indexer/feature_algo.hpp" @@ -30,8 +29,6 @@ using osm::Editor; namespace search { -namespace v2 -{ namespace { class FeaturesCollector @@ -160,7 +157,7 @@ bool MatchFeatureByName(FeatureType const & ft, QueryParams const & params) return matched; } -bool MatchFeatureByPostcode(FeatureType const & ft, v2::TokenSlice const & slice) +bool MatchFeatureByPostcode(FeatureType const & ft, TokenSlice const & slice) { string const postcode = ft.GetMetadata().Get(feature::Metadata::FMD_POSTCODE); vector tokens; @@ -252,7 +249,7 @@ unique_ptr RetrievePostcodeFeaturesImpl( // Retrieves from the geometry index corresponding to handle all // features from |coverage|. unique_ptr RetrieveGeometryFeaturesImpl( - v2::MwmContext const & context, my::Cancellable const & cancellable, + MwmContext const & context, my::Cancellable const & cancellable, covering::IntervalsT const & coverage, int scale) { vector features; @@ -331,9 +328,8 @@ unique_ptr RetrieveGeometryFeatures( m2::RectD const & rect, int scale) { covering::IntervalsT coverage; - v2::CoverRect(rect, scale, coverage); + CoverRect(rect, scale, coverage); return RetrieveGeometryFeaturesImpl(context, cancellable, coverage, scale); } -} // namespace v2 } // namespace search diff --git a/search/retrieval.hpp b/search/retrieval.hpp index a4e85c6095..032636bf7c 100644 --- a/search/retrieval.hpp +++ b/search/retrieval.hpp @@ -18,8 +18,6 @@ class CompressedBitVector; namespace search { -namespace v2 -{ class MwmContext; class TokenSlice; @@ -40,5 +38,5 @@ unique_ptr RetrievePostcodeFeatures( unique_ptr RetrieveGeometryFeatures( MwmContext const & context, my::Cancellable const & cancellable, m2::RectD const & rect, int scale); -} // namespace v2 + } // namespace search diff --git a/search/reverse_geocoder.cpp b/search/reverse_geocoder.cpp index d91089c491..c2796138dc 100644 --- a/search/reverse_geocoder.cpp +++ b/search/reverse_geocoder.cpp @@ -1,6 +1,6 @@ #include "reverse_geocoder.hpp" -#include "search/v2/mwm_context.hpp" +#include "search/mwm_context.hpp" #include "indexer/feature.hpp" #include "indexer/feature_algo.hpp" @@ -50,7 +50,7 @@ void ReverseGeocoder::GetNearbyStreets(MwmSet::MwmId const & id, m2::PointD cons MwmSet::MwmHandle mwmHandle = m_index.GetMwmHandleById(id); if (mwmHandle.IsAlive()) { - search::v2::MwmContext(move(mwmHandle)).ForEachFeature(rect, addStreet); + search::MwmContext(move(mwmHandle)).ForEachFeature(rect, addStreet); sort(streets.begin(), streets.end(), my::LessBy(&Street::m_distanceMeters)); } } @@ -199,7 +199,7 @@ bool ReverseGeocoder::HouseTable::Get(FeatureID const & fid, uint32_t & streetIn LOG(LWARNING, ("MWM", fid, "is dead")); return false; } - m_table = search::v2::HouseToStreetTable::Load(*m_handle.GetValue()); + m_table = search::HouseToStreetTable::Load(*m_handle.GetValue()); } return m_table->Get(fid.m_index, streetIndex); diff --git a/search/reverse_geocoder.hpp b/search/reverse_geocoder.hpp index 3aa3dd4328..097cbe752d 100644 --- a/search/reverse_geocoder.hpp +++ b/search/reverse_geocoder.hpp @@ -1,6 +1,6 @@ #pragma once -#include "search/v2/house_to_street_table.hpp" +#include "search/house_to_street_table.hpp" #include "indexer/feature_decl.hpp" @@ -99,7 +99,7 @@ private: class HouseTable { Index const & m_index; - unique_ptr m_table; + unique_ptr m_table; MwmSet::MwmHandle m_handle; public: explicit HouseTable(Index const & index) : m_index(index) {} diff --git a/search/search.pro b/search/search.pro index 504169295d..894529fb5b 100644 --- a/search/search.pro +++ b/search/search.pro @@ -12,25 +12,42 @@ HEADERS += \ algos.hpp \ approximate_string_match.hpp \ cancel_exception.hpp \ + cbv_ptr.hpp \ dummy_rank_table.hpp \ feature_offset_match.hpp \ + features_filter.hpp \ + features_layer.hpp \ + features_layer_matcher.hpp \ + features_layer_path_finder.hpp \ + geocoder.hpp \ + geometry_cache.hpp \ geometry_utils.hpp \ house_detector.hpp \ + house_numbers_matcher.hpp \ + house_to_street_table.hpp \ intermediate_result.hpp \ + intersection_result.hpp \ interval_set.hpp \ keyword_lang_matcher.hpp \ keyword_matcher.hpp \ latlon_match.hpp \ locality.hpp \ locality_finder.hpp \ + locality_scorer.hpp \ mode.hpp \ + mwm_context.hpp \ + nested_rects_cache.hpp \ params.hpp \ pre_ranker.hpp \ + pre_ranking_info.hpp \ processor.hpp \ processor_factory.hpp \ projection_on_street.hpp \ query_params.hpp \ query_saver.hpp \ + rank_table_cache.hpp \ + ranking_info.hpp \ + ranking_utils.hpp \ region.hpp \ result.hpp \ retrieval.hpp \ @@ -38,73 +55,56 @@ HEADERS += \ search_common.hpp \ search_engine.hpp \ search_index_values.hpp \ + search_model.hpp \ search_string_intersection.hpp \ search_trie.hpp \ + stats_cache.hpp \ + street_vicinity_loader.hpp \ suggest.hpp \ + token_slice.hpp \ types_skipper.hpp \ - v2/cbv_ptr.hpp \ - v2/features_filter.hpp \ - v2/features_layer.hpp \ - v2/features_layer_matcher.hpp \ - v2/features_layer_path_finder.hpp \ - v2/geocoder.hpp \ - v2/geometry_cache.hpp \ - v2/house_numbers_matcher.hpp \ - v2/house_to_street_table.hpp \ - v2/intersection_result.hpp \ - v2/locality_scorer.hpp \ - v2/mwm_context.hpp \ - v2/nested_rects_cache.hpp \ - v2/pre_ranking_info.hpp \ - v2/rank_table_cache.hpp \ - v2/ranking_info.hpp \ - v2/ranking_utils.hpp \ - v2/search_model.hpp \ - v2/stats_cache.hpp \ - v2/street_vicinity_loader.hpp \ - v2/token_slice.hpp \ SOURCES += \ approximate_string_match.cpp \ + cbv_ptr.cpp \ dummy_rank_table.cpp \ + features_filter.cpp \ + features_layer.cpp \ + features_layer_matcher.cpp \ + features_layer_path_finder.cpp \ + geocoder.cpp \ + geometry_cache.cpp \ geometry_utils.cpp \ house_detector.cpp \ + house_numbers_matcher.cpp \ + house_to_street_table.cpp \ intermediate_result.cpp \ + intersection_result.cpp \ keyword_lang_matcher.cpp \ keyword_matcher.cpp \ latlon_match.cpp \ locality.cpp \ locality_finder.cpp \ + locality_scorer.cpp \ mode.cpp \ + mwm_context.cpp \ + nested_rects_cache.cpp \ params.cpp \ pre_ranker.cpp \ + pre_ranking_info.cpp \ processor.cpp \ projection_on_street.cpp \ query_params.cpp \ query_saver.cpp \ + rank_table_cache.cpp \ + ranking_info.cpp \ + ranking_utils.cpp \ region.cpp \ result.cpp \ retrieval.cpp \ reverse_geocoder.cpp \ search_engine.cpp \ + search_model.cpp \ + street_vicinity_loader.cpp \ + token_slice.cpp \ types_skipper.cpp \ - v2/cbv_ptr.cpp \ - v2/features_filter.cpp \ - v2/features_layer.cpp \ - v2/features_layer_matcher.cpp \ - v2/features_layer_path_finder.cpp \ - v2/geocoder.cpp \ - v2/geometry_cache.cpp \ - v2/house_numbers_matcher.cpp \ - v2/house_to_street_table.cpp \ - v2/intersection_result.cpp \ - v2/locality_scorer.cpp \ - v2/mwm_context.cpp \ - v2/nested_rects_cache.cpp \ - v2/pre_ranking_info.cpp \ - v2/rank_table_cache.cpp \ - v2/ranking_info.cpp \ - v2/ranking_utils.cpp \ - v2/search_model.cpp \ - v2/street_vicinity_loader.cpp \ - v2/token_slice.cpp \ diff --git a/search/search_integration_tests/processor_test.cpp b/search/search_integration_tests/processor_test.cpp index 4d81c982d9..6676b73a89 100644 --- a/search/search_integration_tests/processor_test.cpp +++ b/search/search_integration_tests/processor_test.cpp @@ -4,7 +4,7 @@ #include "search/search_integration_tests/helpers.hpp" #include "search/search_tests_support/test_results_matching.hpp" #include "search/search_tests_support/test_search_request.hpp" -#include "search/v2/token_slice.hpp" +#include "search/token_slice.hpp" #include "generator/generator_tests_support/test_feature.hpp" #include "generator/generator_tests_support/test_mwm_builder.hpp" @@ -22,7 +22,6 @@ using namespace generator::tests_support; using namespace search::tests_support; -using namespace search::v2; using TRules = vector>; @@ -414,8 +413,8 @@ UNIT_CLASS_TEST(ProcessorTest, TestPostcodes) params.m_tokens.emplace_back(); params.m_tokens.back().push_back(strings::MakeUniString("141702")); auto * value = handle.GetValue(); - auto features = v2::RetrievePostcodeFeatures(countryId, *value, cancellable, - TokenSlice(params, 0, params.m_tokens.size())); + auto features = RetrievePostcodeFeatures(countryId, *value, cancellable, + TokenSlice(params, 0, params.m_tokens.size())); TEST_EQUAL(1, features->PopCount(), ()); uint64_t index = 0; diff --git a/search/search_model.cpp b/search/search_model.cpp new file mode 100644 index 0000000000..4b45a728a2 --- /dev/null +++ b/search/search_model.cpp @@ -0,0 +1,138 @@ +#include "search/search_model.hpp" + +#include "indexer/classificator.hpp" +#include "indexer/feature.hpp" + +#include "base/macros.hpp" + +using namespace ftypes; + +namespace search +{ +TwoLevelPOIChecker::TwoLevelPOIChecker() : ftypes::BaseChecker(2 /* level */) +{ + Classificator const & c = classif(); + StringIL arr[] = {{"highway", "bus_stop"}, + {"highway", "speed_camera"}, + {"waterway", "waterfall"}, + {"natural", "volcano"}, + {"natural", "cave_entrance"}, + {"natural", "beach"}}; + + for (size_t i = 0; i < ARRAY_SIZE(arr); ++i) + m_types.push_back(c.GetTypeByPath(arr[i])); +} + +namespace +{ +/// Should be similar with ftypes::IsAddressObjectChecker object classes. +class OneLevelPOIChecker : public ftypes::BaseChecker +{ +public: + OneLevelPOIChecker() : ftypes::BaseChecker(1 /* level */) + { + Classificator const & c = classif(); + + auto paths = {"amenity", "historic", "office", "railway", "shop", "sport", "tourism", "craft"}; + for (auto const & path : paths) + m_types.push_back(c.GetTypeByPath({path})); + } +}; + +class IsPoiChecker +{ +public: + IsPoiChecker() {} + + static IsPoiChecker const & Instance() + { + static const IsPoiChecker inst; + return inst; + } + + bool operator()(FeatureType const & ft) const { return m_oneLevel(ft) || m_twoLevel(ft); } + +private: + OneLevelPOIChecker const m_oneLevel; + TwoLevelPOIChecker const m_twoLevel; +}; + +class CustomIsBuildingChecker +{ +public: + static CustomIsBuildingChecker const & Instance() + { + static const CustomIsBuildingChecker inst; + return inst; + } + + bool operator()(FeatureType const & ft) const + { + return !ft.GetHouseNumber().empty() || IsBuildingChecker::Instance()(ft); + } + +private: + CustomIsBuildingChecker() {} +}; +} // namespace + +// static +SearchModel const & SearchModel::Instance() +{ + static SearchModel model; + return model; +} + +SearchModel::SearchType SearchModel::GetSearchType(FeatureType const & feature) const +{ + static auto const & buildingChecker = CustomIsBuildingChecker::Instance(); + static auto const & streetChecker = IsStreetChecker::Instance(); + static auto const & localityChecker = IsLocalityChecker::Instance(); + static auto const & poiChecker = IsPoiChecker::Instance(); + + if (buildingChecker(feature)) + return SEARCH_TYPE_BUILDING; + + if (streetChecker(feature)) + return SEARCH_TYPE_STREET; + + if (localityChecker(feature)) + { + Type type = localityChecker.GetType(feature); + switch (type) + { + case NONE: ASSERT(false, ("Unknown locality.")); return SEARCH_TYPE_UNCLASSIFIED; + case STATE: return SEARCH_TYPE_STATE; + case COUNTRY: return SEARCH_TYPE_COUNTRY; + case CITY: + case TOWN: return SEARCH_TYPE_CITY; + case VILLAGE: return SEARCH_TYPE_VILLAGE; + case LOCALITY_COUNT: return SEARCH_TYPE_UNCLASSIFIED; + } + } + + if (poiChecker(feature)) + return SEARCH_TYPE_POI; + + return SEARCH_TYPE_UNCLASSIFIED; +} + +string DebugPrint(SearchModel::SearchType type) +{ + switch (type) + { + case SearchModel::SEARCH_TYPE_POI: return "POI"; + case SearchModel::SEARCH_TYPE_BUILDING: return "Building"; + case SearchModel::SEARCH_TYPE_STREET: return "Street"; + case SearchModel::SEARCH_TYPE_CITY: return "City"; + case SearchModel::SEARCH_TYPE_VILLAGE: return "Village"; + case SearchModel::SEARCH_TYPE_STATE: return "State"; + case SearchModel::SEARCH_TYPE_COUNTRY: return "Country"; + case SearchModel::SEARCH_TYPE_UNCLASSIFIED: return "Unclassified"; + case SearchModel::SEARCH_TYPE_COUNT: return "Count"; + } + ASSERT(false, ("Unknown search type:", static_cast(type))); + return string(); +} + +} // namespace search diff --git a/search/search_model.hpp b/search/search_model.hpp new file mode 100644 index 0000000000..99e09d66c8 --- /dev/null +++ b/search/search_model.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include "indexer/ftypes_matcher.hpp" + +#include "std/string.hpp" +#include "std/vector.hpp" + +#include "base/macros.hpp" + +class FeatureType; + +namespace search +{ +/// Describes 2-level POI-exception types that don't belong to any POI-common classes +/// (amenity, shop, tourism, ...). Used in search algo and search categories index generation. +class TwoLevelPOIChecker : public ftypes::BaseChecker +{ +public: + TwoLevelPOIChecker(); +}; + +// This class is used to map feature types to a restricted set of +// different search classes (do not confuse these classes with search +// categories - they are completely different things). +class SearchModel +{ +public: + enum SearchType + { + // Low-level features such as amenities, offices, shops, buildings + // without house number, etc. + SEARCH_TYPE_POI, + + // All features with set house number. + SEARCH_TYPE_BUILDING, + + SEARCH_TYPE_STREET, + + // All low-level features except POI, BUILDING and STREET. + SEARCH_TYPE_UNCLASSIFIED, + + SEARCH_TYPE_VILLAGE, + SEARCH_TYPE_CITY, + SEARCH_TYPE_STATE, // US or Canadian states + SEARCH_TYPE_COUNTRY, + + SEARCH_TYPE_COUNT + }; + + static SearchModel const & Instance(); + + SearchType GetSearchType(FeatureType const & feature) const; + +private: + SearchModel() = default; + + DISALLOW_COPY_AND_MOVE(SearchModel); +}; + +string DebugPrint(SearchModel::SearchType type); + +} // namespace search diff --git a/search/search_quality/features_collector_tool/features_collector_tool.cpp b/search/search_quality/features_collector_tool/features_collector_tool.cpp index 59cabc2c3d..e7047da993 100644 --- a/search/search_quality/features_collector_tool/features_collector_tool.cpp +++ b/search/search_quality/features_collector_tool/features_collector_tool.cpp @@ -1,9 +1,9 @@ +#include "search/ranking_info.hpp" #include "search/result.hpp" #include "search/search_quality/helpers.hpp" #include "search/search_quality/sample.hpp" #include "search/search_tests_support/test_search_engine.hpp" #include "search/search_tests_support/test_search_request.hpp" -#include "search/v2/ranking_info.hpp" #include "indexer/classificator_loader.hpp" #include "indexer/feature_algo.hpp" @@ -232,7 +232,7 @@ int main(int argc, char * argv[]) Context context(engine); cout << "SampleId,"; - v2::RankingInfo::PrintCSVHeader(cout); + RankingInfo::PrintCSVHeader(cout); cout << ",Relevance" << endl; for (size_t i = 0; i < samples.size(); ++i) diff --git a/search/search_quality/search_quality_tool/search_quality_tool.cpp b/search/search_quality/search_quality_tool/search_quality_tool.cpp index a49384af61..031d0ccb5c 100644 --- a/search/search_quality/search_quality_tool/search_quality_tool.cpp +++ b/search/search_quality/search_quality_tool/search_quality_tool.cpp @@ -12,11 +12,11 @@ #include "geometry/point2d.hpp" #include "search/processor_factory.hpp" +#include "search/ranking_info.hpp" #include "search/result.hpp" #include "search/search_quality/helpers.hpp" #include "search/search_tests_support/test_search_engine.hpp" #include "search/search_tests_support/test_search_request.hpp" -#include "search/v2/ranking_info.hpp" #include "platform/country_file.hpp" #include "platform/local_country_file.hpp" @@ -455,7 +455,7 @@ int main(int argc, char * argv[]) if (dumpCSV) { - v2::RankingInfo::PrintCSVHeader(csv); + RankingInfo::PrintCSVHeader(csv); csv << endl; } diff --git a/search/search_tests/house_numbers_matcher_test.cpp b/search/search_tests/house_numbers_matcher_test.cpp index 6a4573f0bf..90485b0755 100644 --- a/search/search_tests/house_numbers_matcher_test.cpp +++ b/search/search_tests/house_numbers_matcher_test.cpp @@ -1,13 +1,13 @@ #include "testing/testing.hpp" -#include "search/v2/house_numbers_matcher.hpp" +#include "search/house_numbers_matcher.hpp" #include "std/vector.hpp" #include "base/string_utils.hpp" using namespace strings; -using namespace search::v2; +using namespace search; namespace { @@ -28,7 +28,7 @@ bool HouseNumbersMatch(string const & houseNumber, string const & query, bool qu { vector queryParses; ParseQuery(MakeUniString(query), queryIsPrefix, queryParses); - return search::v2::HouseNumbersMatch(MakeUniString(houseNumber), queryParses); + return search::HouseNumbersMatch(MakeUniString(houseNumber), queryParses); } bool CheckTokenizer(string const & utf8s, vector const & expected) diff --git a/search/search_tests/locality_scorer_test.cpp b/search/search_tests/locality_scorer_test.cpp index 5021e24af7..7223c73431 100644 --- a/search/search_tests/locality_scorer_test.cpp +++ b/search/search_tests/locality_scorer_test.cpp @@ -1,6 +1,6 @@ #include "testing/testing.hpp" -#include "search/v2/locality_scorer.hpp" +#include "search/locality_scorer.hpp" #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" @@ -15,7 +15,6 @@ #include "std/unordered_map.hpp" #include "std/vector.hpp" -using namespace search::v2; using namespace search; using namespace strings; diff --git a/search/search_tests/ranking_tests.cpp b/search/search_tests/ranking_tests.cpp index bc92c2a4b9..7c1df97352 100644 --- a/search/search_tests/ranking_tests.cpp +++ b/search/search_tests/ranking_tests.cpp @@ -1,8 +1,8 @@ #include "testing/testing.hpp" #include "search/query_params.hpp" -#include "search/v2/ranking_utils.hpp" -#include "search/v2/token_slice.hpp" +#include "search/ranking_utils.hpp" +#include "search/token_slice.hpp" #include "indexer/search_delimiters.hpp" #include "indexer/search_string_utils.hpp" @@ -13,7 +13,6 @@ #include "std/string.hpp" using namespace search; -using namespace search::v2; using namespace strings; namespace diff --git a/search/stats_cache.hpp b/search/stats_cache.hpp new file mode 100644 index 0000000000..946e12a7c7 --- /dev/null +++ b/search/stats_cache.hpp @@ -0,0 +1,63 @@ +#pragma once +#include "base/logging.hpp" + +#include "std/unordered_map.hpp" +#include "std/utility.hpp" + +namespace search +{ +template +class Cache +{ + unordered_map m_map; + + /// query statistics + size_t m_accesses; + size_t m_misses; + + size_t m_emptyQueriesCount; /// empty queries count at a row + string m_name; /// cache name for print functions + +public: + explicit Cache(string const & name) + : m_accesses(0), m_misses(0), m_emptyQueriesCount(0), m_name(name) + { + } + + pair Get(TKey const & key) + { + auto r = m_map.insert(make_pair(key, TValue())); + + ++m_accesses; + if (r.second) + ++m_misses; + + return pair(r.first->second, r.second); + } + + void Clear() + { + m_map.clear(); + m_accesses = m_misses = 0; + m_emptyQueriesCount = 0; + } + + /// Called at the end of every search query. + void ClearIfNeeded() + { + if (m_accesses != 0) + { + LOG(LDEBUG, ("Cache", m_name, "Queries =", m_accesses, "From cache =", m_accesses - m_misses, + "Added =", m_misses)); + m_accesses = m_misses = 0; + m_emptyQueriesCount = 0; + } + else if (++m_emptyQueriesCount > 5) + { + LOG(LDEBUG, ("Clearing cache", m_name)); + Clear(); + } + } +}; + +} // namespace search diff --git a/search/street_vicinity_loader.cpp b/search/street_vicinity_loader.cpp new file mode 100644 index 0000000000..28989355b3 --- /dev/null +++ b/search/street_vicinity_loader.cpp @@ -0,0 +1,67 @@ +#include "search/street_vicinity_loader.hpp" + +#include "indexer/feature_covering.hpp" +#include "indexer/feature_decl.hpp" +#include "indexer/index.hpp" + +#include "geometry/mercator.hpp" + +#include "geometry/point2d.hpp" + +#include "base/math.hpp" +#include "base/stl_add.hpp" + +namespace search +{ +StreetVicinityLoader::StreetVicinityLoader(int scale, double offsetMeters) + : m_context(nullptr), m_scale(scale), m_offsetMeters(offsetMeters), m_cache("Streets") +{ +} + +void StreetVicinityLoader::SetContext(MwmContext * context) +{ + ASSERT(context, ()); + if (m_context == context) + return; + + m_context = context; + auto const scaleRange = m_context->m_value.GetHeader().GetScaleRange(); + m_scale = my::clamp(m_scale, scaleRange.first, scaleRange.second); +} + +void StreetVicinityLoader::OnQueryFinished() { m_cache.ClearIfNeeded(); } + +StreetVicinityLoader::Street const & StreetVicinityLoader::GetStreet(uint32_t featureId) +{ + auto r = m_cache.Get(featureId); + if (!r.second) + return r.first; + + LoadStreet(featureId, r.first); + return r.first; +} + +void StreetVicinityLoader::LoadStreet(uint32_t featureId, Street & street) +{ + FeatureType feature; + if (!m_context->GetFeature(featureId, feature)) + return; + + if (feature.GetFeatureType() != feature::GEOM_LINE) + return; + + vector points; + feature.ForEachPoint(MakeBackInsertFunctor(points), FeatureType::BEST_GEOMETRY); + ASSERT(!points.empty(), ()); + + for (auto const & point : points) + street.m_rect.Add(MercatorBounds::RectByCenterXYAndSizeInMeters(point, m_offsetMeters)); + + covering::CoveringGetter coveringGetter(street.m_rect, covering::ViewportWithLowLevels); + auto const & intervals = coveringGetter.Get(m_scale); + m_context->ForEachIndex(intervals, m_scale, MakeBackInsertFunctor(street.m_features)); + + street.m_calculator = make_unique(points); +} + +} // namespace search diff --git a/search/street_vicinity_loader.hpp b/search/street_vicinity_loader.hpp new file mode 100644 index 0000000000..3f3598054d --- /dev/null +++ b/search/street_vicinity_loader.hpp @@ -0,0 +1,95 @@ +#pragma once + +#include "search/mwm_context.hpp" +#include "search/projection_on_street.hpp" +#include "search/stats_cache.hpp" + +#include "indexer/feature.hpp" +#include "indexer/feature_algo.hpp" + +#include "geometry/rect2d.hpp" + +#include "base/macros.hpp" + +#include "std/unordered_map.hpp" + +namespace search +{ +class MwmContext; + +// This class is able to load features in a street's vicinity. +// +// NOTE: this class *IS NOT* thread-safe. +class StreetVicinityLoader +{ +public: + struct Street + { + Street() = default; + Street(Street && street) = default; + + inline bool IsEmpty() const { return !m_calculator || m_rect.IsEmptyInterior(); } + + vector m_features; + m2::RectD m_rect; + unique_ptr m_calculator; + + /// @todo Cache GetProjection results for features here, because + /// feature::GetCenter and ProjectionOnStreetCalculator::GetProjection are not so fast. + + DISALLOW_COPY(Street); + }; + + StreetVicinityLoader(int scale, double offsetMeters); + void SetContext(MwmContext * context); + + // Calls |fn| on each index in |sortedIds| where sortedIds[index] + // belongs to the street's vicinity. + template + void ForEachInVicinity(uint32_t streetId, vector const & sortedIds, double offsetMeters, + TFn const & fn) + { + // Passed offset param should be less than the cached one, or the cache is invalid otherwise. + ASSERT_LESS_OR_EQUAL(offsetMeters, m_offsetMeters, ()); + + Street const & street = GetStreet(streetId); + if (street.IsEmpty()) + return; + + ProjectionOnStreetCalculator const & calculator = *street.m_calculator; + ProjectionOnStreet proj; + for (uint32_t id : street.m_features) + { + // Load center and check projection only when |id| is in |sortedIds|. + if (!binary_search(sortedIds.begin(), sortedIds.end(), id)) + continue; + + FeatureType ft; + if (!m_context->GetFeature(id, ft)) + continue; // Feature was deleted. + + if (calculator.GetProjection(feature::GetCenter(ft, FeatureType::WORST_GEOMETRY), proj) && + proj.m_distMeters <= offsetMeters) + { + fn(id); + } + } + } + + void OnQueryFinished(); + + Street const & GetStreet(uint32_t featureId); + +private: + void LoadStreet(uint32_t featureId, Street & street); + + MwmContext * m_context; + int m_scale; + double const m_offsetMeters; + + Cache m_cache; + + DISALLOW_COPY_AND_MOVE(StreetVicinityLoader); +}; + +} // namespace search diff --git a/search/token_slice.cpp b/search/token_slice.cpp new file mode 100644 index 0000000000..6705b884fe --- /dev/null +++ b/search/token_slice.cpp @@ -0,0 +1,66 @@ +#include "search/token_slice.hpp" + +#include "std/sstream.hpp" + +namespace search +{ +namespace +{ +template +string SliceToString(string const & name, TSlice const & slice) +{ + ostringstream os; + os << name << " ["; + for (size_t i = 0; i < slice.Size(); ++i) + { + os << DebugPrint(slice.Get(i)); + if (i + 1 != slice.Size()) + os << ", "; + } + os << "]"; + return os.str(); +} +} // namespace + +TokenSlice::TokenSlice(QueryParams const & params, size_t startToken, size_t endToken) + : m_params(params), m_offset(startToken), m_size(endToken - startToken) +{ + ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); +} + +bool TokenSlice::IsPrefix(size_t i) const +{ + ASSERT_LESS(i, Size(), ()); + return m_offset + i == m_params.m_tokens.size(); +} + +bool TokenSlice::IsLast(size_t i) const +{ + ASSERT_LESS(i, Size(), ()); + if (m_params.m_prefixTokens.empty()) + return m_offset + i + 1 == m_params.m_tokens.size(); + return m_offset + i == m_params.m_tokens.size(); +} + +TokenSliceNoCategories::TokenSliceNoCategories(QueryParams const & params, size_t startToken, + size_t endToken) + : m_params(params) +{ + ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); + + m_indexes.reserve(endToken - startToken); + for (size_t i = startToken; i < endToken; ++i) + { + if (!m_params.m_isCategorySynonym[i]) + m_indexes.push_back(i); + } +} + +string DebugPrint(TokenSlice const & slice) { return SliceToString("TokenSlice", slice); } + +string DebugPrint(TokenSliceNoCategories const & slice) +{ + return SliceToString("TokenSliceNoCategories", slice); +} + +} // namespace search diff --git a/search/token_slice.hpp b/search/token_slice.hpp new file mode 100644 index 0000000000..87da032ce9 --- /dev/null +++ b/search/token_slice.hpp @@ -0,0 +1,110 @@ +#pragma once + +#include "search/query_params.hpp" + +#include "indexer/string_slice.hpp" + +#include "base/assert.hpp" + +#include "std/cstdint.hpp" +#include "std/string.hpp" +#include "std/vector.hpp" + +namespace search +{ +class TokenSlice +{ +public: + TokenSlice(QueryParams const & params, size_t startToken, size_t endToken); + + inline QueryParams::TSynonymsVector const & Get(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_params.GetTokens(m_offset + i); + } + + inline size_t Size() const { return m_size; } + + inline bool Empty() const { return Size() == 0; } + + // Returns true if the |i|-th token in the slice is the incomplete + // (prefix) token. + bool IsPrefix(size_t i) const; + + // Returns true if the |i|-th token in the slice is the last + // (regardless - full or not) token in the query. + bool IsLast(size_t i) const; + +private: + QueryParams const & m_params; + size_t const m_offset; + size_t const m_size; +}; + +class TokenSliceNoCategories +{ +public: + TokenSliceNoCategories(QueryParams const & params, size_t startToken, size_t endToken); + + inline QueryParams::TSynonymsVector const & Get(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_params.GetTokens(m_indexes[i]); + } + + inline size_t Size() const { return m_indexes.size(); } + + inline bool Empty() const { return Size() == 0; } + + inline bool IsPrefix(size_t i) const + { + ASSERT_LESS(i, Size(), ()); + return m_indexes[i] == m_params.m_tokens.size(); + } + +private: + QueryParams const & m_params; + vector m_indexes; +}; + +class QuerySlice : public StringSliceBase +{ +public: + QuerySlice(TokenSlice const & slice) : m_slice(slice) {} + + // QuerySlice overrides: + QueryParams::TString const & Get(size_t i) const override { return m_slice.Get(i).front(); } + size_t Size() const override { return m_slice.Size(); } + +private: + TokenSlice const m_slice; +}; + +template +class QuerySliceOnRawStrings : public StringSliceBase +{ +public: + QuerySliceOnRawStrings(TCont const & tokens, TString const & prefix) + : m_tokens(tokens), m_prefix(prefix) + { + } + + // QuerySlice overrides: + QueryParams::TString const & Get(size_t i) const override + { + ASSERT_LESS(i, Size(), ()); + return i == m_tokens.size() ? m_prefix : m_tokens[i]; + } + + size_t Size() const override { return m_tokens.size() + (m_prefix.empty() ? 0 : 1); } + +private: + TCont const & m_tokens; + TString const & m_prefix; +}; + +string DebugPrint(TokenSlice const & slice); + +string DebugPrint(TokenSliceNoCategories const & slice); + +} // namespace search diff --git a/search/types_skipper.hpp b/search/types_skipper.hpp index 5dd51cf630..0938aba0c8 100644 --- a/search/types_skipper.hpp +++ b/search/types_skipper.hpp @@ -1,6 +1,6 @@ #pragma once -#include "v2/search_model.hpp" +#include "search_model.hpp" #include "base/buffer_vector.hpp" @@ -31,7 +31,7 @@ private: // m_skipIfEmptyName and m_dontSkipIfEmptyName are used in the case 2 described above. TCont m_skipIfEmptyName[2]; - v2::TwoLevelPOIChecker m_dontSkipIfEmptyName; + TwoLevelPOIChecker m_dontSkipIfEmptyName; uint32_t m_country, m_state; }; diff --git a/search/v2/cbv_ptr.cpp b/search/v2/cbv_ptr.cpp deleted file mode 100644 index 405db9c5b3..0000000000 --- a/search/v2/cbv_ptr.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "search/v2/cbv_ptr.hpp" - - -namespace search -{ -namespace v2 -{ -CBVPtr::CBVPtr(coding::CompressedBitVector const * p, bool isOwner) -{ - Set(p, isOwner); -} - -void CBVPtr::Release() -{ - if (m_isOwner) - delete m_ptr; - - m_ptr = nullptr; - m_isOwner = false; - m_isFull = false; -} - -void CBVPtr::Set(coding::CompressedBitVector const * p, bool isOwner/* = false*/) -{ - Release(); - - m_ptr = p; - m_isOwner = p && isOwner; -} - -void CBVPtr::Set(unique_ptr p) -{ - Set(p.release(), true /* isOwner */); -} - -void CBVPtr::Union(coding::CompressedBitVector const * p) -{ - if (!p || m_isFull) - return; - - if (!m_ptr) - { - m_ptr = p; - m_isFull = false; - } - else - { - Set(coding::CompressedBitVector::Union(*m_ptr, *p).release(), true); - } -} - -void CBVPtr::Intersect(coding::CompressedBitVector const * p) -{ - if (!p) - { - Release(); - return; - } - - if (m_ptr) - { - Set(coding::CompressedBitVector::Intersect(*m_ptr, *p).release(), true); - } - else if (m_isFull) - { - m_ptr = p; - m_isFull = false; - } -} - -bool CBVPtr::IsEmpty() const -{ - return !m_isFull && coding::CompressedBitVector::IsEmpty(m_ptr); -} - -} // namespace v2 -} // namespace search diff --git a/search/v2/cbv_ptr.hpp b/search/v2/cbv_ptr.hpp deleted file mode 100644 index 55e7da2734..0000000000 --- a/search/v2/cbv_ptr.hpp +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include "coding/compressed_bit_vector.hpp" - -#include "base/assert.hpp" -#include "base/macros.hpp" - -#include "std/function.hpp" -#include "std/utility.hpp" - - -namespace search -{ -namespace v2 -{ -/// CompressedBitVector pointer class that incapsulates -/// binary operators logic and takes ownership if needed. -class CBVPtr -{ - DISALLOW_COPY_AND_MOVE(CBVPtr); - - coding::CompressedBitVector const * m_ptr = nullptr; - bool m_isOwner = false; - bool m_isFull = false; ///< True iff all bits are set to one. - - void Release(); - -public: - CBVPtr() = default; - CBVPtr(coding::CompressedBitVector const * p, bool isOwner); - ~CBVPtr() { Release(); } - - inline void SetFull() - { - Release(); - m_isFull = true; - } - - void Set(coding::CompressedBitVector const * p, bool isOwner = false); - void Set(unique_ptr p); - - inline coding::CompressedBitVector const * Get() const { return m_ptr; } - - coding::CompressedBitVector const & operator*() const { return *m_ptr; } - coding::CompressedBitVector const * operator->() const { return m_ptr; } - - bool IsEmpty() const; - - void Union(coding::CompressedBitVector const * p); - void Intersect(coding::CompressedBitVector const * p); - - template - void ForEach(TFn && fn) const - { - ASSERT(!m_isFull, ()); - if (!IsEmpty()) - coding::CompressedBitVectorEnumerator::ForEach(*m_ptr, forward(fn)); - } -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/features_filter.cpp b/search/v2/features_filter.cpp deleted file mode 100644 index ffe474494e..0000000000 --- a/search/v2/features_filter.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "search/v2/features_filter.hpp" - -#include "coding/compressed_bit_vector.hpp" - -#include "std/algorithm.hpp" - -namespace search -{ -namespace v2 -{ -// FeaturesFilter ---------------------------------------------------------------------------------- -FeaturesFilter::FeaturesFilter(coding::CompressedBitVector const & filter, uint32_t threshold) - : m_filter(filter), m_threshold(threshold) -{ -} - -bool FeaturesFilter::NeedToFilter(coding::CompressedBitVector const & cbv) const -{ - return cbv.PopCount() > m_threshold; -} - -// LocalityFilter ---------------------------------------------------------------------------------- -LocalityFilter::LocalityFilter(coding::CompressedBitVector const & filter) - : FeaturesFilter(filter, 0 /* threshold */) -{ -} - -unique_ptr LocalityFilter::Filter( - coding::CompressedBitVector const & cbv) const -{ - return coding::CompressedBitVector::Intersect(m_filter, cbv); -} - -// ViewportFilter ---------------------------------------------------------------------------------- -ViewportFilter::ViewportFilter(coding::CompressedBitVector const & filter, uint32_t threshold) - : FeaturesFilter(filter, threshold) -{ -} - -unique_ptr ViewportFilter::Filter( - coding::CompressedBitVector const & cbv) const -{ - auto result = coding::CompressedBitVector::Intersect(m_filter, cbv); - if (!coding::CompressedBitVector::IsEmpty(result)) - return result; - return cbv.LeaveFirstSetNBits(m_threshold); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/features_filter.hpp b/search/v2/features_filter.hpp deleted file mode 100644 index f84ca3a7bf..0000000000 --- a/search/v2/features_filter.hpp +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include "std/unique_ptr.hpp" - -namespace coding -{ -class CompressedBitVector; -} - -namespace search -{ -namespace v2 -{ -// A lightweight filter of features. -// -// NOTE: this class and its subclasses *ARE* thread-safe. -class FeaturesFilter -{ -public: - FeaturesFilter(coding::CompressedBitVector const & filter, uint32_t threshold); - - virtual ~FeaturesFilter() = default; - - bool NeedToFilter(coding::CompressedBitVector const & features) const; - - virtual unique_ptr Filter( - coding::CompressedBitVector const & cbv) const = 0; - -protected: - coding::CompressedBitVector const & m_filter; - uint32_t const m_threshold; -}; - -// Exact filter - leaves only features belonging to the set it was -// constructed from. -class LocalityFilter : public FeaturesFilter -{ -public: - LocalityFilter(coding::CompressedBitVector const & filter); - - // FeaturesFilter overrides: - unique_ptr Filter( - coding::CompressedBitVector const & cbv) const override; -}; - -// Fuzzy filter - tries to leave only features belonging to the set it -// was constructed from, but if the result is empty, leaves at most -// first |threshold| features instead. This property is quite useful -// when there are no matching features in viewport but it's ok to -// process a limited number of features outside the viewport. -class ViewportFilter : public FeaturesFilter -{ -public: - ViewportFilter(coding::CompressedBitVector const & filter, uint32_t threshold); - - // FeaturesFilter overrides: - unique_ptr Filter( - coding::CompressedBitVector const & cbv) const override; -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/features_layer.cpp b/search/v2/features_layer.cpp deleted file mode 100644 index 80ceabe2cc..0000000000 --- a/search/v2/features_layer.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "search/v2/features_layer.hpp" - -#include "base/internal/message.hpp" - -#include "std/sstream.hpp" - -namespace search -{ -namespace v2 -{ -FeaturesLayer::FeaturesLayer() { Clear(); } - -void FeaturesLayer::Clear() -{ - m_sortedFeatures = nullptr; - m_subQuery.clear(); - m_startToken = 0; - m_endToken = 0; - m_type = SearchModel::SEARCH_TYPE_COUNT; - m_hasDelayedFeatures = false; - m_lastTokenIsPrefix = false; -} - -string DebugPrint(FeaturesLayer const & layer) -{ - ostringstream os; - os << "FeaturesLayer [ size of m_sortedFeatures: " - << (layer.m_sortedFeatures ? layer.m_sortedFeatures->size() : 0) - << ", m_subQuery: " << DebugPrint(layer.m_subQuery) << ", m_startToken: " << layer.m_startToken - << ", m_endToken: " << layer.m_endToken << ", m_type: " << DebugPrint(layer.m_type) - << ", m_lastTokenIsPrefix: " << layer.m_lastTokenIsPrefix << " ]"; - return os.str(); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/features_layer.hpp b/search/v2/features_layer.hpp deleted file mode 100644 index 78a48ae927..0000000000 --- a/search/v2/features_layer.hpp +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include "search/v2/search_model.hpp" - -#include "base/string_utils.hpp" - -#include "std/vector.hpp" - -namespace search -{ -namespace v2 -{ -// This structure represents a part of search query interpretation - -// when to a substring of tokens [m_startToken, m_endToken) is matched -// with a set of m_features of the same m_type. -struct FeaturesLayer -{ - FeaturesLayer(); - - void Clear(); - - // Non-owning ptr to a sorted vector of features. - vector const * m_sortedFeatures; - - strings::UniString m_subQuery; - - size_t m_startToken; - size_t m_endToken; - SearchModel::SearchType m_type; - - // *NOTE* This field is meaningful only when m_type equals to - // SEARCH_TYPE_BUILDING. - // - // When true, m_sortedFeatures contains only features retrieved from - // search index by m_subQuery, and it's necessary for Geocoder to - // perform additional work to retrieve features matching by house - // number. - bool m_hasDelayedFeatures; - - bool m_lastTokenIsPrefix; -}; - -string DebugPrint(FeaturesLayer const & layer); -} // namespace v2 -} // namespace search diff --git a/search/v2/features_layer_matcher.cpp b/search/v2/features_layer_matcher.cpp deleted file mode 100644 index e3134e838f..0000000000 --- a/search/v2/features_layer_matcher.cpp +++ /dev/null @@ -1,148 +0,0 @@ -#include "search/v2/features_layer_matcher.hpp" - -#include "search/reverse_geocoder.hpp" -#include "search/v2/house_to_street_table.hpp" - -#include "indexer/scales.hpp" - -#include "base/assert.hpp" - -namespace search -{ -namespace v2 -{ - -/// Max distance from house to street where we do search matching -/// even if there is no exact street written for this house. -int constexpr kMaxApproxStreetDistanceM = 100; - -FeaturesLayerMatcher::FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable) - : m_context(nullptr) - , m_postcodes(nullptr) - , m_reverseGeocoder(index) - , m_nearbyStreetsCache("FeatureToNearbyStreets") - , m_matchingStreetsCache("BuildingToStreet") - , m_loader(scales::GetUpperScale(), ReverseGeocoder::kLookupRadiusM) - , m_cancellable(cancellable) -{ -} - -void FeaturesLayerMatcher::SetContext(MwmContext * context) -{ - ASSERT(context, ()); - if (m_context == context) - return; - - m_context = context; - m_loader.SetContext(context); -} - -void FeaturesLayerMatcher::SetPostcodes(coding::CompressedBitVector const * postcodes) -{ - m_postcodes = postcodes; -} - -void FeaturesLayerMatcher::OnQueryFinished() -{ - m_nearbyStreetsCache.ClearIfNeeded(); - m_matchingStreetsCache.ClearIfNeeded(); - m_loader.OnQueryFinished(); -} - -uint32_t FeaturesLayerMatcher::GetMatchingStreet(uint32_t houseId) -{ - FeatureType feature; - return GetMatchingStreetImpl(houseId, feature); -} - -uint32_t FeaturesLayerMatcher::GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature) -{ - return GetMatchingStreetImpl(houseId, houseFeature); -} - -FeaturesLayerMatcher::TStreets const & -FeaturesLayerMatcher::GetNearbyStreets(uint32_t featureId) -{ - FeatureType feature; - return GetNearbyStreetsImpl(featureId, feature); -} - -FeaturesLayerMatcher::TStreets const & -FeaturesLayerMatcher::GetNearbyStreets(uint32_t featureId, FeatureType & feature) -{ - return GetNearbyStreetsImpl(featureId, feature); -} - -FeaturesLayerMatcher::TStreets const & -FeaturesLayerMatcher::GetNearbyStreetsImpl(uint32_t featureId, FeatureType & feature) -{ - auto entry = m_nearbyStreetsCache.Get(featureId); - if (!entry.second) - return entry.first; - - if (!feature.GetID().IsValid()) - GetByIndex(featureId, feature); - - auto & streets = entry.first; - m_reverseGeocoder.GetNearbyStreets(feature, streets); - for (size_t i = 0; i < streets.size(); ++i) - { - if (streets[i].m_distanceMeters > ReverseGeocoder::kLookupRadiusM) - { - streets.resize(i); - break; - } - } - - return streets; -} - -uint32_t FeaturesLayerMatcher::GetMatchingStreetImpl(uint32_t houseId, FeatureType & houseFeature) -{ - // Check if this feature is modified - the logic will be different. - string streetName; - bool const edited = osm::Editor::Instance().GetEditedFeatureStreet(houseFeature.GetID(), streetName); - - // Check the cached result value. - auto entry = m_matchingStreetsCache.Get(houseId); - if (!edited && !entry.second) - return entry.first; - - // Load feature if needed. - if (!houseFeature.GetID().IsValid()) - GetByIndex(houseId, houseFeature); - - // Get nearby streets and calculate the resulting index. - auto const & streets = GetNearbyStreets(houseId, houseFeature); - uint32_t & result = entry.first; - result = kInvalidId; - - if (edited) - { - auto const ret = find_if(streets.begin(), streets.end(), [&streetName](TStreet const & st) - { - return st.m_name == streetName; - }); - if (ret != streets.end()) - result = ret->m_id.m_index; - } - else - { - uint32_t index; - if (m_context->GetStreetIndex(houseId, index) && index < streets.size()) - result = streets[index].m_id.m_index; - } - - // If there is no saved street for feature, assume that it's a nearest street if it's too close. - if (result == kInvalidId && - !streets.empty() && - streets[0].m_distanceMeters < kMaxApproxStreetDistanceM) - { - result = streets[0].m_id.m_index; - } - - return result; -} - -} // namespace v2 -} // namespace search diff --git a/search/v2/features_layer_matcher.hpp b/search/v2/features_layer_matcher.hpp deleted file mode 100644 index 39e505a7ce..0000000000 --- a/search/v2/features_layer_matcher.hpp +++ /dev/null @@ -1,372 +0,0 @@ -#pragma once - -#include "search/cancel_exception.hpp" -#include "search/reverse_geocoder.hpp" -#include "search/v2/features_layer.hpp" -#include "search/v2/house_numbers_matcher.hpp" -#include "search/v2/mwm_context.hpp" -#include "search/v2/search_model.hpp" -#include "search/v2/street_vicinity_loader.hpp" - -#include "indexer/feature.hpp" -#include "indexer/feature_algo.hpp" -#include "indexer/feature_impl.hpp" -#include "indexer/features_vector.hpp" -#include "indexer/ftypes_matcher.hpp" -#include "indexer/mwm_set.hpp" - -#include "geometry/mercator.hpp" -#include "geometry/point2d.hpp" -#include "geometry/rect2d.hpp" - -#include "coding/compressed_bit_vector.hpp" - -#include "base/cancellable.hpp" -#include "base/logging.hpp" -#include "base/macros.hpp" -#include "base/stl_helpers.hpp" -#include "base/string_utils.hpp" - -#include "std/algorithm.hpp" -#include "std/bind.hpp" -#include "std/limits.hpp" -#include "std/unordered_map.hpp" -#include "std/vector.hpp" - -class Index; - -namespace search -{ -namespace v2 -{ -// This class performs pairwise intersection between two layers of -// features, where the first (child) layer is geographically smaller -// than the second (parent) one. It emits all pairs -// (feature-from-child-layer, feature-from-parent-layer) of matching -// features, where feature-from-child-layer belongs-to -// feature-from-parent-layer. Belongs-to is a partial relation on -// features, and has different meaning for different search classes: -// -// * BUILDING belongs-to STREET iff the building is located on the street; -// * BUILDING belongs-to CITY iff the building is located in the city; -// * POI belongs-to BUILDING iff the poi is (roughly) located near or inside the building; -// * STREET belongs-to CITY iff the street is (roughly) located in the city; -// * etc. -// -// NOTE: this class *IS NOT* thread-safe. -class FeaturesLayerMatcher -{ -public: - static uint32_t const kInvalidId = numeric_limits::max(); - static int constexpr kBuildingRadiusMeters = 50; - static int constexpr kStreetRadiusMeters = 100; - - FeaturesLayerMatcher(Index & index, my::Cancellable const & cancellable); - void SetContext(MwmContext * context); - void SetPostcodes(coding::CompressedBitVector const * postcodes); - - template - void Match(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) - { - if (child.m_type >= parent.m_type) - return; - switch (parent.m_type) - { - case SearchModel::SEARCH_TYPE_POI: - case SearchModel::SEARCH_TYPE_CITY: - case SearchModel::SEARCH_TYPE_VILLAGE: - case SearchModel::SEARCH_TYPE_STATE: - case SearchModel::SEARCH_TYPE_COUNTRY: - case SearchModel::SEARCH_TYPE_UNCLASSIFIED: - case SearchModel::SEARCH_TYPE_COUNT: - ASSERT(false, ("Invalid parent layer type:", parent.m_type)); - break; - case SearchModel::SEARCH_TYPE_BUILDING: - ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ()); - MatchPOIsWithBuildings(child, parent, forward(fn)); - break; - case SearchModel::SEARCH_TYPE_STREET: - ASSERT(child.m_type == SearchModel::SEARCH_TYPE_POI || - child.m_type == SearchModel::SEARCH_TYPE_BUILDING, - ("Invalid child layer type:", child.m_type)); - if (child.m_type == SearchModel::SEARCH_TYPE_POI) - MatchPOIsWithStreets(child, parent, forward(fn)); - else - MatchBuildingsWithStreets(child, parent, forward(fn)); - break; - } - } - - void OnQueryFinished(); - -private: - template - void MatchPOIsWithBuildings(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) - { - // Following code initially loads centers of POIs and then, for - // each building, tries to find all POIs located at distance less - // than kBuildingRadiusMeters. - - ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ()); - ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_BUILDING, ()); - - auto const & pois = *child.m_sortedFeatures; - auto const & buildings = *parent.m_sortedFeatures; - - BailIfCancelled(m_cancellable); - - vector poiCenters(pois.size()); - - size_t const numPOIs = pois.size(); - vector isPOIProcessed(numPOIs); - size_t processedPOIs = 0; - - for (size_t i = 0; i < pois.size(); ++i) - { - FeatureType poiFt; - GetByIndex(pois[i], poiFt); - poiCenters[i] = feature::GetCenter(poiFt, FeatureType::WORST_GEOMETRY); - } - - for (size_t i = 0; i < buildings.size() && processedPOIs != numPOIs; ++i) - { - BailIfCancelled(m_cancellable); - - FeatureType buildingFt; - GetByIndex(buildings[i], buildingFt); - - for (size_t j = 0; j < pois.size(); ++j) - { - if (isPOIProcessed[j]) - continue; - - double const distMeters = feature::GetMinDistanceMeters(buildingFt, poiCenters[j]); - if (distMeters <= kBuildingRadiusMeters) - { - fn(pois[j], buildings[i]); - isPOIProcessed[j] = true; - ++processedPOIs; - } - } - } - - if (!parent.m_hasDelayedFeatures) - return; - - // |buildings| doesn't contain buildings matching by house number, - // so following code reads buildings in POIs vicinities and checks - // house numbers. - vector queryParses; - ParseQuery(parent.m_subQuery, parent.m_lastTokenIsPrefix, queryParses); - if (queryParses.empty()) - return; - - for (size_t i = 0; i < pois.size(); ++i) - { - m_context->ForEachFeature( - MercatorBounds::RectByCenterXYAndSizeInMeters(poiCenters[i], kBuildingRadiusMeters), - [&](FeatureType & ft) - { - if (m_postcodes && !m_postcodes->GetBit(ft.GetID().m_index)) - return; - if (HouseNumbersMatch(strings::MakeUniString(ft.GetHouseNumber()), queryParses)) - { - double const distanceM = MercatorBounds::DistanceOnEarth(feature::GetCenter(ft), poiCenters[i]); - if (distanceM < kBuildingRadiusMeters) - fn(pois[i], ft.GetID().m_index); - } - }); - } - } - - template - void MatchPOIsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, TFn && fn) - { - ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_POI, ()); - ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ()); - - auto const & pois = *child.m_sortedFeatures; - auto const & streets = *parent.m_sortedFeatures; - - // When the number of POIs is less than the number of STREETs, - // it's faster to check nearby streets for POIs. - if (pois.size() < streets.size()) - { - for (uint32_t poiId : pois) - { - for (auto const & street : GetNearbyStreets(poiId)) - { - if (street.m_distanceMeters > kStreetRadiusMeters) - break; - - uint32_t const streetId = street.m_id.m_index; - if (binary_search(streets.begin(), streets.end(), streetId)) - fn(poiId, streetId); - } - } - return; - } - - for (uint32_t streetId : streets) - { - BailIfCancelled(m_cancellable); - m_loader.ForEachInVicinity(streetId, pois, kStreetRadiusMeters, - bind(fn, _1, streetId)); - } - } - - template - void MatchBuildingsWithStreets(FeaturesLayer const & child, FeaturesLayer const & parent, - TFn && fn) - { - ASSERT_EQUAL(child.m_type, SearchModel::SEARCH_TYPE_BUILDING, ()); - ASSERT_EQUAL(parent.m_type, SearchModel::SEARCH_TYPE_STREET, ()); - - auto const & buildings = *child.m_sortedFeatures; - auto const & streets = *parent.m_sortedFeatures; - - // When all buildings are in |buildings| and the number of - // buildings less than the number of streets, it's probably faster - // to check nearby streets for each building instead of street - // vicinities loading. - if (!child.m_hasDelayedFeatures && buildings.size() < streets.size()) - { - for (uint32_t const houseId : buildings) - { - uint32_t const streetId = GetMatchingStreet(houseId); - if (binary_search(streets.begin(), streets.end(), streetId)) - fn(houseId, streetId); - } - return; - } - - vector queryParses; - ParseQuery(child.m_subQuery, child.m_lastTokenIsPrefix, queryParses); - - uint32_t numFilterInvocations = 0; - auto houseNumberFilter = [&](uint32_t id, FeatureType & feature, bool & loaded) -> bool - { - ++numFilterInvocations; - if ((numFilterInvocations & 0xFF) == 0) - BailIfCancelled(m_cancellable); - - if (binary_search(buildings.begin(), buildings.end(), id)) - return true; - - if (m_postcodes && !m_postcodes->GetBit(id)) - return false; - - // HouseNumbersMatch() calls are expensive, so following code - // tries to reduce the number of calls. The most important - // optimization: as first tokens from the house-number part of - // the query and feature's house numbers must be numbers, their - // first symbols must be the same. - - if (!loaded) - { - GetByIndex(id, feature); - loaded = true; - } - - if (!child.m_hasDelayedFeatures) - return false; - - strings::UniString const houseNumber(strings::MakeUniString(feature.GetHouseNumber())); - if (!feature::IsHouseNumber(houseNumber)) - return false; - return HouseNumbersMatch(houseNumber, queryParses); - }; - - unordered_map cache; - auto cachingHouseNumberFilter = [&](uint32_t id, FeatureType & feature, bool & loaded) -> bool - { - auto const it = cache.find(id); - if (it != cache.cend()) - return it->second; - bool const result = houseNumberFilter(id, feature, loaded); - cache[id] = result; - return result; - }; - - ProjectionOnStreet proj; - for (uint32_t streetId : streets) - { - BailIfCancelled(m_cancellable); - StreetVicinityLoader::Street const & street = m_loader.GetStreet(streetId); - if (street.IsEmpty()) - continue; - - auto const & calculator = *street.m_calculator; - - for (uint32_t houseId : street.m_features) - { - FeatureType feature; - bool loaded = false; - if (!cachingHouseNumberFilter(houseId, feature, loaded)) - continue; - - if (!loaded) - GetByIndex(houseId, feature); - - // Best geometry is used here as feature::GetCenter(feature) - // actually modifies internal state of a |feature| by caching - // it's geometry. So, when GetMatchingStreet(houseId, feature) - // is called, high precision geometry is used again to compute - // |feature|'s center, and this is a right behavior as - // house-to-street table was generated by using high-precision - // centers of features. - m2::PointD const center = feature::GetCenter(feature); - if (calculator.GetProjection(center, proj) && - proj.m_distMeters <= ReverseGeocoder::kLookupRadiusM && - GetMatchingStreet(houseId, feature) == streetId) - { - fn(houseId, streetId); - } - } - } - } - - // Returns id of a street feature corresponding to a |houseId|, or - // kInvalidId if there're not such street. - uint32_t GetMatchingStreet(uint32_t houseId); - uint32_t GetMatchingStreet(uint32_t houseId, FeatureType & houseFeature); - uint32_t GetMatchingStreetImpl(uint32_t houseId, FeatureType & houseFeature); - - using TStreet = ReverseGeocoder::Street; - using TStreets = vector; - - TStreets const & GetNearbyStreets(uint32_t featureId); - TStreets const & GetNearbyStreets(uint32_t featureId, FeatureType & feature); - TStreets const & GetNearbyStreetsImpl(uint32_t featureId, FeatureType & feature); - - inline void GetByIndex(uint32_t id, FeatureType & ft) const - { - /// @todo Add Cache for feature id -> (point, name / house number). - /// TODO(vng): GetFeature below can return false if feature was deleted by user in the Editor. - /// This code should be fixed to take that into an account. - /// Until we don't show "Delete" button to our users, this code will work correctly. - /// Correct fix would be injection into ForEachInIntervalAndScale, so deleted features will never - /// be emitted and used in other code. - UNUSED_VALUE(m_context->GetFeature(id, ft)); - } - - MwmContext * m_context; - - coding::CompressedBitVector const * m_postcodes; - - ReverseGeocoder m_reverseGeocoder; - - // Cache of streets in a feature's vicinity. All lists in the cache - // are ordered by distance from the corresponding feature. - Cache m_nearbyStreetsCache; - - // Cache of correct streets for buildings. Current search algorithm - // supports only one street for a building, whereas buildings can be - // located on multiple streets. - Cache m_matchingStreetsCache; - - StreetVicinityLoader m_loader; - my::Cancellable const & m_cancellable; -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/features_layer_path_finder.cpp b/search/v2/features_layer_path_finder.cpp deleted file mode 100644 index 0f5ca4e91b..0000000000 --- a/search/v2/features_layer_path_finder.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "search/v2/features_layer_path_finder.hpp" - -#include "search/cancel_exception.hpp" -#include "search/v2/features_layer_matcher.hpp" -#include "search/v2/house_numbers_matcher.hpp" - -#include "indexer/features_vector.hpp" - -#include "base/cancellable.hpp" - -namespace search -{ -namespace v2 -{ -namespace -{ -using TParentGraph = unordered_map; - -// This function tries to estimate amount of work needed to perform an -// intersection pass on a sequence of layers. -template -uint64_t CalcPassCost(TIt begin, TIt end) -{ - uint64_t cost = 0; - - if (begin == end) - return cost; - - uint64_t reachable = max((*begin)->m_sortedFeatures->size(), size_t(1)); - for (++begin; begin != end; ++begin) - { - uint64_t const layer = max((*begin)->m_sortedFeatures->size(), size_t(1)); - cost += layer * reachable; - reachable = min(reachable, layer); - } - return cost; -} - -uint64_t CalcTopDownPassCost(vector const & layers) -{ - return CalcPassCost(layers.rbegin(), layers.rend()); -} - -uint64_t CalcBottomUpPassCost(vector const & layers) -{ - return CalcPassCost(layers.begin(), layers.end()); -} - -bool LooksLikeHouseNumber(strings::UniString const & query, bool queryIsPrefix) -{ - vector parses; - ParseQuery(query, queryIsPrefix, parses); - for (auto const & parse : parses) - { - if (parse.IsEmpty()) - continue; - if (feature::IsHouseNumber(parse.m_parts.front())) - return true; - } - return false; -} - -bool GetPath(uint32_t id, vector const & layers, TParentGraph const & parent, - IntersectionResult & result) -{ - result.Clear(); - - size_t level = 0; - TParentGraph::const_iterator it; - do - { - result.Set(layers[level]->m_type, id); - ++level; - it = parent.find(id); - if (it != parent.cend()) - id = it->second; - } while (level < layers.size() && it != parent.cend()); - return level == layers.size(); -} -} // namespace - -FeaturesLayerPathFinder::FeaturesLayerPathFinder(my::Cancellable const & cancellable) - : m_cancellable(cancellable) -{ -} - -void FeaturesLayerPathFinder::FindReachableVertices(FeaturesLayerMatcher & matcher, - vector const & layers, - vector & results) -{ - if (layers.empty()) - return; - - uint64_t const topDownCost = CalcTopDownPassCost(layers); - uint64_t const bottomUpCost = CalcBottomUpPassCost(layers); - - if (bottomUpCost < topDownCost) - FindReachableVerticesBottomUp(matcher, layers, results); - else - FindReachableVerticesTopDown(matcher, layers, results); -} - -void FeaturesLayerPathFinder::FindReachableVerticesTopDown( - FeaturesLayerMatcher & matcher, vector const & layers, - vector & results) -{ - ASSERT(!layers.empty(), ()); - - vector reachable = *(layers.back()->m_sortedFeatures); - vector buffer; - - TParentGraph parent; - - auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature) - { - parent[childFeature] = parentFeature; - buffer.push_back(childFeature); - }; - - for (size_t i = layers.size() - 1; i != 0; --i) - { - BailIfCancelled(m_cancellable); - - if (reachable.empty()) - return; - - FeaturesLayer parent(*layers[i]); - if (i != layers.size() - 1) - my::SortUnique(reachable); - parent.m_sortedFeatures = &reachable; - parent.m_hasDelayedFeatures = false; - - FeaturesLayer child(*layers[i - 1]); - child.m_hasDelayedFeatures = child.m_type == SearchModel::SEARCH_TYPE_BUILDING && - LooksLikeHouseNumber(child.m_subQuery, child.m_lastTokenIsPrefix); - - buffer.clear(); - matcher.Match(child, parent, addEdge); - reachable.swap(buffer); - } - - IntersectionResult result; - for (auto const & id : reachable) - { - if (GetPath(id, layers, parent, result)) - results.push_back(result); - } -} - -void FeaturesLayerPathFinder::FindReachableVerticesBottomUp( - FeaturesLayerMatcher & matcher, vector const & layers, - vector & results) -{ - ASSERT(!layers.empty(), ()); - - vector reachable = *(layers.front()->m_sortedFeatures); - vector buffer; - - TParentGraph parent; - - auto addEdge = [&](uint32_t childFeature, uint32_t parentFeature) - { - parent[childFeature] = parentFeature; - buffer.push_back(parentFeature); - }; - - for (size_t i = 0; i + 1 != layers.size(); ++i) - { - BailIfCancelled(m_cancellable); - - if (reachable.empty()) - return; - - FeaturesLayer child(*layers[i]); - if (i != 0) - my::SortUnique(reachable); - child.m_sortedFeatures = &reachable; - child.m_hasDelayedFeatures = false; - - FeaturesLayer parent(*layers[i + 1]); - parent.m_hasDelayedFeatures = - parent.m_type == SearchModel::SEARCH_TYPE_BUILDING && - LooksLikeHouseNumber(parent.m_subQuery, parent.m_lastTokenIsPrefix); - - buffer.clear(); - matcher.Match(child, parent, addEdge); - reachable.swap(buffer); - } - - IntersectionResult result; - for (auto const & id : *(layers.front()->m_sortedFeatures)) - { - if (GetPath(id, layers, parent, result)) - results.push_back(result); - } -} -} // namespace v2 -} // namespace search diff --git a/search/v2/features_layer_path_finder.hpp b/search/v2/features_layer_path_finder.hpp deleted file mode 100644 index 04684c5ca8..0000000000 --- a/search/v2/features_layer_path_finder.hpp +++ /dev/null @@ -1,87 +0,0 @@ -#pragma once - -#include "search/v2/features_layer.hpp" -#include "search/v2/intersection_result.hpp" - -#include "std/vector.hpp" - -#if defined(DEBUG) -#include "base/logging.hpp" -#include "base/timer.hpp" -#endif // defined(DEBUG) - -class FeaturesVector; -class MwmValue; - -namespace my -{ -class Cancellable; -} - -namespace search -{ -namespace v2 -{ -class FeaturesLayerMatcher; - -// This class is able to find all paths through a layered graph, with -// vertices as features, and edges as pairs of vertices satisfying -// belongs-to relation. For more details on belongs-to relation see -// documentation for FeaturesLayerMatcher. -// -// In short, this class is able to find all features matching to a -// given interpretation of a search query. -// -// NOTE: this class *IS* thread-safe. -class FeaturesLayerPathFinder -{ -public: - FeaturesLayerPathFinder(my::Cancellable const & cancellable); - - template - void ForEachReachableVertex(FeaturesLayerMatcher & matcher, - vector const & layers, TFn && fn) - { - if (layers.empty()) - return; - - // TODO (@y): remove following code as soon as - // FindReachableVertices() will work fast for most cases - // (significantly less than 1 second). -#if defined(DEBUG) - for (auto const * layer : layers) - LOG(LINFO, (DebugPrint(*layer))); - my::Timer timer; -#endif // defined(DEBUG) - - vector results; - FindReachableVertices(matcher, layers, results); - -#if defined(DEBUG) - LOG(LINFO, ("Found:", results.size(), "elapsed:", timer.ElapsedSeconds(), "seconds")); -#endif // defined(DEBUG) - - for_each(results.begin(), results.end(), forward(fn)); - } - -private: - void FindReachableVertices(FeaturesLayerMatcher & matcher, - vector const & layers, - vector & results); - - // Tries to find all |reachable| features from the lowest layer in a - // high level -> low level pass. - void FindReachableVerticesTopDown(FeaturesLayerMatcher & matcher, - vector const & layers, - vector & results); - - // Tries to find all |reachable| features from the lowest layer in a - // low level -> high level pass. - void FindReachableVerticesBottomUp(FeaturesLayerMatcher & matcher, - vector const & layers, - vector & results); - - my::Cancellable const & m_cancellable; -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/geocoder.cpp b/search/v2/geocoder.cpp deleted file mode 100644 index a08cac2575..0000000000 --- a/search/v2/geocoder.cpp +++ /dev/null @@ -1,1607 +0,0 @@ -#include "search/v2/geocoder.hpp" - -#include "search/dummy_rank_table.hpp" -#include "search/processor.hpp" -#include "search/retrieval.hpp" -#include "search/v2/cbv_ptr.hpp" -#include "search/v2/features_filter.hpp" -#include "search/v2/features_layer_matcher.hpp" -#include "search/v2/locality_scorer.hpp" -#include "search/v2/token_slice.hpp" - -#include "indexer/classificator.hpp" -#include "indexer/feature_decl.hpp" -#include "indexer/feature_impl.hpp" -#include "indexer/ftypes_matcher.hpp" -#include "indexer/index.hpp" -#include "indexer/postcodes_matcher.hpp" -#include "indexer/rank_table.hpp" -#include "indexer/search_delimiters.hpp" -#include "indexer/search_string_utils.hpp" - -#include "storage/country_info_getter.hpp" - -#include "coding/multilang_utf8_string.hpp" - -#include "platform/preferred_languages.hpp" - -#include "geometry/mercator.hpp" - -#include "base/assert.hpp" -#include "base/logging.hpp" -#include "base/macros.hpp" -#include "base/scope_guard.hpp" -#include "base/stl_add.hpp" -#include "base/stl_helpers.hpp" - -#include "std/algorithm.hpp" -#include "std/bind.hpp" -#include "std/iterator.hpp" -#include "std/sstream.hpp" -#include "std/target_os.hpp" -#include "std/transform_iterator.hpp" - -#include "defines.hpp" - -#if defined(DEBUG) -#include "base/timer.hpp" -#endif - -#if defined(USE_GOOGLE_PROFILER) && defined(OMIM_OS_LINUX) -#include -#endif - -namespace search -{ -namespace v2 -{ -namespace -{ -size_t constexpr kMaxNumCities = 5; -size_t constexpr kMaxNumStates = 5; -size_t constexpr kMaxNumVillages = 5; -size_t constexpr kMaxNumCountries = 5; - -// This constant limits number of localities that will be extracted -// from World map. Villages are not counted here as they're not -// included into World map. -// @vng Set this value to possible maximum. -size_t const kMaxNumLocalities = LocalityScorer::kDefaultReadLimit; - -size_t constexpr kPivotRectsCacheSize = 10; -size_t constexpr kLocalityRectsCacheSize = 10; - -strings::UniString const kUniSpace(strings::MakeUniString(" ")); - -struct ScopedMarkTokens -{ - ScopedMarkTokens(vector & usedTokens, size_t from, size_t to) - : m_usedTokens(usedTokens), m_from(from), m_to(to) - { - ASSERT_LESS_OR_EQUAL(m_from, m_to, ()); - ASSERT_LESS_OR_EQUAL(m_to, m_usedTokens.size(), ()); -#if defined(DEBUG) - for (size_t i = m_from; i != m_to; ++i) - ASSERT(!m_usedTokens[i], (i)); -#endif - fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, true /* used */); - } - - ~ScopedMarkTokens() - { - fill(m_usedTokens.begin() + m_from, m_usedTokens.begin() + m_to, false /* used */); - } - - vector & m_usedTokens; - size_t const m_from; - size_t const m_to; -}; - -class LazyRankTable : public RankTable -{ - public: - LazyRankTable(MwmValue const & value) : m_value(value) {} - - uint8_t Get(uint64_t i) const override - { - EnsureTableLoaded(); - return m_table->Get(i); - } - - uint64_t Size() const override - { - EnsureTableLoaded(); - return m_table->Size(); - } - - RankTable::Version GetVersion() const override - { - EnsureTableLoaded(); - return m_table->GetVersion(); - } - - void Serialize(Writer & writer, bool preserveHostEndiannes) override - { - EnsureTableLoaded(); - m_table->Serialize(writer, preserveHostEndiannes); - } - - private: - void EnsureTableLoaded() const - { - if (m_table) - return; - m_table = search::RankTable::Load(m_value.m_cont); - if (!m_table) - m_table = make_unique(); - } - - MwmValue const & m_value; - mutable unique_ptr m_table; -}; - -class LocalityScorerDelegate : public LocalityScorer::Delegate -{ -public: - LocalityScorerDelegate(MwmContext const & context, Geocoder::Params const & params) - : m_context(context), m_params(params), m_ranks(m_context.m_value) - { - } - - // LocalityScorer::Delegate overrides: - void GetNames(uint32_t featureId, vector & names) const override - { - FeatureType ft; - if (!m_context.GetFeature(featureId, ft)) - return; - for (auto const & lang : m_params.m_langs) - { - string name; - if (ft.GetName(lang, name)) - names.push_back(name); - } - } - - uint8_t GetRank(uint32_t featureId) const override { return m_ranks.Get(featureId); } - -private: - MwmContext const & m_context; - Geocoder::Params const & m_params; - LazyRankTable m_ranks; -}; - -class StreetCategories -{ -public: - static StreetCategories const & Instance() - { - static StreetCategories const instance; - return instance; - } - - template - void ForEach(TFn && fn) const - { - for_each(m_categories.cbegin(), m_categories.cend(), forward(fn)); - } - - bool Contains(strings::UniString const & category) const - { - return binary_search(m_categories.cbegin(), m_categories.cend(), category); - } - - vector const & GetCategories() const { return m_categories; } - -private: - StreetCategories() - { - auto const & classificator = classif(); - auto addCategory = [&](uint32_t type) - { - uint32_t const index = classificator.GetIndexForType(type); - m_categories.push_back(FeatureTypeToString(index)); - }; - ftypes::IsStreetChecker::Instance().ForEachType(addCategory); - sort(m_categories.begin(), m_categories.end()); - } - - vector m_categories; - - DISALLOW_COPY_AND_MOVE(StreetCategories); -}; - -void JoinQueryTokens(QueryParams const & params, size_t curToken, size_t endToken, - strings::UniString const & sep, strings::UniString & res) -{ - ASSERT_LESS_OR_EQUAL(curToken, endToken, ()); - for (size_t i = curToken; i < endToken; ++i) - { - if (i < params.m_tokens.size()) - { - res.append(params.m_tokens[i].front()); - } - else - { - CHECK_EQUAL(i, params.m_tokens.size(), ()); - CHECK(!params.m_prefixTokens.empty(), ()); - res.append(params.m_prefixTokens.front()); - } - - if (i + 1 != endToken) - res.append(sep); - } -} - -void GetAffiliationName(FeatureType const & ft, string & name) -{ - VERIFY(ft.GetName(StringUtf8Multilang::kDefaultCode, name), ()); - ASSERT(!name.empty(), ()); -} - -// todo(@m) Refactor at least here, or even at indexer/ftypes_matcher.hpp. -vector GetVillageCategories() -{ - vector categories; - - auto const & classificator = classif(); - auto addCategory = [&](uint32_t type) - { - uint32_t const index = classificator.GetIndexForType(type); - categories.push_back(FeatureTypeToString(index)); - }; - ftypes::IsVillageChecker::Instance().ForEachType(addCategory); - - return categories; -} - -bool HasSearchIndex(MwmValue const & value) { return value.m_cont.IsExist(SEARCH_INDEX_FILE_TAG); } - -bool HasGeometryIndex(MwmValue & value) { return value.m_cont.IsExist(INDEX_FILE_TAG); } - -MwmSet::MwmHandle FindWorld(Index & index, vector> const & infos) -{ - MwmSet::MwmHandle handle; - for (auto const & info : infos) - { - if (info->GetType() == MwmInfo::WORLD) - { - handle = index.GetMwmHandleById(MwmSet::MwmId(info)); - break; - } - } - return handle; -} - -strings::UniString AsciiToUniString(char const * s) -{ - return strings::UniString(s, s + strlen(s)); -} - -bool IsStopWord(strings::UniString const & s) -{ - /// @todo Get all common used stop words and factor out this array into - /// search_string_utils.cpp module for example. - static char const * arr[] = { "a", "de", "da", "la" }; - - static set const kStopWords( - make_transform_iterator(arr, &AsciiToUniString), - make_transform_iterator(arr + ARRAY_SIZE(arr), &AsciiToUniString)); - - return kStopWords.count(s) > 0; -} - -double Area(m2::RectD const & rect) -{ - return rect.IsValid() ? rect.SizeX() * rect.SizeY() : 0; -} - -// Computes an average similaty between |rect| and |pivot|. By -// similarity between two rects we mean a fraction of the area of -// rects intersection to the area of the smallest rect. -double GetSimilarity(m2::RectD const & pivot, m2::RectD const & rect) -{ - double const area = min(Area(pivot), Area(rect)); - if (area == 0.0) - return 0.0; - m2::RectD p = pivot; - if (!p.Intersect(rect)) - return 0.0; - return Area(p) / area; -} - -// Returns shortest distance from the |pivot| to the |rect|. -// -// *NOTE* calculations below are incorrect, because shortest distance -// on the Mercator's plane is not the same as shortest distance on the -// Earth. But we assume that it is not an issue here. -double GetDistanceMeters(m2::PointD const & pivot, m2::RectD const & rect) -{ - if (rect.IsPointInside(pivot)) - return 0.0; - - double distance = numeric_limits::max(); - m2::ProjectionToSection proj; - - proj.SetBounds(rect.LeftTop(), rect.RightTop()); - distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); - - proj.SetBounds(rect.LeftBottom(), rect.RightBottom()); - distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); - - proj.SetBounds(rect.LeftTop(), rect.LeftBottom()); - distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); - - proj.SetBounds(rect.RightTop(), rect.RightBottom()); - distance = min(distance, MercatorBounds::DistanceOnEarth(pivot, proj(pivot))); - - return distance; -} - -struct KeyedMwmInfo -{ - KeyedMwmInfo(shared_ptr const & info, m2::RectD const & pivot) : m_info(info) - { - auto const & rect = m_info->m_limitRect; - m_similarity = GetSimilarity(pivot, rect); - m_distance = GetDistanceMeters(pivot.Center(), rect); - } - - bool operator<(KeyedMwmInfo const & rhs) const - { - if (m_distance == 0.0 && rhs.m_distance == 0.0) - return m_similarity > rhs.m_similarity; - return m_distance < rhs.m_distance; - } - - shared_ptr m_info; - double m_similarity; - double m_distance; -}; - -// Reorders maps in a way that prefix consists of maps intersecting -// with pivot, suffix consists of all other maps ordered by minimum -// distance from pivot. Returns number of maps in prefix. -size_t OrderCountries(m2::RectD const & pivot, vector> & infos) -{ - // TODO (@y): remove this if crashes in this function - // disappear. Otherwise, remove null infos and re-check MwmSet - // again. - for (auto const & info : infos) - { - CHECK(info.get(), - ("MwmSet invariant violated. Please, contact @y if you know how to reproduce this.")); - } - - vector keyedInfos; - keyedInfos.reserve(infos.size()); - for (auto const & info : infos) - keyedInfos.emplace_back(info, pivot); - sort(keyedInfos.begin(), keyedInfos.end()); - - infos.clear(); - for (auto const & info : keyedInfos) - infos.emplace_back(info.m_info); - - auto intersects = [&](shared_ptr const & info) -> bool - { - return pivot.IsIntersect(info->m_limitRect); - }; - - auto const sep = stable_partition(infos.begin(), infos.end(), intersects); - return distance(infos.begin(), sep); -} - -// Performs pairwise union of adjacent bit vectors -// until at most one bit vector is left. -void UniteCBVs(vector> & cbvs) -{ - while (cbvs.size() > 1) - { - size_t i = 0; - size_t j = 0; - for (; j + 1 < cbvs.size(); j += 2) - cbvs[i++] = coding::CompressedBitVector::Union(*cbvs[j], *cbvs[j + 1]); - for (; j < cbvs.size(); ++j) - cbvs[i++] = move(cbvs[j]); - cbvs.resize(i); - } -} -} // namespace - -// Geocoder::Params -------------------------------------------------------------------------------- -Geocoder::Params::Params() : m_mode(Mode::Everywhere), m_accuratePivotCenter(0, 0) {} - -// Geocoder::Geocoder ------------------------------------------------------------------------------ -Geocoder::Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter) - : m_index(index) - , m_infoGetter(infoGetter) - , m_numTokens(0) - , m_model(SearchModel::Instance()) - , m_pivotRectsCache(kPivotRectsCacheSize, static_cast(*this), - Processor::kMaxViewportRadiusM) - , m_localityRectsCache(kLocalityRectsCacheSize, static_cast(*this)) - , m_pivotFeatures(index) - , m_villages(nullptr) - , m_filter(nullptr) - , m_matcher(nullptr) - , m_finder(static_cast(*this)) - , m_lastMatchedRegion(nullptr) - , m_preRanker(nullptr) -{ -} - -Geocoder::~Geocoder() {} - -void Geocoder::SetParams(Params const & params) -{ - m_params = params; - - // Filter stop words. - if (m_params.m_tokens.size() > 1) - { - for (auto & v : m_params.m_tokens) - my::EraseIf(v, &IsStopWord); - - auto & v = m_params.m_tokens; - my::EraseIf(v, mem_fn(&Params::TSynonymsVector::empty)); - - // If all tokens are stop words - give up. - if (m_params.m_tokens.empty()) - m_params = params; - } - - m_retrievalParams = m_params; - m_numTokens = m_params.m_tokens.size(); - if (!m_params.m_prefixTokens.empty()) - ++m_numTokens; - - // Remove all category synonyms for streets, as they're extracted - // individually via LoadStreets. - for (size_t i = 0; i < m_numTokens; ++i) - { - auto & synonyms = m_params.GetTokens(i); - ASSERT(!synonyms.empty(), ()); - - if (IsStreetSynonym(synonyms.front())) - { - auto b = synonyms.begin(); - auto e = synonyms.end(); - auto const & categories = StreetCategories::Instance(); - synonyms.erase(remove_if(b + 1, e, bind(&StreetCategories::Contains, cref(categories), _1)), - e); - } - } - - LOG(LDEBUG, ("Languages =", m_params.m_langs)); -} - -void Geocoder::GoEverywhere(PreRanker & preRanker) -{ - // TODO (@y): remove following code as soon as Geocoder::Go() will - // work fast for most cases (significantly less than 1 second). -#if defined(DEBUG) - my::Timer timer; - MY_SCOPE_GUARD(printDuration, [&timer]() - { - LOG(LINFO, ("Total geocoding time:", timer.ElapsedSeconds(), "seconds")); - }); -#endif -#if defined(USE_GOOGLE_PROFILER) && defined(OMIM_OS_LINUX) - ProfilerStart("/tmp/geocoder.prof"); - MY_SCOPE_GUARD(stopProfiler, &ProfilerStop); -#endif - - if (m_numTokens == 0) - return; - - vector> infos; - m_index.GetMwmsInfo(infos); - - GoImpl(preRanker, infos, false /* inViewport */); -} - -void Geocoder::GoInViewport(PreRanker & preRanker) -{ - if (m_numTokens == 0) - return; - - vector> infos; - m_index.GetMwmsInfo(infos); - - my::EraseIf(infos, [this](shared_ptr const & info) - { - return !m_params.m_pivot.IsIntersect(info->m_limitRect); - }); - - GoImpl(preRanker, infos, true /* inViewport */); -} - -void Geocoder::GoImpl(PreRanker & preRanker, vector> & infos, bool inViewport) -{ - m_preRanker = &preRanker; - - try - { - // Tries to find world and fill localities table. - { - m_cities.clear(); - for (auto & regions : m_regions) - regions.clear(); - MwmSet::MwmHandle handle = FindWorld(m_index, infos); - if (handle.IsAlive()) - { - auto & value = *handle.GetValue(); - - // All MwmIds are unique during the application lifetime, so - // it's ok to save MwmId. - m_worldId = handle.GetId(); - m_context = make_unique(move(handle)); - if (HasSearchIndex(value)) - { - PrepareAddressFeatures(); - FillLocalitiesTable(); - } - m_context.reset(); - } - } - - // Orders countries by distance from viewport center and position. - // This order is used during MatchAroundPivot() stage - we try to - // match as many features as possible without trying to match - // locality (COUNTRY or CITY), and only when there are too many - // features, viewport and position vicinity filter is used. To - // prevent full search in all mwms, we need to limit somehow a set - // of mwms for MatchAroundPivot(), so, we always call - // MatchAroundPivot() on maps intersecting with pivot rect, other - // maps are ordered by distance from pivot, and we stop to call - // MatchAroundPivot() on them as soon as at least one feature is - // found. - size_t const numIntersectingMaps = OrderCountries(m_params.m_pivot, infos); - - // MatchAroundPivot() should always be matched in mwms - // intersecting with position and viewport. - auto const & cancellable = static_cast(*this); - auto processCountry = [&](size_t index, unique_ptr context) - { - ASSERT(context, ()); - m_context = move(context); - MY_SCOPE_GUARD(cleanup, [&]() - { - LOG(LDEBUG, (m_context->GetName(), "geocoding complete.")); - m_matcher->OnQueryFinished(); - m_matcher = nullptr; - m_context.reset(); - m_addressFeatures.clear(); - m_streets = nullptr; - m_villages = nullptr; - }); - - auto it = m_matchersCache.find(m_context->GetId()); - if (it == m_matchersCache.end()) - { - it = m_matchersCache.insert(make_pair(m_context->GetId(), make_unique( - m_index, cancellable))).first; - } - m_matcher = it->second.get(); - m_matcher->SetContext(m_context.get()); - - PrepareAddressFeatures(); - - coding::CompressedBitVector const * viewportCBV = nullptr; - if (inViewport) - viewportCBV = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT); - - if (viewportCBV) - { - for (size_t i = 0; i < m_numTokens; ++i) - { - m_addressFeatures[i] = - coding::CompressedBitVector::Intersect(*m_addressFeatures[i], *viewportCBV); - } - } - - // |m_streets| will be initialized in LimitedSearch() and its - // callees, if needed. - m_streets = nullptr; - - m_villages = LoadVillages(*m_context); - - auto citiesFromWorld = m_cities; - FillVillageLocalities(); - MY_SCOPE_GUARD(remove_villages, [&]() - { - m_cities = citiesFromWorld; - }); - - m_usedTokens.assign(m_numTokens, false); - - m_lastMatchedRegion = nullptr; - MatchRegions(REGION_TYPE_COUNTRY); - - if (index < numIntersectingMaps || m_preRanker->IsEmpty()) - MatchAroundPivot(); - }; - - // Iterates through all alive mwms and performs geocoding. - ForEachCountry(infos, processCountry); - } - catch (CancelException & e) - { - } - - // Fill results ranks, as they were missed. - FillMissingFieldsInResults(); -} - -void Geocoder::ClearCaches() -{ - m_pivotRectsCache.Clear(); - m_localityRectsCache.Clear(); - m_pivotFeatures.Clear(); - - m_addressFeatures.clear(); - m_matchersCache.clear(); - m_streetsCache.clear(); - m_villages.reset(); - m_postcodes.Clear(); -} - -void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken) -{ - ASSERT_LESS(curToken, endToken, ()); - ASSERT_LESS_OR_EQUAL(endToken, m_numTokens, ()); - - m_retrievalParams.m_tokens.clear(); - m_retrievalParams.m_prefixTokens.clear(); - - // TODO (@y): possibly it's not cheap to copy vectors of strings. - // Profile it, and in case of serious performance loss, refactor - // QueryParams to support subsets of tokens. - for (size_t i = curToken; i < endToken; ++i) - { - if (i < m_params.m_tokens.size()) - m_retrievalParams.m_tokens.push_back(m_params.m_tokens[i]); - else - m_retrievalParams.m_prefixTokens = m_params.m_prefixTokens; - } -} - -void Geocoder::PrepareAddressFeatures() -{ - m_addressFeatures.resize(m_numTokens); - for (size_t i = 0; i < m_numTokens; ++i) - { - PrepareRetrievalParams(i, i + 1); - m_addressFeatures[i] = RetrieveAddressFeatures( - m_context->GetId(), m_context->m_value, static_cast(*this), - m_retrievalParams); - ASSERT(m_addressFeatures[i], ()); - } -} - -void Geocoder::InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, - FeaturesLayer & layer) -{ - layer.Clear(); - layer.m_type = type; - layer.m_startToken = startToken; - layer.m_endToken = endToken; - - JoinQueryTokens(m_params, layer.m_startToken, layer.m_endToken, kUniSpace /* sep */, - layer.m_subQuery); - layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size()); -} - -void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter, - size_t const maxNumLocalities, - vector & preLocalities) -{ - preLocalities.clear(); - - for (size_t startToken = 0; startToken < m_numTokens; ++startToken) - { - CBVPtr intersection; - intersection.SetFull(); - if (filter) - intersection.Intersect(filter); - intersection.Intersect(m_addressFeatures[startToken].get()); - if (intersection.IsEmpty()) - continue; - - for (size_t endToken = startToken + 1; endToken <= m_numTokens; ++endToken) - { - // Skip locality candidates that match only numbers. - if (!m_params.IsNumberTokens(startToken, endToken)) - { - intersection.ForEach([&](uint32_t featureId) - { - Locality l; - l.m_countryId = m_context->GetId(); - l.m_featureId = featureId; - l.m_startToken = startToken; - l.m_endToken = endToken; - preLocalities.push_back(l); - }); - } - - if (endToken < m_numTokens) - { - intersection.Intersect(m_addressFeatures[endToken].get()); - if (intersection.IsEmpty()) - break; - } - } - } - - LocalityScorerDelegate delegate(*m_context, m_params); - LocalityScorer scorer(m_params, delegate); - scorer.GetTopLocalities(maxNumLocalities, preLocalities); -} - -void Geocoder::FillLocalitiesTable() -{ - vector preLocalities; - FillLocalityCandidates(nullptr, kMaxNumLocalities, preLocalities); - - size_t numCities = 0; - size_t numStates = 0; - size_t numCountries = 0; - for (auto & l : preLocalities) - { - FeatureType ft; - m_context->GetFeature(l.m_featureId, ft); - - auto addRegionMaps = [&](size_t & count, size_t maxCount, RegionType type) - { - if (count < maxCount && ft.GetFeatureType() == feature::GEOM_POINT) - { - Region region(l, type); - region.m_center = ft.GetCenter(); - - string name; - GetAffiliationName(ft, region.m_enName); - LOG(LDEBUG, ("Region =", region.m_enName)); - - m_infoGetter.GetMatchedRegions(region.m_enName, region.m_ids); - if (region.m_ids.empty()) - LOG(LWARNING, ("Maps not found for region", region.m_enName)); - - ++count; - m_regions[type][make_pair(l.m_startToken, l.m_endToken)].push_back(region); - } - }; - - switch (m_model.GetSearchType(ft)) - { - case SearchModel::SEARCH_TYPE_CITY: - { - if (numCities < kMaxNumCities && ft.GetFeatureType() == feature::GEOM_POINT) - { - ++numCities; - - auto const center = feature::GetCenter(ft); - auto const population = ft.GetPopulation(); - auto const radius = ftypes::GetRadiusByPopulation(population); - - City city(l, SearchModel::SEARCH_TYPE_CITY); - city.m_rect = MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius); - -#if defined(DEBUG) - ft.GetName(StringUtf8Multilang::kDefaultCode, city.m_defaultName); - LOG(LDEBUG, ("City =", city.m_defaultName, radius)); -#endif - - m_cities[{l.m_startToken, l.m_endToken}].push_back(city); - } - break; - } - case SearchModel::SEARCH_TYPE_STATE: - { - addRegionMaps(numStates, kMaxNumStates, REGION_TYPE_STATE); - break; - } - case SearchModel::SEARCH_TYPE_COUNTRY: - { - addRegionMaps(numCountries, kMaxNumCountries, REGION_TYPE_COUNTRY); - break; - } - default: - break; - } - } -} - -void Geocoder::FillVillageLocalities() -{ - vector preLocalities; - FillLocalityCandidates(m_villages.get(), kMaxNumVillages, preLocalities); - - size_t numVillages = 0; - - for (auto & l : preLocalities) - { - FeatureType ft; - m_context->GetFeature(l.m_featureId, ft); - - if (m_model.GetSearchType(ft) != SearchModel::SEARCH_TYPE_VILLAGE) - continue; - - // We accept lines and areas as village features. - auto const center = feature::GetCenter(ft); - ++numVillages; - City village(l, SearchModel::SEARCH_TYPE_VILLAGE); - - auto const population = ft.GetPopulation(); - double const radius = ftypes::GetRadiusByPopulation(population); - village.m_rect = MercatorBounds::RectByCenterXYAndSizeInMeters(center, radius); - -#if defined(DEBUG) - ft.GetName(StringUtf8Multilang::kDefaultCode, village.m_defaultName); - LOG(LDEBUG, ("Village =", village.m_defaultName)); -#endif - - m_cities[{l.m_startToken, l.m_endToken}].push_back(village); - if (numVillages >= kMaxNumVillages) - break; - } -} - -template -void Geocoder::ForEachCountry(vector> const & infos, TFn && fn) -{ - for (size_t i = 0; i < infos.size(); ++i) - { - auto const & info = infos[i]; - if (info->GetType() != MwmInfo::COUNTRY && info->GetType() != MwmInfo::WORLD) - continue; - if (info->GetType() == MwmInfo::COUNTRY && m_params.m_mode == Mode::World) - continue; - - auto handle = m_index.GetMwmHandleById(MwmSet::MwmId(info)); - if (!handle.IsAlive()) - continue; - auto & value = *handle.GetValue(); - if (!HasSearchIndex(value) || !HasGeometryIndex(value)) - continue; - fn(i, make_unique(move(handle))); - } -} - -void Geocoder::MatchRegions(RegionType type) -{ - switch (type) - { - case REGION_TYPE_STATE: - // Tries to skip state matching and go to cities matching. - // Then, performs states matching. - MatchCities(); - break; - case REGION_TYPE_COUNTRY: - // Tries to skip country matching and go to states matching. - // Then, performs countries matching. - MatchRegions(REGION_TYPE_STATE); - break; - case REGION_TYPE_COUNT: - ASSERT(false, ("Invalid region type.")); - return; - } - - auto const & regions = m_regions[type]; - - auto const & fileName = m_context->GetName(); - bool const isWorld = m_context->GetInfo()->GetType() == MwmInfo::WORLD; - - // Try to match regions. - for (auto const & p : regions) - { - BailIfCancelled(); - - size_t const startToken = p.first.first; - size_t const endToken = p.first.second; - if (HasUsedTokensInRange(startToken, endToken)) - continue; - - for (auto const & region : p.second) - { - bool matches = false; - - // On the World.mwm we need to check that CITY - STATE - COUNTRY - // form a nested sequence. Otherwise, as mwm borders do not - // intersect state or country boundaries, it's enough to check - // mwm that is currently being processed belongs to region. - if (isWorld) - { - matches = m_lastMatchedRegion == nullptr || - m_infoGetter.IsBelongToRegions(region.m_center, m_lastMatchedRegion->m_ids); - } - else - { - matches = m_infoGetter.IsBelongToRegions(fileName, region.m_ids); - } - - if (!matches) - continue; - - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); - if (AllTokensUsed()) - { - // Region matches to search query, we need to emit it as is. - EmitResult(region, startToken, endToken); - continue; - } - - m_lastMatchedRegion = ®ion; - MY_SCOPE_GUARD(cleanup, [this]() { m_lastMatchedRegion = nullptr; }); - switch (type) - { - case REGION_TYPE_STATE: - MatchCities(); - break; - case REGION_TYPE_COUNTRY: - MatchRegions(REGION_TYPE_STATE); - break; - case REGION_TYPE_COUNT: - ASSERT(false, ("Invalid region type.")); - break; - } - } - } -} - -void Geocoder::MatchCities() -{ - // Localities are ordered my (m_startToken, m_endToken) pairs. - for (auto const & p : m_cities) - { - size_t const startToken = p.first.first; - size_t const endToken = p.first.second; - if (HasUsedTokensInRange(startToken, endToken)) - continue; - - for (auto const & city : p.second) - { - BailIfCancelled(); - - if (m_lastMatchedRegion && - !m_infoGetter.IsBelongToRegions(city.m_rect.Center(), m_lastMatchedRegion->m_ids)) - { - continue; - } - - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); - if (AllTokensUsed()) - { - // City matches to search query, we need to emit it as is. - EmitResult(city, startToken, endToken); - continue; - } - - // No need to search features in the World map. - if (m_context->GetInfo()->GetType() == MwmInfo::WORLD) - continue; - - auto const * cityFeatures = - RetrieveGeometryFeatures(*m_context, city.m_rect, RECT_ID_LOCALITY); - - if (coding::CompressedBitVector::IsEmpty(cityFeatures)) - continue; - - LocalityFilter filter(*cityFeatures); - LimitedSearch(filter); - } - } -} - -void Geocoder::MatchAroundPivot() -{ - auto const * features = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT); - - if (!features) - return; - - ViewportFilter filter(*features, m_preRanker->Limit() /* threshold */); - LimitedSearch(filter); -} - -void Geocoder::LimitedSearch(FeaturesFilter const & filter) -{ - m_filter = &filter; - MY_SCOPE_GUARD(resetFilter, [&]() { m_filter = nullptr; }); - - if (!m_streets) - m_streets = LoadStreets(*m_context); - - MatchUnclassified(0 /* curToken */); - - auto search = [this]() - { - GreedilyMatchStreets(); - MatchPOIsAndBuildings(0 /* curToken */); - }; - - WithPostcodes(search); - search(); -} - -template -void Geocoder::WithPostcodes(TFn && fn) -{ - size_t const maxPostcodeTokens = GetMaxNumTokensInPostcode(); - - for (size_t startToken = 0; startToken != m_numTokens; ++startToken) - { - size_t endToken = startToken; - for (size_t n = 1; startToken + n <= m_numTokens && n <= maxPostcodeTokens; ++n) - { - if (m_usedTokens[startToken + n - 1]) - break; - - TokenSlice slice(m_params, startToken, startToken + n); - auto const isPrefix = startToken + n == m_numTokens; - if (LooksLikePostcode(QuerySlice(slice), isPrefix)) - endToken = startToken + n; - } - if (startToken == endToken) - continue; - - auto postcodes = - RetrievePostcodeFeatures(*m_context, TokenSlice(m_params, startToken, endToken)); - MY_SCOPE_GUARD(cleanup, [&]() { m_postcodes.Clear(); }); - - if (!coding::CompressedBitVector::IsEmpty(postcodes)) - { - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); - - m_postcodes.Clear(); - m_postcodes.m_startToken = startToken; - m_postcodes.m_endToken = endToken; - m_postcodes.m_features = move(postcodes); - - fn(); - } - } -} - -void Geocoder::GreedilyMatchStreets() -{ - for (size_t startToken = 0; startToken < m_numTokens; ++startToken) - { - if (m_usedTokens[startToken]) - continue; - - // Here we try to match as many tokens as possible while - // intersection is a non-empty bit vector of streets. All tokens - // that are synonyms to streets are ignored. Moreover, each time - // a token that looks like a beginning of a house number is met, - // we try to use current intersection of tokens as a street layer - // and try to match buildings or pois. - unique_ptr allFeatures; - - size_t curToken = startToken; - - // This variable is used for prevention of duplicate calls to - // CreateStreetsLayerAndMatchLowerLayers() with the same - // arguments. - size_t lastStopToken = curToken; - - for (; curToken < m_numTokens && !m_usedTokens[curToken]; ++curToken) - { - auto const & token = m_params.GetTokens(curToken).front(); - if (IsStreetSynonymPrefix(token)) - continue; - - if (feature::IsHouseNumber(token)) - { - CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, allFeatures); - lastStopToken = curToken; - } - - unique_ptr buffer; - if (startToken == curToken || coding::CompressedBitVector::IsEmpty(allFeatures)) - buffer = coding::CompressedBitVector::Intersect(*m_streets, *m_addressFeatures[curToken]); - else - buffer = coding::CompressedBitVector::Intersect(*allFeatures, *m_addressFeatures[curToken]); - - if (coding::CompressedBitVector::IsEmpty(buffer)) - break; - - allFeatures.swap(buffer); - } - - if (curToken != lastStopToken) - CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, allFeatures); - } -} - -void Geocoder::CreateStreetsLayerAndMatchLowerLayers( - size_t startToken, size_t endToken, unique_ptr const & features) -{ - ASSERT(m_layers.empty(), ()); - - if (coding::CompressedBitVector::IsEmpty(features)) - return; - - CBVPtr filtered(features.get(), false /* isOwner */); - if (m_filter->NeedToFilter(*features)) - filtered.Set(m_filter->Filter(*features).release(), true /* isOwner */); - - m_layers.emplace_back(); - MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); - - auto & layer = m_layers.back(); - InitLayer(SearchModel::SEARCH_TYPE_STREET, startToken, endToken, layer); - - vector sortedFeatures; - sortedFeatures.reserve(features->PopCount()); - filtered.ForEach(MakeBackInsertFunctor(sortedFeatures)); - layer.m_sortedFeatures = &sortedFeatures; - - ScopedMarkTokens mark(m_usedTokens, startToken, endToken); - MatchPOIsAndBuildings(0 /* curToken */); -} - -void Geocoder::MatchPOIsAndBuildings(size_t curToken) -{ - BailIfCancelled(); - - curToken = SkipUsedTokens(curToken); - if (curToken == m_numTokens) - { - // All tokens were consumed, find paths through layers, emit - // features. - if (m_postcodes.IsEmpty()) - return FindPaths(); - - // When there are no layers but user entered a postcode, we have - // to emit all features matching to the postcode. - if (m_layers.size() == 0) - { - CBVPtr filtered; - if (m_filter->NeedToFilter(*m_postcodes.m_features)) - filtered.Set(m_filter->Filter(*m_postcodes.m_features)); - else - filtered.Set(m_postcodes.m_features.get(), false /* isOwner */); - filtered.ForEach([&](uint32_t id) - { - EmitResult(m_context->GetId(), id, GetSearchTypeInGeocoding(id), - m_postcodes.m_startToken, m_postcodes.m_endToken); - }); - return; - } - - if (!(m_layers.size() == 1 && m_layers[0].m_type == SearchModel::SEARCH_TYPE_STREET)) - return FindPaths(); - - // If there're only one street layer but user also entered a - // postcode, we need to emit all features matching to postcode on - // the given street. - m_layers.emplace_back(); - MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); - - auto & layer = m_layers.back(); - InitLayer(SearchModel::SEARCH_TYPE_BUILDING, m_postcodes.m_startToken, m_postcodes.m_endToken, - layer); - - vector features; - coding::CompressedBitVectorEnumerator::ForEach(*m_postcodes.m_features, - MakeBackInsertFunctor(features)); - layer.m_sortedFeatures = &features; - return FindPaths(); - } - - m_layers.emplace_back(); - MY_SCOPE_GUARD(cleanupGuard, bind(&vector::pop_back, &m_layers)); - - // Clusters of features by search type. Each cluster is a sorted - // list of ids. - size_t const kNumClusters = SearchModel::SEARCH_TYPE_BUILDING + 1; - vector clusters[kNumClusters]; - - // Appends |featureId| to the end of the corresponding cluster, if - // any. - auto clusterize = [&](uint32_t featureId) - { - auto const searchType = GetSearchTypeInGeocoding(featureId); - - // All SEARCH_TYPE_CITY features were filtered in - // MatchCities(). All SEARCH_TYPE_STREET features were - // filtered in GreedilyMatchStreets(). - if (searchType < kNumClusters) - { - if (m_postcodes.IsEmpty() || m_postcodes.m_features->GetBit(featureId)) - clusters[searchType].push_back(featureId); - } - }; - - CBVPtr features; - features.SetFull(); - - // Try to consume [curToken, m_numTokens) tokens range. - for (size_t n = 1; curToken + n <= m_numTokens && !m_usedTokens[curToken + n - 1]; ++n) - { - // At this point |features| is the intersection of - // m_addressFeatures[curToken], m_addressFeatures[curToken + 1], - // ..., m_addressFeatures[curToken + n - 2]. - - BailIfCancelled(); - - { - auto & layer = m_layers.back(); - InitLayer(layer.m_type, curToken, curToken + n, layer); - } - - features.Intersect(m_addressFeatures[curToken + n - 1].get()); - ASSERT(features.Get(), ()); - - CBVPtr filtered; - if (m_filter->NeedToFilter(*features)) - filtered.Set(m_filter->Filter(*features)); - else - filtered.Set(features.Get(), false /* isOwner */); - ASSERT(filtered.Get(), ()); - - bool const looksLikeHouseNumber = feature::IsHouseNumber(m_layers.back().m_subQuery); - - if (filtered.IsEmpty() && !looksLikeHouseNumber) - break; - - if (n == 1) - { - filtered.ForEach(clusterize); - } - else - { - auto noFeature = [&filtered](uint32_t featureId) -> bool - { - return !filtered->GetBit(featureId); - }; - for (auto & cluster : clusters) - my::EraseIf(cluster, noFeature); - - size_t curs[kNumClusters] = {}; - size_t ends[kNumClusters]; - for (size_t i = 0; i < kNumClusters; ++i) - ends[i] = clusters[i].size(); - filtered.ForEach([&](uint32_t featureId) - { - bool found = false; - for (size_t i = 0; i < kNumClusters && !found; ++i) - { - size_t & cur = curs[i]; - size_t const end = ends[i]; - while (cur != end && clusters[i][cur] < featureId) - ++cur; - if (cur != end && clusters[i][cur] == featureId) - found = true; - } - if (!found) - clusterize(featureId); - }); - for (size_t i = 0; i < kNumClusters; ++i) - inplace_merge(clusters[i].begin(), clusters[i].begin() + ends[i], clusters[i].end()); - } - - for (size_t i = 0; i < kNumClusters; ++i) - { - // ATTENTION: DO NOT USE layer after recursive calls to - // MatchPOIsAndBuildings(). This may lead to use-after-free. - auto & layer = m_layers.back(); - layer.m_sortedFeatures = &clusters[i]; - - if (i == SearchModel::SEARCH_TYPE_BUILDING) - { - if (layer.m_sortedFeatures->empty() && !looksLikeHouseNumber) - continue; - } - else if (layer.m_sortedFeatures->empty()) - { - continue; - } - - layer.m_type = static_cast(i); - if (IsLayerSequenceSane()) - MatchPOIsAndBuildings(curToken + n); - } - } -} - -bool Geocoder::IsLayerSequenceSane() const -{ - ASSERT(!m_layers.empty(), ()); - static_assert(SearchModel::SEARCH_TYPE_COUNT <= 32, - "Select a wider type to represent search types mask."); - uint32_t mask = 0; - size_t buildingIndex = m_layers.size(); - size_t streetIndex = m_layers.size(); - - // Following loop returns false iff there're two different layers - // of the same search type. - for (size_t i = 0; i < m_layers.size(); ++i) - { - auto const & layer = m_layers[i]; - ASSERT_NOT_EQUAL(layer.m_type, SearchModel::SEARCH_TYPE_COUNT, ()); - - // TODO (@y): probably it's worth to check belongs-to-locality here. - uint32_t bit = 1U << layer.m_type; - if (mask & bit) - return false; - mask |= bit; - - if (layer.m_type == SearchModel::SEARCH_TYPE_BUILDING) - buildingIndex = i; - else if (layer.m_type == SearchModel::SEARCH_TYPE_STREET) - streetIndex = i; - } - - bool const hasBuildings = buildingIndex != m_layers.size(); - bool const hasStreets = streetIndex != m_layers.size(); - - // Checks that building and street layers are neighbours. - if (hasBuildings && hasStreets) - { - auto const & buildings = m_layers[buildingIndex]; - auto const & streets = m_layers[streetIndex]; - if (buildings.m_startToken != streets.m_endToken && - buildings.m_endToken != streets.m_startToken) - { - return false; - } - } - - return true; -} - -void Geocoder::FindPaths() -{ - if (m_layers.empty()) - return; - - // Layers ordered by search type. - vector sortedLayers; - sortedLayers.reserve(m_layers.size()); - for (auto & layer : m_layers) - sortedLayers.push_back(&layer); - sort(sortedLayers.begin(), sortedLayers.end(), my::LessBy(&FeaturesLayer::m_type)); - - auto const & innermostLayer = *sortedLayers.front(); - - m_matcher->SetPostcodes(m_postcodes.m_features.get()); - m_finder.ForEachReachableVertex(*m_matcher, sortedLayers, - [this, &innermostLayer](IntersectionResult const & result) - { - ASSERT(result.IsValid(), ()); - // TODO(@y, @m, @vng): use rest fields of IntersectionResult for - // better scoring. - EmitResult(m_context->GetId(), result.InnermostResult(), innermostLayer.m_type, - innermostLayer.m_startToken, innermostLayer.m_endToken); - }); -} - -void Geocoder::EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchModel::SearchType type, - size_t startToken, size_t endToken) -{ - FeatureID id(mwmId, ftId); - - // Distance and rank will be filled at the end, for all results at once. - // - // TODO (@y, @m): need to skip zero rank features that are too - // distant from the pivot when there're enough results close to the - // pivot. - PreRankingInfo info; - info.m_searchType = type; - info.m_startToken = startToken; - info.m_endToken = endToken; - - m_preRanker->Emplace(id, info); -} - -void Geocoder::EmitResult(Region const & region, size_t startToken, size_t endToken) -{ - SearchModel::SearchType type; - switch (region.m_type) - { - case REGION_TYPE_STATE: type = SearchModel::SEARCH_TYPE_STATE; break; - case REGION_TYPE_COUNTRY: type = SearchModel::SEARCH_TYPE_COUNTRY; break; - case REGION_TYPE_COUNT: type = SearchModel::SEARCH_TYPE_COUNT; break; - } - EmitResult(m_worldId, region.m_featureId, type, startToken, endToken); -} - -void Geocoder::EmitResult(City const & city, size_t startToken, size_t endToken) -{ - EmitResult(city.m_countryId, city.m_featureId, city.m_type, startToken, endToken); -} - -void Geocoder::FillMissingFieldsInResults() -{ - MwmSet::MwmId mwmId; - MwmSet::MwmHandle mwmHandle; - unique_ptr rankTable; - - m_preRanker->ForEachInfo([&](FeatureID const & id, PreRankingInfo & info) - { - if (id.m_mwmId != mwmId) - { - mwmId = id.m_mwmId; - mwmHandle = m_index.GetMwmHandleById(mwmId); - if (mwmHandle.IsAlive()) - rankTable = RankTable::Load(mwmHandle.GetValue()->m_cont); - else - rankTable = make_unique(); - } - - info.m_rank = rankTable->Get(id.m_index); - }); - - if (m_preRanker->Size() > m_preRanker->Limit()) - { - m_pivotFeatures.SetPosition(m_params.m_accuratePivotCenter, m_params.m_scale); - m_preRanker->ForEachInfo([&](FeatureID const & id, PreRankingInfo & info) - { - info.m_distanceToPivot = - m_pivotFeatures.GetDistanceToFeatureMeters(id); - }); - } -} - -void Geocoder::MatchUnclassified(size_t curToken) -{ - ASSERT(m_layers.empty(), ()); - - // We need to match all unused tokens to UNCLASSIFIED features, - // therefore unused tokens must be adjacent to each other. For - // example, as parks are UNCLASSIFIED now, it's ok to match "London - // Hyde Park", because London will be matched as a city and rest - // adjacent tokens will be matched to "Hyde Park", whereas it's not - // ok to match something to "Park London Hyde", because tokens - // "Park" and "Hyde" are not adjacent. - if (NumUnusedTokensGroups() != 1) - return; - - CBVPtr allFeatures; - allFeatures.SetFull(); - - auto startToken = curToken; - for (curToken = SkipUsedTokens(curToken); curToken < m_numTokens && !m_usedTokens[curToken]; - ++curToken) - { - allFeatures.Intersect(m_addressFeatures[curToken].get()); - } - - if (m_filter->NeedToFilter(*allFeatures)) - allFeatures.Set(m_filter->Filter(*allFeatures).release(), true /* isOwner */); - - if (allFeatures.IsEmpty()) - return; - - auto emitUnclassified = [&](uint32_t featureId) - { - auto type = GetSearchTypeInGeocoding(featureId); - if (type == SearchModel::SEARCH_TYPE_UNCLASSIFIED) - EmitResult(m_context->GetId(), featureId, type, startToken, curToken); - }; - allFeatures.ForEach(emitUnclassified); -} - -unique_ptr Geocoder::LoadCategories( - MwmContext & context, vector const & categories) -{ - ASSERT(context.m_handle.IsAlive(), ()); - ASSERT(HasSearchIndex(context.m_value), ()); - - m_retrievalParams.m_tokens.resize(1); - m_retrievalParams.m_tokens[0].resize(1); - m_retrievalParams.m_prefixTokens.clear(); - - vector> cbvs; - - for_each(categories.begin(), categories.end(), [&](strings::UniString const & category) - { - m_retrievalParams.m_tokens[0][0] = category; - auto cbv = RetrieveAddressFeatures( - context.GetId(), context.m_value, static_cast(*this), - m_retrievalParams); - if (!coding::CompressedBitVector::IsEmpty(cbv)) - cbvs.push_back(move(cbv)); - }); - - UniteCBVs(cbvs); - if (cbvs.empty()) - cbvs.push_back(make_unique()); - - return move(cbvs[0]); -} - -coding::CompressedBitVector const * Geocoder::LoadStreets(MwmContext & context) -{ - if (!context.m_handle.IsAlive() || !HasSearchIndex(context.m_value)) - return nullptr; - - auto mwmId = context.m_handle.GetId(); - auto const it = m_streetsCache.find(mwmId); - if (it != m_streetsCache.cend()) - return it->second.get(); - - auto streets = LoadCategories(context, StreetCategories::Instance().GetCategories()); - - auto const * result = streets.get(); - m_streetsCache[mwmId] = move(streets); - return result; -} - -unique_ptr Geocoder::LoadVillages(MwmContext & context) -{ - if (!context.m_handle.IsAlive() || !HasSearchIndex(context.m_value)) - return make_unique(); - - return LoadCategories(context, GetVillageCategories()); -} - -unique_ptr Geocoder::RetrievePostcodeFeatures( - MwmContext const & context, TokenSlice const & slice) -{ - return ::search::v2::RetrievePostcodeFeatures(context.GetId(), context.m_value, - static_cast(*this), slice); -} - -coding::CompressedBitVector const * Geocoder::RetrieveGeometryFeatures(MwmContext const & context, - m2::RectD const & rect, - RectId id) -{ - switch (id) - { - case RECT_ID_PIVOT: return m_pivotRectsCache.Get(context, rect, m_params.m_scale); - case RECT_ID_LOCALITY: return m_localityRectsCache.Get(context, rect, m_params.m_scale); - case RECT_ID_COUNT: ASSERT(false, ("Invalid RectId.")); return nullptr; - } -} - -SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId) -{ - if (m_streets->GetBit(featureId)) - return SearchModel::SEARCH_TYPE_STREET; - if (m_villages->GetBit(featureId)) - return SearchModel::SEARCH_TYPE_VILLAGE; - - FeatureType feature; - m_context->GetFeature(featureId, feature); - return m_model.GetSearchType(feature); -} - -bool Geocoder::AllTokensUsed() const -{ - return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor()); -} - -bool Geocoder::HasUsedTokensInRange(size_t from, size_t to) const -{ - return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor()); -} - -size_t Geocoder::NumUnusedTokensGroups() const -{ - size_t numGroups = 0; - for (size_t i = 0; i < m_usedTokens.size(); ++i) - { - if (!m_usedTokens[i] && (i == 0 || m_usedTokens[i - 1])) - ++numGroups; - } - return numGroups; -} - -size_t Geocoder::SkipUsedTokens(size_t curToken) const -{ - while (curToken != m_usedTokens.size() && m_usedTokens[curToken]) - ++curToken; - return curToken; -} - -string DebugPrint(Geocoder::Locality const & locality) -{ - ostringstream os; - os << "Locality [" << DebugPrint(locality.m_countryId) << ", featureId=" << locality.m_featureId - << ", startToken=" << locality.m_startToken << ", endToken=" << locality.m_endToken << "]"; - return os.str(); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/geocoder.hpp b/search/v2/geocoder.hpp deleted file mode 100644 index bdc8ef0d52..0000000000 --- a/search/v2/geocoder.hpp +++ /dev/null @@ -1,392 +0,0 @@ -#pragma once - -#include "search/cancel_exception.hpp" -#include "search/mode.hpp" -#include "search/query_params.hpp" -#include "search/v2/features_layer.hpp" -#include "search/v2/features_layer_path_finder.hpp" -#include "search/v2/geometry_cache.hpp" -#include "search/v2/mwm_context.hpp" -#include "search/v2/nested_rects_cache.hpp" -#include "search/v2/pre_ranking_info.hpp" -#include "search/v2/ranking_utils.hpp" -#include "search/v2/search_model.hpp" - -#include "indexer/index.hpp" -#include "indexer/mwm_set.hpp" - -#include "storage/country_info_getter.hpp" - -#include "coding/compressed_bit_vector.hpp" - -#include "geometry/rect2d.hpp" - -#include "base/buffer_vector.hpp" -#include "base/cancellable.hpp" -#include "base/macros.hpp" -#include "base/string_utils.hpp" - -#include "std/limits.hpp" -#include "std/set.hpp" -#include "std/string.hpp" -#include "std/unique_ptr.hpp" -#include "std/unordered_map.hpp" -#include "std/vector.hpp" - -class MwmInfo; -class MwmValue; - -namespace coding -{ -class CompressedBitVector; -} - -namespace storage -{ -class CountryInfoGetter; -} // namespace storage - -namespace search -{ -class PreRanker; - -namespace v2 -{ -class FeaturesFilter; -class FeaturesLayerMatcher; -class SearchModel; -class TokenSlice; - -// This class is used to retrieve all features corresponding to a -// search query. Search query is represented as a sequence of tokens -// (including synonyms for these tokens), and Geocoder tries to build -// all possible partitions (or layers) of the search query, where each -// layer is a set of features corresponding to some search class -// (e.g. POI, BUILDING, STREET, etc., see search/v2/search_model.hpp). -// Then, Geocoder builds a layered graph, with edges between features -// on adjacent layers (e.g. between BUILDING ans STREET, STREET and -// CITY, etc.). Usually an edge between two features means that a -// feature from the lowest layer geometrically belongs to a feature -// from the highest layer (BUILDING is located on STREET, STREET is -// located inside CITY, CITY is located inside STATE, etc.). Final -// part is to find all paths through this layered graph and report all -// features from the lowest layer, that are reachable from the -// highest layer. -class Geocoder : public my::Cancellable -{ -public: - struct Params : public QueryParams - { - Params(); - - Mode m_mode; - - // We need to pass both pivot and pivot center because pivot is - // usually a rectangle created by radius and center, and due to - // precision loss, |m_pivot|.Center() may differ from - // |m_accuratePivotCenter|. Therefore |m_pivot| should be used for - // fast filtering of features outside of the rectangle, while - // |m_accuratePivotCenter| should be used when it's needed to - // compute a distance from a feature to the pivot. - m2::RectD m_pivot; - m2::PointD m_accuratePivotCenter; - }; - - enum RegionType - { - REGION_TYPE_STATE, - REGION_TYPE_COUNTRY, - REGION_TYPE_COUNT - }; - - struct Locality - { - Locality() : m_featureId(0), m_startToken(0), m_endToken(0) {} - - Locality(uint32_t featureId, size_t startToken, size_t endToken) - : m_featureId(featureId), m_startToken(startToken), m_endToken(endToken) - { - } - - MwmSet::MwmId m_countryId; - uint32_t m_featureId; - size_t m_startToken; - size_t m_endToken; - }; - - // This struct represents a country or US- or Canadian- state. It - // is used to filter maps before search. - struct Region : public Locality - { - Region(Locality const & l, RegionType type) : Locality(l), m_center(0, 0), m_type(type) {} - - storage::CountryInfoGetter::TRegionIdSet m_ids; - string m_enName; - m2::PointD m_center; - RegionType m_type; - }; - - // This struct represents a city or a village. It is used to filter features - // during search. - // todo(@m) It works well as is, but consider a new naming scheme - // when counties etc. are added. E.g., Region for countries and - // states and Locality for smaller settlements. - struct City : public Locality - { - City(Locality const & l, SearchModel::SearchType type) : Locality(l), m_type(type) {} - - m2::RectD m_rect; - SearchModel::SearchType m_type; -#if defined(DEBUG) - string m_defaultName; -#endif - }; - - Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter); - - ~Geocoder() override; - - // Sets search query params. - void SetParams(Params const & params); - - // Starts geocoding, retrieved features will be appended to - // |results|. - void GoEverywhere(PreRanker & preRanker); - void GoInViewport(PreRanker & preRanker); - - void ClearCaches(); - -private: - enum RectId - { - RECT_ID_PIVOT, - RECT_ID_LOCALITY, - RECT_ID_COUNT - }; - - struct Postcodes - { - void Clear() - { - m_startToken = 0; - m_endToken = 0; - m_features.reset(); - } - - inline bool IsEmpty() const { return coding::CompressedBitVector::IsEmpty(m_features); } - - size_t m_startToken = 0; - size_t m_endToken = 0; - unique_ptr m_features; - }; - - void GoImpl(PreRanker & preRanker, vector> & infos, bool inViewport); - - template - using TLocalitiesCache = map, vector>; - - QueryParams::TSynonymsVector const & GetTokens(size_t i) const; - - // Fills |m_retrievalParams| with [curToken, endToken) subsequence - // of search query tokens. - void PrepareRetrievalParams(size_t curToken, size_t endToken); - - // Creates a cache of posting lists corresponding to features in m_context - // for each token and saves it to m_addressFeatures. - void PrepareAddressFeatures(); - - void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken, - FeaturesLayer & layer); - - void FillLocalityCandidates(coding::CompressedBitVector const * filter, - size_t const maxNumLocalities, vector & preLocalities); - - void FillLocalitiesTable(); - - void FillVillageLocalities(); - - template - void ForEachCountry(vector> const & infos, TFn && fn); - - // Throws CancelException if cancelled. - inline void BailIfCancelled() - { - ::search::BailIfCancelled(static_cast(*this)); - } - - // Tries to find all countries and states in a search query and then - // performs matching of cities in found maps. - void MatchRegions(RegionType type); - - // Tries to find all cities in a search query and then performs - // matching of streets in found cities. - void MatchCities(); - - // Tries to do geocoding without localities, ie. find POIs, - // BUILDINGs and STREETs without knowledge about country, state, - // city or village. If during the geocoding too many features are - // retrieved, viewport is used to throw away excess features. - void MatchAroundPivot(); - - // Tries to do geocoding in a limited scope, assuming that knowledge - // about high-level features, like cities or countries, is - // incorporated into |filter|. - void LimitedSearch(FeaturesFilter const & filter); - - template - void WithPostcodes(TFn && fn); - - // Tries to match some adjacent tokens in the query as streets and - // then performs geocoding in street vicinities. - void GreedilyMatchStreets(); - - void CreateStreetsLayerAndMatchLowerLayers( - size_t startToken, size_t endToken, unique_ptr const & features); - - // Tries to find all paths in a search tree, where each edge is - // marked with some substring of the query tokens. These paths are - // called "layer sequence" and current path is stored in |m_layers|. - void MatchPOIsAndBuildings(size_t curToken); - - // Returns true if current path in the search tree (see comment for - // MatchPOIsAndBuildings()) looks sane. This method is used as a fast - // pre-check to cut off unnecessary work. - bool IsLayerSequenceSane() const; - - // Finds all paths through layers and emits reachable features from - // the lowest layer. - void FindPaths(); - - // Forms result and feeds it to |m_preRanker|. - void EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchModel::SearchType type, - size_t startToken, size_t endToken); - void EmitResult(Region const & region, size_t startToken, size_t endToken); - void EmitResult(City const & city, size_t startToken, size_t endToken); - - // Computes missing fields for all results in |m_preRanker|. - void FillMissingFieldsInResults(); - - // Tries to match unclassified objects from lower layers, like - // parks, forests, lakes, rivers, etc. This method finds all - // UNCLASSIFIED objects that match to all currently unused tokens. - void MatchUnclassified(size_t curToken); - - unique_ptr LoadCategories( - MwmContext & context, vector const & categories); - - coding::CompressedBitVector const * LoadStreets(MwmContext & context); - - unique_ptr LoadVillages(MwmContext & context); - - // A wrapper around RetrievePostcodeFeatures. - unique_ptr RetrievePostcodeFeatures(MwmContext const & context, - TokenSlice const & slice); - - // A caching wrapper around Retrieval::RetrieveGeometryFeatures. - coding::CompressedBitVector const * RetrieveGeometryFeatures(MwmContext const & context, - m2::RectD const & rect, RectId id); - - // This is a faster wrapper around SearchModel::GetSearchType(), as - // it uses pre-loaded lists of streets and villages. - SearchModel::SearchType GetSearchTypeInGeocoding(uint32_t featureId); - - // Returns true iff all tokens are used. - bool AllTokensUsed() const; - - // Returns true if there exists at least one used token in [from, - // to). - bool HasUsedTokensInRange(size_t from, size_t to) const; - - // Counts number of groups of consecutive unused tokens. - size_t NumUnusedTokensGroups() const; - - // Advances |curToken| to the nearest unused token, or to the end of - // |m_usedTokens| if there are no unused tokens. - size_t SkipUsedTokens(size_t curToken) const; - - Index & m_index; - - storage::CountryInfoGetter const & m_infoGetter; - - // Geocoder params. - Params m_params; - - // Total number of search query tokens. - size_t m_numTokens; - - // This field is used to map features to a limited number of search - // classes. - SearchModel const & m_model; - - // Following fields are set up by Search() method and can be - // modified and used only from Search() or its callees. - - MwmSet::MwmId m_worldId; - - // Context of the currently processed mwm. - unique_ptr m_context; - - // m_cities stores both big cities that are visible at World.mwm - // and small villages and hamlets that are not. - TLocalitiesCache m_cities; - TLocalitiesCache m_regions[REGION_TYPE_COUNT]; - - // Caches of features in rects. These caches are separated from - // TLocalitiesCache because the latter are quite lightweight and not - // all of them are needed. - PivotRectsCache m_pivotRectsCache; - LocalityRectsCache m_localityRectsCache; - - // Cache of nested rects used to estimate distance from a feature to the pivot. - NestedRectsCache m_pivotFeatures; - - // Cache of posting lists for each token in the query. TODO (@y, - // @m, @vng): consider to update this cache lazily, as user inputs - // tokens one-by-one. - vector> m_addressFeatures; - - // Cache of street ids in mwms. - map> m_streetsCache; - - // Street features in the mwm that is currently being processed. - // The initialization of m_streets is postponed in order to gain - // some speed. Therefore m_streets may be used only in - // LimitedSearch() and in all its callees. - coding::CompressedBitVector const * m_streets; - - // Village features in the mwm that is currently being processed. - unique_ptr m_villages; - - // Postcodes features in the mwm that is currently being processed. - Postcodes m_postcodes; - - // This vector is used to indicate what tokens were matched by - // locality and can't be re-used during the geocoding process. - vector m_usedTokens; - - // This filter is used to throw away excess features. - FeaturesFilter const * m_filter; - - // Features matcher for layers intersection. - map> m_matchersCache; - FeaturesLayerMatcher * m_matcher; - - // Path finder for interpretations. - FeaturesLayerPathFinder m_finder; - - // Search query params prepared for retrieval. - QueryParams m_retrievalParams; - - // Pointer to the most nested region filled during geocoding. - Region const * m_lastMatchedRegion; - - // Stack of layers filled during geocoding. - vector m_layers; - - // Non-owning. - PreRanker * m_preRanker; -}; - -string DebugPrint(Geocoder::Locality const & locality); -} // namespace v2 -} // namespace search diff --git a/search/v2/geometry_cache.cpp b/search/v2/geometry_cache.cpp deleted file mode 100644 index 2884e76aa1..0000000000 --- a/search/v2/geometry_cache.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include "search/v2/geometry_cache.hpp" - -#include "search/geometry_utils.hpp" -#include "search/retrieval.hpp" -#include "search/v2/mwm_context.hpp" - -#include "geometry/mercator.hpp" - -namespace search -{ -namespace v2 -{ -namespace -{ -double constexpr kCellEps = MercatorBounds::GetCellID2PointAbsEpsilon(); -} // namespace - -// GeometryCache ----------------------------------------------------------------------------------- -GeometryCache::GeometryCache(size_t maxNumEntries, my::Cancellable const & cancellable) - : m_maxNumEntries(maxNumEntries), m_cancellable(cancellable) -{ - CHECK_GREATER(m_maxNumEntries, 0, ()); -} - -void GeometryCache::InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, - Entry & entry) -{ - entry.m_rect = rect; - entry.m_cbv = v2::RetrieveGeometryFeatures(context, m_cancellable, rect, scale); - entry.m_scale = scale; -} - -// PivotRectsCache --------------------------------------------------------------------------------- -PivotRectsCache::PivotRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable, - double maxRadiusMeters) - : GeometryCache(maxNumEntries, cancellable), m_maxRadiusMeters(maxRadiusMeters) -{ -} - -coding::CompressedBitVector const * PivotRectsCache::Get(MwmContext const & context, - m2::RectD const & rect, int scale) -{ - auto p = FindOrCreateEntry(context.GetId(), [&rect, &scale](Entry const & entry) - { - return scale == entry.m_scale && - (entry.m_rect.IsRectInside(rect) || IsEqualMercator(rect, entry.m_rect, kCellEps)); - }); - auto & entry = p.first; - if (p.second) - { - m2::RectD normRect = - MercatorBounds::RectByCenterXYAndSizeInMeters(rect.Center(), m_maxRadiusMeters); - if (!normRect.IsRectInside(rect)) - normRect = rect; - InitEntry(context, normRect, scale, entry); - } - return entry.m_cbv.get(); -} - -// LocalityRectsCache ------------------------------------------------------------------------------ -LocalityRectsCache::LocalityRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable) - : GeometryCache(maxNumEntries, cancellable) -{ -} - -coding::CompressedBitVector const * LocalityRectsCache::Get(MwmContext const & context, - m2::RectD const & rect, int scale) -{ - auto p = FindOrCreateEntry(context.GetId(), [&rect, &scale](Entry const & entry) - { - return scale == entry.m_scale && IsEqualMercator(rect, entry.m_rect, kCellEps); - }); - auto & entry = p.first; - if (p.second) - InitEntry(context, rect, scale, entry); - return entry.m_cbv.get(); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/geometry_cache.hpp b/search/v2/geometry_cache.hpp deleted file mode 100644 index 62805918a5..0000000000 --- a/search/v2/geometry_cache.hpp +++ /dev/null @@ -1,109 +0,0 @@ -#pragma once - -#include "indexer/mwm_set.hpp" - -#include "coding/compressed_bit_vector.hpp" - -#include "geometry/rect2d.hpp" - -#include "base/assert.hpp" - -#include "std/algorithm.hpp" -#include "std/cstdint.hpp" -#include "std/deque.hpp" -#include "std/map.hpp" -#include "std/unique_ptr.hpp" -#include "std/utility.hpp" - -namespace my -{ -class Cancellable; -}; - -namespace search -{ -namespace v2 -{ -class MwmContext; - -// This class represents a simple cache of features in rects for all mwms. -// -// *NOTE* This class is not thread-safe. -class GeometryCache -{ -public: - virtual ~GeometryCache() = default; - - // Returns (hopefully, cached) list of features in a given - // rect. Note that return value may be invalidated on next calls to - // this method. - virtual coding::CompressedBitVector const * Get(MwmContext const & context, - m2::RectD const & rect, int scale) = 0; - - inline void Clear() { m_entries.clear(); } - -protected: - struct Entry - { - m2::RectD m_rect; - unique_ptr m_cbv; - int m_scale = 0; - }; - - // |maxNumEntries| denotes the maximum number of rectangles that - // will be cached for each mwm individually. - GeometryCache(size_t maxNumEntries, my::Cancellable const & cancellable); - - template - pair FindOrCreateEntry(MwmSet::MwmId const & id, TPred && pred) - { - auto & entries = m_entries[id]; - auto it = find_if(entries.begin(), entries.end(), forward(pred)); - if (it != entries.end()) - { - if (it != entries.begin()) - iter_swap(entries.begin(), it); - return pair(entries.front(), false); - } - - entries.emplace_front(); - if (entries.size() == m_maxNumEntries + 1) - entries.pop_back(); - - ASSERT_LESS_OR_EQUAL(entries.size(), m_maxNumEntries, ()); - ASSERT(!entries.empty(), ()); - return pair(entries.front(), true); - } - - void InitEntry(MwmContext const & context, m2::RectD const & rect, int scale, Entry & entry); - - map> m_entries; - size_t const m_maxNumEntries; - my::Cancellable const & m_cancellable; -}; - -class PivotRectsCache : public GeometryCache -{ -public: - PivotRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable, - double maxRadiusMeters); - - // GeometryCache overrides: - coding::CompressedBitVector const * Get(MwmContext const & context, m2::RectD const & rect, - int scale) override; - -private: - double const m_maxRadiusMeters; -}; - -class LocalityRectsCache : public GeometryCache -{ -public: - LocalityRectsCache(size_t maxNumEntries, my::Cancellable const & cancellable); - - // GeometryCache overrides: - coding::CompressedBitVector const * Get(MwmContext const & context, m2::RectD const & rect, - int scale) override; -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/house_numbers_matcher.cpp b/search/v2/house_numbers_matcher.cpp deleted file mode 100644 index 3acd0d6341..0000000000 --- a/search/v2/house_numbers_matcher.cpp +++ /dev/null @@ -1,322 +0,0 @@ -#include "search/v2/house_numbers_matcher.hpp" - -#include "std/algorithm.hpp" -#include "std/iterator.hpp" -#include "std/limits.hpp" -#include "std/sstream.hpp" - -#include "base/logging.hpp" - -using namespace strings; - -namespace search -{ -namespace v2 -{ -namespace -{ -size_t constexpr kInvalidNum = numeric_limits::max(); - -HouseNumberTokenizer::CharClass GetCharClass(UniChar c) -{ - static UniString const kSeps = MakeUniString("\"\\/(),. \t№#-"); - if (c >= '0' && c <= '9') - return HouseNumberTokenizer::CharClass::Digit; - if (find(kSeps.begin(), kSeps.end(), c) != kSeps.end()) - return HouseNumberTokenizer::CharClass::Separator; - return HouseNumberTokenizer::CharClass::Other; -} - -bool IsShortWord(HouseNumberTokenizer::Token const & t) -{ - return t.m_klass == HouseNumberTokenizer::CharClass::Other && t.m_token.size() <= 3; -} - -bool IsNumber(HouseNumberTokenizer::Token const & t) -{ - return t.m_klass == HouseNumberTokenizer::CharClass::Digit; -} - -bool IsNumberOrShortWord(HouseNumberTokenizer::Token const & t) -{ - return IsNumber(t) || IsShortWord(t); -} - -bool IsBuildingSynonymPrefix(UniString const & p) -{ - static UniString kSynonyms[] = { - MakeUniString("building"), MakeUniString("bld"), MakeUniString("unit"), - MakeUniString("block"), MakeUniString("blk"), MakeUniString("корпус"), - MakeUniString("литер"), MakeUniString("строение"), MakeUniString("блок")}; - - for (UniString const & s : kSynonyms) - { - if (StartsWith(s, p)) - return true; - } - return false; -} - -size_t GetNumTokensForBuildingPart(vector const & ts, size_t i, - vector & memory); - -size_t GetNumTokensForBuildingPartImpl(vector const & ts, size_t i, - vector & memory) -{ - ASSERT_LESS(i, ts.size(), ()); - - auto const & token = ts[i]; - if (token.m_klass != HouseNumberTokenizer::CharClass::Other) - return 0; - - if (!IsBuildingSynonymPrefix(token.m_token)) - return 0; - - // No sense in single "корпус" or "литер". - if (i + 1 >= ts.size()) - return 0; - - if (!IsNumberOrShortWord(ts[i + 1])) - return 0; - - // No sense in "корпус корпус" or "литер литер". - if (ts[i + 1].m_token == token.m_token) - return 0; - - // Consume next token, either number or short word. - size_t j = i + 2; - - // Consume one more number of short word, if possible. - if (j < ts.size() && IsNumberOrShortWord(ts[j]) && ts[j].m_klass != ts[j - 1].m_klass && - GetNumTokensForBuildingPart(ts, j, memory) == 0) - { - ++j; - } - - return j - i; -} - -// Returns number of tokens starting at position |i|, where the first -// token is some way of writing of "корпус", or "building", second -// token is a number or a letter, and (possibly) third token which can -// be a letter when second token is a number. |memory| is used here to -// store results of previous calls and prevents degradation to -// non-linear time. -// -// TODO (@y, @m): the parser is quite complex now. Consider to just -// throw out all prefixes of "building" or "литер" and sort rest -// tokens. Number of false positives will be higher but the parser -// will be more robust, simple and faster. -size_t GetNumTokensForBuildingPart(vector const & ts, size_t i, - vector & memory) -{ - if (i >= ts.size()) - return 0; - if (memory[i] == kInvalidNum) - memory[i] = GetNumTokensForBuildingPartImpl(ts, i, memory); - return memory[i]; -} - -void MergeTokens(vector const & ts, vector & rs) -{ - vector memory(ts.size(), kInvalidNum); - - size_t i = 0; - while (i < ts.size()) - { - switch (ts[i].m_klass) - { - case HouseNumberTokenizer::CharClass::Digit: - { - UniString token = ts[i].m_token; - ++i; - // Process cases like "123 б" or "9PQ". - if (i < ts.size() && IsShortWord(ts[i]) && GetNumTokensForBuildingPart(ts, i, memory) == 0) - { - token.append(ts[i].m_token.begin(), ts[i].m_token.end()); - ++i; - } - rs.push_back(move(token)); - break; - } - case HouseNumberTokenizer::CharClass::Separator: - { - ASSERT(false, ("Seps can't be merged.")); - ++i; - break; - } - case HouseNumberTokenizer::CharClass::Other: - { - if (size_t numTokens = GetNumTokensForBuildingPart(ts, i, memory)) - { - UniString token; - ++i; - for (size_t j = 1; j < numTokens; ++j, ++i) - token.append(ts[i].m_token.begin(), ts[i].m_token.end()); - rs.push_back(move(token)); - break; - } - - rs.push_back(ts[i].m_token); - ++i; - break; - } - } - } - - if (!rs.empty()) - sort(rs.begin() + 1, rs.end()); -} - -bool ParsesMatch(Parse const & houseNumberParse, Parse const & queryParse) -{ - if (houseNumberParse.IsEmpty() || queryParse.IsEmpty()) - return false; - - auto const & h = houseNumberParse.m_parts; - auto const & q = queryParse.m_parts; - - // Check first tokens, hope, house numbers. - if (h[0] != q[0]) - return false; - - size_t i = 1, j = 1; - while (i != h.size() && j != q.size()) - { - while (i != h.size() && h[i] < q[j]) - ++i; - if (i == h.size() || h[i] != q[j]) - return false; - ++i; - ++j; - } - - if (queryParse.m_hasTrailingBuildingPrefixSynonym) - { - // In this case, at least one more unmatched part must be in a - // house number. - return j == q.size() && h.size() > q.size(); - } - - return j == q.size(); -} -} // namespace - -// static -void HouseNumberTokenizer::Tokenize(UniString const & s, vector & ts) -{ - size_t i = 0; - while (i < s.size()) - { - CharClass klass = GetCharClass(s[i]); - - size_t j = i; - while (j < s.size() && GetCharClass(s[j]) == klass) - ++j; - - if (klass != CharClass::Separator) - { - UniString token(s.begin() + i, s.begin() + j); - ts.emplace_back(move(token), klass); - } - - i = j; - } -} - -void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps) -{ - vector tokens; - HouseNumberTokenizer::Tokenize(MakeLowerCase(query), tokens); - - { - ps.emplace_back(); - Parse & p = ps.back(); - MergeTokens(tokens, p.m_parts); - } - - // *NOTE* |tokens| is modified in the following block. - if (queryIsPrefix && !tokens.empty() && - tokens.back().m_klass == HouseNumberTokenizer::CharClass::Other && - IsBuildingSynonymPrefix(tokens.back().m_token)) - { - tokens.pop_back(); - ps.emplace_back(); - Parse & p = ps.back(); - MergeTokens(tokens, p.m_parts); - p.m_hasTrailingBuildingPrefixSynonym = true; - } -} - -bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query, - bool queryIsPrefix) -{ - if (houseNumber == query) - return true; - - vector queryParses; - ParseQuery(query, queryIsPrefix, queryParses); - - return HouseNumbersMatch(houseNumber, queryParses); -} - -bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryParses) -{ - if (houseNumber.empty() || queryParses.empty()) - return false; - - // Fast pre-check, helps to early exit without complex house number - // parsing. - bool good = false; - for (auto const & queryParse : queryParses) - { - if (!queryParse.IsEmpty() && houseNumber[0] == queryParse.m_parts.front()[0]) - { - good = true; - break; - } - } - if (!good) - return false; - - Parse houseNumberParse; - { - vector tokens; - HouseNumberTokenizer::Tokenize(MakeLowerCase(houseNumber), tokens); - MergeTokens(tokens, houseNumberParse.m_parts); - } - - for (auto const & queryParse : queryParses) - { - if (ParsesMatch(houseNumberParse, queryParse)) - return true; - } - return false; -} - -string DebugPrint(HouseNumberTokenizer::CharClass charClass) -{ - switch (charClass) - { - case HouseNumberTokenizer::CharClass::Separator: return "Separator"; - case HouseNumberTokenizer::CharClass::Digit: return "Digit"; - case HouseNumberTokenizer::CharClass::Other: return "Other"; - } - return "Unknown"; -} - -string DebugPrint(HouseNumberTokenizer::Token const & token) -{ - ostringstream os; - os << "Token [" << DebugPrint(token.m_token) << ", " << DebugPrint(token.m_klass) << "]"; - return os.str(); -} - -string DebugPrint(Parse const & parse) -{ - ostringstream os; - os << "Parse [" << DebugPrint(parse.m_parts) << "]"; - return os.str(); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/house_numbers_matcher.hpp b/search/v2/house_numbers_matcher.hpp deleted file mode 100644 index 53bf7d6bb7..0000000000 --- a/search/v2/house_numbers_matcher.hpp +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include "base/string_utils.hpp" - -#include "std/string.hpp" -#include "std/vector.hpp" - -namespace search -{ -namespace v2 -{ -// This class splits a string representing a house number to groups of -// symbols from the same class (separators, digits or other symbols, -// hope, letters). -class HouseNumberTokenizer -{ -public: - enum class CharClass - { - Separator, - Digit, - Other, - }; - - struct Token - { - Token() : m_klass(CharClass::Separator) {} - Token(strings::UniString const & token, CharClass klass) : m_token(token), m_klass(klass) {} - Token(strings::UniString && token, CharClass klass) : m_token(move(token)), m_klass(klass) {} - - strings::UniString m_token; - CharClass m_klass; - }; - - // Performs greedy split of |s| by character classes. Note that this - // function never emits Tokens corresponding to Separator classes. - static void Tokenize(strings::UniString const & s, vector & ts); -}; - -struct Parse -{ - inline bool IsEmpty() const { return m_parts.empty(); } - - vector m_parts; - bool m_hasTrailingBuildingPrefixSynonym = false; -}; - -// Parses query for later faster processing, when multiple buildings -// are matched against the query. -void ParseQuery(strings::UniString const & query, bool queryIsPrefix, vector & ps); - -// Returns true when |query| matches to |houseNumber|. -bool HouseNumbersMatch(strings::UniString const & houseNumber, strings::UniString const & query, - bool queryIsPrefix); - -// Returns true when at least one parse of the query matches to -// |houseNumber|. -bool HouseNumbersMatch(strings::UniString const & houseNumber, vector const & queryParses); - -string DebugPrint(HouseNumberTokenizer::CharClass charClass); - -string DebugPrint(HouseNumberTokenizer::Token const & token); - -string DebugPrint(Parse const & parse); -} // namespace v2 -} // namespace search diff --git a/search/v2/house_to_street_table.cpp b/search/v2/house_to_street_table.cpp deleted file mode 100644 index 8c15dc59f3..0000000000 --- a/search/v2/house_to_street_table.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "search/v2/house_to_street_table.hpp" - -#include "indexer/index.hpp" - -#include "platform/mwm_traits.hpp" - -#include "coding/fixed_bits_ddvector.hpp" -#include "coding/reader.hpp" - -#include "base/assert.hpp" - -#include "defines.hpp" - -namespace search -{ -namespace v2 -{ -namespace -{ -class Fixed3BitsTable : public HouseToStreetTable -{ -public: - using TVector = FixedBitsDDVector<3, ModelReaderPtr>; - - Fixed3BitsTable(MwmValue & value) - : m_vector(TVector::Create(value.m_cont.GetReader(SEARCH_ADDRESS_FILE_TAG))) - { - ASSERT(m_vector.get(), ("Can't instantiate FixedBitsDDVector.")); - } - - // HouseToStreetTable overrides: - bool Get(uint32_t houseId, uint32_t & streetIndex) const override - { - return m_vector->Get(houseId, streetIndex); - } - -private: - unique_ptr m_vector; -}; - -class DummyTable : public HouseToStreetTable -{ -public: - // HouseToStreetTable overrides: - bool Get(uint32_t /* houseId */, uint32_t & /* streetIndex */) const override { return false; } -}; -} // namespace - -unique_ptr HouseToStreetTable::Load(MwmValue & value) -{ - version::MwmTraits traits(value.GetMwmVersion().GetFormat()); - auto const format = traits.GetHouseToStreetTableFormat(); - - unique_ptr result; - - try - { - if (format == version::MwmTraits::HouseToStreetTableFormat::Fixed3BitsDDVector) - result.reset(new Fixed3BitsTable(value)); - } - catch (Reader::OpenException const & ex) - { - LOG(LWARNING, (ex.Msg())); - } - - if (!result) - result.reset(new DummyTable()); - return result; -} -} // namespace v2 -} // namespace search diff --git a/search/v2/house_to_street_table.hpp b/search/v2/house_to_street_table.hpp deleted file mode 100644 index 4d991c70f1..0000000000 --- a/search/v2/house_to_street_table.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include "std/limits.hpp" -#include "std/unique_ptr.hpp" - -class MwmValue; - -namespace search -{ -namespace v2 -{ -class HouseToStreetTable -{ -public: - virtual ~HouseToStreetTable() = default; - - /// @todo Actually, value may be nullptr in the very common case. - /// It's better to construct a table from MwmHandle. - static unique_ptr Load(MwmValue & value); - - // Returns true and stores to |streetIndex| the index number of the - // correct street corresponding to the house in the list of streets - // generated by ReverseGeocoder. Returns false if there is no such - // street. - virtual bool Get(uint32_t houseId, uint32_t & streetIndex) const = 0; -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/intersection_result.cpp b/search/v2/intersection_result.cpp deleted file mode 100644 index 0733ac2e26..0000000000 --- a/search/v2/intersection_result.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include "search/v2/intersection_result.hpp" - -#include "std/limits.hpp" -#include "std/sstream.hpp" - -namespace search -{ -namespace v2 -{ -// static -uint32_t const IntersectionResult::kInvalidId = numeric_limits::max(); - -IntersectionResult::IntersectionResult() - : m_poi(kInvalidId), m_building(kInvalidId), m_street(kInvalidId) -{ -} - -void IntersectionResult::Set(SearchModel::SearchType type, uint32_t id) -{ - switch (type) - { - case SearchModel::SEARCH_TYPE_POI: m_poi = id; break; - case SearchModel::SEARCH_TYPE_BUILDING: m_building = id; break; - case SearchModel::SEARCH_TYPE_STREET: m_street = id; break; - case SearchModel::SEARCH_TYPE_CITY: - case SearchModel::SEARCH_TYPE_VILLAGE: - case SearchModel::SEARCH_TYPE_STATE: - case SearchModel::SEARCH_TYPE_COUNTRY: - case SearchModel::SEARCH_TYPE_UNCLASSIFIED: - case SearchModel::SEARCH_TYPE_COUNT: ASSERT(false, ("Unsupported type.")); break; - } -} - -uint32_t IntersectionResult::InnermostResult() const -{ - if (m_poi != kInvalidId) - return m_poi; - if (m_building != kInvalidId) - return m_building; - if (m_street != kInvalidId) - return m_street; - return kInvalidId; -} - -void IntersectionResult::Clear() -{ - m_poi = kInvalidId; - m_building = kInvalidId; - m_street = kInvalidId; -} - -string DebugPrint(IntersectionResult const & result) -{ - ostringstream os; - os << "IntersectionResult [ "; - if (result.m_poi != IntersectionResult::kInvalidId) - os << "POI:" << result.m_poi << " "; - if (result.m_building != IntersectionResult::kInvalidId) - os << "BUILDING:" << result.m_building << " "; - if (result.m_street != IntersectionResult::kInvalidId) - os << "STREET:" << result.m_street << " "; - os << "]"; - return os.str(); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/intersection_result.hpp b/search/v2/intersection_result.hpp deleted file mode 100644 index 53f755c622..0000000000 --- a/search/v2/intersection_result.hpp +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include "search/v2/search_model.hpp" - -#include "std/cstdint.hpp" -#include "std/string.hpp" - -namespace search -{ -namespace v2 -{ -// This class holds higher-level features for an intersection result, -// i.e. BUILDING and STREET for POI or STREET for BUILDING. -struct IntersectionResult -{ - static uint32_t const kInvalidId; - - IntersectionResult(); - - void Set(SearchModel::SearchType type, uint32_t id); - - // Returns the first valid feature among the [POI, BUILDING, - // STREET]. - uint32_t InnermostResult() const; - - // Returns true when at least one valid feature exists. - inline bool IsValid() const { return InnermostResult() != kInvalidId; } - - // Clears all fields to an invalid state. - void Clear(); - - uint32_t m_poi; - uint32_t m_building; - uint32_t m_street; -}; - -string DebugPrint(IntersectionResult const & result); -} // namespace v2 -} // namespace search diff --git a/search/v2/locality_scorer.cpp b/search/v2/locality_scorer.cpp deleted file mode 100644 index e72ed54dfa..0000000000 --- a/search/v2/locality_scorer.cpp +++ /dev/null @@ -1,139 +0,0 @@ -#include "search/v2/locality_scorer.hpp" - -#include "search/v2/token_slice.hpp" - -#include "std/algorithm.hpp" - -namespace search -{ -namespace v2 -{ -// static -size_t const LocalityScorer::kDefaultReadLimit = 100; - -namespace -{ -bool IsAlmostFullMatch(NameScore score) -{ - return score == NAME_SCORE_FULL_MATCH_PREFIX || score == NAME_SCORE_FULL_MATCH; -} -} // namespace - -// LocalityScorer::ExLocality ---------------------------------------------------------------------- -LocalityScorer::ExLocality::ExLocality() : m_numTokens(0), m_rank(0), m_nameScore(NAME_SCORE_ZERO) -{ -} - -LocalityScorer::ExLocality::ExLocality(Geocoder::Locality const & locality) - : m_locality(locality) - , m_numTokens(locality.m_endToken - locality.m_startToken) - , m_rank(0) - , m_nameScore(NAME_SCORE_ZERO) -{ -} - -// LocalityScorer ---------------------------------------------------------------------------------- -LocalityScorer::LocalityScorer(QueryParams const & params, Delegate const & delegate) - : m_params(params), m_delegate(delegate) -{ -} - -void LocalityScorer::GetTopLocalities(size_t limit, vector & localities) const -{ - vector ls; - ls.reserve(localities.size()); - for (auto const & locality : localities) - ls.emplace_back(locality); - - RemoveDuplicates(ls); - LeaveTopByRank(std::max(limit, kDefaultReadLimit), ls); - SortByName(ls); - if (ls.size() > limit) - ls.resize(limit); - - localities.clear(); - localities.reserve(ls.size()); - for (auto const & l : ls) - localities.push_back(l.m_locality); -} - -void LocalityScorer::RemoveDuplicates(vector & ls) const -{ - sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) - { - if (lhs.GetId() != rhs.GetId()) - return lhs.GetId() < rhs.GetId(); - return lhs.m_numTokens > rhs.m_numTokens; - }); - ls.erase(unique(ls.begin(), ls.end(), - [](ExLocality const & lhs, ExLocality const & rhs) - { - return lhs.GetId() == rhs.GetId(); - }), - ls.end()); -} - -void LocalityScorer::LeaveTopByRank(size_t limit, vector & ls) const -{ - if (ls.size() <= limit) - return; - - for (auto & l : ls) - l.m_rank = m_delegate.GetRank(l.GetId()); - - sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) - { - if (lhs.m_rank != rhs.m_rank) - return lhs.m_rank > rhs.m_rank; - return lhs.m_numTokens > rhs.m_numTokens; - }); - ls.resize(limit); -} - -void LocalityScorer::SortByName(vector & ls) const -{ - vector names; - for (auto & l : ls) - { - names.clear(); - m_delegate.GetNames(l.GetId(), names); - - auto score = NAME_SCORE_ZERO; - for (auto const & name : names) - { - score = max(score, GetNameScore(name, v2::TokenSlice(m_params, l.m_locality.m_startToken, - l.m_locality.m_endToken))); - } - l.m_nameScore = score; - } - - sort(ls.begin(), ls.end(), [](ExLocality const & lhs, ExLocality const & rhs) - { - if (IsAlmostFullMatch(lhs.m_nameScore) && IsAlmostFullMatch(rhs.m_nameScore)) - { - // When both localities match well, e.g. full or full prefix - // match, the one with larger number of tokens is selected. In - // case of tie, the one with better score is selected. - if (lhs.m_numTokens != rhs.m_numTokens) - return lhs.m_numTokens > rhs.m_numTokens; - if (lhs.m_nameScore != rhs.m_nameScore) - return lhs.m_nameScore > rhs.m_nameScore; - } - else - { - // When name scores differ, the one with better name score is - // selected. In case of tie, the one with larger number of - // matched tokens is selected. - if (lhs.m_nameScore != rhs.m_nameScore) - return lhs.m_nameScore > rhs.m_nameScore; - if (lhs.m_numTokens != rhs.m_numTokens) - return lhs.m_numTokens > rhs.m_numTokens; - } - - // Okay, in case of tie we select the one with better rank. This - // is a quite arbitrary decision and definitely may be improved. - return lhs.m_rank > rhs.m_rank; - }); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/locality_scorer.hpp b/search/v2/locality_scorer.hpp deleted file mode 100644 index d65a82edac..0000000000 --- a/search/v2/locality_scorer.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include "search/v2/geocoder.hpp" -#include "search/v2/ranking_utils.hpp" - -#include "std/string.hpp" -#include "std/vector.hpp" - -namespace search -{ -struct QueryParams; - -namespace v2 -{ -class LocalityScorer -{ -public: - static size_t const kDefaultReadLimit; - - class Delegate - { - public: - virtual ~Delegate() = default; - - virtual void GetNames(uint32_t featureId, vector & names) const = 0; - virtual uint8_t GetRank(uint32_t featureId) const = 0; - }; - - LocalityScorer(QueryParams const & params, Delegate const & delegate); - - // Leaves at most |limit| elements of |localities|, ordered by some - // combination of ranks and number of matched tokens. - void GetTopLocalities(size_t limit, vector & localities) const; - -private: - struct ExLocality - { - ExLocality(); - explicit ExLocality(Geocoder::Locality const & locality); - - inline uint32_t GetId() const { return m_locality.m_featureId; } - - Geocoder::Locality m_locality; - size_t m_numTokens; - uint8_t m_rank; - NameScore m_nameScore; - }; - - void RemoveDuplicates(vector & ls) const; - void LeaveTopByRank(size_t limit, vector & ls) const; - void SortByName(vector & ls) const; - - QueryParams const & m_params; - Delegate const & m_delegate; -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/mwm_context.cpp b/search/v2/mwm_context.cpp deleted file mode 100644 index 69a3fd140e..0000000000 --- a/search/v2/mwm_context.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "search/v2/mwm_context.hpp" - - -namespace search -{ -namespace v2 -{ - -void CoverRect(m2::RectD const & rect, int scale, covering::IntervalsT & result) -{ - covering::CoveringGetter covering(rect, covering::ViewportWithLowLevels); - auto const & intervals = covering.Get(scale); - result.insert(result.end(), intervals.begin(), intervals.end()); -} - -MwmContext::MwmContext(MwmSet::MwmHandle handle) - : m_handle(move(handle)) - , m_value(*m_handle.GetValue()) - , m_vector(m_value.m_cont, m_value.GetHeader(), m_value.m_table) - , m_index(m_value.m_cont.GetReader(INDEX_FILE_TAG), m_value.m_factory) -{ -} - -bool MwmContext::GetFeature(uint32_t index, FeatureType & ft) const -{ - switch (GetEditedStatus(index)) - { - case osm::Editor::FeatureStatus::Deleted: - return false; - case osm::Editor::FeatureStatus::Modified: - case osm::Editor::FeatureStatus::Created: - VERIFY(osm::Editor::Instance().GetEditedFeature(GetId(), index, ft), ()); - return true; - case osm::Editor::FeatureStatus::Untouched: - m_vector.GetByIndex(index, ft); - ft.SetID(FeatureID(GetId(), index)); - return true; - } -} - -bool MwmContext::GetStreetIndex(uint32_t houseId, uint32_t & streetId) -{ - if (!m_houseToStreetTable) - { - m_houseToStreetTable = HouseToStreetTable::Load(m_value); - ASSERT(m_houseToStreetTable, ()); - } - return m_houseToStreetTable->Get(houseId, streetId); -} - -} // namespace v2 -} // namespace search diff --git a/search/v2/mwm_context.hpp b/search/v2/mwm_context.hpp deleted file mode 100644 index 191d2eea69..0000000000 --- a/search/v2/mwm_context.hpp +++ /dev/null @@ -1,96 +0,0 @@ -#pragma once - -#include "search/v2/house_to_street_table.hpp" - -#include "indexer/features_vector.hpp" -#include "indexer/index.hpp" -#include "indexer/scale_index.hpp" - -#include "base/macros.hpp" - -#include "std/unique_ptr.hpp" - -class MwmValue; - -namespace search -{ -namespace v2 -{ - -void CoverRect(m2::RectD const & rect, int scale, covering::IntervalsT & result); - -/// @todo Move this class into "index" library and make it more generic. -/// Now it duplicates "Index" functionality. -class MwmContext -{ -public: - MwmSet::MwmHandle m_handle; - MwmValue & m_value; - -private: - FeaturesVector m_vector; - ScaleIndex m_index; - unique_ptr m_houseToStreetTable; - -public: - explicit MwmContext(MwmSet::MwmHandle handle); - - inline MwmSet::MwmId const & GetId() const { return m_handle.GetId(); } - inline string const & GetName() const { return GetInfo()->GetCountryName(); } - inline shared_ptr const & GetInfo() const { return GetId().GetInfo(); } - - template void ForEachIndex(covering::IntervalsT const & intervals, - uint32_t scale, TFn && fn) const - { - ForEachIndexImpl(intervals, scale, [&](uint32_t index) - { - // TODO: Optimize deleted checks by getting vector of deleted indexes from the Editor. - if (GetEditedStatus(index) != osm::Editor::FeatureStatus::Deleted) - fn(index); - }); - } - - template void ForEachFeature(m2::RectD const & rect, TFn && fn) const - { - uint32_t const scale = m_value.GetHeader().GetLastScale(); - covering::IntervalsT intervals; - CoverRect(rect, scale, intervals); - - ForEachIndexImpl(intervals, scale, - [&](uint32_t index) - { - FeatureType ft; - if (GetFeature(index, ft)) - fn(ft); - }); - } - - // @returns false if feature was deleted by user. - bool GetFeature(uint32_t index, FeatureType & ft) const; - - bool GetStreetIndex(uint32_t houseId, uint32_t & streetId); - -private: - osm::Editor::FeatureStatus GetEditedStatus(uint32_t index) const - { - return osm::Editor::Instance().GetFeatureStatus(GetId(), index); - } - - template void ForEachIndexImpl(covering::IntervalsT const & intervals, - uint32_t scale, TFn && fn) const - { - CheckUniqueIndexes checkUnique(m_value.GetHeader().GetFormat() >= version::Format::v5); - for (auto const & i : intervals) - m_index.ForEachInIntervalAndScale( - [&] (uint32_t index) - { - if (checkUnique(index)) - fn(index); - }, i.first, i.second, scale); - } - - DISALLOW_COPY_AND_MOVE(MwmContext); -}; - -} // namespace v2 -} // namespace search diff --git a/search/v2/nested_rects_cache.cpp b/search/v2/nested_rects_cache.cpp deleted file mode 100644 index 8708e4b00a..0000000000 --- a/search/v2/nested_rects_cache.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "search/v2/nested_rects_cache.hpp" - -#include "search/v2/ranking_info.hpp" - -#include "indexer/index.hpp" - -#include "geometry/mercator.hpp" -#include "geometry/rect2d.hpp" - -#include "base/assert.hpp" -#include "base/stl_add.hpp" - -#include "std/algorithm.hpp" - -namespace search -{ -namespace v2 -{ -namespace -{ -double const kPositionToleranceMeters = 15.0; -} // namespace - -NestedRectsCache::NestedRectsCache(Index & index) - : m_index(index), m_scale(0), m_position(0, 0), m_valid(false) -{ -} - -void NestedRectsCache::SetPosition(m2::PointD const & position, int scale) -{ - double distance = MercatorBounds::DistanceOnEarth(position, m_position); - if (distance < kPositionToleranceMeters && scale == m_scale && m_valid) - return; - m_position = position; - m_scale = scale; - Update(); -} - -double NestedRectsCache::GetDistanceToFeatureMeters(FeatureID const & id) const -{ - if (!m_valid) - return RankingInfo::kMaxDistMeters; - - size_t bucket = 0; - for (; bucket != RECT_SCALE_COUNT; ++bucket) - { - if (binary_search(m_features[bucket].begin(), m_features[bucket].end(), id)) - break; - } - auto const scale = static_cast(bucket); - - if (scale != RECT_SCALE_COUNT) - return GetRadiusMeters(scale); - - if (auto const & info = id.m_mwmId.GetInfo()) - { - auto const & rect = info->m_limitRect; - return max(MercatorBounds::DistanceOnEarth(rect.Center(), m_position), GetRadiusMeters(scale)); - } - - return RankingInfo::kMaxDistMeters; -} - -void NestedRectsCache::Clear() -{ - for (int scale = 0; scale != RECT_SCALE_COUNT; ++scale) - { - m_features[scale].clear(); - m_features[scale].shrink_to_fit(); - } - m_valid = false; -} - -// static -double NestedRectsCache::GetRadiusMeters(RectScale scale) -{ - switch (scale) - { - case RECT_SCALE_TINY: return 100.0; - case RECT_SCALE_SMALL: return 300.0; - case RECT_SCALE_MEDIUM: return 1000.0; - case RECT_SCALE_LARGE: return 2500.0; - case RECT_SCALE_COUNT: return 5000.0; - } -} - -void NestedRectsCache::Update() -{ - for (int scale = 0; scale != RECT_SCALE_COUNT; ++scale) - { - auto & features = m_features[scale]; - - features.clear(); - m2::RectD const rect = MercatorBounds::RectByCenterXYAndSizeInMeters( - m_position, GetRadiusMeters(static_cast(scale))); - auto addId = MakeBackInsertFunctor(features); - m_index.ForEachFeatureIDInRect(addId, rect, m_scale); - sort(features.begin(), features.end()); - } - - m_valid = true; -} -} // namespace v2 -} // namespace search diff --git a/search/v2/nested_rects_cache.hpp b/search/v2/nested_rects_cache.hpp deleted file mode 100644 index a4f7387a04..0000000000 --- a/search/v2/nested_rects_cache.hpp +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include "indexer/feature_decl.hpp" - -#include "geometry/point2d.hpp" - -class Index; - -namespace search -{ -namespace v2 -{ -class NestedRectsCache -{ -public: - explicit NestedRectsCache(Index & index); - - void SetPosition(m2::PointD const & position, int scale); - - double GetDistanceToFeatureMeters(FeatureID const & id) const; - - void Clear(); - -private: - enum RectScale - { - RECT_SCALE_TINY, - RECT_SCALE_SMALL, - RECT_SCALE_MEDIUM, - RECT_SCALE_LARGE, - - RECT_SCALE_COUNT - }; - - static double GetRadiusMeters(RectScale scale); - - void Update(); - - Index & m_index; - int m_scale; - m2::PointD m_position; - bool m_valid; - - // Sorted lists of features. - vector m_features[RECT_SCALE_COUNT]; -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/pre_ranking_info.cpp b/search/v2/pre_ranking_info.cpp deleted file mode 100644 index b45f92dc08..0000000000 --- a/search/v2/pre_ranking_info.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "search/v2/pre_ranking_info.hpp" - -#include "std/sstream.hpp" - -namespace search -{ -namespace v2 -{ -string DebugPrint(PreRankingInfo const & info) -{ - ostringstream os; - os << "PreRankingInfo ["; - os << "m_distanceToPivot:" << info.m_distanceToPivot << ","; - os << "m_startToken:" << info.m_startToken << ","; - os << "m_endToken:" << info.m_endToken << ","; - os << "m_rank:" << info.m_rank << ","; - os << "m_searchType:" << info.m_searchType; - os << "]"; - return os.str(); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/pre_ranking_info.hpp b/search/v2/pre_ranking_info.hpp deleted file mode 100644 index 5459e4c285..0000000000 --- a/search/v2/pre_ranking_info.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include "search/v2/search_model.hpp" - -#include "std/cstdint.hpp" - -namespace search -{ -namespace v2 -{ -struct PreRankingInfo -{ - inline size_t GetNumTokens() const { return m_endToken - m_startToken; } - - // An abstract distance from the feature to the pivot. Measurement - // units do not matter here. - double m_distanceToPivot = 0; - - // Tokens [m_startToken, m_endToken) match to the feature name or - // house number. - size_t m_startToken = 0; - size_t m_endToken = 0; - - // Rank of the feature. - uint8_t m_rank = 0; - - // Search type for the feature. - SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT; -}; - -string DebugPrint(PreRankingInfo const & info); -} // namespace v2 -} // namespace search diff --git a/search/v2/rank_table_cache.cpp b/search/v2/rank_table_cache.cpp deleted file mode 100644 index cfaf1ee62e..0000000000 --- a/search/v2/rank_table_cache.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "search/v2/rank_table_cache.hpp" - -#include "search/dummy_rank_table.hpp" - -#include "indexer/rank_table.hpp" - -namespace search -{ -namespace v2 -{ - -RankTable const & RankTableCache::Get(Index & index, TId const & mwmId) -{ - auto const it = m_ranks.find(TKey(mwmId)); - if (it != m_ranks.end()) - return *it->second; - - TKey handle(index.GetMwmHandleById(mwmId)); - auto table = RankTable::Load(handle.GetValue()->m_cont); - if (!table) - table.reset(new DummyRankTable()); - - return *(m_ranks.emplace(move(handle), move(table)).first->second.get()); -} - -void RankTableCache::Remove(TId const & id) -{ - m_ranks.erase(TKey(id)); -} - -void RankTableCache::Clear() -{ - m_ranks.clear(); -} - -} // namespace v2 -} // namespace search diff --git a/search/v2/rank_table_cache.hpp b/search/v2/rank_table_cache.hpp deleted file mode 100644 index d4cac4041a..0000000000 --- a/search/v2/rank_table_cache.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include "indexer/index.hpp" - -#include "base/macros.hpp" - -#include "std/map.hpp" -#include "std/unique_ptr.hpp" - -namespace search -{ -class RankTable; - -namespace v2 -{ -class RankTableCache -{ - using TId = MwmSet::MwmId; - - struct TKey : public MwmSet::MwmHandle - { - TKey() = default; - TKey(TKey &&) = default; - - explicit TKey(TId const & id) - { - this->m_mwmId = id; - } - explicit TKey(MwmSet::MwmHandle && handle) - : MwmSet::MwmHandle(move(handle)) - { - } - }; - -public: - RankTableCache() = default; - - RankTable const & Get(Index & index, TId const & mwmId); - - void Remove(TId const & id); - void Clear(); - -private: - struct Compare - { - bool operator() (TKey const & r1, TKey const & r2) const - { - return (r1.GetId() < r2.GetId()); - } - }; - - map, Compare> m_ranks; - - DISALLOW_COPY_AND_MOVE(RankTableCache); -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/ranking_info.cpp b/search/v2/ranking_info.cpp deleted file mode 100644 index 0de02d483e..0000000000 --- a/search/v2/ranking_info.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "search/v2/ranking_info.hpp" - -#include "std/cmath.hpp" -#include "std/iomanip.hpp" -#include "std/limits.hpp" -#include "std/sstream.hpp" - -namespace search -{ -namespace v2 -{ -namespace -{ -// See search/search_quality/scoring_model.py for details. In short, -// these coeffs correspond to coeffs in a linear model. -double const kDistanceToPivot = -0.7579760; -double const kRank = 0.9185310; -double const kFalseCats = -0.7996119; -double const kNameScore[NameScore::NAME_SCORE_COUNT] = { - -1.0000000 /* Zero */, - 0.3585180 /* Substring Prefix */, - 0.3198080 /* Substring */, - 0.3216740 /* Full Match Prefix */, - 0.3216740 /* Full Match */ -}; -double const kSearchType[SearchModel::SEARCH_TYPE_COUNT] = { - -0.1419479 /* POI */, - -0.1419479 /* Building */, - -0.1462099 /* Street */, - -0.1509122 /* Unclassified */, - -0.2591458 /* Village */, - -0.0451342 /* City */, - 0.2515398 /* State */, - 0.4918102 /* Country */ -}; - -double TransformDistance(double distance) -{ - return min(distance, RankingInfo::kMaxDistMeters) / RankingInfo::kMaxDistMeters; -} -} // namespace - -// static -double const RankingInfo::kMaxDistMeters = 2e6; - -// static -void RankingInfo::PrintCSVHeader(ostream & os) -{ - os << "DistanceToPivot" - << ",Rank" - << ",NameScore" - << ",SearchType" - << ",PureCats" - << ",FalseCats"; -} - -string DebugPrint(RankingInfo const & info) -{ - ostringstream os; - os << "RankingInfo ["; - os << "m_distanceToPivot:" << info.m_distanceToPivot << ","; - os << "m_rank:" << static_cast(info.m_rank) << ","; - os << "m_nameScore:" << DebugPrint(info.m_nameScore) << ","; - os << "m_searchType:" << DebugPrint(info.m_searchType) << ","; - os << "m_pureCats:" << info.m_pureCats << ","; - os << "m_falseCats:" << info.m_falseCats; - os << "]"; - return os.str(); -} - -void RankingInfo::ToCSV(ostream & os) const -{ - os << fixed; - os << m_distanceToPivot << "," << static_cast(m_rank) << "," << DebugPrint(m_nameScore) - << "," << DebugPrint(m_searchType) << "," << m_pureCats << "," << m_falseCats; -} - -double RankingInfo::GetLinearModelRank() const -{ - // NOTE: this code must be consistent with scoring_model.py. Keep - // this in mind when you're going to change scoring_model.py or this - // code. We're working on automatic rank calculation code generator - // integrated in the build system. - double const distanceToPivot = TransformDistance(m_distanceToPivot); - double const rank = static_cast(m_rank) / numeric_limits::max(); - - auto nameScore = m_nameScore; - if (m_pureCats || m_falseCats) - { - // If the feature was matched only by categorial tokens, it's - // better for ranking to set name score to zero. For example, - // when we're looking for a "cafe", cafes "Cafe Pushkin" and - // "Lermontov" both match to the request, but must be ranked in - // accordance to their distances to the user position or viewport, - // in spite of "Cafe Pushkin" has a non-zero name rank. - nameScore = NAME_SCORE_ZERO; - } - - return kDistanceToPivot * distanceToPivot + kRank * rank + kNameScore[nameScore] + - kSearchType[m_searchType] + m_falseCats * kFalseCats; -} -} // namespace v2 -} // namespace search diff --git a/search/v2/ranking_info.hpp b/search/v2/ranking_info.hpp deleted file mode 100644 index 8a1cf70896..0000000000 --- a/search/v2/ranking_info.hpp +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "search/v2/pre_ranking_info.hpp" -#include "search/v2/ranking_utils.hpp" -#include "search/v2/search_model.hpp" - -#include "std/iostream.hpp" - -class FeatureType; - -namespace search -{ -namespace v2 -{ -struct RankingInfo -{ - static double const kMaxDistMeters; - - // Distance from the feature to the pivot point. - double m_distanceToPivot = kMaxDistMeters; - - // Rank of the feature. - uint8_t m_rank = 0; - - // Score for the feature's name. - NameScore m_nameScore = NAME_SCORE_ZERO; - - // Search type for the feature. - SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT; - - // True if all of the tokens that the feature was matched by - // correspond to this feature's categories. - bool m_pureCats = false; - - // True if none of the tokens that the feature was matched by - // corresponds to this feature's categories although all of the - // tokens are categorial ones. - bool m_falseCats = false; - - static void PrintCSVHeader(ostream & os); - - void ToCSV(ostream & os) const; - - // Returns rank calculated by a linear model. Large values - // correspond to important features. - double GetLinearModelRank() const; -}; - -string DebugPrint(RankingInfo const & info); -} // namespace v2 -} // namespace search diff --git a/search/v2/ranking_utils.cpp b/search/v2/ranking_utils.cpp deleted file mode 100644 index f865f3336e..0000000000 --- a/search/v2/ranking_utils.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "search/v2/ranking_utils.hpp" - -#include "std/algorithm.hpp" - -using namespace strings; - -namespace search -{ -namespace v2 -{ -namespace impl -{ -bool Match(vector const & tokens, UniString const & token) -{ - return find(tokens.begin(), tokens.end(), token) != tokens.end(); -} - -bool PrefixMatch(vector const & prefixes, UniString const & token) -{ - for (auto const & prefix : prefixes) - { - if (StartsWith(token, prefix)) - return true; - } - return false; -} -} // namespace impl - -string DebugPrint(NameScore score) -{ - switch (score) - { - case NAME_SCORE_ZERO: return "Zero"; - case NAME_SCORE_SUBSTRING_PREFIX: return "Substring Prefix"; - case NAME_SCORE_SUBSTRING: return "Substring"; - case NAME_SCORE_FULL_MATCH_PREFIX: return "Full Match Prefix"; - case NAME_SCORE_FULL_MATCH: return "Full Match"; - case NAME_SCORE_COUNT: return "Count"; - } - return "Unknown"; -} -} // namespace v2 -} // namespace search diff --git a/search/v2/ranking_utils.hpp b/search/v2/ranking_utils.hpp deleted file mode 100644 index 1d0d1d2c65..0000000000 --- a/search/v2/ranking_utils.hpp +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include "search/query_params.hpp" -#include "search/v2/search_model.hpp" - -#include "indexer/search_delimiters.hpp" -#include "indexer/search_string_utils.hpp" - -#include "base/stl_add.hpp" -#include "base/string_utils.hpp" - -#include "std/cstdint.hpp" -#include "std/limits.hpp" -#include "std/string.hpp" -#include "std/vector.hpp" - -namespace search -{ -struct QueryParams; - -namespace v2 -{ -namespace impl -{ -bool Match(vector const & tokens, strings::UniString const & token); - -bool PrefixMatch(vector const & prefixes, strings::UniString const & token); -} // namespace impl - -// The order and numeric values are important here. Please, check all -// use-cases before changing this enum. -enum NameScore -{ - NAME_SCORE_ZERO = 0, - NAME_SCORE_SUBSTRING_PREFIX = 1, - NAME_SCORE_SUBSTRING = 2, - NAME_SCORE_FULL_MATCH_PREFIX = 3, - NAME_SCORE_FULL_MATCH = 4, - - NAME_SCORE_COUNT -}; - -template -NameScore GetNameScore(string const & name, TSlice const & slice) -{ - if (slice.Empty()) - return NAME_SCORE_ZERO; - - vector tokens; - SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters()); - return GetNameScore(tokens, slice); -} - -template -NameScore GetNameScore(vector const & tokens, TSlice const & slice) -{ - if (slice.Empty()) - return NAME_SCORE_ZERO; - - size_t const n = tokens.size(); - size_t const m = slice.Size(); - - bool const lastTokenIsPrefix = slice.IsPrefix(m - 1); - - NameScore score = NAME_SCORE_ZERO; - for (int offset = 0; offset + m <= n; ++offset) - { - bool match = true; - for (int i = 0; i < m - 1 && match; ++i) - match = match && impl::Match(slice.Get(i), tokens[offset + i]); - if (!match) - continue; - - if (impl::Match(slice.Get(m - 1), tokens[offset + m - 1])) - { - if (m == n) - return NAME_SCORE_FULL_MATCH; - score = max(score, NAME_SCORE_SUBSTRING); - } - if (lastTokenIsPrefix && impl::PrefixMatch(slice.Get(m - 1), tokens[offset + m - 1])) - { - if (m == n) - return NAME_SCORE_FULL_MATCH_PREFIX; - score = max(score, NAME_SCORE_SUBSTRING_PREFIX); - } - } - return score; -} - -string DebugPrint(NameScore score); -} // namespace v2 -} // namespace search diff --git a/search/v2/search_model.cpp b/search/v2/search_model.cpp deleted file mode 100644 index 5117d6e3a7..0000000000 --- a/search/v2/search_model.cpp +++ /dev/null @@ -1,149 +0,0 @@ -#include "search/v2/search_model.hpp" - -#include "indexer/classificator.hpp" -#include "indexer/feature.hpp" - -#include "base/macros.hpp" - -using namespace ftypes; - -namespace search -{ -namespace v2 -{ -TwoLevelPOIChecker::TwoLevelPOIChecker() : ftypes::BaseChecker(2 /* level */) -{ - Classificator const & c = classif(); - StringIL arr[] = { - {"highway", "bus_stop"}, - {"highway", "speed_camera"}, - {"waterway", "waterfall"}, - {"natural", "volcano"}, - {"natural", "cave_entrance"}, - {"natural", "beach"} - }; - - for (size_t i = 0; i < ARRAY_SIZE(arr); ++i) - m_types.push_back(c.GetTypeByPath(arr[i])); -} - -namespace -{ -/// Should be similar with ftypes::IsAddressObjectChecker object classes. -class OneLevelPOIChecker : public ftypes::BaseChecker -{ -public: - OneLevelPOIChecker() : ftypes::BaseChecker(1 /* level */) - { - Classificator const & c = classif(); - - auto paths = {"amenity", "historic", "office", "railway", "shop", "sport", "tourism", "craft"}; - for (auto const & path : paths) - m_types.push_back(c.GetTypeByPath({path})); - } -}; - -class IsPoiChecker -{ -public: - IsPoiChecker() {} - - static IsPoiChecker const & Instance() - { - static const IsPoiChecker inst; - return inst; - } - - bool operator()(FeatureType const & ft) const { return m_oneLevel(ft) || m_twoLevel(ft); } - -private: - OneLevelPOIChecker const m_oneLevel; - TwoLevelPOIChecker const m_twoLevel; -}; - -class CustomIsBuildingChecker -{ -public: - static CustomIsBuildingChecker const & Instance() - { - static const CustomIsBuildingChecker inst; - return inst; - } - - bool operator()(FeatureType const & ft) const - { - return !ft.GetHouseNumber().empty() || IsBuildingChecker::Instance()(ft); - } - -private: - CustomIsBuildingChecker() {} -}; -} // namespace - -// static -SearchModel const & SearchModel::Instance() -{ - static SearchModel model; - return model; -} - -SearchModel::SearchType SearchModel::GetSearchType(FeatureType const & feature) const -{ - static auto const & buildingChecker = CustomIsBuildingChecker::Instance(); - static auto const & streetChecker = IsStreetChecker::Instance(); - static auto const & localityChecker = IsLocalityChecker::Instance(); - static auto const & poiChecker = IsPoiChecker::Instance(); - - if (buildingChecker(feature)) - return SEARCH_TYPE_BUILDING; - - if (streetChecker(feature)) - return SEARCH_TYPE_STREET; - - if (localityChecker(feature)) - { - Type type = localityChecker.GetType(feature); - switch (type) - { - case NONE: - ASSERT(false, ("Unknown locality.")); - return SEARCH_TYPE_UNCLASSIFIED; - case STATE: - return SEARCH_TYPE_STATE; - case COUNTRY: - return SEARCH_TYPE_COUNTRY; - case CITY: - case TOWN: - return SEARCH_TYPE_CITY; - case VILLAGE: - return SEARCH_TYPE_VILLAGE; - case LOCALITY_COUNT: - return SEARCH_TYPE_UNCLASSIFIED; - } - } - - if (poiChecker(feature)) - return SEARCH_TYPE_POI; - - return SEARCH_TYPE_UNCLASSIFIED; -} - -string DebugPrint(SearchModel::SearchType type) -{ - switch (type) - { - case SearchModel::SEARCH_TYPE_POI: return "POI"; - case SearchModel::SEARCH_TYPE_BUILDING: return "Building"; - case SearchModel::SEARCH_TYPE_STREET: return "Street"; - case SearchModel::SEARCH_TYPE_CITY: return "City"; - case SearchModel::SEARCH_TYPE_VILLAGE: return "Village"; - case SearchModel::SEARCH_TYPE_STATE: return "State"; - case SearchModel::SEARCH_TYPE_COUNTRY: return "Country"; - case SearchModel::SEARCH_TYPE_UNCLASSIFIED: return "Unclassified"; - case SearchModel::SEARCH_TYPE_COUNT: return "Count"; - } - ASSERT(false, ("Unknown search type:", static_cast(type))); - return string(); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/search_model.hpp b/search/v2/search_model.hpp deleted file mode 100644 index 5b81678944..0000000000 --- a/search/v2/search_model.hpp +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include "indexer/ftypes_matcher.hpp" - -#include "std/string.hpp" -#include "std/vector.hpp" - -#include "base/macros.hpp" - -class FeatureType; - -namespace search -{ -namespace v2 -{ - -/// Describes 2-level POI-exception types that don't belong to any POI-common classes -/// (amenity, shop, tourism, ...). Used in search algo and search categories index generation. -class TwoLevelPOIChecker : public ftypes::BaseChecker -{ -public: - TwoLevelPOIChecker(); -}; - -// This class is used to map feature types to a restricted set of -// different search classes (do not confuse these classes with search -// categories - they are completely different things). -class SearchModel -{ -public: - enum SearchType - { - // Low-level features such as amenities, offices, shops, buildings - // without house number, etc. - SEARCH_TYPE_POI, - - // All features with set house number. - SEARCH_TYPE_BUILDING, - - SEARCH_TYPE_STREET, - - // All low-level features except POI, BUILDING and STREET. - SEARCH_TYPE_UNCLASSIFIED, - - SEARCH_TYPE_VILLAGE, - SEARCH_TYPE_CITY, - SEARCH_TYPE_STATE, // US or Canadian states - SEARCH_TYPE_COUNTRY, - - SEARCH_TYPE_COUNT - }; - - static SearchModel const & Instance(); - - SearchType GetSearchType(FeatureType const & feature) const; - -private: - SearchModel() = default; - - DISALLOW_COPY_AND_MOVE(SearchModel); -}; - -string DebugPrint(SearchModel::SearchType type); -} // namespace v2 -} // namespace search diff --git a/search/v2/stats_cache.hpp b/search/v2/stats_cache.hpp deleted file mode 100644 index 42cfee0e0e..0000000000 --- a/search/v2/stats_cache.hpp +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once -#include "base/logging.hpp" - -#include "std/unordered_map.hpp" -#include "std/utility.hpp" - - -namespace search -{ -namespace v2 -{ - -template -class Cache -{ - unordered_map m_map; - - /// query statistics - size_t m_accesses; - size_t m_misses; - - size_t m_emptyQueriesCount; /// empty queries count at a row - string m_name; /// cache name for print functions - -public: - explicit Cache(string const & name) - : m_accesses(0), m_misses(0), m_emptyQueriesCount(0), m_name(name) - { - } - - pair Get(TKey const & key) - { - auto r = m_map.insert(make_pair(key, TValue())); - - ++m_accesses; - if (r.second) - ++m_misses; - - return pair(r.first->second, r.second); - } - - void Clear() - { - m_map.clear(); - m_accesses = m_misses = 0; - m_emptyQueriesCount = 0; - } - - /// Called at the end of every search query. - void ClearIfNeeded() - { - if (m_accesses != 0) - { - LOG(LDEBUG, ("Cache", m_name, "Queries =", m_accesses, - "From cache =", m_accesses - m_misses, "Added =", m_misses)); - m_accesses = m_misses = 0; - m_emptyQueriesCount = 0; - } - else if (++m_emptyQueriesCount > 5) - { - LOG(LDEBUG, ("Clearing cache", m_name)); - Clear(); - } - } -}; - -} // namespace v2 -} // namespace search diff --git a/search/v2/street_vicinity_loader.cpp b/search/v2/street_vicinity_loader.cpp deleted file mode 100644 index b5a4bddb63..0000000000 --- a/search/v2/street_vicinity_loader.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "search/v2/street_vicinity_loader.hpp" - -#include "indexer/feature_covering.hpp" -#include "indexer/feature_decl.hpp" -#include "indexer/index.hpp" - -#include "geometry/mercator.hpp" - -#include "geometry/point2d.hpp" - -#include "base/math.hpp" -#include "base/stl_add.hpp" - -namespace search -{ -namespace v2 -{ -StreetVicinityLoader::StreetVicinityLoader(int scale, double offsetMeters) - : m_context(nullptr), m_scale(scale), m_offsetMeters(offsetMeters), m_cache("Streets") -{ -} - -void StreetVicinityLoader::SetContext(MwmContext * context) -{ - ASSERT(context, ()); - if (m_context == context) - return; - - m_context = context; - auto const scaleRange = m_context->m_value.GetHeader().GetScaleRange(); - m_scale = my::clamp(m_scale, scaleRange.first, scaleRange.second); -} - -void StreetVicinityLoader::OnQueryFinished() -{ - m_cache.ClearIfNeeded(); -} - -StreetVicinityLoader::Street const & StreetVicinityLoader::GetStreet(uint32_t featureId) -{ - auto r = m_cache.Get(featureId); - if (!r.second) - return r.first; - - LoadStreet(featureId, r.first); - return r.first; -} - -void StreetVicinityLoader::LoadStreet(uint32_t featureId, Street & street) -{ - FeatureType feature; - if (!m_context->GetFeature(featureId, feature)) - return; - - if (feature.GetFeatureType() != feature::GEOM_LINE) - return; - - vector points; - feature.ForEachPoint(MakeBackInsertFunctor(points), FeatureType::BEST_GEOMETRY); - ASSERT(!points.empty(), ()); - - for (auto const & point : points) - street.m_rect.Add(MercatorBounds::RectByCenterXYAndSizeInMeters(point, m_offsetMeters)); - - covering::CoveringGetter coveringGetter(street.m_rect, covering::ViewportWithLowLevels); - auto const & intervals = coveringGetter.Get(m_scale); - m_context->ForEachIndex(intervals, m_scale, MakeBackInsertFunctor(street.m_features)); - - street.m_calculator = make_unique(points); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/street_vicinity_loader.hpp b/search/v2/street_vicinity_loader.hpp deleted file mode 100644 index b4f308795e..0000000000 --- a/search/v2/street_vicinity_loader.hpp +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once - -#include "search/projection_on_street.hpp" -#include "search/v2/mwm_context.hpp" -#include "search/v2/stats_cache.hpp" - -#include "indexer/feature.hpp" -#include "indexer/feature_algo.hpp" - -#include "geometry/rect2d.hpp" - -#include "base/macros.hpp" - -#include "std/unordered_map.hpp" - - -namespace search -{ -namespace v2 -{ -class MwmContext; - -// This class is able to load features in a street's vicinity. -// -// NOTE: this class *IS NOT* thread-safe. -class StreetVicinityLoader -{ -public: - struct Street - { - Street() = default; - Street(Street && street) = default; - - inline bool IsEmpty() const { return !m_calculator || m_rect.IsEmptyInterior(); } - - vector m_features; - m2::RectD m_rect; - unique_ptr m_calculator; - - /// @todo Cache GetProjection results for features here, because - /// feature::GetCenter and ProjectionOnStreetCalculator::GetProjection are not so fast. - - DISALLOW_COPY(Street); - }; - - StreetVicinityLoader(int scale, double offsetMeters); - void SetContext(MwmContext * context); - - // Calls |fn| on each index in |sortedIds| where sortedIds[index] - // belongs to the street's vicinity. - template - void ForEachInVicinity(uint32_t streetId, vector const & sortedIds, - double offsetMeters, TFn const & fn) - { - // Passed offset param should be less than the cached one, or the cache is invalid otherwise. - ASSERT_LESS_OR_EQUAL(offsetMeters, m_offsetMeters, ()); - - Street const & street = GetStreet(streetId); - if (street.IsEmpty()) - return; - - ProjectionOnStreetCalculator const & calculator = *street.m_calculator; - ProjectionOnStreet proj; - for (uint32_t id : street.m_features) - { - // Load center and check projection only when |id| is in |sortedIds|. - if (!binary_search(sortedIds.begin(), sortedIds.end(), id)) - continue; - - FeatureType ft; - if (!m_context->GetFeature(id, ft)) - continue; // Feature was deleted. - - if (calculator.GetProjection(feature::GetCenter(ft, FeatureType::WORST_GEOMETRY), proj) && - proj.m_distMeters <= offsetMeters) - { - fn(id); - } - } - } - - void OnQueryFinished(); - - Street const & GetStreet(uint32_t featureId); - -private: - void LoadStreet(uint32_t featureId, Street & street); - - MwmContext * m_context; - int m_scale; - double const m_offsetMeters; - - Cache m_cache; - - DISALLOW_COPY_AND_MOVE(StreetVicinityLoader); -}; -} // namespace v2 -} // namespace search diff --git a/search/v2/token_slice.cpp b/search/v2/token_slice.cpp deleted file mode 100644 index 2c230eae05..0000000000 --- a/search/v2/token_slice.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "search/v2/token_slice.hpp" - -#include "std/sstream.hpp" - -namespace search -{ -namespace v2 -{ -namespace -{ -template -string SliceToString(string const & name, TSlice const & slice) -{ - ostringstream os; - os << name << " ["; - for (size_t i = 0; i < slice.Size(); ++i) - { - os << DebugPrint(slice.Get(i)); - if (i + 1 != slice.Size()) - os << ", "; - } - os << "]"; - return os.str(); -} -} // namespace - -TokenSlice::TokenSlice(QueryParams const & params, size_t startToken, size_t endToken) - : m_params(params), m_offset(startToken), m_size(endToken - startToken) -{ - ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); -} - -bool TokenSlice::IsPrefix(size_t i) const -{ - ASSERT_LESS(i, Size(), ()); - return m_offset + i == m_params.m_tokens.size(); -} - -bool TokenSlice::IsLast(size_t i) const -{ - ASSERT_LESS(i, Size(), ()); - if (m_params.m_prefixTokens.empty()) - return m_offset + i + 1 == m_params.m_tokens.size(); - return m_offset + i == m_params.m_tokens.size(); -} - -TokenSliceNoCategories::TokenSliceNoCategories(QueryParams const & params, size_t startToken, - size_t endToken) - : m_params(params) -{ - ASSERT_LESS_OR_EQUAL(startToken, endToken, ()); - - m_indexes.reserve(endToken - startToken); - for (size_t i = startToken; i < endToken; ++i) - { - if (!m_params.m_isCategorySynonym[i]) - m_indexes.push_back(i); - } -} - -string DebugPrint(TokenSlice const & slice) -{ - return SliceToString("TokenSlice", slice); -} - -string DebugPrint(TokenSliceNoCategories const & slice) -{ - return SliceToString("TokenSliceNoCategories", slice); -} -} // namespace v2 -} // namespace search diff --git a/search/v2/token_slice.hpp b/search/v2/token_slice.hpp deleted file mode 100644 index 725ea98355..0000000000 --- a/search/v2/token_slice.hpp +++ /dev/null @@ -1,112 +0,0 @@ -#pragma once - -#include "search/query_params.hpp" - -#include "indexer/string_slice.hpp" - -#include "base/assert.hpp" - -#include "std/cstdint.hpp" -#include "std/string.hpp" -#include "std/vector.hpp" - -namespace search -{ -namespace v2 -{ -class TokenSlice -{ -public: - TokenSlice(QueryParams const & params, size_t startToken, size_t endToken); - - inline QueryParams::TSynonymsVector const & Get(size_t i) const - { - ASSERT_LESS(i, Size(), ()); - return m_params.GetTokens(m_offset + i); - } - - inline size_t Size() const { return m_size; } - - inline bool Empty() const { return Size() == 0; } - - // Returns true if the |i|-th token in the slice is the incomplete - // (prefix) token. - bool IsPrefix(size_t i) const; - - // Returns true if the |i|-th token in the slice is the last - // (regardless - full or not) token in the query. - bool IsLast(size_t i) const; - -private: - QueryParams const & m_params; - size_t const m_offset; - size_t const m_size; -}; - -class TokenSliceNoCategories -{ -public: - TokenSliceNoCategories(QueryParams const & params, size_t startToken, size_t endToken); - - inline QueryParams::TSynonymsVector const & Get(size_t i) const - { - ASSERT_LESS(i, Size(), ()); - return m_params.GetTokens(m_indexes[i]); - } - - inline size_t Size() const { return m_indexes.size(); } - - inline bool Empty() const { return Size() == 0; } - - inline bool IsPrefix(size_t i) const - { - ASSERT_LESS(i, Size(), ()); - return m_indexes[i] == m_params.m_tokens.size(); - } - -private: - QueryParams const & m_params; - vector m_indexes; -}; - -class QuerySlice : public StringSliceBase -{ -public: - QuerySlice(TokenSlice const & slice) : m_slice(slice) {} - - // QuerySlice overrides: - QueryParams::TString const & Get(size_t i) const override { return m_slice.Get(i).front(); } - size_t Size() const override { return m_slice.Size(); } - -private: - TokenSlice const m_slice; -}; - -template -class QuerySliceOnRawStrings : public StringSliceBase -{ -public: - QuerySliceOnRawStrings(TCont const & tokens, TString const & prefix) - : m_tokens(tokens), m_prefix(prefix) - { - } - - // QuerySlice overrides: - QueryParams::TString const & Get(size_t i) const override - { - ASSERT_LESS(i, Size(), ()); - return i == m_tokens.size() ? m_prefix : m_tokens[i]; - } - - size_t Size() const override { return m_tokens.size() + (m_prefix.empty() ? 0 : 1); } - - private: - TCont const & m_tokens; - TString const & m_prefix; -}; - -string DebugPrint(TokenSlice const & slice); - -string DebugPrint(TokenSliceNoCategories const & slice); -} // namespace v2 -} // namespace search -- cgit v1.2.3