Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'search/geocoder.hpp')
-rw-r--r--search/geocoder.hpp390
1 files changed, 390 insertions, 0 deletions
diff --git a/search/geocoder.hpp b/search/geocoder.hpp
new file mode 100644
index 0000000000..13810f27f4
--- /dev/null
+++ b/search/geocoder.hpp
@@ -0,0 +1,390 @@
+#pragma once
+
+#include "search/cancel_exception.hpp"
+#include "search/features_layer.hpp"
+#include "search/features_layer_path_finder.hpp"
+#include "search/geometry_cache.hpp"
+#include "search/mode.hpp"
+#include "search/mwm_context.hpp"
+#include "search/nested_rects_cache.hpp"
+#include "search/pre_ranking_info.hpp"
+#include "search/query_params.hpp"
+#include "search/ranking_utils.hpp"
+#include "search/search_model.hpp"
+
+#include "indexer/index.hpp"
+#include "indexer/mwm_set.hpp"
+
+#include "storage/country_info_getter.hpp"
+
+#include "coding/compressed_bit_vector.hpp"
+
+#include "geometry/rect2d.hpp"
+
+#include "base/buffer_vector.hpp"
+#include "base/cancellable.hpp"
+#include "base/macros.hpp"
+#include "base/string_utils.hpp"
+
+#include "std/limits.hpp"
+#include "std/set.hpp"
+#include "std/string.hpp"
+#include "std/unique_ptr.hpp"
+#include "std/unordered_map.hpp"
+#include "std/vector.hpp"
+
+class MwmInfo;
+class MwmValue;
+
+namespace coding
+{
+class CompressedBitVector;
+}
+
+namespace storage
+{
+class CountryInfoGetter;
+} // namespace storage
+
+namespace search
+{
+class PreRanker;
+
+class FeaturesFilter;
+class FeaturesLayerMatcher;
+class SearchModel;
+class TokenSlice;
+
+// This class is used to retrieve all features corresponding to a
+// search query. Search query is represented as a sequence of tokens
+// (including synonyms for these tokens), and Geocoder tries to build
+// all possible partitions (or layers) of the search query, where each
+// layer is a set of features corresponding to some search class
+// (e.g. POI, BUILDING, STREET, etc., see search_model.hpp).
+// Then, Geocoder builds a layered graph, with edges between features
+// on adjacent layers (e.g. between BUILDING ans STREET, STREET and
+// CITY, etc.). Usually an edge between two features means that a
+// feature from the lowest layer geometrically belongs to a feature
+// from the highest layer (BUILDING is located on STREET, STREET is
+// located inside CITY, CITY is located inside STATE, etc.). Final
+// part is to find all paths through this layered graph and report all
+// features from the lowest layer, that are reachable from the
+// highest layer.
+class Geocoder : public my::Cancellable
+{
+public:
+ struct Params : public QueryParams
+ {
+ Params();
+
+ Mode m_mode;
+
+ // We need to pass both pivot and pivot center because pivot is
+ // usually a rectangle created by radius and center, and due to
+ // precision loss, |m_pivot|.Center() may differ from
+ // |m_accuratePivotCenter|. Therefore |m_pivot| should be used for
+ // fast filtering of features outside of the rectangle, while
+ // |m_accuratePivotCenter| should be used when it's needed to
+ // compute a distance from a feature to the pivot.
+ m2::RectD m_pivot;
+ m2::PointD m_accuratePivotCenter;
+ };
+
+ enum RegionType
+ {
+ REGION_TYPE_STATE,
+ REGION_TYPE_COUNTRY,
+ REGION_TYPE_COUNT
+ };
+
+ struct Locality
+ {
+ Locality() : m_featureId(0), m_startToken(0), m_endToken(0) {}
+
+ Locality(uint32_t featureId, size_t startToken, size_t endToken)
+ : m_featureId(featureId), m_startToken(startToken), m_endToken(endToken)
+ {
+ }
+
+ MwmSet::MwmId m_countryId;
+ uint32_t m_featureId;
+ size_t m_startToken;
+ size_t m_endToken;
+ };
+
+ // This struct represents a country or US- or Canadian- state. It
+ // is used to filter maps before search.
+ struct Region : public Locality
+ {
+ Region(Locality const & l, RegionType type) : Locality(l), m_center(0, 0), m_type(type) {}
+
+ storage::CountryInfoGetter::TRegionIdSet m_ids;
+ string m_enName;
+ m2::PointD m_center;
+ RegionType m_type;
+ };
+
+ // This struct represents a city or a village. It is used to filter features
+ // during search.
+ // todo(@m) It works well as is, but consider a new naming scheme
+ // when counties etc. are added. E.g., Region for countries and
+ // states and Locality for smaller settlements.
+ struct City : public Locality
+ {
+ City(Locality const & l, SearchModel::SearchType type) : Locality(l), m_type(type) {}
+
+ m2::RectD m_rect;
+ SearchModel::SearchType m_type;
+#if defined(DEBUG)
+ string m_defaultName;
+#endif
+ };
+
+ Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter);
+
+ ~Geocoder() override;
+
+ // Sets search query params.
+ void SetParams(Params const & params);
+
+ // Starts geocoding, retrieved features will be appended to
+ // |results|.
+ void GoEverywhere(PreRanker & preRanker);
+ void GoInViewport(PreRanker & preRanker);
+
+ void ClearCaches();
+
+private:
+ enum RectId
+ {
+ RECT_ID_PIVOT,
+ RECT_ID_LOCALITY,
+ RECT_ID_COUNT
+ };
+
+ struct Postcodes
+ {
+ void Clear()
+ {
+ m_startToken = 0;
+ m_endToken = 0;
+ m_features.reset();
+ }
+
+ inline bool IsEmpty() const { return coding::CompressedBitVector::IsEmpty(m_features); }
+
+ size_t m_startToken = 0;
+ size_t m_endToken = 0;
+ unique_ptr<coding::CompressedBitVector> m_features;
+ };
+
+ void GoImpl(PreRanker & preRanker, vector<shared_ptr<MwmInfo>> & infos, bool inViewport);
+
+ template <typename TLocality>
+ using TLocalitiesCache = map<pair<size_t, size_t>, vector<TLocality>>;
+
+ QueryParams::TSynonymsVector const & GetTokens(size_t i) const;
+
+ // Fills |m_retrievalParams| with [curToken, endToken) subsequence
+ // of search query tokens.
+ void PrepareRetrievalParams(size_t curToken, size_t endToken);
+
+ // Creates a cache of posting lists corresponding to features in m_context
+ // for each token and saves it to m_addressFeatures.
+ void PrepareAddressFeatures();
+
+ void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken,
+ FeaturesLayer & layer);
+
+ void FillLocalityCandidates(coding::CompressedBitVector const * filter,
+ size_t const maxNumLocalities, vector<Locality> & preLocalities);
+
+ void FillLocalitiesTable();
+
+ void FillVillageLocalities();
+
+ template <typename TFn>
+ void ForEachCountry(vector<shared_ptr<MwmInfo>> const & infos, TFn && fn);
+
+ // Throws CancelException if cancelled.
+ inline void BailIfCancelled()
+ {
+ ::search::BailIfCancelled(static_cast<my::Cancellable const &>(*this));
+ }
+
+ // Tries to find all countries and states in a search query and then
+ // performs matching of cities in found maps.
+ void MatchRegions(RegionType type);
+
+ // Tries to find all cities in a search query and then performs
+ // matching of streets in found cities.
+ void MatchCities();
+
+ // Tries to do geocoding without localities, ie. find POIs,
+ // BUILDINGs and STREETs without knowledge about country, state,
+ // city or village. If during the geocoding too many features are
+ // retrieved, viewport is used to throw away excess features.
+ void MatchAroundPivot();
+
+ // Tries to do geocoding in a limited scope, assuming that knowledge
+ // about high-level features, like cities or countries, is
+ // incorporated into |filter|.
+ void LimitedSearch(FeaturesFilter const & filter);
+
+ template <typename TFn>
+ void WithPostcodes(TFn && fn);
+
+ // Tries to match some adjacent tokens in the query as streets and
+ // then performs geocoding in street vicinities.
+ void GreedilyMatchStreets();
+
+ void CreateStreetsLayerAndMatchLowerLayers(
+ size_t startToken, size_t endToken, unique_ptr<coding::CompressedBitVector> const & features);
+
+ // Tries to find all paths in a search tree, where each edge is
+ // marked with some substring of the query tokens. These paths are
+ // called "layer sequence" and current path is stored in |m_layers|.
+ void MatchPOIsAndBuildings(size_t curToken);
+
+ // Returns true if current path in the search tree (see comment for
+ // MatchPOIsAndBuildings()) looks sane. This method is used as a fast
+ // pre-check to cut off unnecessary work.
+ bool IsLayerSequenceSane() const;
+
+ // Finds all paths through layers and emits reachable features from
+ // the lowest layer.
+ void FindPaths();
+
+ // Forms result and feeds it to |m_preRanker|.
+ void EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchModel::SearchType type,
+ size_t startToken, size_t endToken);
+ void EmitResult(Region const & region, size_t startToken, size_t endToken);
+ void EmitResult(City const & city, size_t startToken, size_t endToken);
+
+ // Computes missing fields for all results in |m_preRanker|.
+ void FillMissingFieldsInResults();
+
+ // Tries to match unclassified objects from lower layers, like
+ // parks, forests, lakes, rivers, etc. This method finds all
+ // UNCLASSIFIED objects that match to all currently unused tokens.
+ void MatchUnclassified(size_t curToken);
+
+ unique_ptr<coding::CompressedBitVector> LoadCategories(
+ MwmContext & context, vector<strings::UniString> const & categories);
+
+ coding::CompressedBitVector const * LoadStreets(MwmContext & context);
+
+ unique_ptr<coding::CompressedBitVector> LoadVillages(MwmContext & context);
+
+ // A wrapper around RetrievePostcodeFeatures.
+ unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures(MwmContext const & context,
+ TokenSlice const & slice);
+
+ // A caching wrapper around Retrieval::RetrieveGeometryFeatures.
+ coding::CompressedBitVector const * RetrieveGeometryFeatures(MwmContext const & context,
+ m2::RectD const & rect, RectId id);
+
+ // This is a faster wrapper around SearchModel::GetSearchType(), as
+ // it uses pre-loaded lists of streets and villages.
+ SearchModel::SearchType GetSearchTypeInGeocoding(uint32_t featureId);
+
+ // Returns true iff all tokens are used.
+ bool AllTokensUsed() const;
+
+ // Returns true if there exists at least one used token in [from,
+ // to).
+ bool HasUsedTokensInRange(size_t from, size_t to) const;
+
+ // Counts number of groups of consecutive unused tokens.
+ size_t NumUnusedTokensGroups() const;
+
+ // Advances |curToken| to the nearest unused token, or to the end of
+ // |m_usedTokens| if there are no unused tokens.
+ size_t SkipUsedTokens(size_t curToken) const;
+
+ Index & m_index;
+
+ storage::CountryInfoGetter const & m_infoGetter;
+
+ // Geocoder params.
+ Params m_params;
+
+ // Total number of search query tokens.
+ size_t m_numTokens;
+
+ // This field is used to map features to a limited number of search
+ // classes.
+ SearchModel const & m_model;
+
+ // Following fields are set up by Search() method and can be
+ // modified and used only from Search() or its callees.
+
+ MwmSet::MwmId m_worldId;
+
+ // Context of the currently processed mwm.
+ unique_ptr<MwmContext> m_context;
+
+ // m_cities stores both big cities that are visible at World.mwm
+ // and small villages and hamlets that are not.
+ TLocalitiesCache<City> m_cities;
+ TLocalitiesCache<Region> m_regions[REGION_TYPE_COUNT];
+
+ // Caches of features in rects. These caches are separated from
+ // TLocalitiesCache because the latter are quite lightweight and not
+ // all of them are needed.
+ PivotRectsCache m_pivotRectsCache;
+ LocalityRectsCache m_localityRectsCache;
+
+ // Cache of nested rects used to estimate distance from a feature to the pivot.
+ NestedRectsCache m_pivotFeatures;
+
+ // Cache of posting lists for each token in the query. TODO (@y,
+ // @m, @vng): consider to update this cache lazily, as user inputs
+ // tokens one-by-one.
+ vector<unique_ptr<coding::CompressedBitVector>> m_addressFeatures;
+
+ // Cache of street ids in mwms.
+ map<MwmSet::MwmId, unique_ptr<coding::CompressedBitVector>> m_streetsCache;
+
+ // Street features in the mwm that is currently being processed.
+ // The initialization of m_streets is postponed in order to gain
+ // some speed. Therefore m_streets may be used only in
+ // LimitedSearch() and in all its callees.
+ coding::CompressedBitVector const * m_streets;
+
+ // Village features in the mwm that is currently being processed.
+ unique_ptr<coding::CompressedBitVector> m_villages;
+
+ // Postcodes features in the mwm that is currently being processed.
+ Postcodes m_postcodes;
+
+ // This vector is used to indicate what tokens were matched by
+ // locality and can't be re-used during the geocoding process.
+ vector<bool> m_usedTokens;
+
+ // This filter is used to throw away excess features.
+ FeaturesFilter const * m_filter;
+
+ // Features matcher for layers intersection.
+ map<MwmSet::MwmId, unique_ptr<FeaturesLayerMatcher>> m_matchersCache;
+ FeaturesLayerMatcher * m_matcher;
+
+ // Path finder for interpretations.
+ FeaturesLayerPathFinder m_finder;
+
+ // Search query params prepared for retrieval.
+ QueryParams m_retrievalParams;
+
+ // Pointer to the most nested region filled during geocoding.
+ Region const * m_lastMatchedRegion;
+
+ // Stack of layers filled during geocoding.
+ vector<FeaturesLayer> m_layers;
+
+ // Non-owning.
+ PreRanker * m_preRanker;
+};
+
+string DebugPrint(Geocoder::Locality const & locality);
+
+} // namespace search