Welcome to mirror list, hosted at ThFree Co, Russian Federation.

geocoder.hpp « search - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d6b30a1e1dcea7bb995932e201a2f3436337da9f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#pragma once

#include "search/cancel_exception.hpp"
#include "search/categories_cache.hpp"
#include "search/cbv.hpp"
#include "search/features_layer.hpp"
#include "search/features_layer_path_finder.hpp"
#include "search/geocoder_context.hpp"
#include "search/geometry_cache.hpp"
#include "search/mode.hpp"
#include "search/model.hpp"
#include "search/mwm_context.hpp"
#include "search/nested_rects_cache.hpp"
#include "search/pre_ranking_info.hpp"
#include "search/query_params.hpp"
#include "search/ranking_utils.hpp"
#include "search/streets_matcher.hpp"

#include "indexer/index.hpp"
#include "indexer/mwm_set.hpp"

#include "storage/country_info_getter.hpp"

#include "coding/compressed_bit_vector.hpp"

#include "geometry/rect2d.hpp"

#include "base/buffer_vector.hpp"
#include "base/cancellable.hpp"
#include "base/macros.hpp"
#include "base/string_utils.hpp"

#include "std/limits.hpp"
#include "std/set.hpp"
#include "std/string.hpp"
#include "std/unique_ptr.hpp"
#include "std/unordered_map.hpp"
#include "std/vector.hpp"

class MwmInfo;
class MwmValue;

namespace storage
{
class CountryInfoGetter;
}  // namespace storage

namespace search
{
class PreRanker;

class FeaturesFilter;
class FeaturesLayerMatcher;
class SearchModel;
class TokenSlice;

// This class is used to retrieve all features corresponding to a
// search query.  Search query is represented as a sequence of tokens
// (including synonyms for these tokens), and Geocoder tries to build
// all possible partitions (or layers) of the search query, where each
// layer is a set of features corresponding to some search class
// (e.g. POI, BUILDING, STREET, etc., see search_model.hpp).
// Then, Geocoder builds a layered graph, with edges between features
// on adjacent layers (e.g. between BUILDING ans STREET, STREET and
// CITY, etc.). Usually an edge between two features means that a
// feature from the lowest layer geometrically belongs to a feature
// from the highest layer (BUILDING is located on STREET, STREET is
// located inside CITY, CITY is located inside STATE, etc.). Final
// part is to find all paths through this layered graph and report all
// features from the lowest layer, that are reachable from the
// highest layer.
class Geocoder
{
public:
  struct Params : public QueryParams
  {
    Params();

    Mode m_mode;
    m2::RectD m_pivot;
  };

  enum RegionType
  {
    REGION_TYPE_STATE,
    REGION_TYPE_COUNTRY,
    REGION_TYPE_COUNT
  };

  struct Locality
  {
    Locality() : m_featureId(0), m_startToken(0), m_endToken(0), m_prob(0.0) {}

    Locality(uint32_t featureId, size_t startToken, size_t endToken)
      : m_featureId(featureId), m_startToken(startToken), m_endToken(endToken), m_prob(0.0)
    {
    }

    MwmSet::MwmId m_countryId;
    uint32_t m_featureId;
    size_t m_startToken;
    size_t m_endToken;

    // Measures our belief in the fact that tokens in the range [m_startToken, m_endToken)
    // indeed specify a locality. Currently it is set only for villages.
    double m_prob;

    string m_name;
  };

  // This struct represents a country or US- or Canadian- state.  It
  // is used to filter maps before search.
  struct Region : public Locality
  {
    Region(Locality const & l, RegionType type) : Locality(l), m_center(0, 0), m_type(type) {}

    storage::CountryInfoGetter::TRegionIdSet m_ids;
    string m_defaultName;
    m2::PointD m_center;
    RegionType m_type;
  };

  // This struct represents a city or a village. It is used to filter features
  // during search.
  // todo(@m) It works well as is, but consider a new naming scheme
  // when counties etc. are added. E.g., Region for countries and
  // states and Locality for smaller settlements.
  struct City : public Locality
  {
    City(Locality const & l, SearchModel::SearchType type) : Locality(l), m_type(type) {}

    m2::RectD m_rect;
    SearchModel::SearchType m_type;
#if defined(DEBUG)
    string m_defaultName;
#endif
  };

  Geocoder(Index const & index, storage::CountryInfoGetter const & infoGetter,
           PreRanker & preRanker, VillagesCache & villagesCache,
           my::Cancellable const & cancellable);

  ~Geocoder();

  // Sets search query params.
  void SetParams(Params const & params);

  // Starts geocoding, retrieved features will be appended to
  // |results|.
  void GoEverywhere();
  void GoInViewport();

  void ClearCaches();

private:
  enum RectId
  {
    RECT_ID_PIVOT,
    RECT_ID_LOCALITY,
    RECT_ID_COUNT
  };

  struct Postcodes
  {
    void Clear()
    {
      m_startToken = 0;
      m_endToken = 0;
      m_features.Reset();
    }

    size_t m_startToken = 0;
    size_t m_endToken = 0;
    CBV m_features;
  };

  void GoImpl(vector<shared_ptr<MwmInfo>> & infos, bool inViewport);

  template <typename TLocality>
  using TLocalitiesCache = map<pair<size_t, size_t>, vector<TLocality>>;

  QueryParams::TSynonymsVector const & GetTokens(size_t i) const;

  // Fills |m_retrievalParams| with [curToken, endToken) subsequence
  // of search query tokens.
  void PrepareRetrievalParams(size_t curToken, size_t endToken);

  // Creates a cache of posting lists corresponding to features in m_context
  // for each token and saves it to m_addressFeatures.
  void InitBaseContext(BaseContext & ctx);

  void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken,
                 FeaturesLayer & layer);

  void FillLocalityCandidates(BaseContext const & ctx, CBV const & filter,
                              size_t const maxNumLocalities, vector<Locality> & preLocalities);

  void FillLocalitiesTable(BaseContext const & ctx);

  void FillVillageLocalities(BaseContext const & ctx);

  template <typename TFn>
  void ForEachCountry(vector<shared_ptr<MwmInfo>> const & infos, TFn && fn);

  // Throws CancelException if cancelled.
  inline void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }

  // Tries to find all countries and states in a search query and then
  // performs matching of cities in found maps.
  void MatchRegions(BaseContext & ctx, RegionType type);

  // Tries to find all cities in a search query and then performs
  // matching of streets in found cities.
  void MatchCities(BaseContext & ctx);

  // Tries to do geocoding without localities, ie. find POIs,
  // BUILDINGs and STREETs without knowledge about country, state,
  // city or village. If during the geocoding too many features are
  // retrieved, viewport is used to throw away excess features.
  void MatchAroundPivot(BaseContext & ctx);

  // Tries to do geocoding in a limited scope, assuming that knowledge
  // about high-level features, like cities or countries, is
  // incorporated into |filter|.
  void LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter);

  template <typename TFn>
  void WithPostcodes(BaseContext & ctx, TFn && fn);

  // Tries to match some adjacent tokens in the query as streets and
  // then performs geocoding in street vicinities.
  void GreedilyMatchStreets(BaseContext & ctx);

  void CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx,
                                             StreetsMatcher::Prediction const & prediction);

  // Tries to find all paths in a search tree, where each edge is
  // marked with some substring of the query tokens. These paths are
  // called "layer sequence" and current path is stored in |m_layers|.
  void MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken);

  // Returns true if current path in the search tree (see comment for
  // MatchPOIsAndBuildings()) looks sane. This method is used as a fast
  // pre-check to cut off unnecessary work.
  bool IsLayerSequenceSane() const;

  // Finds all paths through layers and emits reachable features from
  // the lowest layer.
  void FindPaths();

  // Forms result and feeds it to |m_preRanker|.
  void EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchModel::SearchType type,
                  size_t startToken, size_t endToken);
  void EmitResult(Region const & region, size_t startToken, size_t endToken);
  void EmitResult(City const & city, size_t startToken, size_t endToken);

  // Tries to match unclassified objects from lower layers, like
  // parks, forests, lakes, rivers, etc. This method finds all
  // UNCLASSIFIED objects that match to all currently unused tokens.
  void MatchUnclassified(BaseContext & ctx, size_t curToken);

  // A wrapper around RetrievePostcodeFeatures.
  CBV RetrievePostcodeFeatures(MwmContext const & context, TokenSlice const & slice);

  // A caching wrapper around Retrieval::RetrieveGeometryFeatures.
  CBV RetrieveGeometryFeatures(MwmContext const & context, m2::RectD const & rect, RectId id);

  // This is a faster wrapper around SearchModel::GetSearchType(), as
  // it uses pre-loaded lists of streets and villages.
  WARN_UNUSED_RESULT bool GetSearchTypeInGeocoding(BaseContext const & ctx, uint32_t featureId,
                                                   SearchModel::SearchType & searchType);

  Index const & m_index;

  storage::CountryInfoGetter const & m_infoGetter;

  StreetsCache m_streetsCache;
  VillagesCache & m_villagesCache;

  my::Cancellable const & m_cancellable;

  // Geocoder params.
  Params m_params;

  // This field is used to map features to a limited number of search
  // classes.
  SearchModel const & m_model;

  // Following fields are set up by Search() method and can be
  // modified and used only from Search() or its callees.

  MwmSet::MwmId m_worldId;

  // Context of the currently processed mwm.
  unique_ptr<MwmContext> m_context;

  // m_cities stores both big cities that are visible at World.mwm
  // and small villages and hamlets that are not.
  TLocalitiesCache<City> m_cities;
  TLocalitiesCache<Region> m_regions[REGION_TYPE_COUNT];

  // Caches of features in rects. These caches are separated from
  // TLocalitiesCache because the latter are quite lightweight and not
  // all of them are needed.
  PivotRectsCache m_pivotRectsCache;
  LocalityRectsCache m_localityRectsCache;

  // Postcodes features in the mwm that is currently being processed.
  Postcodes m_postcodes;

  // This filter is used to throw away excess features.
  FeaturesFilter const * m_filter;

  // Features matcher for layers intersection.
  map<MwmSet::MwmId, unique_ptr<FeaturesLayerMatcher>> m_matchersCache;
  FeaturesLayerMatcher * m_matcher;

  // Path finder for interpretations.
  FeaturesLayerPathFinder m_finder;

  // Search query params prepared for retrieval.
  QueryParams m_retrievalParams;

  // Pointer to the most nested region filled during geocoding.
  Region const * m_lastMatchedRegion;

  // Stack of layers filled during geocoding.
  vector<FeaturesLayer> m_layers;

  PreRanker & m_preRanker;
};

string DebugPrint(Geocoder::Locality const & locality);
}  // namespace search