Welcome to mirror list, hosted at ThFree Co, Russian Federation.

geocoder.hpp « v2 « search - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: bdc8ef0d529194840a570e4b88eac72b0857ad3a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
#pragma once

#include "search/cancel_exception.hpp"
#include "search/mode.hpp"
#include "search/query_params.hpp"
#include "search/v2/features_layer.hpp"
#include "search/v2/features_layer_path_finder.hpp"
#include "search/v2/geometry_cache.hpp"
#include "search/v2/mwm_context.hpp"
#include "search/v2/nested_rects_cache.hpp"
#include "search/v2/pre_ranking_info.hpp"
#include "search/v2/ranking_utils.hpp"
#include "search/v2/search_model.hpp"

#include "indexer/index.hpp"
#include "indexer/mwm_set.hpp"

#include "storage/country_info_getter.hpp"

#include "coding/compressed_bit_vector.hpp"

#include "geometry/rect2d.hpp"

#include "base/buffer_vector.hpp"
#include "base/cancellable.hpp"
#include "base/macros.hpp"
#include "base/string_utils.hpp"

#include "std/limits.hpp"
#include "std/set.hpp"
#include "std/string.hpp"
#include "std/unique_ptr.hpp"
#include "std/unordered_map.hpp"
#include "std/vector.hpp"

class MwmInfo;
class MwmValue;

namespace coding
{
class CompressedBitVector;
}

namespace storage
{
class CountryInfoGetter;
}  // namespace storage

namespace search
{
class PreRanker;

namespace v2
{
class FeaturesFilter;
class FeaturesLayerMatcher;
class SearchModel;
class TokenSlice;

// This class is used to retrieve all features corresponding to a
// search query.  Search query is represented as a sequence of tokens
// (including synonyms for these tokens), and Geocoder tries to build
// all possible partitions (or layers) of the search query, where each
// layer is a set of features corresponding to some search class
// (e.g. POI, BUILDING, STREET, etc., see search/v2/search_model.hpp).
// Then, Geocoder builds a layered graph, with edges between features
// on adjacent layers (e.g. between BUILDING ans STREET, STREET and
// CITY, etc.). Usually an edge between two features means that a
// feature from the lowest layer geometrically belongs to a feature
// from the highest layer (BUILDING is located on STREET, STREET is
// located inside CITY, CITY is located inside STATE, etc.). Final
// part is to find all paths through this layered graph and report all
// features from the lowest layer, that are reachable from the
// highest layer.
class Geocoder : public my::Cancellable
{
public:
  struct Params : public QueryParams
  {
    Params();

    Mode m_mode;

    // We need to pass both pivot and pivot center because pivot is
    // usually a rectangle created by radius and center, and due to
    // precision loss, |m_pivot|.Center() may differ from
    // |m_accuratePivotCenter|. Therefore |m_pivot| should be used for
    // fast filtering of features outside of the rectangle, while
    // |m_accuratePivotCenter| should be used when it's needed to
    // compute a distance from a feature to the pivot.
    m2::RectD m_pivot;
    m2::PointD m_accuratePivotCenter;
  };

  enum RegionType
  {
    REGION_TYPE_STATE,
    REGION_TYPE_COUNTRY,
    REGION_TYPE_COUNT
  };

  struct Locality
  {
    Locality() : m_featureId(0), m_startToken(0), m_endToken(0) {}

    Locality(uint32_t featureId, size_t startToken, size_t endToken)
      : m_featureId(featureId), m_startToken(startToken), m_endToken(endToken)
    {
    }

    MwmSet::MwmId m_countryId;
    uint32_t m_featureId;
    size_t m_startToken;
    size_t m_endToken;
  };

  // This struct represents a country or US- or Canadian- state.  It
  // is used to filter maps before search.
  struct Region : public Locality
  {
    Region(Locality const & l, RegionType type) : Locality(l), m_center(0, 0), m_type(type) {}

    storage::CountryInfoGetter::TRegionIdSet m_ids;
    string m_enName;
    m2::PointD m_center;
    RegionType m_type;
  };

  // This struct represents a city or a village. It is used to filter features
  // during search.
  // todo(@m) It works well as is, but consider a new naming scheme
  // when counties etc. are added. E.g., Region for countries and
  // states and Locality for smaller settlements.
  struct City : public Locality
  {
    City(Locality const & l, SearchModel::SearchType type) : Locality(l), m_type(type) {}

    m2::RectD m_rect;
    SearchModel::SearchType m_type;
#if defined(DEBUG)
    string m_defaultName;
#endif
  };

  Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter);

  ~Geocoder() override;

  // Sets search query params.
  void SetParams(Params const & params);

  // Starts geocoding, retrieved features will be appended to
  // |results|.
  void GoEverywhere(PreRanker & preRanker);
  void GoInViewport(PreRanker & preRanker);

  void ClearCaches();

private:
  enum RectId
  {
    RECT_ID_PIVOT,
    RECT_ID_LOCALITY,
    RECT_ID_COUNT
  };

  struct Postcodes
  {
    void Clear()
    {
      m_startToken = 0;
      m_endToken = 0;
      m_features.reset();
    }

    inline bool IsEmpty() const { return coding::CompressedBitVector::IsEmpty(m_features); }

    size_t m_startToken = 0;
    size_t m_endToken = 0;
    unique_ptr<coding::CompressedBitVector> m_features;
  };

  void GoImpl(PreRanker & preRanker, vector<shared_ptr<MwmInfo>> & infos, bool inViewport);

  template <typename TLocality>
  using TLocalitiesCache = map<pair<size_t, size_t>, vector<TLocality>>;

  QueryParams::TSynonymsVector const & GetTokens(size_t i) const;

  // Fills |m_retrievalParams| with [curToken, endToken) subsequence
  // of search query tokens.
  void PrepareRetrievalParams(size_t curToken, size_t endToken);

  // Creates a cache of posting lists corresponding to features in m_context
  // for each token and saves it to m_addressFeatures.
  void PrepareAddressFeatures();

  void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken,
                 FeaturesLayer & layer);

  void FillLocalityCandidates(coding::CompressedBitVector const * filter,
                              size_t const maxNumLocalities, vector<Locality> & preLocalities);

  void FillLocalitiesTable();

  void FillVillageLocalities();

  template <typename TFn>
  void ForEachCountry(vector<shared_ptr<MwmInfo>> const & infos, TFn && fn);

  // Throws CancelException if cancelled.
  inline void BailIfCancelled()
  {
    ::search::BailIfCancelled(static_cast<my::Cancellable const &>(*this));
  }

  // Tries to find all countries and states in a search query and then
  // performs matching of cities in found maps.
  void MatchRegions(RegionType type);

  // Tries to find all cities in a search query and then performs
  // matching of streets in found cities.
  void MatchCities();

  // Tries to do geocoding without localities, ie. find POIs,
  // BUILDINGs and STREETs without knowledge about country, state,
  // city or village. If during the geocoding too many features are
  // retrieved, viewport is used to throw away excess features.
  void MatchAroundPivot();

  // Tries to do geocoding in a limited scope, assuming that knowledge
  // about high-level features, like cities or countries, is
  // incorporated into |filter|.
  void LimitedSearch(FeaturesFilter const & filter);

  template<typename TFn>
  void WithPostcodes(TFn && fn);

  // Tries to match some adjacent tokens in the query as streets and
  // then performs geocoding in street vicinities.
  void GreedilyMatchStreets();

  void CreateStreetsLayerAndMatchLowerLayers(
      size_t startToken, size_t endToken, unique_ptr<coding::CompressedBitVector> const & features);

  // Tries to find all paths in a search tree, where each edge is
  // marked with some substring of the query tokens. These paths are
  // called "layer sequence" and current path is stored in |m_layers|.
  void MatchPOIsAndBuildings(size_t curToken);

  // Returns true if current path in the search tree (see comment for
  // MatchPOIsAndBuildings()) looks sane. This method is used as a fast
  // pre-check to cut off unnecessary work.
  bool IsLayerSequenceSane() const;

  // Finds all paths through layers and emits reachable features from
  // the lowest layer.
  void FindPaths();

  // Forms result and feeds it to |m_preRanker|.
  void EmitResult(MwmSet::MwmId const & mwmId, uint32_t ftId, SearchModel::SearchType type,
                  size_t startToken, size_t endToken);
  void EmitResult(Region const & region, size_t startToken, size_t endToken);
  void EmitResult(City const & city, size_t startToken, size_t endToken);

  // Computes missing fields for all results in |m_preRanker|.
  void FillMissingFieldsInResults();

  // Tries to match unclassified objects from lower layers, like
  // parks, forests, lakes, rivers, etc. This method finds all
  // UNCLASSIFIED objects that match to all currently unused tokens.
  void MatchUnclassified(size_t curToken);

  unique_ptr<coding::CompressedBitVector> LoadCategories(
      MwmContext & context, vector<strings::UniString> const & categories);

  coding::CompressedBitVector const * LoadStreets(MwmContext & context);

  unique_ptr<coding::CompressedBitVector> LoadVillages(MwmContext & context);

  // A wrapper around RetrievePostcodeFeatures.
  unique_ptr<coding::CompressedBitVector> RetrievePostcodeFeatures(MwmContext const & context,
                                                                   TokenSlice const & slice);

  // A caching wrapper around Retrieval::RetrieveGeometryFeatures.
  coding::CompressedBitVector const * RetrieveGeometryFeatures(MwmContext const & context,
                                                               m2::RectD const & rect, RectId id);

  // This is a faster wrapper around SearchModel::GetSearchType(), as
  // it uses pre-loaded lists of streets and villages.
  SearchModel::SearchType GetSearchTypeInGeocoding(uint32_t featureId);

  // Returns true iff all tokens are used.
  bool AllTokensUsed() const;

  // Returns true if there exists at least one used token in [from,
  // to).
  bool HasUsedTokensInRange(size_t from, size_t to) const;

  // Counts number of groups of consecutive unused tokens.
  size_t NumUnusedTokensGroups() const;

  // Advances |curToken| to the nearest unused token, or to the end of
  // |m_usedTokens| if there are no unused tokens.
  size_t SkipUsedTokens(size_t curToken) const;

  Index & m_index;

  storage::CountryInfoGetter const & m_infoGetter;

  // Geocoder params.
  Params m_params;

  // Total number of search query tokens.
  size_t m_numTokens;

  // This field is used to map features to a limited number of search
  // classes.
  SearchModel const & m_model;

  // Following fields are set up by Search() method and can be
  // modified and used only from Search() or its callees.

  MwmSet::MwmId m_worldId;

  // Context of the currently processed mwm.
  unique_ptr<MwmContext> m_context;

  // m_cities stores both big cities that are visible at World.mwm
  // and small villages and hamlets that are not.
  TLocalitiesCache<City> m_cities;
  TLocalitiesCache<Region> m_regions[REGION_TYPE_COUNT];

  // Caches of features in rects. These caches are separated from
  // TLocalitiesCache because the latter are quite lightweight and not
  // all of them are needed.
  PivotRectsCache m_pivotRectsCache;
  LocalityRectsCache m_localityRectsCache;

  // Cache of nested rects used to estimate distance from a feature to the pivot.
  NestedRectsCache m_pivotFeatures;

  // Cache of posting lists for each token in the query.  TODO (@y,
  // @m, @vng): consider to update this cache lazily, as user inputs
  // tokens one-by-one.
  vector<unique_ptr<coding::CompressedBitVector>> m_addressFeatures;

  // Cache of street ids in mwms.
  map<MwmSet::MwmId, unique_ptr<coding::CompressedBitVector>> m_streetsCache;

  // Street features in the mwm that is currently being processed.
  // The initialization of m_streets is postponed in order to gain
  // some speed. Therefore m_streets may be used only in
  // LimitedSearch() and in all its callees.
  coding::CompressedBitVector const * m_streets;

  // Village features in the mwm that is currently being processed.
  unique_ptr<coding::CompressedBitVector> m_villages;

  // Postcodes features in the mwm that is currently being processed.
  Postcodes m_postcodes;

  // This vector is used to indicate what tokens were matched by
  // locality and can't be re-used during the geocoding process.
  vector<bool> m_usedTokens;

  // This filter is used to throw away excess features.
  FeaturesFilter const * m_filter;

  // Features matcher for layers intersection.
  map<MwmSet::MwmId, unique_ptr<FeaturesLayerMatcher>> m_matchersCache;
  FeaturesLayerMatcher * m_matcher;

  // Path finder for interpretations.
  FeaturesLayerPathFinder m_finder;

  // Search query params prepared for retrieval.
  QueryParams m_retrievalParams;

  // Pointer to the most nested region filled during geocoding.
  Region const * m_lastMatchedRegion;

  // Stack of layers filled during geocoding.
  vector<FeaturesLayer> m_layers;

  // Non-owning.
  PreRanker * m_preRanker;
};

string DebugPrint(Geocoder::Locality const & locality);
}  // namespace v2
}  // namespace search