Welcome to mirror list, hosted at ThFree Co, Russian Federation.

utils.hpp « search - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c36ef22f8d1b07abbbf705b3791001b0cdb3566a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#pragma once

#include "search/common.hpp"
#include "search/feature_offset_match.hpp"
#include "search/token_slice.hpp"

#include "indexer/categories_holder.hpp"
#include "indexer/mwm_set.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "indexer/trie.hpp"

#include "base/levenshtein_dfa.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"

#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <vector>

class DataSourceBase;
class MwmInfo;

namespace search
{
size_t GetMaxErrorsForToken(strings::UniString const & token);

strings::LevenshteinDFA BuildLevenshteinDFA(strings::UniString const & s);

template <typename ToDo>
void ForEachCategoryType(StringSliceBase const & slice, Locales const & locales,
                         CategoriesHolder const & categories, ToDo && todo)
{
  for (size_t i = 0; i < slice.Size(); ++i)
  {
    auto const & token = slice.Get(i);
    for (int8_t const locale : locales)
      categories.ForEachTypeByName(locale, token, std::bind<void>(todo, i, std::placeholders::_1));

    // Special case processing of 2 codepoints emoji (e.g. black guy on a bike).
    // Only emoji synonyms can have one codepoint.
    if (token.size() > 1)
    {
      categories.ForEachTypeByName(CategoriesHolder::kEnglishCode, strings::UniString(1, token[0]),
                                   std::bind<void>(todo, i, std::placeholders::_1));
    }
  }
}

// Unlike ForEachCategoryType which extracts types by a token
// from |slice| by exactly matching it to a token in the name
// of a category, in the worst case this function has to loop through the tokens
// in all category synonyms in all |locales| in order to find a token
// whose edit distance is close enough to the required token from |slice|.
template <typename ToDo>
void ForEachCategoryTypeFuzzy(StringSliceBase const & slice, Locales const & locales,
                              CategoriesHolder const & categories, ToDo && todo)
{
  using Iterator = trie::MemTrieIterator<strings::UniString, ::base::VectorValues<uint32_t>>;

  auto const & trie = categories.GetNameToTypesTrie();
  Iterator const iterator(trie.GetRootIterator());

  for (size_t i = 0; i < slice.Size(); ++i)
  {
    // todo(@m, @y). We build dfa twice for each token: here and in geocoder.cpp.
    // A possible optimization is to build each dfa once and save it. Note that
    // dfas for the prefix tokens differ, i.e. we ignore slice.IsPrefix(i) here.
    SearchTrieRequest<strings::LevenshteinDFA> request;
    request.m_names.push_back(BuildLevenshteinDFA(slice.Get(i)));
    request.SetLangs(locales);

    MatchFeaturesInTrie(request, iterator, [&](uint32_t /* type */) { return true; } /* filter */,
                        std::bind<void>(todo, i, std::placeholders::_1));
  }
}

// Returns |true| and fills |types| if request specified by |slice| is categorial
// in any of the |locales| and |false| otherwise. We expect that categorial requests should
// mostly arise from clicking on a category button in the UI.
// It is assumed that typing a word that matches a category's name
// and a space after it means that no errors were made.
template <typename T>
bool FillCategories(QuerySliceOnRawStrings<T> const & slice, Locales const & locales,
                    CategoriesHolder const & catHolder, std::vector<uint32_t> & types)
{
  types.clear();
  if (slice.HasPrefixToken())
    return false;

  catHolder.ForEachNameAndType(
      [&](CategoriesHolder::Category::Name const & categorySynonym, uint32_t type) {
        if (!locales.Contains(static_cast<uint64_t>(categorySynonym.m_locale)))
          return;

        std::vector<QueryParams::String> categoryTokens;
        SplitUniString(search::NormalizeAndSimplifyString(categorySynonym.m_name),
                       MakeBackInsertFunctor(categoryTokens), search::Delimiters());

        if (slice.Size() != categoryTokens.size())
          return;

        for (size_t i = 0; i < slice.Size(); ++i)
        {
          if (slice.Get(i) != categoryTokens[i])
            return;
        }

        types.push_back(type);
      });

  return !types.empty();
}

MwmSet::MwmHandle FindWorld(DataSourceBase const & dataSource,
                            std::vector<std::shared_ptr<MwmInfo>> const &infos);
MwmSet::MwmHandle FindWorld(DataSourceBase const & dataSource);
}  // namespace search