Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ranking_utils.hpp « search - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 11c6083ebdd95fcdc522f2a54b3e57de6585e8d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#pragma once

#include "search/common.hpp"
#include "search/model.hpp"
#include "search/query_params.hpp"

#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"

#include "base/levenshtein_dfa.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"

#include <algorithm>
#include <cstdint>
#include <limits>
#include <string>
#include <vector>

class CategoriesHolder;

namespace feature
{
class TypesHolder;
}

namespace search
{
class QueryParams;
class TokenSlice;

class CategoriesInfo
{
public:
  CategoriesInfo(feature::TypesHolder const & holder, TokenSlice const & tokens,
                 Locales const & locales, CategoriesHolder const & categories);

  // Returns true when all tokens correspond to categories in
  // |holder|.
  bool IsPureCategories() const { return m_pureCategories; }

  // Returns true when all tokens are categories tokens but none of
  // them correspond to categories in |holder|.
  bool IsFalseCategories() const { return m_falseCategories; }

private:
  bool m_pureCategories = false;
  bool m_falseCategories = false;
};

struct ErrorsMade
{
  static size_t constexpr kInfiniteErrors = std::numeric_limits<size_t>::max();

  ErrorsMade() = default;
  explicit ErrorsMade(size_t errorsMade) : m_errorsMade(errorsMade) {}

  bool IsValid() const { return m_errorsMade != kInfiniteErrors; }

  template <typename Fn>
  static ErrorsMade Combine(ErrorsMade const & lhs, ErrorsMade const & rhs, Fn && fn)
  {
    if (!lhs.IsValid())
      return rhs;
    if (!rhs.IsValid())
      return lhs;
    return ErrorsMade(fn(lhs.m_errorsMade, rhs.m_errorsMade));
  }

  static ErrorsMade Min(ErrorsMade const & lhs, ErrorsMade const & rhs)
  {
    return Combine(lhs, rhs, [](size_t u, size_t v) { return std::min(u, v); });
  }

  static ErrorsMade Max(ErrorsMade const & lhs, ErrorsMade const & rhs)
  {
    return Combine(lhs, rhs, [](size_t u, size_t v) { return std::max(u, v); });
  }

  friend ErrorsMade operator+(ErrorsMade const & lhs, ErrorsMade const & rhs)
  {
    return Combine(lhs, rhs, [](size_t u, size_t v) { return u + v; });
  }

  ErrorsMade & operator+=(ErrorsMade const & rhs)
  {
    *this = *this + rhs;
    return *this;
  }

  bool operator==(ErrorsMade const & rhs) const { return m_errorsMade == rhs.m_errorsMade; }

  size_t m_errorsMade = kInfiniteErrors;
};

string DebugPrint(ErrorsMade const & errorsMade);

namespace impl
{
bool FullMatch(QueryParams::Token const & token, strings::UniString const & text);

bool PrefixMatch(QueryParams::Token const & token, strings::UniString const & text);

// Returns the minimum number of errors needed to match |text| with
// any of the |tokens|.  If it's not possible in accordance with
// GetMaxErrorsForToken(|text|), returns kInfiniteErrors.
ErrorsMade GetMinErrorsMade(std::vector<strings::UniString> const & tokens,
                            strings::UniString const & text);
}  // namespace impl

// The order and numeric values are important here.  Please, check all
// use-cases before changing this enum.
enum NameScore
{
  NAME_SCORE_ZERO = 0,
  NAME_SCORE_SUBSTRING = 1,
  NAME_SCORE_PREFIX = 2,
  NAME_SCORE_FULL_MATCH = 3,

  NAME_SCORE_COUNT
};

// Returns true when |s| is a stop-word and may be removed from a query.
bool IsStopWord(strings::UniString const & s);

// Normalizes, simplifies and splits string, removes stop-words.
void PrepareStringForMatching(std::string const & name, std::vector<strings::UniString> & tokens);

template <typename Slice>
NameScore GetNameScore(std::string const & name, Slice const & slice)
{
  if (slice.Empty())
    return NAME_SCORE_ZERO;

  std::vector<strings::UniString> tokens;
  SplitUniString(NormalizeAndSimplifyString(name), base::MakeBackInsertFunctor(tokens), Delimiters());
  return GetNameScore(tokens, slice);
}

template <typename Slice>
NameScore GetNameScore(std::vector<strings::UniString> const & tokens, Slice const & slice)
{
  if (slice.Empty())
    return NAME_SCORE_ZERO;

  size_t const n = tokens.size();
  size_t const m = slice.Size();

  bool const lastTokenIsPrefix = slice.IsPrefix(m - 1);

  NameScore score = NAME_SCORE_ZERO;
  for (size_t offset = 0; offset + m <= n; ++offset)
  {
    bool match = true;
    for (size_t i = 0; i < m - 1 && match; ++i)
      match = match && impl::FullMatch(slice.Get(i), tokens[offset + i]);
    if (!match)
      continue;

    bool const fullMatch = impl::FullMatch(slice.Get(m - 1), tokens[offset + m - 1]);
    bool const prefixMatch =
        lastTokenIsPrefix && impl::PrefixMatch(slice.Get(m - 1), tokens[offset + m - 1]);
    if (!fullMatch && !prefixMatch)
      continue;

    if (m == n && fullMatch)
      return NAME_SCORE_FULL_MATCH;

    if (offset == 0)
      score = max(score, NAME_SCORE_PREFIX);

    score = max(score, NAME_SCORE_SUBSTRING);
  }
  return score;
}

string DebugPrint(NameScore score);

// Returns total number of errors that were made during matching
// feature |tokens| by a query - query tokens are in |slice|.
template <typename Slice>
ErrorsMade GetErrorsMade(std::vector<strings::UniString> const & tokens, Slice const & slice)
{
  ErrorsMade totalErrorsMade;

  for (size_t i = 0; i < slice.Size(); ++i)
  {
    ErrorsMade errorsMade;
    slice.Get(i).ForEach([&](strings::UniString const & s) {
      errorsMade = ErrorsMade::Min(errorsMade, impl::GetMinErrorsMade(tokens, s));
    });

    totalErrorsMade += errorsMade;
  }

  return totalErrorsMade;
}

template <typename Slice>
ErrorsMade GetErrorsMade(std::string const & s, Slice const & slice)
{
  return GetErrorsMade({strings::MakeUniString(s)}, slice);
}
}  // namespace search