Welcome to mirror list, hosted at ThFree Co, Russian Federation.

sponsored_scoring.cpp « generator - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: af0e2a10ef0e0723120a71dbc48e4806dfcc70c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#include "generator/sponsored_scoring.hpp"

#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"

#include "geometry/distance_on_sphere.hpp"

#include <algorithm>
#include <vector>

namespace
{
using WeightedBagOfWords = std::vector<std::pair<strings::UniString, double>>;

std::vector<strings::UniString> StringToWords(std::string const & str)
{
  std::vector<strings::UniString> result;
  search::NormalizeAndTokenizeString(str, result, search::Delimiters{});
  std::sort(std::begin(result), std::end(result));
  return result;
}

WeightedBagOfWords MakeWeightedBagOfWords(std::vector<strings::UniString> const & words)
{
  // TODO(mgsergio): Calculate tf-idsf score for every word.
  auto constexpr kTfIdfScorePlaceholder = 1;

  WeightedBagOfWords result;
  for (size_t i = 0; i < words.size(); ++i)
  {
    result.emplace_back(words[i], kTfIdfScorePlaceholder);
    while (i + 1 < words.size() && words[i] == words[i + 1])
    {
      result.back().second += kTfIdfScorePlaceholder;  // TODO(mgsergio): tf-idf score for result[i].frist;
      ++i;
    }
  }
  return result;
}

double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
  double result{};

  auto lhsIt = begin(lhs);
  auto rhsIt = begin(rhs);

  while (lhsIt != end(lhs) && rhsIt != end(rhs))
  {
    if (lhsIt->first == rhsIt->first)
    {
      result += lhsIt->second * rhsIt->second;
      ++lhsIt;
      ++rhsIt;
    }
    else if (lhsIt->first < rhsIt->first)
    {
      ++lhsIt;
    }
    else
    {
      ++rhsIt;
    }
  }

  return result;
}

double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
  auto const product = WeightedBagsDotProduct(lhs, rhs);
  auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
  auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));

  // WeightedBagsDotProduct returns 0.0 if lhs.empty() || rhs.empty() or
  // if every element of either lhs or rhs is 0.0.
  if (product == 0.0)
    return 0.0;

  return product / (lhsLength * rhsLength);
}
}  // namespace

namespace generator
{
namespace impl
{
double GetLinearNormDistanceScore(double distance, double const maxDistance)
{
  CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
  distance = base::clamp(distance, 0.0, maxDistance);
  return 1.0 - distance / maxDistance;
}

double GetNameSimilarityScore(std::string const & booking_name, std::string const & osm_name)
{
  auto const aws = MakeWeightedBagOfWords(StringToWords(booking_name));
  auto const bws = MakeWeightedBagOfWords(StringToWords(osm_name));

  if (aws.empty() && bws.empty())
    return 1.0;
  if (aws.empty() || bws.empty())
    return 0.0;

  return WeightedBagOfWordsCos(aws, bws);
}
}  // namespace impl
}  // namespace generator