Welcome to mirror list, hosted at ThFree Co, Russian Federation.

PhraseDistanceFeature.cpp « FF « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 0544ec9fb7ea5bbafc0acaa7ce61416ec497bba0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#include "PhraseDistanceFeature.h"

#include <vector>
#include <boost/foreach.hpp>
#include "moses/InputType.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/StaticData.h"
#include "util/exception.hh"

using namespace std;

namespace Moses
{
PhraseDistanceFeature::PhraseDistanceFeature(const string &line)
  : StatelessFeatureFunction(2, line)
  , m_space("")
  , m_spaceID(0)
  , m_measure(EuclideanDistance)
{
  ReadParameters();
}

void PhraseDistanceFeature::EvaluateWithSourceContext(const InputType &input
    , const InputPath &inputPath
    , const TargetPhrase &targetPhrase
    , const StackVec *stackVec
    , ScoreComponentCollection &scoreBreakdown
    , ScoreComponentCollection *estimatedScores) const
{
  vector<float> scores(m_numScoreComponents, 0);
  bool broken = false;
  // Input coord
  map<size_t const, vector<float> >::const_iterator ii;
  if (input.m_coordMap) {
    ii = input.m_coordMap->find(m_spaceID);
  } else {
    TRACE_ERR("No coordinates for space " << m_space << " on input (specify with coord XML tag)" << endl);
    TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
    broken = true;
  }
  if (ii == input.m_coordMap->end()) {
    TRACE_ERR("No coordinates for space " << m_space << " on input (specify with coord XML tag)" << endl);
    TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
    broken = true;
  }
  // Target phrase coord
  vector<SPTR<vector<float> > > const* tpp = targetPhrase.GetCoordList(m_spaceID);
  if (tpp == NULL) {
    TRACE_ERR("No coordinates for space " << m_space << " on target phrase (PhraseDictionary implementation needs to set)" << endl);
    TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
    broken = true;
  }
  // Compute scores
  if (!broken) {
    vector<float> const& inputCoord = ii->second;
    vector<SPTR<vector<float> > > const& tpCoord = *tpp;
    // Centroid of target phrase instances (from phrase extraction)
    vector<float> centroid = vector<float>(inputCoord.size(), 0);
    BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
      for (size_t i = 0; i < inputCoord.size(); ++i) {
        centroid[i] += (*coord)[i];
      }
    }
    for (size_t i = 0; i < inputCoord.size(); ++i) {
      centroid[i] /= tpCoord.size();
    }
    // Average distance from the target phrase instances to (1) the input and
    // (2) the target phrase centroid
    float inputDistance = 0;
    float centroidDistance = 0;
    if (m_measure == EuclideanDistance) {
      BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
        float pointInputDistance = 0;
        float pointCentroidDistance = 0;
        for (size_t i = 0; i < inputCoord.size(); ++i) {
          pointInputDistance += pow(inputCoord[i] - (*coord)[i], 2);
          pointCentroidDistance += pow(centroid[i] - (*coord)[i], 2);
        }
        inputDistance += sqrt(pointInputDistance);
        centroidDistance += sqrt(pointCentroidDistance);
      }
    } else if (m_measure == TotalVariationDistance) {
      BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
        float pointInputDistance = 0;
        float pointCentroidDistance = 0;
        for (size_t i = 0; i < inputCoord.size(); ++i) {
          pointInputDistance += abs(inputCoord[i] - (*coord)[i]);
          pointCentroidDistance += abs(centroid[i] - (*coord)[i]);
        }
        inputDistance += pointInputDistance / 2;
        centroidDistance += pointCentroidDistance / 2;
      }
    }
    inputDistance /= tpCoord.size();
    centroidDistance /= tpCoord.size();
    // Log transform scores, max with float epsilon to avoid domain error
    scores[0] = log(max(inputDistance, Moses::FLOAT_EPSILON));
    scores[1] = log(max(centroidDistance, Moses::FLOAT_EPSILON));
  }
  // Set scores
  scoreBreakdown.Assign(this, scores);
  return;
}

void PhraseDistanceFeature::SetParameter(const string& key, const string& value)
{
  if (key == "space") {
    m_space = value;
    m_spaceID = StaticData::InstanceNonConst().MapCoordSpace(m_space);
  } else if (key == "measure") {
    if (value == "euc") {
      m_measure = EuclideanDistance;
    } else if (value == "var") {
      m_measure = TotalVariationDistance;
    } else {
      UTIL_THROW2("Unknown measure " << value << ", choices: euc var");
    }
  } else {
    StatelessFeatureFunction::SetParameter(key, value);
  }
}

} // namespace