Welcome to mirror list, hosted at ThFree Co, Russian Federation.

EditOps.cpp « FF « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: fa66acf1c40d8964938f606b98cf70edbd966702 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#include <sstream>
#include "EditOps.h"
#include "moses/Phrase.h"
#include "moses/TargetPhrase.h"
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TranslationOption.h"
#include "util/string_piece_hash.hh"
#include "util/exception.hh"

#include <functional>

#include <boost/foreach.hpp>
#include <boost/algorithm/string.hpp>

#include "Diffs.h"

namespace Moses
{

using namespace std;

std::string ParseScores(const std::string &line, const std::string& defaultScores)
{
  std::vector<std::string> toks = Tokenize(line);
  UTIL_THROW_IF2(toks.empty(), "Empty line");

  for (size_t i = 1; i < toks.size(); ++i) {
    std::vector<std::string> args = TokenizeFirstOnly(toks[i], "=");
    UTIL_THROW_IF2(args.size() != 2,
                   "Incorrect format for feature function arg: " << toks[i]);

    if (args[0] == "scores") {
      return args[1];
    }
  }
  return defaultScores;
}

EditOps::EditOps(const std::string &line)
  : StatelessFeatureFunction(ParseScores(line, "dis").size(), line)
  , m_factorType(0), m_chars(false), m_scores(ParseScores(line, "dis"))
{
  std::cerr << "Initializing EditOps feature.." << std::endl;
  ReadParameters();
}

void EditOps::SetParameter(const std::string& key, const std::string& value)
{
  if (key == "factor") {
    m_factorType = Scan<FactorType>(value);
  } else if (key == "chars") {
    m_chars = Scan<bool>(value);
  } else if (key == "scores") {
    m_scores = value;
  } else {
    StatelessFeatureFunction::SetParameter(key, value);
  }
}

void EditOps::Load()
{ }

void EditOps::EvaluateInIsolation(const Phrase &source
                                  , const TargetPhrase &target
                                  , ScoreComponentCollection &scoreBreakdown
                                  , ScoreComponentCollection &estimatedFutureScore) const
{
  ComputeFeatures(source, target, &scoreBreakdown);
}

void EditOps::ComputeFeatures(
  const Phrase &source,
  const TargetPhrase& target,
  ScoreComponentCollection* accumulator) const
{
  std::vector<float> ops(GetNumScoreComponents(), 0);

  if(m_chars) {
    std::vector<FactorType> factors;
    factors.push_back(m_factorType);

    std::string sourceStr = source.GetStringRep(factors);
    std::string targetStr = target.GetStringRep(factors);

    AddStats(sourceStr, targetStr, m_scores, ops);
  } else {
    std::vector<std::string> sourceTokens;
    //std::cerr << "Ed src: ";
    for(size_t i = 0; i < source.GetSize(); ++i) {
      if(!source.GetWord(i).IsNonTerminal())
        sourceTokens.push_back(source.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
      //std::cerr << sourceTokens.back() << " ";
    }
    //std::cerr << std::endl;

    std::vector<std::string> targetTokens;
    //std::cerr << "Ed trg: ";
    for(size_t i = 0; i < target.GetSize(); ++i) {
      if(!target.GetWord(i).IsNonTerminal())
        targetTokens.push_back(target.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
      //std::cerr << targetTokens.back() << " ";
    }
    //std::cerr << std::endl;

    AddStats(sourceTokens, targetTokens, m_scores, ops);
  }

  accumulator->PlusEquals(this, ops);
}

bool EditOps::IsUseable(const FactorMask &mask) const
{
  bool ret = mask[m_factorType];
  return ret;
}

}