1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
#include <sstream>
#include "EditOps.h"
#include "moses/Phrase.h"
#include "moses/TargetPhrase.h"
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TranslationOption.h"
#include "util/string_piece_hash.hh"
#include "util/exception.hh"
#include <functional>
#include <boost/foreach.hpp>
#include <boost/algorithm/string.hpp>
#include "Diffs.h"
namespace Moses
{
using namespace std;
std::string ParseScores(const std::string &line, const std::string& defaultScores)
{
std::vector<std::string> toks = Tokenize(line);
UTIL_THROW_IF2(toks.empty(), "Empty line");
for (size_t i = 1; i < toks.size(); ++i) {
std::vector<std::string> args = TokenizeFirstOnly(toks[i], "=");
UTIL_THROW_IF2(args.size() != 2,
"Incorrect format for feature function arg: " << toks[i]);
if (args[0] == "scores") {
return args[1];
}
}
return defaultScores;
}
EditOps::EditOps(const std::string &line)
: StatelessFeatureFunction(ParseScores(line, "dis").size(), line)
, m_factorType(0), m_chars(false), m_scores(ParseScores(line, "dis"))
{
std::cerr << "Initializing EditOps feature.." << std::endl;
ReadParameters();
}
void EditOps::SetParameter(const std::string& key, const std::string& value)
{
if (key == "factor") {
m_factorType = Scan<FactorType>(value);
} else if (key == "chars") {
m_chars = Scan<bool>(value);
} else if (key == "scores") {
m_scores = value;
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
void EditOps::Load()
{ }
void EditOps::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &target
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
ComputeFeatures(source, target, &scoreBreakdown);
}
void EditOps::ComputeFeatures(
const Phrase &source,
const TargetPhrase& target,
ScoreComponentCollection* accumulator) const
{
std::vector<float> ops(GetNumScoreComponents(), 0);
if(m_chars) {
std::vector<FactorType> factors;
factors.push_back(m_factorType);
std::string sourceStr = source.GetStringRep(factors);
std::string targetStr = target.GetStringRep(factors);
AddStats(sourceStr, targetStr, m_scores, ops);
} else {
std::vector<std::string> sourceTokens;
//std::cerr << "Ed src: ";
for(size_t i = 0; i < source.GetSize(); ++i) {
if(!source.GetWord(i).IsNonTerminal())
sourceTokens.push_back(source.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
//std::cerr << sourceTokens.back() << " ";
}
//std::cerr << std::endl;
std::vector<std::string> targetTokens;
//std::cerr << "Ed trg: ";
for(size_t i = 0; i < target.GetSize(); ++i) {
if(!target.GetWord(i).IsNonTerminal())
targetTokens.push_back(target.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
//std::cerr << targetTokens.back() << " ";
}
//std::cerr << std::endl;
AddStats(sourceTokens, targetTokens, m_scores, ops);
}
accumulator->PlusEquals(this, ops);
}
bool EditOps::IsUseable(const FactorMask &mask) const
{
bool ret = mask[m_factorType];
return ret;
}
}
|