blob: c4480720566094ff55f119a3ab9db107a48d3443 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
#include "AlignmentInfo.h"
#include "PhrasePairFeature.h"
#include "TargetPhrase.h"
#include "Hypothesis.h"
using namespace std;
namespace Moses {
string PhrasePairFeature::GetScoreProducerWeightShortName(unsigned) const
{
return "pp";
}
size_t PhrasePairFeature::GetNumInputScores() const
{
return 0;
}
void PhrasePairFeature::InitializeForInput( Sentence const& in )
{
m_local.reset(new ThreadLocalStorage);
m_local->input = ∈
}
void PhrasePairFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const {
const TargetPhrase& target = cur_hypo.GetCurrTargetPhrase();
const Phrase& source = target.GetSourcePhrase();
/* const AlignmentInfo& align = cur_hypo.GetAlignmentInfo();
for (AlignmentInfo::const_iterator i = align.begin(); i != align.end(); ++i) {
const Factor* sourceFactor = source.GetWord(i->first).GetFactor(m_sourceFactorId);
const Factor* targetFactor = cur_hypo.GetWord(i->second).GetFactor(m_targetFactorId);
ostringstream namestr;
namestr << sourceFactor->GetString();
namestr << ":";
namestr << targetFactor->GetString();
accumulator->PlusEquals(this,namestr.str(),1);
}*/
if (m_simple) {
ostringstream namestr;
namestr << "pp_";
namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
for (size_t i = 1; i < source.GetSize(); ++i) {
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
namestr << ",";
namestr << sourceFactor->GetString();
}
namestr << "~";
namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
for (size_t i = 1; i < target.GetSize(); ++i) {
const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
namestr << ",";
namestr << targetFactor->GetString();
}
// temporary: limit training to particular phrases
if (!m_unrestricted) {
string feature = namestr.str();
if (m_limitedFeatures.find(feature) != m_limitedFeatures.end() )
accumulator->SparsePlusEquals(feature,1);
}
else
accumulator->SparsePlusEquals(namestr.str(),1);
}
if (m_sourceContext) {
const Sentence& input = *(m_local->input);
// range over source words to get context
for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_sourceFactorId)->GetString();
if (m_ignorePunctuation) {
// check if trigger is punctuation
char firstChar = sourceTrigger.at(0);
CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
if(charIterator != m_punctuationHash.end())
continue;
}
bool sourceTriggerExists = false;
// if (!m_unrestricted)
// sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
if (m_unrestricted || sourceTriggerExists) {
ostringstream namestr;
namestr << "pp_";
namestr << sourceTrigger;
namestr << "~";
namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
for (size_t i = 1; i < source.GetSize(); ++i) {
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
namestr << ",";
namestr << sourceFactor->GetString();
}
namestr << "~";
namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
for (size_t i = 1; i < target.GetSize(); ++i) {
const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
namestr << ",";
namestr << targetFactor->GetString();
}
// temporary:
if (!m_unrestricted) {
string feature = namestr.str();
if (m_limitedFeatures.find(feature) != m_limitedFeatures.end() )
accumulator->SparsePlusEquals(feature,1);
}
else
accumulator->SparsePlusEquals(namestr.str(),1);
}
}
}
}
bool PhrasePairFeature::ComputeValueInTranslationOption() const {
return false;
}
}
|