Welcome to mirror list, hosted at ThFree Co, Russian Federation.

SourceWordDeletionFeature.cpp « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 4f247c7e7e1702923c63e15d92bd640c44f9526a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <sstream>
#include "SourceWordDeletionFeature.h"
#include "Phrase.h"
#include "TargetPhrase.h"
#include "Hypothesis.h"
#include "ChartHypothesis.h"
#include "ScoreComponentCollection.h"

namespace Moses {

using namespace std;

bool SourceWordDeletionFeature::Load(const std::string &filePath) 
{
  ifstream inFile(filePath.c_str());
  if (!inFile)
  {
      cerr << "could not open file " << filePath << endl;
      return false;
  }

  std::string line;
  while (getline(inFile, line)) {
    m_vocab.insert(line);
  }

  inFile.close();

  m_unrestricted = false;
  return true;
}

void SourceWordDeletionFeature::Evaluate(const Hypothesis& cur_hypo,
                                         ScoreComponentCollection* accumulator) const
{
	TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
	const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
	const AlignmentInfo::CollType &alignment = alignmentInfo.GetAlignments();
	ComputeFeatures(targetPhrase, accumulator, alignment);
}

void SourceWordDeletionFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
		                   	 	 	 	 	 	 	 	 	 	 	 	ScoreComponentCollection* accumulator) const
{
	TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
	const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
	const AlignmentInfo::CollType &alignment = alignmentInfo.GetTerminalAlignments();
	ComputeFeatures(targetPhrase, accumulator, alignment);
}

void SourceWordDeletionFeature::ComputeFeatures(const TargetPhrase& targetPhrase,
		                   	 	 	 	 	 	ScoreComponentCollection* accumulator,
		                   	 	 	 	 	 	const AlignmentInfo::CollType &alignment) const 
{
  // handle special case: unknown words (they have no word alignment)
	size_t targetLength = targetPhrase.GetSize();
	size_t sourceLength = targetPhrase.GetSourcePhrase().GetSize();
	if (targetLength == 1 && sourceLength == 1) {
		const Factor* f1 = targetPhrase.GetWord(0).GetFactor(1);
		if (f1 && f1->GetString().compare(UNKNOWN_FACTOR) == 0) {
			return;
		}
	}

  // flag aligned words
  bool aligned[16];
  CHECK(sourceLength < 16);
  for(size_t i=0; i<sourceLength; i++)
    aligned[i] = false;
  for (AlignmentInfo::const_iterator alignmentPoint = alignment.begin(); alignmentPoint != alignment.end(); alignmentPoint++) 
    aligned[ alignmentPoint->first ] = true;
      
  // process unaligned source words
  for(size_t i=0; i<sourceLength; i++) {
    if (!aligned[i]) {
    	Word w = targetPhrase.GetSourcePhrase().GetWord(i);
    	if (!w.IsNonTerminal()) {
    		const string &word = w.GetFactor(m_factorType)->GetString();
    		if (word != "<s>" && word != "</s>") {
    			if (!m_unrestricted && m_vocab.find( word ) == m_vocab.end()) {
    				accumulator->PlusEquals(this,"OTHER",1);	
    			}
    			else {
    				accumulator->PlusEquals(this,word,1);
    			}
    		}
    	}
    }
  }
}

}