Welcome to mirror list, hosted at ThFree Co, Russian Federation.

PhraseBoundaryFeature.cpp « FF « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 32b26ebba2c4ec1850e0ce4ad02e4f8df6bc592c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#include "PhraseBoundaryFeature.h"

#include "moses/Hypothesis.h"

using namespace std;

namespace Moses
{

int PhraseBoundaryState::Compare(const FFState& other) const
{
  const PhraseBoundaryState& rhs = dynamic_cast<const PhraseBoundaryState&>(other);
  int tgt = Word::Compare(*m_targetWord,*(rhs.m_targetWord));
  if (tgt) return tgt;
  return Word::Compare(*m_sourceWord,*(rhs.m_sourceWord));
}

PhraseBoundaryFeature::PhraseBoundaryFeature(const std::string &line)
  : StatefulFeatureFunction("PhraseBoundaryFeature", 0, line)
{
  std::cerr << "Initializing source word deletion feature.." << std::endl;

  for (size_t i = 0; i < m_args.size(); ++i) {
    const vector<string> &args = m_args[i];

    if (args[0] == "source") {
      m_sourceFactors = Tokenize<FactorType>(args[1], ",");
    } else if (args[0] == "target") {
      m_targetFactors = Tokenize<FactorType>(args[1], ",");
    } else {
      throw "Unknown argument " + args[0];
    }
  }
}

const FFState* PhraseBoundaryFeature::EmptyHypothesisState(const InputType &) const
{
  return new PhraseBoundaryState(NULL,NULL);
}


void PhraseBoundaryFeature::AddFeatures(
  const Word* leftWord, const Word* rightWord, const FactorList& factors, const string& side,
  ScoreComponentCollection* scores) const
{
  for (size_t i = 0; i < factors.size(); ++i) {
    ostringstream name;
    name << side << ":";
    name << factors[i];
    name << ":";
    if (leftWord) {
      name << leftWord->GetFactor(factors[i])->GetString();
    } else {
      name << BOS_;
    }
    name << ":";
    if (rightWord) {
      name << rightWord->GetFactor(factors[i])->GetString();
    } else {
      name << EOS_;
    }
    scores->PlusEquals(this,name.str(),1);
  }

}

FFState* PhraseBoundaryFeature::Evaluate
(const Hypothesis& cur_hypo, const FFState* prev_state,
 ScoreComponentCollection* scores) const
{
  const PhraseBoundaryState* pbState = dynamic_cast<const PhraseBoundaryState*>(prev_state);
  const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
  if (targetPhrase.GetSize() == 0) {
    return new PhraseBoundaryState(*pbState);
  }
  const Word* leftTargetWord = pbState->GetTargetWord();
  const Word* rightTargetWord = &(targetPhrase.GetWord(0));
  AddFeatures(leftTargetWord,rightTargetWord,m_targetFactors,"tgt",scores);

  const Phrase* sourcePhrase = cur_hypo.GetSourcePhrase();
  const Word* leftSourceWord = pbState->GetSourceWord();
  const Word* rightSourceWord = &(sourcePhrase->GetWord(0));
  AddFeatures(leftSourceWord,rightSourceWord,m_sourceFactors,"src",scores);

  const Word* endSourceWord = &(sourcePhrase->GetWord(sourcePhrase->GetSize()-1));
  const Word* endTargetWord = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));

  //if end of sentence add EOS
  if (cur_hypo.IsSourceCompleted()) {
    AddFeatures(endSourceWord,NULL,m_sourceFactors,"src",scores);
    AddFeatures(endTargetWord,NULL,m_targetFactors,"tgt",scores);
  }

  return new PhraseBoundaryState(endSourceWord,endTargetWord);
}

bool PhraseBoundaryFeature::IsUseable(const FactorMask &mask) const
{
  for (size_t i = 0; i < m_targetFactors.size(); ++i) {
    const FactorType &factor = m_targetFactors[i];
    if (!mask[factor]) {
      return false;
    }
  }
  return true;
}

}