Welcome to mirror list, hosted at ThFree Co, Russian Federation.

OovHandler-inl.h « S2T « Syntax « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: e5ffe6370534fad8370c92c74fd25472d5468321 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#pragma once

#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/StaticData.h"

namespace Moses
{
namespace Syntax
{
namespace S2T
{

template<typename RuleTrie>
template<typename InputIterator>
boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
  InputIterator first, InputIterator last)
{
  const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();

  boost::shared_ptr<RuleTrie> trie(new RuleTrie(&m_ruleTableFF));

  while (first != last) {
    const Word &oov = *first++;
    if (ShouldDrop(oov)) {
      continue;
    }
    boost::scoped_ptr<Phrase> srcPhrase(SynthesizeSourcePhrase(oov));
    for (UnknownLHSList::const_iterator p = lhsList.begin();
         p != lhsList.end(); ++p) {
      const std::string &targetLhsStr = p->first;
      float prob = p->second;
// TODO Check ownership and fix any leaks.
      Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
      TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
      TargetPhraseCollection::shared_ptr tpc;
      tpc= GetOrCreateTargetPhraseCollection(*trie, *srcPhrase, *tp, NULL);  
      // TODO Check NULL is valid argument
      tpc->Add(tp);
    }
  }

  return trie;
}

template<typename RuleTrie>
Phrase *OovHandler<RuleTrie>::SynthesizeSourcePhrase(const Word &sourceWord)
{
  Phrase *phrase = new Phrase(1);
  phrase->AddWord() = sourceWord;
  phrase->GetWord(0).SetIsOOV(true);
  return phrase;
}

template<typename RuleTrie>
Word *OovHandler<RuleTrie>::SynthesizeTargetLhs(const std::string &lhsStr)
{
  Word *targetLhs = new Word(true);
  targetLhs->CreateFromString(Output,
                              StaticData::Instance().GetOutputFactorOrder(),
                              lhsStr, true);
  UTIL_THROW_IF2(targetLhs->GetFactor(0) == NULL, "Null factor for target LHS");
  return targetLhs;
}

template<typename RuleTrie>
TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
  const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
{
  const StaticData &staticData = StaticData::Instance();

  const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
    UnknownWordPenaltyProducer::Instance();

  TargetPhrase *targetPhrase = new TargetPhrase();
  Word &targetWord = targetPhrase->AddWord();
  targetWord.CreateUnknownWord(oov);

  // scores
  float score = FloorScore(TransformScore(prob));

  targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
  targetPhrase->EvaluateInIsolation(srcPhrase);
  targetPhrase->SetTargetLHS(&targetLhs);
  targetPhrase->SetAlignmentInfo("0-0");
  if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() ||
      staticData.GetTreeStructure() != NULL) {
    std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " +
                        oov[0]->GetString().as_string() + " ]";
    targetPhrase->SetProperty("Tree", value);
  }

  return targetPhrase;
}

template<typename RuleTrie>
bool OovHandler<RuleTrie>::ShouldDrop(const Word &oov)
{
  if (!StaticData::Instance().GetDropUnknown()) {
    return false;
  }
  const Factor *f = oov[0]; // TODO hack. shouldn't know which factor is surface
  const StringPiece s = f->GetString();
  return s.find_first_of("0123456789") != std::string::npos;
}

}  // S2T
}  // Syntax
}  // Moses