Welcome to mirror list, hosted at ThFree Co, Russian Federation.

StsgRuleWriter.cpp « extract-ghkm « phrase-extract - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c1f0b7ad11762d9f3794564fa8dd2bd058421b25 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include "StsgRuleWriter.h"

#include <cassert>
#include <cmath>
#include <ostream>
#include <map>
#include <sstream>
#include <vector>

#include "Alignment.h"
#include "Options.h"
#include "StsgRule.h"

namespace MosesTraining
{
namespace Syntax
{
namespace GHKM
{

void StsgRuleWriter::Write(const StsgRule &rule)
{
  std::ostringstream sourceSS;
  std::ostringstream targetSS;

  // Write the source side of the rule to sourceSS.
  const std::vector<Symbol> &sourceSide = rule.GetSourceSide();
  for (std::size_t i = 0; i < sourceSide.size(); ++i) {
    const Symbol &symbol = sourceSide[i];
    if (i > 0) {
      sourceSS << " ";
    }
    if (symbol.GetType() == NonTerminal) {
      sourceSS << "[X]";
    } else {
      sourceSS << symbol.GetValue();
    }
  }

  // Write the target side of the rule to targetSS.
  rule.GetTargetSide().PrintTree(targetSS);

  // Write the rule to the forward and inverse extract files.
  if (m_options.t2s) {
    // If model is tree-to-string then flip the source and target.
    m_fwd << targetSS.str() << " ||| " << sourceSS.str() << " |||";
    m_inv << sourceSS.str() << " ||| " << targetSS.str() << " |||";
  } else {
    m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
    m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
  }

  // Write the non-terminal alignments.
  const std::vector<int> &nonTermAlignment = rule.GetNonTermAlignment();
  for (int srcIndex = 0; srcIndex <  nonTermAlignment.size(); ++srcIndex) {
    int tgtIndex = nonTermAlignment[srcIndex];
    if (m_options.t2s) {
      // If model is tree-to-string then flip the source and target.
      m_fwd << " " << tgtIndex << "-" << srcIndex;
      m_inv << " " << srcIndex << "-" << tgtIndex;
    } else {
      m_fwd << " " << srcIndex << "-" << tgtIndex;
      m_inv << " " << tgtIndex << "-" << srcIndex;
    }
  }
  m_fwd << " |||";
  m_inv << " |||";

  // Write the symbol alignments.
  const Alignment &alignment = rule.GetAlignment();
  for (Alignment::const_iterator p = alignment.begin();
       p != alignment.end(); ++p) {
    if (m_options.t2s) {
      // If model is tree-to-string then flip the source and target.
      m_fwd << " " << p->second << "-" << p->first;
      m_inv << " " << p->first << "-" << p->second;
    } else {
      m_fwd << " " << p->first << "-" << p->second;
      m_inv << " " << p->second << "-" << p->first;
    }
  }

  // Write a count of 1.
  m_fwd << " ||| 1";
  m_inv << " ||| 1";

  // Write the PCFG score (if requested).
  if (m_options.pcfg) {
    m_fwd << " ||| " << std::exp(rule.GetTargetSide().GetPcfgScore());
  }

  m_fwd << std::endl;
  m_inv << std::endl;
}

}  // namespace GHKM
}  // namespace Syntax
}  // namespace MosesTraining