Welcome to mirror list, hosted at ThFree Co, Russian Federation.

Word.cpp « SCFG « moses2 « contrib - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 1794706da6ad4c36b4b501ca4014763ea686cd2d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*
 * Word.cpp
 *
 *  Created on: 23 Oct 2015
 *      Author: hieu
 */
#include <boost/functional/hash.hpp>
#include "Word.h"
#include "Hypothesis.h"
#include "ActiveChart.h"
#include "TargetPhraseImpl.h"
#include "Sentence.h"
#include "../legacy/Util2.h"
#include "../System.h"
#include "../AlignmentInfo.h"
#include "../ManagerBase.h"

using namespace std;

namespace Moses2
{
namespace SCFG
{
Word::Word(const SCFG::Word &copy)
:Moses2::Word(copy)
,isNonTerminal(copy.isNonTerminal)
{
}

void Word::CreateFromString(FactorCollection &vocab,
    const System &system,
    const std::string &str)
{
  vector<string> toks;

  if (str[0] == '[' && str[str.size() - 1] == ']') {
    isNonTerminal = true;

    size_t startPos = str.find("[", 1);
    bool doubleNT = startPos != string::npos;

    if (doubleNT) {
      assert(startPos != string::npos);
      string str2 = str.substr(startPos + 1, str.size() - startPos - 2);
      toks = Tokenize(str2, "|");
    }
    else {
      string str2 = str.substr(1, str.size() - 2);
      toks = Tokenize(str2, "|");
    }
  }
  else {
    isNonTerminal = false;
    toks = Tokenize(str, "|");
  }

  // parse string
  for (size_t i = 0; i < toks.size(); ++i) {
    const string &tok = toks[i];
    //cerr << "tok=" << tok << endl;

    const Factor *factor = vocab.AddFactor(tok, system, isNonTerminal);
    m_factors[i] = factor;
  }
}

size_t Word::hash() const
{
  size_t ret = Moses2::Word::hash();
  boost::hash_combine(ret, isNonTerminal);
  return ret;
}

size_t Word::hash(const std::vector<FactorType> &factors) const
{
  size_t seed = isNonTerminal;
  for (size_t i = 0; i < factors.size(); ++i) {
	  FactorType factorType = factors[i];
	  const Factor *factor = m_factors[factorType];
	  boost::hash_combine(seed, factor);
  }
  return seed;
}

void Word::OutputToStream(const System &system, std::ostream &out) const
{
  if (isNonTerminal) {
    out << "[";
  }
  Moses2::Word::OutputToStream(system, out);
  if (isNonTerminal) {
      out << "]";
  }
}

void Word::OutputToStream(
		  const ManagerBase &mgr,
		  size_t targetPos,
		  const SCFG::Hypothesis &hypo,
		  std::ostream &out) const
{
  const SCFG::TargetPhraseImpl &tp = hypo.GetTargetPhrase();
  const SCFG::SymbolBind &symbolBind = hypo.GetSymbolBind();

    bool outputWord = true;
    if (mgr.system.options.input.placeholder_factor != NOT_FOUND) {
		const AlignmentInfo &alignInfo = tp.GetAlignTerm();
		std::set<size_t> sourceAligns = alignInfo.GetAlignmentsForTarget(targetPos);
		if (sourceAligns.size() == 1) {
			size_t sourcePos = *sourceAligns.begin();
			/*
			cerr << "sourcePos=" << sourcePos << endl;
			cerr << "tp=" << tp.Debug(mgr.system) << endl;
			cerr << "m_symbolBind=" << symbolBind.Debug(mgr.system) << endl;
			*/
			assert(sourcePos < symbolBind.GetSize());
			const Range &inputRange = symbolBind.coll[sourcePos].GetRange();
			assert(inputRange.GetNumWordsCovered() == 1);
			const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence &>(mgr.GetInput());
			const SCFG::Word &sourceWord = sentence[inputRange.GetStartPos()];
			const Factor *factor = sourceWord[mgr.system.options.input.placeholder_factor];
			if (factor) {
				out << factor->GetString();
				outputWord = false;
			}
		}
    }

    if (outputWord){
  	  OutputToStream(mgr.system, out);
    }
}

std::string Word::Debug(const System &system) const
{
  stringstream out;
  if (isNonTerminal) {
    out << "[";
  }
  out << Moses2::Word::Debug(system);
  if (isNonTerminal) {
      out << "]";
  }
  return out.str();
}

}
}