Welcome to mirror list, hosted at ThFree Co, Russian Federation.

Manager.h « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: e125903e1bb7125531cc8ce689e05a5ad42cf029 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#ifndef moses_Manager_h
#define moses_Manager_h

#include <vector>
#include <list>
#include "InputType.h"
#include "Hypothesis.h"
#include "StaticData.h"
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "TrellisPathList.h"
#include "SquareMatrix.h"
#include "Bitmap.h"
#include "Search.h"
#include "SearchCubePruning.h"
#include "BaseManager.h"

namespace Moses
{

class SentenceStats;
class TrellisPath;
class TranslationOptionCollection;
class LatticeMBRSolution;

/** Used to output the search graph */
struct SearchGraphNode {
  const Hypothesis* hypo;
  const Hypothesis* recombinationHypo;
  int forward;
  double fscore;

  SearchGraphNode(const Hypothesis* theHypo,
                  const Hypothesis* theRecombinationHypo,
                  int theForward,
                  double theFscore) :
    hypo(theHypo), recombinationHypo(theRecombinationHypo),
    forward(theForward), fscore(theFscore) {}

  bool operator<(const SearchGraphNode& sgn) const {
    return this->hypo->GetId() < sgn.hypo->GetId();
  }

};

/** The Manager class implements a stack decoding algorithm for phrase-based decoding
 * Hypotheses are organized in stacks. One stack contains all hypothesis that have
 * the same number of foreign words translated.  The data structure for hypothesis
 * stacks is the class HypothesisStack. The data structure for a hypothesis
 * is the class Hypothesis.
 *
 * The main decoder loop in the function ProcessSentence() consists of the steps:
 * - Create the list of possible translation options. In phrase-based decoding
 *   (and also the first mapping step in the factored model) is a phrase translation
 *   from the source to the target. Given a specific input sentence, only a limited
 *   number of phrase translation can be applied. For efficient lookup of the
 *   translation options later, these options are first collected in the function
 *   CreateTranslationOption (for more information check the class
 *   TranslationOptionCollection)
 * - Create initial hypothesis: Hypothesis stack 0 contains only one empty hypothesis.
 * - Going through stacks 0 ... (sentence_length-1):
 *   - The stack is pruned to the maximum size
 *   - Going through all hypotheses in the stack
 *     - Each hypothesis is expanded by ProcessOneHypothesis()
 *     - Expansion means applying a translation option to the hypothesis to create
 *       new hypotheses
 *     - What translation options may be applied depends on reordering limits and
 *       overlap with already translated words
 *     - With a applicable translation option and a hypothesis at hand, a new
 *       hypothesis can be created in ExpandHypothesis()
 *     - New hypothesis are either discarded (because they are too bad), added to
 *       the appropriate stack, or re-combined with existing hypotheses
 **/

class Manager : public BaseManager
{
  Manager();
  Manager(Manager const&);
  void operator=(Manager const&);
private:

  // Helper functions to output search graph in HTK standard lattice format
  void OutputFeatureWeightsForSLF(std::ostream &outputSearchGraphStream) const;
  size_t OutputFeatureWeightsForSLF(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) const;
  void OutputFeatureValuesForSLF(const Hypothesis* hypo, bool zeros, std::ostream &outputSearchGraphStream) const;
  size_t OutputFeatureValuesForSLF(size_t index, bool zeros, const Hypothesis* hypo, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) const;

  // Helper functions to output search graph in the hypergraph format of Kenneth Heafield's lazy hypergraph decoder
  void OutputFeatureValuesForHypergraph(const Hypothesis* hypo, std::ostream &outputSearchGraphStream) const;


protected:
  // data
  TranslationOptionCollection *m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
  Search *m_search;

  HypothesisStack* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
  size_t interrupted_flag;
  std::auto_ptr<SentenceStats> m_sentenceStats;
  int m_hypoId; //used to number the hypos as they are created.

  void GetConnectedGraph(
    std::map< int, bool >* pConnected,
    std::vector< const Hypothesis* >* pConnectedList) const;
  void GetWinnerConnectedGraph(
    std::map< int, bool >* pConnected,
    std::vector< const Hypothesis* >* pConnectedList) const;

  // output
  // nbest
  mutable std::ostringstream m_latticeNBestOut;
  mutable std::ostringstream m_alignmentOut;
public:
  void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList) const;
  void OutputSurface(std::ostream &out,
                     Hypothesis const& edge,
                     bool const recursive=false) const;

  void OutputAlignment(std::ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const;
  void OutputInput(std::ostream& os, const Hypothesis* hypo) const;
  void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const;
  void OutputPassthroughInformation(std::ostream& os, const Hypothesis* hypo) const;

  std::map<size_t, const Factor*>
  GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const;

  void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId) const;

  void OutputAlignment(std::ostringstream &out, const TrellisPath &path) const;

public:
  // Manager(InputType const& source);
  Manager(ttasksptr const& ttask);
  ~Manager();
  const  TranslationOptionCollection* getSntTranslationOptions();

  void Decode();
  const Hypothesis *GetBestHypothesis() const;
  const Hypothesis *GetActualBestHypothesis() const;
  void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
  void CalcLatticeSamples(size_t count, TrellisPathList &ret) const;
  void PrintAllDerivations(long translationId, std::ostream& outputStream) const;
  void printDivergentHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase*> & remainingPhrases, float remainingScore , std::ostream& outputStream) const;
  void printThisHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase* > & remainingPhrases, float remainingScore , std::ostream& outputStream) const;
  void GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo ) const;
  void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
  int GetNextHypoId();

  void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId) const;
  void OutputBestHypo(const std::vector<Moses::Word>&  mbrBestHypo, std::ostream& out) const;
  void OutputBestHypo(const Moses::TrellisPath &path, std::ostream &out) const;

#ifdef HAVE_PROTOBUF
  void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
#endif

  void OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
  void OutputSearchGraphAsSLF(long translationId, std::ostream &outputSearchGraphStream) const;
  void OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const;
  void GetSearchGraph(std::vector<SearchGraphNode>& searchGraph) const;

  const InputType& GetSource() const;

  /***
   * to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
   */
  void CalcDecoderStatistics() const;
  void ResetSentenceStats(const InputType& source);
  SentenceStats& GetSentenceStats() const;

  /***
   *For Lattice MBR
  */
  void
  GetForwardBackwardSearchGraph
  ( std::map< int, bool >* pConnected,
    std::vector< const Hypothesis* >* pConnectedList,
    std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps,
    std::vector< float>* pFwdBwdScores) const;

  // outputs
  void OutputBest(OutputCollector *collector)  const;
  void OutputNBest(OutputCollector *collector)  const;
  void OutputAlignment(OutputCollector *collector) const;
  void OutputLatticeSamples(OutputCollector *collector) const;
  void OutputDetailedTranslationReport(OutputCollector *collector) const;
  void OutputUnknowns(OutputCollector *collector) const;
  void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const {
  }
  void OutputWordGraph(OutputCollector *collector) const;
  void OutputSearchGraph(OutputCollector *collector) const;
  void OutputSearchGraphSLF() const;
  // void OutputSearchGraphHypergraph() const;

};

}
#endif