Welcome to mirror list, hosted at ThFree Co, Russian Federation.

Manager.h « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 4b45414eebd969b47977dfd805859e56c7fea85a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#ifndef moses_Manager_h
#define moses_Manager_h

#include <vector>
#include <list>
#include <ctime>
#include "InputType.h"
#include "Hypothesis.h"
#include "StaticData.h"
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "TrellisPathList.h"
#include "SquareMatrix.h"
#include "WordsBitmap.h"
#include "Search.h"
#include "SearchCubePruning.h"
#if HAVE_CONFIG_H
#include "config.h"
#endif

namespace Moses
{

class SentenceStats;
class TrellisPath;
class TranslationOptionCollection;

/** Used to output the search graph */
struct SearchGraphNode {
  const Hypothesis* hypo;
  const Hypothesis* recombinationHypo;
  int forward;
  double fscore;

  SearchGraphNode(const Hypothesis* theHypo,
                  const Hypothesis* theRecombinationHypo,
                  int theForward,
                  double theFscore) :
    hypo(theHypo), recombinationHypo(theRecombinationHypo),
    forward(theForward), fscore(theFscore) {}

};

/** The Manager class implements a stack decoding algorithm.
 * Hypotheses are organized in stacks. One stack contains all hypothesis that have
 * the same number of foreign words translated.  The data structure for hypothesis
 * stacks is the class HypothesisStack. The data structure for a hypothesis
 * is the class Hypothesis.
 *
 * The main decoder loop in the function ProcessSentence() consists of the steps:
 * - Create the list of possible translation options. In phrase-based decoding
 *   (and also the first mapping step in the factored model) is a phrase translation
 *   from the source to the target. Given a specific input sentence, only a limited
 *   number of phrase translation can be applied. For efficient lookup of the
 *   translation options later, these optuions are first collected in the function
 *   CreateTranslationOption (for more information check the class
 *   TranslationOptionCollection)
 * - Create initial hypothesis: Hypothesis stack 0 contains only one empty hypothesis.
 * - Going through stacks 0 ... (sentence_length-1):
 *   - The stack is pruned to the maximum size
 *   - Going through all hypotheses in the stack
 *     - Each hypothesis is expanded by ProcessOneHypothesis()
 *     - Expansion means applying a translation option to the hypothesis to create
 *       new hypotheses
 *     - What translation options may be applied depends on reordering limits and
 *       overlap with already translated words
 *     - With a applicable translation option and a hypothesis at hand, a new
 *       hypothesis can be created in ExpandHypothesis()
 *     - New hypothesis are either discarded (because they are too bad), added to
 *       the appropriate stack, or re-combined with existing hypotheses
 **/

class Manager
{
  Manager();
  Manager(Manager const&);
  void operator=(Manager const&);
  const TranslationSystem* m_system;
protected:
  // data
//	InputType const& m_source; /**< source sentence to be translated */
  TranslationOptionCollection *m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
  Search *m_search;

  HypothesisStack* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
  clock_t m_start; /**< starting time, used for logging */
  size_t interrupted_flag;
  std::auto_ptr<SentenceStats> m_sentenceStats;
  int m_hypoId; //used to number the hypos as they are created.

  void GetConnectedGraph(
    std::map< int, bool >* pConnected,
    std::vector< const Hypothesis* >* pConnectedList) const;
  void GetWinnerConnectedGraph(
    std::map< int, bool >* pConnected,
    std::vector< const Hypothesis* >* pConnectedList) const;


public:
  InputType const& m_source; /**< source sentence to be translated */
  Manager(InputType const& source, SearchAlgorithm searchAlgorithm, const TranslationSystem* system);
  ~Manager();
  const  TranslationOptionCollection* getSntTranslationOptions();
  const TranslationSystem* GetTranslationSystem() {
    return m_system;
  }

  void ProcessSentence();
  const Hypothesis *GetBestHypothesis() const;
  const Hypothesis *GetActualBestHypothesis() const;
  void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const;
  void CalcLatticeSamples(size_t count, TrellisPathList &ret) const;
  void PrintAllDerivations(long translationId, std::ostream& outputStream) const;
  void printDivergentHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase*> & remainingPhrases, float remainingScore , std::ostream& outputStream) const;
  void printThisHypothesis(long translationId, const Hypothesis* hypo, const std::vector <const TargetPhrase* > & remainingPhrases, float remainingScore , std::ostream& outputStream) const;
  void GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const;
  int GetNextHypoId();
#ifdef HAVE_PROTOBUF
  void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
#endif

  void OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
  void GetSearchGraph(std::vector<SearchGraphNode>& searchGraph) const;
  const InputType& GetSource() const {
    return m_source;
  }

  /***
   * to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
   */
  void CalcDecoderStatistics() const;
  void ResetSentenceStats(const InputType& source);
  SentenceStats& GetSentenceStats() const;

  /***
   *For Lattice MBR
  */
  void GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
                                     std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const;

};

}
#endif