Welcome to mirror list, hosted at ThFree Co, Russian Federation.

GlobalLexicalModelUnlimited.h « FF « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d3016568ac92e1e25a2cb613b0dd5e37372872cc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#ifndef GLOBALLEXICALMODELUNLIMITED_H_
#define GLOBALLEXICALMODELUNLIMITED_H_

#include <stdexcept>
#include <string>
#include <vector>
#include <boost/unordered_set.hpp>
#include <boost/unordered_map.hpp>

#include "StatelessFeatureFunction.h"
#include "moses/Factor.h"
#include "moses/Phrase.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/Range.h"
#include "moses/FactorTypeSet.h"
#include "moses/Sentence.h"

#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif

namespace Moses
{

class Factor;
class Phrase;
class Hypothesis;
class InputType;

/** Discriminatively trained global lexicon model
 * This is a implementation of Mauser et al., 2009's model that predicts
 * each output word from _all_ the input words. The intuition behind this
 * feature is that it uses context words for disambiguation
 */

class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
{
  typedef std::map< char, short > CharHash;
  typedef std::map< std::string, short > StringHash;

  struct ThreadLocalStorage {
    // const Sentence *input;
    const Sentence *input;
  };

private:
#ifdef WITH_THREADS
  boost::thread_specific_ptr<ThreadLocalStorage> m_local;
#else
  std::auto_ptr<ThreadLocalStorage> m_local;
#endif

  CharHash m_punctuationHash;

  std::vector< FactorType > m_inputFactors;
  std::vector< FactorType > m_outputFactors;
  bool m_unrestricted;

  bool m_sourceContext;
  bool m_biphrase;
  bool m_bitrigger;

  bool m_biasFeature;
  bool m_ignorePunctuation;

  boost::unordered_set<std::string> m_vocabSource;
  boost::unordered_set<std::string> m_vocabTarget;

public:
  GlobalLexicalModelUnlimited(const std::string &line);

  bool Load(const std::string &filePathSource, const std::string &filePathTarget);

  void InitializeForInput(ttasksptr const& ttask);

  //TODO: This implements the old interface, but cannot be updated because
  //it appears to be stateful
  void EvaluateWhenApplied(const Hypothesis& cur_hypo,
                           ScoreComponentCollection* accumulator) const;

  void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
                           int /* featureID */,
                           ScoreComponentCollection* ) const {
    throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
  }

  void EvaluateWithSourceContext(const InputType &input
                                 , const InputPath &inputPath
                                 , const TargetPhrase &targetPhrase
                                 , const StackVec *stackVec
                                 , ScoreComponentCollection &scoreBreakdown
                                 , ScoreComponentCollection *estimatedScores = NULL) const {
  }

  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const {
  }

  void EvaluateInIsolation(const Phrase &source
                           , const TargetPhrase &targetPhrase
                           , ScoreComponentCollection &scoreBreakdown
                           , ScoreComponentCollection &estimatedScores) const {
  }

  void AddFeature(ScoreComponentCollection* accumulator,
                  StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,
                  StringPiece targetWord) const;
};

}
#endif /* GLOBALLEXICALMODELUNLIMITED_H_ */