moses/FF/FeatureFunction.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

// -*- c++ -*-
#ifndef moses_FeatureFunction_h
#define moses_FeatureFunction_h

#include <vector>
#include <set>
#include <string>
#include "moses/FeatureVector.h"
#include "moses/TypeDef.h"
#include "moses/parameters/AllOptions.h"
#include <boost/shared_ptr.hpp>

namespace Moses
{

class AllOptions;
class Phrase;
class TargetPhrase;
class TranslationOptionList;
class TranslationOption;
class Hypothesis;
class ChartHypothesis;
class InputType;
class ScoreComponentCollection;
class Bitmap;
class Range;
class FactorMask;
class InputPath;
class StackVec;
class DistortionScoreProducer;
class TranslationTask;

/** base class for all feature functions.
 */
class FeatureFunction
{
protected:
  /**< all the score producers in this run */
  static std::vector<FeatureFunction*> s_staticColl;

  std::string m_description, m_argLine;
  std::vector<std::vector<std::string> > m_args;
  bool m_tuneable;
  bool m_requireSortingAfterSourceContext;
  size_t m_verbosity;
  size_t m_numScoreComponents;
  size_t m_index; // index into vector covering ALL feature function values
  std::vector<bool> m_tuneableComponents;
  size_t m_numTuneableComponents;
  AllOptions::ptr m_options;
  //In case there's multiple producers with the same description
  static std::multiset<std::string> description_counts;

public:
  static void Register(FeatureFunction* ff);
private:
  // void Initialize(const std::string &line);
  void ParseLine(const std::string &line);

public:
  static const std::vector<FeatureFunction*>& GetFeatureFunctions() {
    return s_staticColl;
  }

  static FeatureFunction &FindFeatureFunction(const std::string& name);
  static void Destroy();

  FeatureFunction(const std::string &line, bool registerNow);
  FeatureFunction(size_t numScoreComponents, const std::string &line, bool registerNow = true);
  virtual bool IsStateless() const = 0;
  virtual ~FeatureFunction();

  //! override to load model files
  virtual void Load(AllOptions::ptr const& opts) {
    m_options = opts;
  }

  AllOptions::ptr const&
  options() const {
    return m_options;
  }

  static void ResetDescriptionCounts() {
    description_counts.clear();
  }

  //! returns the number of scores that a subclass produces.
  //! For example, a language model conventionally produces 1, a translation table some arbitrary number, etc
  size_t GetNumScoreComponents() const {
    return m_numScoreComponents;
  }

  //! returns a string description of this producer
  const std::string& GetScoreProducerDescription() const {
    return m_description;
  }

  FName GetFeatureName(const std::string& name) const {
    return FName(GetScoreProducerDescription(), name);
  }


  //! if false, then this feature is not displayed in the n-best list.
  // use with care
  virtual bool IsTuneable() const {
    return m_tuneable;
  }

  virtual bool HasTuneableComponents() const {
    return m_numTuneableComponents;
  }

  virtual bool IsTuneableComponent(size_t i) const {
    if (m_numTuneableComponents == m_numScoreComponents) {
      return true;
    }
    return m_tuneableComponents[i];
  }

  virtual bool RequireSortingAfterSourceContext() const {
    return m_requireSortingAfterSourceContext;
  }

  virtual std::vector<float> DefaultWeights() const;

  size_t GetIndex() const;
  size_t SetIndex(size_t const idx);

protected:
  virtual void
  CleanUpAfterSentenceProcessing(InputType const& source) { }

public:
  //! Called before search and collecting of translation options
  virtual void
  InitializeForInput(ttasksptr const& ttask) { };

  // clean up temporary memory, called after processing each sentence
  virtual void
  CleanUpAfterSentenceProcessing(ttasksptr const& ttask);

  const std::string &
  GetArgLine() const {
    return m_argLine;
  }

  // given a target phrase containing only factors specified in mask
  // return true if the feature function can be evaluated
  virtual bool IsUseable(const FactorMask &mask) const = 0;

  // used by stateless ff and stateful ff. Calculate initial score
  // estimate during loading of phrase table
  //
  // source phrase is the substring that the phrase table uses to look
  // up the target phrase,
  //
  // may have more factors than actually need, but not guaranteed.
  // For SCFG decoding, the source contains non-terminals, NOT the raw
  // source from the input sentence
  virtual void
  EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase,
                      ScoreComponentCollection& scoreBreakdown,
                      ScoreComponentCollection& estimatedScores) const = 0;

  // for context-dependent processing
  static void SetupAll(TranslationTask const& task);
  virtual void Setup(TranslationTask const& task) const { };

  // This method is called once all the translation options are retrieved from the phrase table, and
  // just before search.
  // 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
  // 'stackVec' is a vector of chart cells that the RHS non-terms cover.
  // It is guaranteed to be in the same order as the non-terms in the source phrase.
  // For pb models, stackvec is NULL.
  // No FF should set estimatedScores in both overloads!
  virtual void EvaluateWithSourceContext(const InputType &input
                                         , const InputPath &inputPath
                                         , const TargetPhrase &targetPhrase
                                         , const StackVec *stackVec
                                         , ScoreComponentCollection &scoreBreakdown
                                         , ScoreComponentCollection *estimatedScores = NULL) const = 0;

  // This method is called once all the translation options are retrieved from the phrase table, and
  // just before search.
  // 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
  // 'stackVec' is a vector of chart cells that the RHS non-terms cover.
  // It is guaranteed to be in the same order as the non-terms in the source phrase.
  // For pb models, stackvec is NULL.
  // No FF should set estimatedScores in both overloads!
  virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const = 0;

  virtual void SetParameter(const std::string& key, const std::string& value);
  virtual void ReadParameters();
  virtual void SetTuneableComponents(const std::string& value);
};

}

#endif