Welcome to mirror list, hosted at ThFree Co, Russian Federation.

StaticData.h « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ea3d140256bec6b805f05798815fc74ea054d5e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#ifndef moses_StaticData_h
#define moses_StaticData_h

#include <stdexcept>
#include <limits>
#include <list>
#include <vector>
#include <map>
#include <memory>
#include <utility>
#include <fstream>
#include <string>

#ifdef WITH_THREADS
#include <boost/thread.hpp>
#include <boost/thread/mutex.hpp>
#endif

#include "Parameter.h"
#include "SentenceStats.h"
#include "ScoreComponentCollection.h"
#include "moses/FF/Factory.h"
#include "moses/PP/Factory.h"

#include "moses/parameters/AllOptions.h"
#include "moses/parameters/BookkeepingOptions.h"

namespace Moses
{

class InputType;
class DecodeGraph;
class DecodeStep;

class DynamicCacheBasedLanguageModel;
class PhraseDictionaryDynamicCacheBased;

typedef std::pair<std::string, float> UnknownLHSEntry;
typedef std::vector<UnknownLHSEntry>  UnknownLHSList;

/** Contains global variables and constants.
 *  Only 1 object of this class should be instantiated.
 *  A const object of this class is accessible by any function during decoding by calling StaticData::Instance();
 */
class StaticData
{
  friend class HyperParameterAsWeight;

private:
  static StaticData s_instance;
protected:
  Parameter *m_parameter;
  boost::shared_ptr<AllOptions> m_options;

  mutable ScoreComponentCollection m_allWeights;

  std::vector<DecodeGraph*> m_decodeGraphs;

  // Initial	= 0 = can be used when creating poss trans
  // Other		= 1 = used to calculate LM score once all steps have been processed
  float
  m_wordDeletionWeight;


  // PhraseTrans, Generation & LanguageModelScore has multiple weights.
  // int				m_maxDistortion;
  // do it differently from old pharaoh
  // -ve	= no limit on distortion
  // 0		= no disortion (monotone in old pharaoh)
  bool m_reorderingConstraint; //! use additional reordering constraints
  BookkeepingOptions m_bookkeeping_options;


  bool m_requireSortingAfterSourceContext;

  mutable size_t m_verboseLevel;

  std::string m_factorDelimiter; //! by default, |, but it can be changed


  size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)

  std::string m_outputUnknownsFile; //! output unknowns in this file

  // Initial = 0 = can be used when creating poss trans
  // Other = 1 = used to calculate LM score once all steps have been processed
  Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
  SourceLabelOverlap m_sourceLabelOverlap;
  UnknownLHSList m_unknownLHS;

  int m_threadCount;
  // long m_startTranslationId;

  // alternate weight settings
  mutable std::string m_currentWeightSetting;
  std::map< std::string, ScoreComponentCollection* > m_weightSetting; // core weights
  std::map< std::string, std::set< std::string > > m_weightSettingIgnoreFF; // feature function
  std::map< std::string, std::set< size_t > > m_weightSettingIgnoreDP; // decoding path

  bool m_useLegacyPT;
  // bool m_defaultNonTermOnlyForEmptyRange;
  // S2TParsingAlgorithm m_s2tParsingAlgorithm;

  FeatureRegistry m_registry;
  PhrasePropertyFactory m_phrasePropertyFactory;

  StaticData();

  void LoadChartDecodingParameters();
  void LoadNonTerminals();

  //! load decoding steps
  void LoadDecodeGraphs();
  void LoadDecodeGraphsOld(const std::vector<std::string> &mappingVector,
                           const std::vector<size_t> &maxChartSpans);
  void LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector,
                           const std::vector<size_t> &maxChartSpans);

  void NoCache();

  std::string m_binPath;

  // soft NT lookup for chart models
  std::vector<std::vector<Word> > m_softMatchesMap;

  const StatefulFeatureFunction* m_treeStructure;

  void ini_oov_options();
  bool ini_output_options();
  bool ini_performance_options();

  void initialize_features();

  // Coordinate space name map for matching spaces across XML input ("coord"
  // tag) and feature functions that assign or use coordinates on target phrases
  std::map< std::string const, size_t > m_coordSpaceMap;
  size_t m_coordSpaceNextID;

public:

  //! destructor
  ~StaticData();

  //! return static instance for use like global variable
  static const StaticData& Instance() {
    return s_instance;
  }

  //! do NOT call unless you know what you're doing
  static StaticData& InstanceNonConst() {
    return s_instance;
  }

  /** delete current static instance and replace with another.
  	* Used by gui front end
  	*/
#ifdef WIN32
  static void Reset() {
    s_instance = StaticData();
  }
#endif

  //! Load data into static instance. This function is required as
  //  LoadData() is not const
  static bool LoadDataStatic(Parameter *parameter, const std::string &execPath);

  //! Main function to load everything. Also initialize the Parameter object
  bool LoadData(Parameter *parameter);
  void ClearData();

  const Parameter &GetParameter() const {
    return *m_parameter;
  }

  AllOptions::ptr const
    options() const {
    return m_options;
  }

  size_t
  GetVerboseLevel() const {
    return m_verboseLevel;
  }

  void
  SetVerboseLevel(int x) const {
    m_verboseLevel = x;
  }

  const ScoreComponentCollection&
  GetAllWeights() const {
    return m_allWeights;
  }

  void SetAllWeights(const ScoreComponentCollection& weights) {
    m_allWeights = weights;
  }

  //Weight for a single-valued feature
  float GetWeight(const FeatureFunction* sp) const {
    return m_allWeights.GetScoreForProducer(sp);
  }

  //Weight for a single-valued feature
  void SetWeight(const FeatureFunction* sp, float weight) ;


  //Weights for feature with fixed number of values
  std::vector<float> GetWeights(const FeatureFunction* sp) const {
    return m_allWeights.GetScoresForProducer(sp);
  }

  //Weights for feature with fixed number of values
  void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);

  const std::string& GetFactorDelimiter() const {
    return m_factorDelimiter;
  }

  size_t GetLMCacheCleanupThreshold() const {
    return m_lmcache_cleanup_threshold;
  }

  const std::string& GetOutputUnknownsFile() const {
    return m_outputUnknownsFile;
  }

  const UnknownLHSList &GetUnknownLHS() const {
    return m_unknownLHS;
  }

  float GetRuleCountThreshold() const {
    return 999999; /* TODO wtf! */
  }

  void ReLoadBleuScoreFeatureParameter(float weight);

  Parameter* GetParameter() {
    return m_parameter;
  }

  int ThreadCount() const {
    return m_threadCount;
  }

  void SetExecPath(const std::string &path);
  const std::string &GetBinDirectory() const;

  bool NeedAlignmentInfo() const {
    return m_bookkeeping_options.need_alignment_info;
  }

  bool GetHasAlternateWeightSettings() const {
    return m_weightSetting.size() > 0;
  }

  /** Alternate weight settings allow the wholesale ignoring of
      feature functions. This function checks if a feature function
      should be evaluated given the current weight setting */
  bool IsFeatureFunctionIgnored( const FeatureFunction &ff ) const {
    if (!GetHasAlternateWeightSettings()) {
      return false;
    }
    std::map< std::string, std::set< std::string > >::const_iterator lookupIgnoreFF
    =  m_weightSettingIgnoreFF.find( m_currentWeightSetting );
    if (lookupIgnoreFF == m_weightSettingIgnoreFF.end()) {
      return false;
    }
    const std::string &ffName = ff.GetScoreProducerDescription();
    const std::set< std::string > &ignoreFF = lookupIgnoreFF->second;
    return ignoreFF.count( ffName );
  }

  /** Alternate weight settings allow the wholesale ignoring of
      decoding graphs (typically a translation table). This function
      checks if a feature function should be evaluated given the
      current weight setting */
  bool IsDecodingGraphIgnored( const size_t id ) const {
    if (!GetHasAlternateWeightSettings()) {
      return false;
    }
    std::map< std::string, std::set< size_t > >::const_iterator lookupIgnoreDP
    =  m_weightSettingIgnoreDP.find( m_currentWeightSetting );
    if (lookupIgnoreDP == m_weightSettingIgnoreDP.end()) {
      return false;
    }
    const std::set< size_t > &ignoreDP = lookupIgnoreDP->second;
    return ignoreDP.count( id );
  }

  /** process alternate weight settings
    * (specified with [alternate-weight-setting] in config file) */
  void SetWeightSetting(const std::string &settingName) const {

    // if no change in weight setting, do nothing
    if (m_currentWeightSetting == settingName) {
      return;
    }

    // model must support alternate weight settings
    if (!GetHasAlternateWeightSettings()) {
      std::cerr << "Warning: Input specifies weight setting, but model does not support alternate weight settings.";
      return;
    }

    // find the setting
    m_currentWeightSetting = settingName;
    std::map< std::string, ScoreComponentCollection* >::const_iterator i =
      m_weightSetting.find( settingName );

    // if not found, resort to default
    if (i == m_weightSetting.end()) {
      std::cerr << "Warning: Specified weight setting " << settingName
                << " does not exist in model, using default weight setting instead";
      i = m_weightSetting.find( "default" );
      m_currentWeightSetting = "default";
    }

    // set weights
    m_allWeights = *(i->second);
  }

  float GetWeightWordPenalty() const;

  const std::vector<DecodeGraph*>& GetDecodeGraphs() const {
    return m_decodeGraphs;
  }

  //sentence (and thread) specific initialisationn and cleanup
  void InitializeForInput(ttasksptr const& ttask) const;
  void CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const;

  void LoadFeatureFunctions();
  bool CheckWeights() const;
  void LoadSparseWeightsFromConfig();
  bool LoadWeightSettings();
  bool LoadAlternateWeightSettings();

  std::map<std::string, std::string> OverrideFeatureNames();
  void OverrideFeatures();

  const FeatureRegistry &GetFeatureRegistry() const {
    return m_registry;
  }

  const PhrasePropertyFactory &GetPhrasePropertyFactory() const {
    return m_phrasePropertyFactory;
  }

  /** check whether we should be using the old code to support binary phrase-table.
  ** eventually, we'll stop support the binary phrase-table and delete this legacy code
  **/
  void CheckLEGACYPT();
  bool GetUseLegacyPT() const {
    return m_useLegacyPT;
  }

  void SetSoftMatches(std::vector<std::vector<Word> >& softMatchesMap) {
    m_softMatchesMap = softMatchesMap;
  }

  const std::vector< std::vector<Word> >& GetSoftMatches() const {
    return m_softMatchesMap;
  }

  void ResetWeights(const std::string &denseWeights, const std::string &sparseFile);

  // need global access for output of tree structure
  const StatefulFeatureFunction* GetTreeStructure() const {
    return m_treeStructure;
  }

  void SetTreeStructure(const StatefulFeatureFunction* treeStructure) {
    m_treeStructure = treeStructure;
  }

  bool RequireSortingAfterSourceContext() const {
    return m_requireSortingAfterSourceContext;
  }

  // Coordinate spaces
  size_t GetCoordSpace(std::string space) const;
  size_t MapCoordSpace(std::string space);
};

}
#endif