Welcome to mirror list, hosted at ThFree Co, Russian Federation.

GlobalLexicalModelUnlimited.h « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c05c11be8608e07d121aa989d6d803db7cd5c339 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#ifndef GLOBALLEXICALMODELUNLIMITED_H_
#define GLOBALLEXICALMODELUNLIMITED_H_

#include <string>
#include <vector>
#include "Factor.h"
#include "Phrase.h"
#include "TypeDef.h"
#include "Util.h"
#include "WordsRange.h"
#include "ScoreProducer.h"
#include "FeatureFunction.h"
#include "FactorTypeSet.h"
#include "Sentence.h"

#include "FFState.h"

#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif

namespace Moses
{

class Factor;
class Phrase;
class Hypothesis;
class InputType;

/** Discriminatively trained global lexicon model
 * This is a implementation of Mauser et al., 2009's model that predicts
 * each output word from _all_ the input words. The intuition behind this
 * feature is that it uses context words for disambiguation
 */

class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
{
	typedef std::map< char, short > CharHash;
	typedef std::map< std::string, short > StringHash;

  struct ThreadLocalStorage
  {
    const Sentence *input;
  };

private:
#ifdef WITH_THREADS
  boost::thread_specific_ptr<ThreadLocalStorage> m_local;
#else
  std::auto_ptr<ThreadLocalStorage> m_local;
#endif

  CharHash m_punctuationHash;

  std::vector< FactorType > m_inputFactors;
  std::vector< FactorType > m_outputFactors;
  bool m_unrestricted;

  bool m_sourceContext;
  bool m_biphrase;
  bool m_bitrigger;

  bool m_biasFeature;
  float m_sparseProducerWeight;
  bool m_ignorePunctuation;

  std::set<std::string> m_vocabSource;
  std::set<std::string> m_vocabTarget;

public:
  GlobalLexicalModelUnlimited(const std::vector< FactorType >& inFactors, const std::vector< FactorType >& outFactors,
  		bool biasFeature, bool ignorePunctuation, size_t context):
    StatelessFeatureFunction("glm",ScoreProducer::unlimited),
  	m_inputFactors(inFactors),
    m_outputFactors(outFactors),
    m_unrestricted(true),
    m_sourceContext(false),
    m_biphrase(false),
    m_bitrigger(false),
    m_biasFeature(biasFeature),
    m_sparseProducerWeight(1),
    m_ignorePunctuation(ignorePunctuation)
  {
  	std::cerr << "Creating global lexical model unlimited.. ";


		switch(context) {
			case 1:
				m_sourceContext = true;
				std::cerr << "using source context.. ";
				break;
			case 2:
				m_biphrase = true;
				std::cerr << "using biphrases.. ";
				break;
			case 3:
				std::cerr << "using bitriggers.. ";
				m_bitrigger = true;
				break;
		}

  	// compile a list of punctuation characters
  	if (m_ignorePunctuation) {
  		std::cerr << "ignoring punctuation.. ";
  		char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
  		for (size_t i=0; i < sizeof(punctuation)-1; ++i)
  			m_punctuationHash[punctuation[i]] = 1;
  	}
  	std::cerr << "done." << std::endl;
  }

  std::string GetScoreProducerWeightShortName(unsigned) const {
    return "glm";
  };

  bool Load(const std::string &filePathSource, const std::string &filePathTarget);

  void InitializeForInput( Sentence const& in );

  const FFState* EmptyHypothesisState(const InputType &) const {
  	return new DummyState();
  }

  void Evaluate(const Hypothesis& cur_hypo,
  							ScoreComponentCollection* accumulator) const;

  void EvaluateChart(const ChartHypothesis& /* cur_hypo */,
  									 int /* featureID */,
  									 ScoreComponentCollection* ) const {
  	/* Not implemented */
    assert(0);
  }


  void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
  float GetSparseProducerWeight() const { return m_sparseProducerWeight; }

	void AddFeature(ScoreComponentCollection* accumulator, StringHash alreadyScored,
			std::string sourceTrigger, std::string sourceWord, std::string targetTrigger,
			std::string targetWord) const;
};

}
#endif /* GLOBALLEXICALMODELUNLIMITED_H_ */