Welcome to mirror list, hosted at ThFree Co, Russian Federation.

WordTranslationFeature.h « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 39be24d412a43149a86cfb143044324211bc31d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#ifndef moses_WordTranslationFeature_h
#define moses_WordTranslationFeature_h

#include <string>
#include <map>

#include "FeatureFunction.h"
#include "FactorCollection.h"

#include "Sentence.h"
#include "FFState.h"

#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif

namespace Moses
{

/** Sets the features for word translation
 */
class WordTranslationFeature : public StatelessFeatureFunction {

  typedef std::map< char, short > CharHash;
	
  struct ThreadLocalStorage
  {
    const Sentence *input;
  };

private:
#ifdef WITH_THREADS
  boost::thread_specific_ptr<ThreadLocalStorage> m_local;
#else
  std::auto_ptr<ThreadLocalStorage> m_local;
#endif

  std::set<std::string> m_vocabSource;
  std::set<std::string> m_vocabTarget;
  FactorType m_factorTypeSource;
  FactorType m_factorTypeTarget;
  bool m_unrestricted;
  bool m_simple;
  bool m_sourceContext;
  bool m_targetContext;
  float m_sparseProducerWeight;
  bool m_ignorePunctuation;
  CharHash m_punctuationHash;
  
public:
	WordTranslationFeature(FactorType factorTypeSource, FactorType factorTypeTarget,
			bool simple, bool sourceContext, bool targetContext, bool ignorePunctuation):
     StatelessFeatureFunction("wt", ScoreProducer::unlimited),
     m_factorTypeSource(factorTypeSource),
     m_factorTypeTarget(factorTypeTarget),
     m_unrestricted(true),
     m_simple(simple),
     m_sourceContext(sourceContext),
     m_targetContext(targetContext),
     m_sparseProducerWeight(1),
     m_ignorePunctuation(ignorePunctuation)
  {
		std::cerr << "Initializing word translation feature.. ";
		if (m_simple == 1) std::cerr << "using simple word translations.. ";
		if (m_sourceContext == 1) std::cerr << "using source context.. ";
		if (m_targetContext == 1) std::cerr << "using target context.. ";
		
		  // compile a list of punctuation characters
		  if (m_ignorePunctuation) {
			  std::cerr << "ignoring punctuation for triggers.. ";
			  char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
			  for (size_t i=0; i < sizeof(punctuation)-1; ++i)
				  m_punctuationHash[punctuation[i]] = 1;
		  }
		
		std::cerr << "done." << std::endl;
  }
      
	bool Load(const std::string &filePathSource, const std::string &filePathTarget);

	void InitializeForInput( Sentence const& in );

//  void Evaluate(const TargetPhrase& cur_phrase, ScoreComponentCollection* accumulator) const;

  const FFState* EmptyHypothesisState(const InputType &) const {
  	return new DummyState();
  }

  void Evaluate(const Hypothesis& cur_hypo,
  		          ScoreComponentCollection* accumulator) const;

  void EvaluateChart(const ChartHypothesis& cur_hypo,
  								   int featureID,
                     ScoreComponentCollection* accumulator) const;

  // basic properties
	std::string GetScoreProducerWeightShortName(unsigned) const { return "wt"; }
	size_t GetNumInputScores() const { return 0; }
	
	void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
	float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
};

}

#endif // moses_WordTranslationFeature_h