Welcome to mirror list, hosted at ThFree Co, Russian Federation.

PhrasePairFeature.h « src « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 6dab1a99893ab1ac322ab625cf5891e73fc7aefd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#ifndef moses_PhrasePairFeature_h
#define moses_PhrasePairFeature_h

#include "Factor.h"
#include "FeatureFunction.h"
#include "Sentence.h"

#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif

namespace Moses {

/**
  * Phrase pair feature: complete source/target phrase pair
  **/
class PhrasePairFeature: public StatelessFeatureFunction {
	
	typedef std::map< char, short > CharHash;
	
	struct ThreadLocalStorage
	{
		const Sentence *input;
	};
	
	private:
#ifdef WITH_THREADS
	  boost::thread_specific_ptr<ThreadLocalStorage> m_local;
#else
	  std::auto_ptr<ThreadLocalStorage> m_local;
#endif

	  FactorType m_sourceFactorId;
	  FactorType m_targetFactorId;
	  bool m_unrestricted;
	  bool m_simple;
	  bool m_sourceContext;
	  float m_sparseProducerWeight;
	  bool m_ignorePunctuation;
	  CharHash m_punctuationHash;
	
	  std::set<std::string> m_limitedFeatures;

  public:
	  PhrasePairFeature (FactorType sourceFactorId, FactorType targetFactorId, 
			  bool simple, bool sourceContext, bool ignorePunctuation, std::string filePath) :
				  StatelessFeatureFunction("pp", ScoreProducer::unlimited),
	    m_sourceFactorId(sourceFactorId),
	    m_targetFactorId(targetFactorId),
	    m_unrestricted(true),
	    m_simple(simple),
	    m_sourceContext(sourceContext),	    
	    m_sparseProducerWeight(1),
	    m_ignorePunctuation(ignorePunctuation) {
		  std::cerr << "Creating phrase pair feature.. " << std::endl;
		  if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
		  if (m_sourceContext == 1) std::cerr << "using source context.. ";
		  
		  // compile a list of punctuation characters 
		  if (m_ignorePunctuation) {
			  std::cerr << "ignoring punctuation for triggers.. ";
			  char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
			  for (size_t i=0; i < sizeof(punctuation)-1; ++i)
				  m_punctuationHash[punctuation[i]] = 1;
		  }
		  
		  std::cerr << "done." << std::endl;

		  // only temporary: features for restricted training
		  if (!filePath.empty()) {
		    std::cerr << "Loading features for restricted training.. ";
		  	std::ifstream inFile(filePath.c_str());
		  	if (!inFile)
		  	{
		      	std::cerr << "could not open file " << filePath << std::endl;
			exit(1);
		  	}

		  	std::string line;
		  	while (getline(inFile, line)) 
			  m_limitedFeatures.insert(line);
			inFile.close();
		  	m_unrestricted = false;
			std::cerr << "done." << std::endl;
		  }		  
	  }

    void Evaluate(const Hypothesis& cur_hypo,
    							ScoreComponentCollection* accumulator) const;

    void EvaluateChart(
      const ChartHypothesis&,
      int /* featureID */,
      ScoreComponentCollection*) const {
  		CHECK(0); // feature function not valid in chart decoder
  	}

    //NB: Should really precompute this feature, but don't have
    //good hooks to do this.
    bool ComputeValueInTranslationOption() const;
    
    std::string GetScoreProducerWeightShortName(unsigned) const;
    size_t GetNumInputScores() const;

    void InitializeForInput( Sentence const& in );
    
    void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
    float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
};

}


#endif