diff options
author | Hieu Hoang <hieu@hoang.co.uk> | 2013-05-24 21:02:49 +0400 |
---|---|---|
committer | Hieu Hoang <hieu@hoang.co.uk> | 2013-05-24 21:02:49 +0400 |
commit | a80d838b0fa6eda3e45d2403f06fb3fe5559953d (patch) | |
tree | e4eae6ecbb30171e5159b543bc74c5964fe552e3 /moses/FF/GlobalLexicalModelUnlimited.h | |
parent | 6c322ba05ec745cbe97500e2206518e05e92376d (diff) |
move feature functions into moses/FF
Diffstat (limited to 'moses/FF/GlobalLexicalModelUnlimited.h')
-rw-r--r-- | moses/FF/GlobalLexicalModelUnlimited.h | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h new file mode 100644 index 000000000..f0bab7345 --- /dev/null +++ b/moses/FF/GlobalLexicalModelUnlimited.h @@ -0,0 +1,98 @@ +#ifndef GLOBALLEXICALMODELUNLIMITED_H_ +#define GLOBALLEXICALMODELUNLIMITED_H_ + +#include <stdexcept> +#include <string> +#include <vector> +#include "FeatureFunction.h" +#include "moses/Factor.h" +#include "moses/Phrase.h" +#include "moses/TypeDef.h" +#include "moses/Util.h" +#include "moses/WordsRange.h" +#include "moses/FactorTypeSet.h" +#include "moses/Sentence.h" + +#include "moses/FF/FFState.h" + +#ifdef WITH_THREADS +#include <boost/thread/tss.hpp> +#endif + +namespace Moses +{ + +class Factor; +class Phrase; +class Hypothesis; +class InputType; + +/** Discriminatively trained global lexicon model + * This is a implementation of Mauser et al., 2009's model that predicts + * each output word from _all_ the input words. The intuition behind this + * feature is that it uses context words for disambiguation + */ + +class GlobalLexicalModelUnlimited : public StatelessFeatureFunction +{ + typedef std::map< char, short > CharHash; + typedef std::map< std::string, short > StringHash; + + struct ThreadLocalStorage + { + const Sentence *input; + }; + +private: +#ifdef WITH_THREADS + boost::thread_specific_ptr<ThreadLocalStorage> m_local; +#else + std::auto_ptr<ThreadLocalStorage> m_local; +#endif + + CharHash m_punctuationHash; + + std::vector< FactorType > m_inputFactors; + std::vector< FactorType > m_outputFactors; + bool m_unrestricted; + + bool m_sourceContext; + bool m_biphrase; + bool m_bitrigger; + + bool m_biasFeature; + bool m_ignorePunctuation; + + std::set<std::string> m_vocabSource; + std::set<std::string> m_vocabTarget; + +public: + GlobalLexicalModelUnlimited(const std::string &line); + + bool Load(const std::string &filePathSource, const std::string &filePathTarget); + + void InitializeForInput( Sentence const& in ); + + const FFState* EmptyHypothesisState(const InputType &) const { + return new DummyState(); + } + + //TODO: This implements the old interface, but cannot be updated because + //it appears to be stateful + void Evaluate(const Hypothesis& cur_hypo, + ScoreComponentCollection* accumulator) const; + + void EvaluateChart(const ChartHypothesis& /* cur_hypo */, + int /* featureID */, + ScoreComponentCollection* ) const { + throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet"); + } + + void AddFeature(ScoreComponentCollection* accumulator, + StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger, + StringPiece targetWord) const; + +}; + +} +#endif /* GLOBALLEXICALMODELUNLIMITED_H_ */ |