diff options
Diffstat (limited to 'moses/TranslationModel/UG/mmsapt.h')
-rw-r--r-- | moses/TranslationModel/UG/mmsapt.h | 87 |
1 files changed, 64 insertions, 23 deletions
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index b6be36131..a7ece8fdb 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -19,6 +19,7 @@ #include "moses/TranslationModel/UG/mm/ug_typedefs.h" #include "moses/TranslationModel/UG/mm/tpt_pickler.h" #include "moses/TranslationModel/UG/mm/ug_bitext.h" +#include "moses/TranslationModel/UG/mm/ug_phrasepair.h" #include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h" #include "moses/InputFileStream.h" @@ -29,7 +30,8 @@ #include <map> #include "moses/TranslationModel/PhraseDictionary.h" -#include "mmsapt_phrase_scorers.h" +#include "mmsapt_phrase_scorers.h" // deprecated +#include "sapt_phrase_scorers.h" // TO DO: // - make lexical phrase scorer take addition to the "dynamic overlay" into account @@ -47,47 +49,68 @@ namespace Moses #endif { friend class Alignment; + map<string,string> param; public: typedef L2R_Token<SimpleWordId> Token; typedef mmBitext<Token> mmbitext; typedef imBitext<Token> imbitext; + typedef Bitext<Token> bitext; typedef TSA<Token> tsa; typedef PhraseScorer<Token> pscorer; + private: + // vector<sptr<bitext> > shards; mmbitext btfix; - sptr<imbitext> btdyn; + sptr<imbitext> btdyn; string bname,extra_data; string L1; string L2; - float m_lbop_parameter; - float m_lex_alpha; + float m_lbop_conf; // confidence level for lbop smoothing + float m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing // alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha) // must be > 0 if dynamic size_t m_default_sample_size; size_t m_workers; // number of worker threads for sampling the bitexts - // deprecated! - char m_pfwd_denom; // denominator for computation of fwd phrase score: - // 'r' - divide by raw count - // 's' - divide by sample count - // 'g' - devide by number of "good" (i.e. coherent) samples - // size_t num_features; + // // deprecated! + // char m_pfwd_denom; // denominator for computation of fwd phrase score: + // // 'r' - divide by raw count + // // 's' - divide by sample count + // // 'g' - devide by number of "good" (i.e. coherent) samples + // // size_t num_features; size_t input_factor; size_t output_factor; // we can actually return entire Tokens! - bool withLogCountFeatures; // add logs of counts as features? - bool withCoherence; - string m_pfwd_features; // which pfwd functions to use - string m_pbwd_features; // which pbwd functions to use + // bool withLogCountFeatures; // add logs of counts as features? + // bool withCoherence; + // string m_pfwd_features; // which pfwd functions to use + // string m_pbwd_features; // which pbwd functions to use + + // for display for human inspection (ttable dumps): vector<string> m_feature_names; // names of features activated + vector<bool> m_is_logval; // keeps track of which features are log valued + vector<bool> m_is_integer; // keeps track of which features are integer valued + vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix) vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn) vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn) - size_t - add_corpus_specific_features - (vector<sptr<pscorer > >& ffvec, size_t num_feats); + void + register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry); + + template<typename fftype> + void + check_ff(string const ffname,vector<sptr<pscorer> >* registry = NULL); + // add feature function if specified + + template<typename fftype> + void + check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry = NULL); + // add feature function if specified + + void + add_corpus_specific_features(vector<sptr<pscorer > >& ffvec); // built-in feature functions // PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn; @@ -140,12 +163,24 @@ namespace Moses mm2dtable_t COOCraw; TargetPhrase* - createTargetPhrase + mkTPhrase(Phrase const& src, + Moses::bitext::PhrasePair<Token>* fix, + Moses::bitext::PhrasePair<Token>* dyn, + sptr<Bitext<Token> > const& dynbt) const; + + // template<typename Token> + // void + // expand(typename Bitext<Token>::iter const& m, Bitext<Token> const& bt, + // pstats const& pstats, vector<PhrasePair<Token> >& dest); + +#if 0 + TargetPhrase* + mkTPhrase (Phrase const& src, Bitext<Token> const& bt, - bitext::PhrasePair const& pp + Moses::bitext::PhrasePair const& pp ) const; - +#endif void process_pstats (Phrase const& src, @@ -180,7 +215,7 @@ namespace Moses ) const; void - load_extra_data(string bname); + load_extra_data(string bname, bool locking); mutable size_t m_tpc_ctr; public: @@ -231,8 +266,14 @@ namespace Moses vector<string> const& GetFeatureNames() const; - void - ScorePPfix(bitext::PhrasePair& pp) const; + // void + // ScorePPfix(bitext::PhrasePair& pp) const; + + bool + isLogVal(int i) const; + + bool + isInteger(int i) const; private: }; |