Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel/UG/mmsapt.h')
-rw-r--r--moses/TranslationModel/UG/mmsapt.h87
1 files changed, 64 insertions, 23 deletions
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h
index b6be36131..a7ece8fdb 100644
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@@ -19,6 +19,7 @@
#include "moses/TranslationModel/UG/mm/ug_typedefs.h"
#include "moses/TranslationModel/UG/mm/tpt_pickler.h"
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
#include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h"
#include "moses/InputFileStream.h"
@@ -29,7 +30,8 @@
#include <map>
#include "moses/TranslationModel/PhraseDictionary.h"
-#include "mmsapt_phrase_scorers.h"
+#include "mmsapt_phrase_scorers.h" // deprecated
+#include "sapt_phrase_scorers.h"
// TO DO:
// - make lexical phrase scorer take addition to the "dynamic overlay" into account
@@ -47,47 +49,68 @@ namespace Moses
#endif
{
friend class Alignment;
+ map<string,string> param;
public:
typedef L2R_Token<SimpleWordId> Token;
typedef mmBitext<Token> mmbitext;
typedef imBitext<Token> imbitext;
+ typedef Bitext<Token> bitext;
typedef TSA<Token> tsa;
typedef PhraseScorer<Token> pscorer;
+
private:
+ // vector<sptr<bitext> > shards;
mmbitext btfix;
- sptr<imbitext> btdyn;
+ sptr<imbitext> btdyn;
string bname,extra_data;
string L1;
string L2;
- float m_lbop_parameter;
- float m_lex_alpha;
+ float m_lbop_conf; // confidence level for lbop smoothing
+ float m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing
// alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
// must be > 0 if dynamic
size_t m_default_sample_size;
size_t m_workers; // number of worker threads for sampling the bitexts
- // deprecated!
- char m_pfwd_denom; // denominator for computation of fwd phrase score:
- // 'r' - divide by raw count
- // 's' - divide by sample count
- // 'g' - devide by number of "good" (i.e. coherent) samples
- // size_t num_features;
+ // // deprecated!
+ // char m_pfwd_denom; // denominator for computation of fwd phrase score:
+ // // 'r' - divide by raw count
+ // // 's' - divide by sample count
+ // // 'g' - devide by number of "good" (i.e. coherent) samples
+ // // size_t num_features;
size_t input_factor;
size_t output_factor; // we can actually return entire Tokens!
- bool withLogCountFeatures; // add logs of counts as features?
- bool withCoherence;
- string m_pfwd_features; // which pfwd functions to use
- string m_pbwd_features; // which pbwd functions to use
+ // bool withLogCountFeatures; // add logs of counts as features?
+ // bool withCoherence;
+ // string m_pfwd_features; // which pfwd functions to use
+ // string m_pbwd_features; // which pbwd functions to use
+
+ // for display for human inspection (ttable dumps):
vector<string> m_feature_names; // names of features activated
+ vector<bool> m_is_logval; // keeps track of which features are log valued
+ vector<bool> m_is_integer; // keeps track of which features are integer valued
+
vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)
- size_t
- add_corpus_specific_features
- (vector<sptr<pscorer > >& ffvec, size_t num_feats);
+ void
+ register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry);
+
+ template<typename fftype>
+ void
+ check_ff(string const ffname,vector<sptr<pscorer> >* registry = NULL);
+ // add feature function if specified
+
+ template<typename fftype>
+ void
+ check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry = NULL);
+ // add feature function if specified
+
+ void
+ add_corpus_specific_features(vector<sptr<pscorer > >& ffvec);
// built-in feature functions
// PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
@@ -140,12 +163,24 @@ namespace Moses
mm2dtable_t COOCraw;
TargetPhrase*
- createTargetPhrase
+ mkTPhrase(Phrase const& src,
+ Moses::bitext::PhrasePair<Token>* fix,
+ Moses::bitext::PhrasePair<Token>* dyn,
+ sptr<Bitext<Token> > const& dynbt) const;
+
+ // template<typename Token>
+ // void
+ // expand(typename Bitext<Token>::iter const& m, Bitext<Token> const& bt,
+ // pstats const& pstats, vector<PhrasePair<Token> >& dest);
+
+#if 0
+ TargetPhrase*
+ mkTPhrase
(Phrase const& src,
Bitext<Token> const& bt,
- bitext::PhrasePair const& pp
+ Moses::bitext::PhrasePair const& pp
) const;
-
+#endif
void
process_pstats
(Phrase const& src,
@@ -180,7 +215,7 @@ namespace Moses
) const;
void
- load_extra_data(string bname);
+ load_extra_data(string bname, bool locking);
mutable size_t m_tpc_ctr;
public:
@@ -231,8 +266,14 @@ namespace Moses
vector<string> const&
GetFeatureNames() const;
- void
- ScorePPfix(bitext::PhrasePair& pp) const;
+ // void
+ // ScorePPfix(bitext::PhrasePair& pp) const;
+
+ bool
+ isLogVal(int i) const;
+
+ bool
+ isInteger(int i) const;
private:
};