diff options
author | Ulrich Germann <Ulrich.Germann@gmail.com> | 2015-04-21 19:48:48 +0300 |
---|---|---|
committer | Ulrich Germann <Ulrich.Germann@gmail.com> | 2015-04-21 19:48:48 +0300 |
commit | 2c0851099b19362d70972ca298623ff9c1d68817 (patch) | |
tree | ca48506ea9e2c169508aa6f1f8ef2cdda8d2934f /moses/TranslationModel/UG/mm | |
parent | 0d13edae240e4c39c165be1c9f355417d212ad3f (diff) |
Work on integrating hierarchical lexicalized reordering models with sampled phrase tables.
Diffstat (limited to 'moses/TranslationModel/UG/mm')
-rw-r--r-- | moses/TranslationModel/UG/mm/ug_bitext_jstats.h | 3 | ||||
-rw-r--r-- | moses/TranslationModel/UG/mm/ug_lexical_reordering.cc | 24 | ||||
-rw-r--r-- | moses/TranslationModel/UG/mm/ug_phrasepair.h | 119 |
3 files changed, 126 insertions, 20 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h index 13c86e34d..ce2e89438 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h @@ -43,6 +43,9 @@ namespace Moses bool valid(); uint32_t dcnt_fwd(PhraseOrientation const idx) const; uint32_t dcnt_bwd(PhraseOrientation const idx) const; + void fill_lr_vec(Moses::LRModel::Direction const& dir, + Moses::LRModel::ModelType const& mdl, + vector<float>& v); }; } } diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc index 706c042c0..00f499f88 100644 --- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc +++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc @@ -126,11 +126,25 @@ namespace Moses T = x = e1; B = a1.size()-1; if (expand_block(a1,a2,x,y,T,L,B,R) >= 0) return Moses::LRModel::S; - while (s2 && a2[s2].size() == 0) --s2; - if (a2[s2].size() == 0) return po_other; - if (a2[s2].back() < s1) return Moses::LRModel::DR; - if (a2[s2].front() >= e1) return Moses::LRModel::DL; - return po_other; + size_t s2x = s2; + while (s2-- && a2[s2].size() == 0); + + Moses::LRModel::ReorderingType ret; + ret = (a2[s2].size() == 0 ? po_other : + a2[s2].back() < s1 ? Moses::LRModel::DR : + a2[s2].front() >= e1 ? Moses::LRModel::DL : + po_other); +#if 0 + cout << "s1=" << s1 << endl; + cout << "s2=" << s2x << "=>" << s2 << endl; + cout << "e1=" << e1 << endl; + cout << "e2=" << e2 << endl; + cout << "a2[s2].size()=" << a2[s2].size() << endl; + cout << "a2[s2].back()=" << a2[s2].back() << endl; + cout << "a2[s2].front()=" << a2[s2].front() << endl; + cout << "RETURNING " << ret << endl; +#endif + return ret; } } // namespace bitext diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h index 28a926587..70d4b0d82 100644 --- a/moses/TranslationModel/UG/mm/ug_phrasepair.h +++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h @@ -3,7 +3,9 @@ #include <vector> #include "ug_typedefs.h" #include "ug_bitext_pstats.h" - +#include "moses/FF/LexicalReordering/LexicalReorderingState.h" +#include "boost/format.hpp" +#include "tpt_tokenindex.h" namespace Moses { namespace bitext @@ -45,7 +47,15 @@ namespace Moses PhrasePair const& update(uint64_t const pid2, Token const* x, uint32_t const len, jstats const& js); - + + void + fill_lr_vec(LRModel::Direction const& dir, + LRModel::ModelType const& mdl, + vector<float>& v) const; + void + print(ostream& out, TokenIndex const& V1, TokenIndex const& V2, + LRModel const& LR) const; + class SortByTargetIdSeq { public: @@ -98,20 +108,20 @@ namespace Moses assert(js.aln().size()); if (js.aln().size()) aln = js.aln()[0].second; - float total_fwd = 0, total_bwd = 0; - for (int i = 0; i <= Moses::LRModel::NONE; i++) - { - PhraseOrientation po = static_cast<PhraseOrientation>(i); - total_fwd += js.dcnt_fwd(po)+1; - total_bwd += js.dcnt_bwd(po)+1; - } + // float total_fwd = 0, total_bwd = 0; + // for (int i = 0; i <= Moses::LRModel::NONE; i++) + // { + // PhraseOrientation po = static_cast<PhraseOrientation>(i); + // total_fwd += js.dcnt_fwd(po)+1; + // total_bwd += js.dcnt_bwd(po)+1; + // } // should we do that here or leave the raw counts? for (int i = 0; i <= Moses::LRModel::NONE; i++) { PhraseOrientation po = static_cast<PhraseOrientation>(i); - dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd; - dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd; + dfwd[i] = js.dcnt_fwd(po); + dbwd[i] = js.dcnt_bwd(po); } indoc = js.indoc; @@ -162,6 +172,7 @@ namespace Moses joint += o.joint; sample1 += o.sample1; sample2 += o.sample2; + // todo: add distortion counts return *this; } @@ -226,7 +237,8 @@ namespace Moses } template<typename Token> - bool PhrasePair<Token> + bool + PhrasePair<Token> ::SortDescendingByJointCount ::operator()(PhrasePair const& a, PhrasePair const& b) const { @@ -234,7 +246,8 @@ namespace Moses } template<typename Token> - void PhrasePair<Token> + void + PhrasePair<Token> ::init() { inverse = false; @@ -242,5 +255,81 @@ namespace Moses start1 = start2 = NULL; p1 = p2 = 0; } - } -} + + + void + fill_lr_vec2(LRModel::ModelType mdl, float const* const cnt, + float const total, float* v); + + template<typename Token> + void + PhrasePair<Token> + ::fill_lr_vec(LRModel::Direction const& dir, + LRModel::ModelType const& mdl, + vector<float>& v) const + { + // how many distinct scores do we have? + size_t num_scores = (mdl == LRModel::MSLR ? 4 : mdl == LRModel::MSD ? 3 : 2); + size_t offset; + if (dir == LRModel::Bidirectional) + { + offset = num_scores; + num_scores *= 2; + } + else offset = 0; + + v.resize(num_scores); + + // determine the denominator + float total = 0; + for (size_t i = 0; i <= LRModel::NONE; ++i) + total += dfwd[i]; + + if (dir != LRModel::Forward) // i.e., Backward or Bidirectional + fill_lr_vec2(mdl, dbwd, total, &v[0]); + if (dir != LRModel::Backward) // i.e., Forward or Bidirectional + fill_lr_vec2(mdl, dfwd, total, &v[offset]); + } + + + template<typename Token> + void + PhrasePair<Token> + ::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2, + LRModel const& LR) const + { + out << toString (V1, this->start1, this->len1) << " ::: " + << toString (V2, this->start2, this->len2) << " " + << this->joint << " ["; + for (size_t i = 0; i < this->indoc.size(); ++i) + { + if (i) out << " "; + out << this->indoc[i]; + } + out << "] ["; + vector<float> lrscores; + this->fill_lr_vec(LR.GetDirection(), LR.GetModelType(), lrscores); + for (size_t i = 0; i < lrscores.size(); ++i) + { + if (i) out << " "; + out << boost::format("%.2f") % exp(lrscores[i]); + } + out << "]" << endl; +#if 0 + for (int i = 0; i <= Moses::LRModel::NONE; i++) + { + // PhraseOrientation po = static_cast<PhraseOrientation>(i); + if (i) *log << " "; + *log << p.dfwd[i]; + } + *log << "] ["; + for (int i = 0; i <= Moses::LRModel::NONE; i++) + { + // PhraseOrientation po = static_cast<PhraseOrientation>(i); + if (i) *log << " "; + *log << p.dbwd[i]; + } +#endif + } + } // namespace bitext +} // namespace Moses |