Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <Ulrich.Germann@gmail.com>2015-04-21 19:48:48 +0300
committerUlrich Germann <Ulrich.Germann@gmail.com>2015-04-21 19:48:48 +0300
commit2c0851099b19362d70972ca298623ff9c1d68817 (patch)
treeca48506ea9e2c169508aa6f1f8ef2cdda8d2934f /moses/TranslationModel/UG/mm
parent0d13edae240e4c39c165be1c9f355417d212ad3f (diff)
Work on integrating hierarchical lexicalized reordering models with sampled phrase tables.
Diffstat (limited to 'moses/TranslationModel/UG/mm')
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_jstats.h3
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_reordering.cc24
-rw-r--r--moses/TranslationModel/UG/mm/ug_phrasepair.h119
3 files changed, 126 insertions, 20 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
index 13c86e34d..ce2e89438 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
@@ -43,6 +43,9 @@ namespace Moses
bool valid();
uint32_t dcnt_fwd(PhraseOrientation const idx) const;
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
+ void fill_lr_vec(Moses::LRModel::Direction const& dir,
+ Moses::LRModel::ModelType const& mdl,
+ vector<float>& v);
};
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
index 706c042c0..00f499f88 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
@@ -126,11 +126,25 @@ namespace Moses
T = x = e1; B = a1.size()-1;
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S;
- while (s2 && a2[s2].size() == 0) --s2;
- if (a2[s2].size() == 0) return po_other;
- if (a2[s2].back() < s1) return Moses::LRModel::DR;
- if (a2[s2].front() >= e1) return Moses::LRModel::DL;
- return po_other;
+ size_t s2x = s2;
+ while (s2-- && a2[s2].size() == 0);
+
+ Moses::LRModel::ReorderingType ret;
+ ret = (a2[s2].size() == 0 ? po_other :
+ a2[s2].back() < s1 ? Moses::LRModel::DR :
+ a2[s2].front() >= e1 ? Moses::LRModel::DL :
+ po_other);
+#if 0
+ cout << "s1=" << s1 << endl;
+ cout << "s2=" << s2x << "=>" << s2 << endl;
+ cout << "e1=" << e1 << endl;
+ cout << "e2=" << e2 << endl;
+ cout << "a2[s2].size()=" << a2[s2].size() << endl;
+ cout << "a2[s2].back()=" << a2[s2].back() << endl;
+ cout << "a2[s2].front()=" << a2[s2].front() << endl;
+ cout << "RETURNING " << ret << endl;
+#endif
+ return ret;
}
} // namespace bitext
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h
index 28a926587..70d4b0d82 100644
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.h
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@@ -3,7 +3,9 @@
#include <vector>
#include "ug_typedefs.h"
#include "ug_bitext_pstats.h"
-
+#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
+#include "boost/format.hpp"
+#include "tpt_tokenindex.h"
namespace Moses
{
namespace bitext
@@ -45,7 +47,15 @@ namespace Moses
PhrasePair const&
update(uint64_t const pid2, Token const* x,
uint32_t const len, jstats const& js);
-
+
+ void
+ fill_lr_vec(LRModel::Direction const& dir,
+ LRModel::ModelType const& mdl,
+ vector<float>& v) const;
+ void
+ print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
+ LRModel const& LR) const;
+
class SortByTargetIdSeq
{
public:
@@ -98,20 +108,20 @@ namespace Moses
assert(js.aln().size());
if (js.aln().size())
aln = js.aln()[0].second;
- float total_fwd = 0, total_bwd = 0;
- for (int i = 0; i <= Moses::LRModel::NONE; i++)
- {
- PhraseOrientation po = static_cast<PhraseOrientation>(i);
- total_fwd += js.dcnt_fwd(po)+1;
- total_bwd += js.dcnt_bwd(po)+1;
- }
+ // float total_fwd = 0, total_bwd = 0;
+ // for (int i = 0; i <= Moses::LRModel::NONE; i++)
+ // {
+ // PhraseOrientation po = static_cast<PhraseOrientation>(i);
+ // total_fwd += js.dcnt_fwd(po)+1;
+ // total_bwd += js.dcnt_bwd(po)+1;
+ // }
// should we do that here or leave the raw counts?
for (int i = 0; i <= Moses::LRModel::NONE; i++)
{
PhraseOrientation po = static_cast<PhraseOrientation>(i);
- dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
- dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
+ dfwd[i] = js.dcnt_fwd(po);
+ dbwd[i] = js.dcnt_bwd(po);
}
indoc = js.indoc;
@@ -162,6 +172,7 @@ namespace Moses
joint += o.joint;
sample1 += o.sample1;
sample2 += o.sample2;
+ // todo: add distortion counts
return *this;
}
@@ -226,7 +237,8 @@ namespace Moses
}
template<typename Token>
- bool PhrasePair<Token>
+ bool
+ PhrasePair<Token>
::SortDescendingByJointCount
::operator()(PhrasePair const& a, PhrasePair const& b) const
{
@@ -234,7 +246,8 @@ namespace Moses
}
template<typename Token>
- void PhrasePair<Token>
+ void
+ PhrasePair<Token>
::init()
{
inverse = false;
@@ -242,5 +255,81 @@ namespace Moses
start1 = start2 = NULL;
p1 = p2 = 0;
}
- }
-}
+
+
+ void
+ fill_lr_vec2(LRModel::ModelType mdl, float const* const cnt,
+ float const total, float* v);
+
+ template<typename Token>
+ void
+ PhrasePair<Token>
+ ::fill_lr_vec(LRModel::Direction const& dir,
+ LRModel::ModelType const& mdl,
+ vector<float>& v) const
+ {
+ // how many distinct scores do we have?
+ size_t num_scores = (mdl == LRModel::MSLR ? 4 : mdl == LRModel::MSD ? 3 : 2);
+ size_t offset;
+ if (dir == LRModel::Bidirectional)
+ {
+ offset = num_scores;
+ num_scores *= 2;
+ }
+ else offset = 0;
+
+ v.resize(num_scores);
+
+ // determine the denominator
+ float total = 0;
+ for (size_t i = 0; i <= LRModel::NONE; ++i)
+ total += dfwd[i];
+
+ if (dir != LRModel::Forward) // i.e., Backward or Bidirectional
+ fill_lr_vec2(mdl, dbwd, total, &v[0]);
+ if (dir != LRModel::Backward) // i.e., Forward or Bidirectional
+ fill_lr_vec2(mdl, dfwd, total, &v[offset]);
+ }
+
+
+ template<typename Token>
+ void
+ PhrasePair<Token>
+ ::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
+ LRModel const& LR) const
+ {
+ out << toString (V1, this->start1, this->len1) << " ::: "
+ << toString (V2, this->start2, this->len2) << " "
+ << this->joint << " [";
+ for (size_t i = 0; i < this->indoc.size(); ++i)
+ {
+ if (i) out << " ";
+ out << this->indoc[i];
+ }
+ out << "] [";
+ vector<float> lrscores;
+ this->fill_lr_vec(LR.GetDirection(), LR.GetModelType(), lrscores);
+ for (size_t i = 0; i < lrscores.size(); ++i)
+ {
+ if (i) out << " ";
+ out << boost::format("%.2f") % exp(lrscores[i]);
+ }
+ out << "]" << endl;
+#if 0
+ for (int i = 0; i <= Moses::LRModel::NONE; i++)
+ {
+ // PhraseOrientation po = static_cast<PhraseOrientation>(i);
+ if (i) *log << " ";
+ *log << p.dfwd[i];
+ }
+ *log << "] [";
+ for (int i = 0; i <= Moses::LRModel::NONE; i++)
+ {
+ // PhraseOrientation po = static_cast<PhraseOrientation>(i);
+ if (i) *log << " ";
+ *log << p.dbwd[i];
+ }
+#endif
+ }
+ } // namespace bitext
+} // namespace Moses