Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2015-03-10 13:41:41 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2015-03-10 13:41:41 +0300
commit51824355f9f469c186d5376218e3396b92652617 (patch)
tree6116e3bb624653a873c83b3751e23c5bc02b877c /moses/TranslationModel
parentccf4cb838c46643b0365417dc553d8724c93440a (diff)
Sampling now keeps track of counts for hierarchical lexicalized reordering.
Diffstat (limited to 'moses/TranslationModel')
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.cc64
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.h44
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_reordering.cc136
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_reordering.h26
4 files changed, 178 insertions, 92 deletions
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc
index ff1bd0c10..657902ba7 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext.cc
@@ -23,8 +23,8 @@ namespace Moses
, sum_pairs (0)
, in_progress (0)
{
- ofwd[0] = ofwd[1] = ofwd[2] = ofwd[3] = ofwd[4] = ofwd[5] = ofwd[6] = 0;
- obwd[0] = obwd[1] = obwd[2] = obwd[3] = obwd[4] = obwd[5] = obwd[6] = 0;
+ ofwd[0] = ofwd[1] = ofwd[2] = ofwd[3] = ofwd[4] = 0;
+ obwd[0] = obwd[1] = obwd[2] = obwd[3] = obwd[4] = 0;
}
pstats::
@@ -89,7 +89,7 @@ namespace Moses
my_rcnt = other.rcnt();
my_wcnt = other.wcnt();
my_aln = other.aln();
- for (int i = po_first; i <= po_other; i++)
+ for (size_t i = 0; i <= po_other; i++)
{
ofwd[i] = other.ofwd[i];
obwd[i] = other.obwd[i];
@@ -410,63 +410,5 @@ namespace Moses
cout << string(90,'-') << endl;
}
- PhraseOrientation
- find_po_fwd(vector<vector<ushort> >& a1,
- vector<vector<ushort> >& a2,
- size_t b1, size_t e1,
- size_t b2, size_t e2)
- {
- size_t n2 = e2;
- while (n2 < a2.size() && a2[n2].size() == 0) ++n2;
-
- if (n2 == a2.size())
- return po_last;
-
- ushort ns1,ne1,ne2;
- if (!expand_phrase_pair(a1,a2,n2,b1,e1,ns1,ne1,ne2))
- return po_other;
-
- if (ns1 >= e1)
- {
- for (ushort j = e1; j < ns1; ++j)
- if (a1[j].size())
- return po_jfwd;
- return po_mono;
- }
- else
- {
- for (ushort j = ne1; j < b1; ++j)
- if (a1[j].size()) return po_jbwd;
- return po_swap;
- }
- }
-
-
- PhraseOrientation
- find_po_bwd(vector<vector<ushort> >& a1,
- vector<vector<ushort> >& a2,
- size_t b1, size_t e1,
- size_t b2, size_t e2)
- {
- int p2 = b2-1;
- while (p2 >= 0 && !a2[p2].size()) --p2;
- if (p2 < 0) return po_first;
- ushort ps1,pe1,pe2;
- if (!expand_phrase_pair(a1,a2,p2,b1,e1,ps1,pe1,pe2))
- return po_other;
-
- if (pe1 < b1)
- {
- for (ushort j = pe1; j < b1; ++j)
- if (a1[j].size()) return po_jfwd;
- return po_mono;
- }
- else
- {
- for (ushort j = e1; j < ps1; ++j)
- if (a1[j].size()) return po_jbwd;
- return po_swap;
- }
- }
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h
index bc7c75c07..9d1d06c6f 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@@ -36,6 +36,7 @@
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
#include "moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h"
+#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
#include "moses/Util.h"
#include "moses/StaticData.h"
@@ -53,6 +54,7 @@
#include "ug_lexical_phrase_scorer2.h"
#include "ug_phrasepair.h"
#include "ug_lru_cache.h"
+#include "ug_lexical_reordering.h"
#define PSTATS_CACHE_THRESHOLD 50
@@ -62,35 +64,13 @@ namespace Moses {
class Mmsapt;
namespace bitext
{
+
template<typename TKN> class Bitext;
template<typename TKN> class PhrasePair;
using namespace ugdiss;
template<typename TKN> class Bitext;
- enum PhraseOrientation
- {
- po_first,
- po_mono,
- po_jfwd,
- po_swap,
- po_jbwd,
- po_last,
- po_other
- };
-
- PhraseOrientation
- find_po_fwd(vector<vector<ushort> >& a1,
- vector<vector<ushort> >& a2,
- size_t b1, size_t e1,
- size_t b2, size_t e2);
-
- PhraseOrientation
- find_po_bwd(vector<vector<ushort> >& a1,
- vector<vector<ushort> >& a2,
- size_t b1, size_t e1,
- size_t b2, size_t e2);
-
template<typename sid_t, typename off_t, typename len_t>
void
parse_pid(::uint64_t const pid, sid_t & sid,
@@ -142,7 +122,8 @@ namespace Moses {
static ThreadSafeCounter active;
#endif
boost::mutex lock; // for parallel gathering of stats
- boost::condition_variable ready; // consumers can wait for this data structure to be ready.
+ boost::condition_variable ready; /* consumers can wait for this
+ * data structure to be ready. */
size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
@@ -150,8 +131,9 @@ namespace Moses {
size_t sum_pairs;
size_t in_progress; // keeps track of how many threads are currently working on this
- uint32_t ofwd[po_other+1], obwd[po_other+1];
-
+ // size_t Moses::LRModel::ReorderingType
+ uint32_t ofwd[Moses::LRModel::MAX+1], obwd[Moses::LRModel::MAX+1];
+
// typedef typename boost::unordered_map<typename ::uint64_t, jstats> trg_map_t;
typedef std::map<typename ::uint64_t, jstats> trg_map_t;
trg_map_t trg;
@@ -198,8 +180,8 @@ namespace Moses {
::uint64_t p1, p2;
uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
vector<float> fvals;
- float dfwd[po_other+1]; // distortion counts // counts or probs?
- float dbwd[po_other+1]; // distortion counts
+ float dfwd[Moses::LRModel::MAX+1]; // distortion counts // counts or probs?
+ float dbwd[Moses::LRModel::MAX+1]; // distortion counts
vector<uchar> aln;
float score;
bool inverse;
@@ -284,7 +266,7 @@ namespace Moses {
if (js.aln().size())
aln = js.aln()[0].second;
float total_fwd = 0, total_bwd = 0;
- for (int i = po_first; i <= po_other; i++)
+ for (int i = 0; i <= Moses::LRModel::MAX; i++)
{
PhraseOrientation po = static_cast<PhraseOrientation>(i);
total_fwd += js.dcnt_fwd(po)+1;
@@ -292,13 +274,13 @@ namespace Moses {
}
// should we do that here or leave the raw counts?
- for (int i = po_first; i <= po_other; i++)
+ for (int i = 0; i <= Moses::LRModel::MAX; i++)
{
PhraseOrientation po = static_cast<PhraseOrientation>(i);
dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
}
-
+
return *this;
}
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
new file mode 100644
index 000000000..34007c882
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
@@ -0,0 +1,136 @@
+#include "ug_lexical_reordering.h"
+namespace Moses
+{
+ namespace bitext
+ {
+ using namespace std;
+
+ // check if min and max in the aligmnet vector v are within the
+ // bounds LFT and RGT and update the actual bounds L and R; update
+ // the total count of alignment links in the underlying phrase
+ // pair
+ bool
+ check(vector<ushort> const& v, // alignment row/column
+ size_t const LFT, size_t const RGT, // hard limits
+ ushort& L, ushort& R, size_t& count) // current bounds, count
+ {
+ if (v.size() == 0) return 0;
+ if (L > v.front() && (L=v.front()) < LFT) return false;
+ if (R < v.back() && (R=v.back()) > RGT) return false;
+ count += v.size();
+ return true;
+ }
+
+ /// return number of alignment points in box, -1 on failure
+ int
+ expand_block(vector<vector<ushort> > const& row2col,
+ vector<vector<ushort> > const& col2row,
+ size_t row, size_t col, // seed coordinates
+ size_t const TOP, size_t const LFT, // hard limits
+ size_t const BOT, size_t const RGT, // hard limits
+ ushort* top = NULL, ushort* lft = NULL,
+ ushort* bot = NULL, ushort* rgt = NULL) // store results
+ {
+ if (row < TOP || row > BOT || col < LFT || col > RGT) return -1;
+ UTIL_THROW_IF2(row >= row2col.size(), "out of bounds");
+ UTIL_THROW_IF2(col >= col2row.size(), "out of bounds");
+
+ // ====================================================
+ // tables grow downwards, so TOP is smaller than BOT!
+ // ====================================================
+
+ ushort T, L, B, R; // box dimensions
+
+ // if we start on an empty cell, search for the first alignment point
+ if (row2col[row].size() == 0 && col2row[col].size() == 0)
+ {
+ if (row == TOP) while (row < BOT && !row2col[++row].size());
+ else if (row == BOT) while (row > TOP && !row2col[--row].size());
+
+ if (col == LFT) while (col < RGT && !col2row[++col].size());
+ else if (col == RGT) while (col > RGT && !col2row[--col].size());
+
+ if (row2col[row].size() == 0 && col2row[col].size() == 0)
+ return 0;
+ }
+ if (row2col[row].size() == 0)
+ row = col2row[col].front();
+ if (col2row[col].size() == 0)
+ col = row2col[row].front();
+
+ if ((T = col2row[col].front()) < TOP) return -1;
+ if ((B = col2row[col].back()) > BOT) return -1;
+ if ((L = row2col[row].front()) < LFT) return -1;
+ if ((R = row2col[row].back()) > RGT) return -1;
+
+ if (B == T && R == L) return 1;
+
+ // start/end of row / column coverage:
+ ushort rs = row, re = row, cs = col, ce = col;
+ int ret = row2col[row].size();
+ for (size_t tmp = 1; tmp; ret += tmp)
+ {
+ tmp = 0;;
+ while (rs>T) if (!check(row2col[--rs],LFT,RGT,L,R,tmp)) return -1;
+ while (re<B) if (!check(row2col[++re],LFT,RGT,L,R,tmp)) return -1;
+ while (cs>L) if (!check(col2row[--cs],TOP,BOT,T,B,tmp)) return -1;
+ while (ce<R) if (!check(col2row[++ce],TOP,BOT,T,B,tmp)) return -1;
+ }
+ if (top) *top = T;
+ if (bot) *bot = B;
+ if (lft) *lft = L;
+ if (rgt) *rgt = R;
+ return ret;
+ }
+
+ Moses::LRModel::ReorderingType
+ find_po_fwd(vector<vector<ushort> >& a1,
+ vector<vector<ushort> >& a2,
+ size_t s1, size_t e1,
+ size_t s2, size_t e2)
+ {
+ if (e2 == a2.size()) // end of target sentence
+ return Moses::LRModel::M;
+ size_t y = e2, L = e2, R = a2.size()-1; // won't change
+ size_t x = e1, T = e1, B = a1.size()-1;
+ if (e1 < a1.size() && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
+ return Moses::LRModel::M;
+ B = x = s1-1; T = 0;
+ if (s1 && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
+ return Moses::LRModel::S;
+ while (e2 < a2.size() && a2[e2].size() == 0) ++e2;
+ if (e2 == a2.size()) // should never happen, actually
+ return Moses::LRModel::MAX;
+ if (a2[e2].back() < s1)
+ return Moses::LRModel::DL;
+ if (a2[e2].front() >= e1)
+ return Moses::LRModel::DR;
+ return Moses::LRModel::MAX;
+ }
+
+
+ Moses::LRModel::ReorderingType
+ find_po_bwd(vector<vector<ushort> >& a1,
+ vector<vector<ushort> >& a2,
+ size_t s1, size_t e1,
+ size_t s2, size_t e2)
+ {
+ if (s1 == 0 && s2 == 0) return Moses::LRModel::M;
+ if (s2 == 0) return Moses::LRModel::DR;
+ if (s1 == 0) return Moses::LRModel::DL;
+ size_t y = s2-1, L = 0, R = s2-1; // won't change
+ size_t x = s1-1, T = 0, B = s1-1;
+ if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
+ return Moses::LRModel::M;
+ T = x = e1; B = a1.size()-1;
+ if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
+ return Moses::LRModel::S;
+ while (s2 && a2[s2].size() == 0) --s2;
+ if (a2[s2].size() == 0) return Moses::LRModel::MAX;
+ if (a2[s2].back() < s1) return Moses::LRModel::DR;
+ if (a2[s2].front() >= e1) return Moses::LRModel::DL;
+ return Moses::LRModel::MAX;
+ }
+
+ } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
new file mode 100644
index 000000000..5adf0ea7e
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
@@ -0,0 +1,26 @@
+// -*- c++ -*-
+#pragma once
+#include <vector>
+#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
+
+namespace Moses { namespace bitext {
+
+typedef Moses::LRModel::ReorderingType PhraseOrientation;
+static size_t const po_other = Moses::LRModel::MAX;
+
+PhraseOrientation
+find_po_fwd(std::vector<std::vector<ushort> >& a1,
+ std::vector<std::vector<ushort> >& a2,
+ size_t b1, size_t e1,
+ size_t b2, size_t e2);
+
+PhraseOrientation
+find_po_bwd(std::vector<std::vector<ushort> >& a1,
+ std::vector<std::vector<ushort> >& a2,
+ size_t b1, size_t e1,
+ size_t b2, size_t e2);
+
+
+
+
+}} // close namespaces