From 3c767fc333f5fe9101a6a4cf67afdc48ddcf8174 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 8 Jun 2015 13:47:02 +0100 Subject: New field to store cumulative bias scores. --- moses/TranslationModel/UG/mm/ug_bitext_jstats.cc | 7 +++++-- moses/TranslationModel/UG/mm/ug_bitext_jstats.h | 4 +++- moses/TranslationModel/UG/mm/ug_phrasepair.h | 7 +++++-- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'moses/TranslationModel/UG/mm') diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc index bcda9ebf3..9ff8b855a 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc +++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc @@ -6,6 +6,7 @@ namespace Moses uint32_t jstats::rcnt() const { return my_rcnt; } float jstats::wcnt() const { return my_wcnt; } + float jstats::bcnt() const { return my_bcnt; } uint32_t jstats::cnt2() const { return my_cnt2; } // What was that used for again? UG @@ -15,7 +16,7 @@ namespace Moses jstats:: jstats() - : my_rcnt(0), my_cnt2(0), my_wcnt(0) + : my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0) { for (int i = 0; i <= Moses::LRModel::NONE; ++i) ofwd[i] = obwd[i] = 0; @@ -27,6 +28,7 @@ namespace Moses { my_rcnt = other.rcnt(); my_wcnt = other.wcnt(); + my_bcnt = other.bcnt(); my_aln = other.aln(); indoc = other.indoc; for (int i = 0; i <= Moses::LRModel::NONE; i++) @@ -54,13 +56,14 @@ namespace Moses void jstats:: - add(float w, vector const& a, uint32_t const cnt2, + add(float w, float b, vector const& a, uint32_t const cnt2, uint32_t fwd_orient, uint32_t bwd_orient, int const docid) { boost::lock_guard lk(this->lock); my_cnt2 = cnt2; my_rcnt += 1; my_wcnt += w; + my_bcnt += b; if (a.size()) { size_t i = 0; diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h index dade27649..49ba0d810 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h @@ -18,6 +18,7 @@ namespace Moses uint32_t my_rcnt; // unweighted joint count uint32_t my_cnt2; // raw counts L2 float my_wcnt; // weighted joint count + float my_bcnt; // cumulative bias // to do: use a static alignment pattern store that stores each pattern only // once, so that we don't have to store so many alignment vectors @@ -33,9 +34,10 @@ namespace Moses uint32_t rcnt() const; // raw joint counts uint32_t cnt2() const; // raw target phrase occurrence count float wcnt() const; // weighted joint counts + float bcnt() const; // cumulative bias scores vector > > const & aln() const; - void add(float w, vector const& a, uint32_t const cnt2, + void add(float w, float b, vector const& a, uint32_t const cnt2, uint32_t fwd_orient, uint32_t bwd_orient, int const docid); void invalidate(); diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h index 7e565c2df..5247b7f01 100644 --- a/moses/TranslationModel/UG/mm/ug_phrasepair.h +++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h @@ -28,7 +28,7 @@ namespace Moses float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs? float dbwd[Moses::LRModel::NONE+1]; // distortion counts std::vector aln; - float score; + float score, cum_bias; bool inverse; std::vector indoc; PhrasePair() { }; @@ -96,6 +96,7 @@ namespace Moses good2 = 0; sample2 = 0; raw2 = 0; + cum_bias = 0; fvals.resize(numfeats); } @@ -109,6 +110,7 @@ namespace Moses start2 = x; len2 = len; raw2 = js.cnt2(); joint = js.rcnt(); + cum_bias = js.bcnt(); assert(js.aln().size()); if (js.aln().size()) aln = js.aln()[0].second; @@ -176,6 +178,7 @@ namespace Moses joint += o.joint; sample1 += o.sample1; sample2 += o.sample2; + cum_bias += o.cum_bias; // todo: add distortion counts return *this; } @@ -189,7 +192,7 @@ namespace Moses , raw1(o.raw1) , raw2(o.raw2) , sample1(o.sample1) , sample2(o.sample2) , good1(o.good1) , good2(o.good2) - , joint(o.joint) + , joint(o.joint) , cum_bias(o.cum_bias) , fvals(o.fvals) , aln(o.aln) , score(o.score) -- cgit v1.2.3