diff options
Diffstat (limited to 'moses/TranslationModel/UG/sapt_pscore_unaligned.h')
-rw-r--r-- | moses/TranslationModel/UG/sapt_pscore_unaligned.h | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/moses/TranslationModel/UG/sapt_pscore_unaligned.h b/moses/TranslationModel/UG/sapt_pscore_unaligned.h new file mode 100644 index 000000000..bdd2919b4 --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h @@ -0,0 +1,67 @@ +// -*- c++ -*- +// Phrase scorer that counts the number of unaligend words in the phrase +// written by Ulrich Germann + +#include "sapt_pscore_base.h" +#include <boost/dynamic_bitset.hpp> + +namespace Moses { + namespace bitext + { + template<typename Token> + class + PScoreUnaligned : public PhraseScorer<Token> + { + typedef boost::dynamic_bitset<uint64_t> bitvector; + public: + PScoreUnaligned(string const spec) + { + this->m_index = -1; + int f = this->m_num_feats = atoi(spec.c_str()); + UTIL_THROW_IF2(f != 1 && f != 2,"unal parameter must be 1 or 2 at "<<HERE); + this->m_feature_names.resize(f); + if (f == 1) + this->m_feature_names[0] = "unal"; + else + { + this->m_feature_names[0] = "unal-s"; + this->m_feature_names[1] = "unal-t"; + } + } + + bool + isLogVal(int i) const { return false; } + + bool + isIntegerValued(int i) const { return true; } + + void + operator()(Bitext<Token> const& bt, + PhrasePair<Token>& pp, + vector<float> * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + // uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0; + // parse_pid(pp.p1, sid1, off1, len1); + // parse_pid(pp.p2, sid2, off2, len2); + bitvector check1(pp.len1),check2(pp.len2); + for (size_t i = 0; i < pp.aln.size(); ) + { + check1.set(pp.aln[i++]); + check2.set(pp.aln.at(i++)); + } + + if (this->m_num_feats == 1) + { + (*dest)[this->m_index] = pp.len1 - check1.count(); + (*dest)[this->m_index] += pp.len2 - check2.count(); + } + else + { + (*dest)[this->m_index] = pp.len1 - check1.count(); + (*dest)[this->m_index+1] = pp.len2 - check2.count(); + } + } + }; + } // namespace bitext +} // namespace Moses |