// -*- c++ -*- // Phrase scorer that counts the number of unaligend words in the phrase // written by Ulrich Germann #include "sapt_pscore_base.h" #include namespace Moses { namespace bitext { template class PScoreUnaligned : public PhraseScorer { typedef boost::dynamic_bitset bitvector; public: PScoreUnaligned(string const spec) { this->m_index = -1; int f = this->m_num_feats = atoi(spec.c_str()); UTIL_THROW_IF2(f != 1 && f != 2,"unal parameter must be 1 or 2 at "<m_feature_names.resize(f); if (f == 1) this->m_feature_names[0] = "unal"; else { this->m_feature_names[0] = "unal-s"; this->m_feature_names[1] = "unal-t"; } } bool isLogVal(int i) const { return false; } bool isIntegerValued(int i) const { return true; } void operator()(Bitext const& bt, PhrasePair& pp, vector * dest = NULL) const { if (!dest) dest = &pp.fvals; // uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0; // parse_pid(pp.p1, sid1, off1, len1); // parse_pid(pp.p2, sid2, off2, len2); bitvector check1(pp.len1),check2(pp.len2); for (size_t i = 0; i < pp.aln.size(); ) { check1.set(pp.aln[i++]); check2.set(pp.aln.at(i++)); } if (this->m_num_feats == 1) { (*dest)[this->m_index] = pp.len1 - check1.count(); (*dest)[this->m_index] += pp.len2 - check2.count(); } else { (*dest)[this->m_index] = pp.len1 - check1.count(); (*dest)[this->m_index+1] = pp.len2 - check2.count(); } } }; } // namespace bitext } // namespace Moses