/* * File: lexdecom.cpp * Author: Felipe Sánchez-Martínez, Universitat d'Alacant * * Created on 2010/01/27 */ #include "lexdecom.h" #include "scorer-impl.h" #include #include PhraseScorer* LexicalDecompositionPhraseScorer::create_scorer(const char *argv[], int &argp, bool reverse, const PhraseScorerFactory &ptf) { if(argv[argp] == NULL) usage(); String lwfile(argv[argp++]); return new LexicalDecompositionPhraseScorer(ptf.get_phrase_table(), reverse, lwfile, argv, argp, ptf); } LexicalDecompositionPhraseScorer::LexicalDecompositionPhraseScorer(PhraseTable &pd, bool reverse, const String &weightfile, const char *argv[], int &argp, const PhraseScorerFactory &ptf) : PhraseScorer(pd, reverse) { black_box_scorer = AbsoluteDiscountPhraseScorer::create_scorer(argv, argp, reverse, ptf); std::ifstream wfile(weightfile.c_str()); //Code copied from LexicalWeightPhraseScorer; it should be factored !! -- 2010/01/27 std::cerr<<"Reading lexical weights from '"<> src >> tgt >> weight; Count src_id = PhraseText::index_word(src); Count tgt_id = PhraseText::index_word(tgt); weight_map_.insert(std::make_pair(std::make_pair(src_id, tgt_id), weight)); } wfile.close(); std::cerr<<"done."<second; } void LexicalDecompositionPhraseScorer::do_score_phrases() { //Estimate p(J|I) = p(src_len|tgt_len) black_box_scorer->score_phrases(); std::cerr<<"LexicalDecompositionPhraseScorer::do_score_phrases"< > count_srclen_tgtlen; std::map total_tgtlen; for(PhraseTable::iterator it = phrase_table_.begin(); it != phrase_table_.end(); it++) { const PhrasePairInfo &ppair = *it; PhraseInfo &src = phrase_table_.get_src_phrase(ppair.get_src()); PhraseInfo &tgt = phrase_table_.get_tgt_phrase(ppair.get_tgt()); unsigned src_len = src.get_phrase().size(); unsigned tgt_len = tgt.get_phrase().size(); /*//Debug code for (unsigned i=0; i "< "< >::iterator its; std::map::iterator itt; for (its=count_srclen_tgtlen.begin(); its!=count_srclen_tgtlen.end(); its++) { unsigned src_len=its->first; for(itt=its->second.begin(); itt!=its->second.end(); itt++) { unsigned tgt_len=itt->first; Count cnt=itt->second; prob_srclen_tgtlen_[src_len][tgt_len] = static_cast(cnt)/static_cast(total_tgtlen[tgt_len]); } } } Score LexicalDecompositionPhraseScorer::get_noisy_or_combination(Count src_word, PhraseInfo &tgt_phrase) { Score sc=1.0; unsigned tgt_len=tgt_phrase.get_phrase().size(); for(unsigned i=0; iget_src()); PhraseInfo &tgt_phrase = phrase_table_.get_tgt_phrase(it->get_tgt()); unsigned src_len=src_phrase.get_phrase().size(); unsigned tgt_len=tgt_phrase.get_phrase().size(); Score prod=1.0; for(unsigned j=0; j(black_box_scorer->get_discount()) * tgt_phrase.get_distinct() / tgt_phrase.get_count(); Score ret_value = black_box_scorer->get_score(it) + (lambda * prod * prob_srclen_tgtlen_[src_len][tgt_len]); /* //Debug code for (unsigned i=0; i discount: "<get_discount()<<"; black box score: "<get_score(it) <<"; lambda: "<get_count()<<"; score: "<