/* EGYPT Toolkit for Statistical Machine Translation Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "model3.h" #include "utility.h" #include "Globals.h" LogProb model3::prob_of_target_and_alignment_given_source(Vector& A, Vector& Fert, tmodel& tTable, Vector& fs, Vector& es) { LogProb total = 1.0; LogProb temp = 0.0; const LogProb zero = 0.0; WordIndex l = es.size()-1, m = fs.size()-1; WordIndex i, j; total *= pow(double(1-p1), m-2.0 * Fert[0]) * pow(double(p1), double(Fert[0])); if (total == 0) return (zero); for (i = 1; i <= Fert[0]; i++) { // loop caculates m-fert[0] choose fert[0] total *= double(m - Fert[0] - i + 1) / i; if (total == 0) return (zero); } for (i = 1; i <= l; i++) { // this loop calculates fertilities term total *= double(nTable.getValue(es[i], Fert[i])) * (LogProb) factorial(Fert[i]); if (total == 0) return (zero); } for (j = 1; j <= m; j++) { // temp = tTable.getValue(es[A[j]], fs[j]) ; temp = double(tTable.getProb(es[A[j]], fs[j])); total *= temp; if (0 != A[j]) total *= double(dTable.getValue(j, A[j], l, m)); if (total == 0) return (zero); } return (total); } LogProb model3::prob_of_target_given_source(tmodel& tTable, Vector& fs, Vector& es) { WordIndex x, y; LogProb total = 0; // WordIndex l = es.size(), m = fs.size(); WordIndex l = es.size()-1, m = fs.size()-1; Vector A(fs.size(),/*-1*/0); Vector Fert(es.size(),0); WordIndex i, j; for (x = 0; x < pow(l+1.0, double(m)) ; x++) { // For all possible alignmets A y = x; // for (j = 1 ; j < m ; j++){ for (j = 1; j <= m; j++) { A[j] = y % (l+1); y /= (l+1); } // for(i = 0 ; i < l ; i++) for (i = 0; i <= l; i++) Fert[i] = 0; // for (j = 1 ; j < m ; j++) for (j = 1; j <= m; j++) Fert[A[j]]++; // if (2 * Fert[0] < m){ if (2 * Fert[0] <= m) { /* consider alignments that has Fert[0] less than half the length of french sentence */ total += prob_of_target_and_alignment_given_source(A, Fert, tTable, fs, es); } } return (total); } LogProb model3::scoreOfMove(Vector& es, Vector& fs, Vector& A, Vector& Fert, tmodel& tTable, WordIndex j, WordIndex i) // returns the scaling factor of the original score if A[j] is linked to // i, no change is really made to A // but the score is calculated if the move is to be taken (i.e. // no side effects on Alignment A nor its Fertility Fert // If the value of the scaling factor is: // 1: then the score of the new alignment if the move is taken will // not change. // 0.5: the new score is half the score of the original alignment. // 2.0: the new score will be twice as much. // { // LogProb score; LogProb change; WordIndex m, l; m = fs.size() - 1; l = es.size() - 1; if (A[j] == i) // return(original_score); return (1); else if (A[j] == 0) { // a move from position zero to something else change = double(p0*p0)/p1 * (double((Fert[0]*(m-Fert[0]+1))) / ((m-2*Fert[0]+1)*(m-2*Fert[0] +2))) * (Fert[i]+1) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m)); } else if (i == 0) { // a move to position zero change= ((double(p1) / (p0*p0)) * (double((m-2*Fert[0])*(m-2*Fert[0]-1))/((Fert[0]+1)*(m-Fert[0]))) * (double(1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]]))* double(tTable.getProb(es[i], fs[j])) / double(tTable.getProb(es[A[j]], fs[j])) * 1.0 / double(dTable.getValue(j, A[j], l, m))); } else { // a move that does not involve position zero change = ((double(Fert[i]+1)/Fert[A[j]]) * double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) / double(nTable.getValue(es[A[j]], Fert[A[j]])) * double(nTable.getValue(es[i], Fert[i]+1)) / double(nTable.getValue(es[i], Fert[i])) * double(tTable.getProb(es[i], fs[j]))/ double(tTable.getProb(es[A[j]], fs[j])) * double(dTable.getValue(j, i, l, m))/ double(dTable.getValue(j, A[j], l, m))); } return (change); } LogProb model3::scoreOfSwap(Vector& es, Vector& fs, Vector& A, tmodel& tTable, int j1, int j2) // returns the scaling factor of the original score if the swap to // take place, // No side effects here (none of the parameters passed is changed! // (i.e. the alignment A is not really changed) // If the value of the scaling factor is: // 1: then the score of the new alignment if the move is taken will // not change. // 0.5: the new score is half the score of the original alignment. // 2.0: the new score will be twice as much. // { LogProb score; WordIndex i1, i2, m, l; m = fs.size() - 1; l = es.size() - 1; if (j1 == j2 || A[j1] == A[j2]) // if swapping same position return ratio 1 return (1); else { i1 = A[j1]; i2 = A[j2]; score = double(tTable.getProb(es[i2], fs[j1]))/double(tTable.getProb(es[i1], fs[j1])) * double(tTable.getProb(es[i1], fs[j2]))/double(tTable.getProb(es[i2], fs[j2])); if (i1 != 0) { score *= double(dTable.getValue(j2, i1, l, m))/double(dTable.getValue(j1, i1, l, m)); } if (i2 != 0) { score *= double(dTable.getValue(j1, i2, l, m))/double(dTable.getValue(j2, i2, l, m)); } return (score); } } void model3::hillClimb(Vector& es, Vector& fs, Vector& A, Vector& Fert, LogProb& best_score, tmodel& tTable, int = -1, int j_peg = -1) // Hill climbing given alignment A . // Alignment A will be updated and also best_score // if no pegging is needed i_peg == -1, and j_peg == -1 { WordIndex i, j, l, m, j1, old_i; LogProb change; bool local_minima; int level = 0; LogProb best_change_so_far, best_change; Vector A_so_far; Vector Fert_so_far; l = es.size() - 1; m = fs.size() - 1; best_change = 1; // overall scaling factor (i.e. from the begining of climb do { best_change_so_far = 1; // best scaling factor of this level of hill climb local_minima = true; for (j = 1; j <= m; j++) { if (int(j) != j_peg) { // make sure not to change the pegged link for (j1 = j + 1; j1 <= m; j1++) { // for all possible swaps // make sure you are not swapping at same position if ((A[j] != A[j1]) && (int(j1) != j_peg)) { // change = scoreOfSwap(es, fs, A, best_score, tTable, j, j1); change = scoreOfSwap(es, fs, A, tTable, j, j1); if (change > best_change_so_far) { // if better alignment found, keep it local_minima = false; best_change_so_far = change; A_so_far = A; Fert_so_far = Fert; old_i = A_so_far[j]; A_so_far[j] = A_so_far[j1]; A_so_far[j1] = old_i; } // end of if (change > best_change_so_far) } // end of if (A[j] != A[j1] ..) } // of for (j1 = j+1 ....) // for (i = 0 ; i < l ; i++){ // all possible moves for (i = 0; i <= l; i++) { // all possible moves if (i != A[j]) { // make sure not to move to same position if (i != 0 || (m >= 2 * (Fert[0]+1))) { // if moving to NULL word // (pos 0), make sure not to violate the fertility restriction // i.e. NULL can not take more than half the target words // change = scoreOfMove(es, fs, A, Fert, best_score, tTable, j, i); change = scoreOfMove(es, fs, A, Fert, tTable, j, i); if (change > best_change_so_far) { // if better alignment found, keep it best_change_so_far = change; local_minima = false; A_so_far = A; Fert_so_far = Fert; old_i = A_so_far[j]; A_so_far[j] = i; Fert_so_far[old_i]--; Fert_so_far[i]++; } // end of if (change > best_change_so_far) } // end of if ((i!=0) ... } // end of if (i != A[j] ) } // end of for (i = 0 ; ....) } // end of if(j != j_peg) } // end of for (j = 1 ; ...) level++; if (!local_minima) { if (best_change_so_far > 1) { // if current chage is improving A = A_so_far; Fert = Fert_so_far; best_change *= best_change_so_far; } else { local_minima = true; } } // end of if(!local_minima) if (level> 15) cerr << "."; } while (local_minima == false); if (level > 15) cerr << "\nHill Climb Level: " << level << " score: scaling old: " <<(best_score*best_change); best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs, es); if (level>15) cerr << " using new calc: " << best_score << '\n'; } void model3::findBestAlignment(Vector& es, Vector& fs, Vector& A, Vector& Fert, LogProb& best_score, /*tmodel& tTable, amodel& aTable, */ int i_peg = -1, int j_peg = -1) // This finds the best Model2 alignment (i.e. no fertilities stuff) in A // for the given sentence pair. Its score is returned in A. Its fertility // info in Fert. // if j_peg == -1 && i_peg == -1 then No pegging is performed. { WordIndex i, j, l, m, best_i=0; LogProb temp, score, ss; l = es.size() - 1; m = fs.size() - 1; for (i=0; i <= l; i++) Fert[i] = 0; ss = 1; if ((j_peg != -1) && (i_peg != -1)) { // if you're doing pegging A[j_peg] = i_peg; Fert[i_peg] = 1; ss *= double(tTable.getProb(es[i_peg], fs[j_peg])) * double(aTable.getValue(i_peg, j_peg, l, m)); } for (j = 1; j <= m; j++) { if (int(j) != j_peg) { score = 0; for (i = 0; i <= l; i++) { // first make sure that connecting target word at pos j to source word // at pos i will not lead to a violation on Fertility restrictions // (e.g. maximum fertility for a word, max fertility for NULL word, etc) if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0] +1))) || (i != 0))) { temp = double(tTable.getProb(es[i], fs[j])) * double(aTable.getValue(i, j, l, m)); if (temp > score) { best_i = i; score = temp; } // end of if (temp > score) } // end of if (((i == 0 ...) } // end of for (i= 0 ...) if (score == 0) { cerr << "WARNING: In searching for model2 best alignment\n "; cerr << "Nothing was set for target token " << fs[j] << "at position j: " << j << "\n"; for (i = 0; i <= l; i++) { cerr << "i: " << i << "ttable("<= 2 *(Fert[0]+1))) || (i != 0))) cerr <<"Passed fertility condition \n"; else cerr <<"Failed fertility condition \n"; } } // end of if (score == 0) else { Fert[best_i]++; A[j] = best_i; } ss *= score; } // end of if (j != j_peg) } // end of for (j == 1 ; ...) if (ss <= 0) { cerr << "WARNING: Model2 viterbi alignment has zero score for sentence pair:\n"; printSentencePair(es, fs, cerr); } best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs, es); } void model3::collectCountsOverAlignement(const Vector& es, const Vector& fs, const Vector& A, LogProb score, float count) { WordIndex j, i, l, m; Vector Fert(es.size(),0); l = es.size() - 1; m = fs.size() - 1; score *= LogProb(count); COUNT temp = COUNT(score) ; for (i=0; i <= l; i++) Fert[i] = 0; for (j = 1; j <= m; j++) { Fert[A[j]]++; tTable.incCount(es[A[j]], fs[j], temp); // tCountTable.getRef(es[A[j]], fs[j])+=score; if (A[j]) dCountTable.addValue(j, A[j], l, m, temp); aCountTable.addValue(A[j], j, l, m, temp); } for (i = 0; i <= l; i++) nCountTable.addValue(es[i], Fert[i], temp); // p1_count += score * (LogProb) (Fert[0]) ; // p0_count += score * (LogProb) ((m - 2 * Fert[0])) ; p1_count += temp * (Fert[0]); p0_count += temp * ((m - 2 * Fert[0])); } void model3::findAlignmentsNeighborhood(Vector& es, Vector& fs, LogProb&align_total_count, alignmodel&neighborhood, int i_peg = -1, int j_peg = -1) // Finding the Neigborhood of a best viterbi alignment after hill climbing // if (i_peg == -1 and j_peg == -1, then No Pegging is done. { LogProb best_score, score; WordIndex i, j, l, m, old_i, j1; Vector A(fs.size(),0); Vector Fert(es.size(),0); time_t it_st; best_score = 0; l = es.size() - 1; m = fs.size() - 1; findBestAlignment(es, fs, A, Fert, best_score, /*tTable, aTable,*/i_peg, j_peg); if (best_score == 0) { cerr << "WARNING: viterbi alignment score is zero for the following pair\n"; printSentencePair(es, fs, cerr); } hillClimb(es, fs, A, Fert, best_score, tTable, i_peg, j_peg); if (best_score <= 0) { cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n"; printSentencePair(es, fs, cerr); } else { // best_score > 0 // if (2 * Fert[0] < m ){ if (2*Fert[0] <= m) { /* consider alignments that has Fert[0] less than half the number of words in French sentence */ if (neighborhood.insert(A, best_score)) { align_total_count += best_score; } } else { // else part is added for debugging / Yaser cerr << "WARNING:Best Alignment found violates Fertility requiremnets !!\n"; for (i = 0; i <= l; i++) cerr << "Fert["< 0) { /* consider alignments that has Fert[0] less than half the number of words in French sentence */ old_i = A[j]; A[j] = A[j1]; A[j1] = old_i; if (neighborhood.insert(A, score)) { align_total_count += score; } // restore original alignment old_i = A[j]; A[j] = A[j1]; A[j1] = old_i; } } } for (i = 0; i <= l; i++) { // all possible moves if (i != A[j]) { // make sure not to move to same position if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2 *(Fert[0]+1))) || (i != 0))) { // consider legal alignments only score = best_score * scoreOfMove(es, fs, A, Fert, tTable, j, i); // ADD A and its score to list of alig. to collect counts over if (score > 0) { old_i = A[j]; A[j] = i; Fert[old_i]--; Fert[i]++; // add to list of alignemts here ****************** if (neighborhood.insert(A, score)) { align_total_count += score; } // now resotre alignment and fertilities to previoud values A[j] = old_i; Fert[old_i]++; Fert[i]--; } // end of if (score > 0) } // end of if (i == 0 ...) } // end of if (i != A[j]) }// end of for(i = 0 ; ...) }// end of for (j = 1 ; ...) } // of else best_score <= 0 } void model3::viterbi_loop(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1, bool dump_files, const char* alignfile, bool collect_counts, string model) { WordIndex i, j, l, m; ofstream of2; int pair_no; LogProb temp; if (dump_files) of2.open(alignfile); pair_no = 0; // sentence pair number // for each sentence pair in the corpus perp.clear() ; // clears cross_entrop & perplexity viterbiPerp.clear(); sentPair sent; while (sHandler1.getNextSentence(sent)) { Vector& es = sent.eSent; Vector& fs = sent.fSent; const float count = sent.getCount(); if ((sent.sentenceNo % 1000) == 0) cerr < viterbi_alignment; LogProb viterbi_score; alignmodel neighborhood; neighborhood.clear(); align_total_count = 0; findAlignmentsNeighborhood( /*tTable, aTable,*//*p1_count, p0_count,*/es, fs, align_total_count, neighborhood) ; if (Peg) { for (i = 0; i <= l; i++) for (j = 1; j <= m; j++) { if ( (tTable.getProb(es[i], fs[j]) > PROB_SMOOTH) && (aTable.getValue(i, j, l, m) > PROB_SMOOTH) && (dTable.getValue(j, i, l, m) > PROB_SMOOTH)) findAlignmentsNeighborhood(/*tTable, aTable,*//*p1_count, p0_count, */es, fs, align_total_count, neighborhood, i, j); } } // Now Collect counts over saved neighborhoods viterbi_score = 0; if (Verbose) cerr << "\nCollecting counts over found alignments, total prob: " << align_total_count << "\n"; hash_map, LogProb, hashmyalignment, equal_to_myalignment >::iterator align; int acount = 0; if (align_total_count == 0) { cerr << " WARNINIG: For the following sentence pair : \n"; printSentencePair(es, fs, cerr); cerr << "The collection of alignments found have 0 probability!!\n"; cerr << "No counts will be collected of it \n"; } else { if (collect_counts) { for (align = neighborhood.begin(); align != neighborhood.end(); align++) { temp = (*align).second/align_total_count; collectCountsOverAlignement(/*tTable, aCountTable, */es, fs, /*p1_count, p0_count ,*/((*align).first), temp, count); acount++; if (viterbi_score < temp) { viterbi_alignment = ((*align).first); viterbi_score = temp; } } } // end of if (collect_counts) perp.addFactor(log(double(align_total_count)), count, l, m, 0); viterbiPerp.addFactor(log(double(viterbi_score)), count, l, m, 0); if (Verbose) { cerr << "Collected counts over "<