// // C++ Implementation: tercalc // // Description: // // // Author: <>, (C) 2010 // // Copyright: See COPYING file that comes with this distribution // // #include "tercalc.h" using namespace std; using namespace Tools; namespace TERCpp { terCalc::terCalc() { MAX_SHIFT_SIZE = 50; INF = 999999.0; shift_cost = 1.0; insert_cost = 1.0; delete_cost = 1.0; substitute_cost = 1.0; match_cost = 0.0; NUM_SEGMENTS_SCORED = 0; NUM_SHIFTS_CONSIDERED = 0; NUM_BEAM_SEARCH_CALLS = 0; BEAM_WIDTH = 20; MAX_SHIFT_DIST = 50; PRINT_DEBUG = false; } // terCalc::~terCalc() // { // } // size_t* terCalc::hashVec ( vector s ) // { // size_t retour[ ( int ) s.size() ]; // int i=0; // for ( i=0; i< ( int ) s.size(); i++ ) // { // boost::hash hasher; // retour[i]=hasher ( s.at ( i ) ); // } // return retour; // } int terCalc::WERCalculation ( size_t * ref, size_t * hyp ) { int retour; int REFSize = sizeof ( ref ) + 1; int HYPSize = sizeof ( hyp ) + 1; int WER[REFSize][HYPSize]; int i = 0; int j = 0; for ( i = 0; i < REFSize; i++ ) { WER[i][0] = ( int ) i; } for ( j = 0; j < HYPSize; j++ ) { WER[0][j] = ( int ) j; } for ( j = 1; j < HYPSize; j++ ) { for ( i = 1; i < REFSize; i++ ) { if ( i == 1 ) { cerr << endl; } if ( ref[i-1] == hyp[j-1] ) { WER[i][j] = WER[i-1][j-1]; cerr << "- "; cerr << WER[i][j] << "-\t"; } else { if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) { WER[i][j] = ( WER[i-1][j] + 1 ); // cerr << "D "; cerr << WER[i][j] << "D\t"; } else { if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) { WER[i][j] = ( WER[i][j-1] + 1 ); // cerr << "I "; cerr << WER[i][j] << "I\t"; } else { WER[i][j] = ( WER[i-1][j-1] + 1 ); // cerr << "S "; cerr << WER[i][j] << "S\t"; } } } } } cerr << endl; retour = WER[i-1][j-1]; cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl; return retour; } int terCalc::WERCalculation ( std::vector< int > ref, std::vector< int > hyp ) { stringstream s; s.str ( "" ); string stringRef ( "" ); string stringHyp ( "" ); for ( vector::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) { if ( l_it == ref.begin() ) { s << ( *l_it ); } else { s << " " << ( *l_it ); } } stringRef = s.str(); s.str ( "" ); for ( vector::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) { if ( l_itHyp == hyp.begin() ) { s << ( *l_itHyp ); } else { s << " " << ( *l_itHyp ); } } stringHyp = s.str(); s.str ( "" ); return WERCalculation ( stringToVector ( stringRef, " " ), stringToVector ( stringHyp , " " ) ); } terAlignment terCalc::TER ( std::vector< int > hyp, std::vector< int > ref ) { stringstream s; s.str ( "" ); string stringRef ( "" ); string stringHyp ( "" ); for ( vector::iterator l_it = ref.begin(); l_it != ref.end(); l_it++ ) { if ( l_it == ref.begin() ) { s << ( *l_it ); } else { s << " " << ( *l_it ); } } stringRef = s.str(); s.str ( "" ); for ( vector::iterator l_itHyp = hyp.begin(); l_itHyp != hyp.end(); l_itHyp++ ) { if ( l_itHyp == hyp.begin() ) { s << ( *l_itHyp ); } else { s << " " << ( *l_itHyp ); } } stringHyp = s.str(); s.str ( "" ); return TER ( stringToVector ( stringRef , " " ), stringToVector ( stringHyp , " " ) ); } int terCalc::WERCalculation ( vector ref, vector hyp ) { int retour; int REFSize = ( int ) ref.size() + 1; int HYPSize = ( int ) hyp.size() + 1; int WER[REFSize][HYPSize]; char WERchar[REFSize][HYPSize]; int i = 0; int j = 0; for ( i = 0; i < REFSize; i++ ) { WER[i][0] = ( int ) i; } for ( j = 0; j < HYPSize; j++ ) { WER[0][j] = ( int ) j; } for ( j = 1; j < HYPSize; j++ ) { for ( i = 1; i < REFSize; i++ ) { // if (i==1) // { // cerr << endl; // } if ( ref[i-1] == hyp[j-1] ) { WER[i][j] = WER[i-1][j-1]; // cerr << "- "; // cerr << WER[i][j]<< "-\t"; WERchar[i][j] = '-'; } else { if ( ( ( WER[i-1][ j] + 1 ) < ( WER[i][j-1] + 1 ) ) && ( ( WER[i-1][j] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) { WER[i][j] = ( WER[i-1][j] + 1 ); // cerr << "D "; // cerr << WER[i][j]<< "D\t"; WERchar[i][j] = 'D'; } else { if ( ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j] + 1 ) ) && ( ( WER[i][j-1] + 1 ) < ( WER[i-1][j-1] + 1 ) ) ) { WER[i][j] = ( WER[i][j-1] + 1 ); // cerr << "I "; // cerr << WER[i][j]<< "I\t"; WERchar[i][j] = 'I'; } else { WER[i][j] = ( WER[i-1][j-1] + 1 ); // cerr << "S "; // cerr << WER[i][j]<< "S\t"; WERchar[i][j] = 'S'; } } } } } cerr << endl; retour = WER[REFSize-1][HYPSize-1]; cerr << "i : " << i - 1 << "\tj : " << j - 1 << endl; j = HYPSize - 1; i = REFSize - 1; int k; stringstream s; // WERalignment local[HYPSize]; if ( HYPSize > REFSize ) { k = HYPSize; } else { k = REFSize; } WERalignment local; while ( j > 0 && i > 0 ) { cerr << "indice i : " << i << "\t"; cerr << "indice j : " << j << endl; if ( ( j == HYPSize - 1 ) && ( i == REFSize - 1 ) ) { alignmentElement localInfos; s << WER[i][j]; localInfos.push_back ( s.str() ); s.str ( "" ); s << WERchar[i][j]; localInfos.push_back ( s.str() ); s.str ( "" ); local.push_back ( localInfos ); // // i--; // j--; } // else { if ( ( ( WER[i-1][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i-1][j-1] ) <= ( WER[i][j-1] ) ) ) { alignmentElement localInfos; s << WER[i-1][j-1]; localInfos.push_back ( s.str() ); s.str ( "" ); s << WERchar[i-1][j-1]; localInfos.push_back ( s.str() ); s.str ( "" ); local.push_back ( localInfos ); i--; j--; } else { if ( ( ( WER[i][j-1] ) <= ( WER[i-1][j] ) ) && ( ( WER[i][j-1] ) <= ( WER[i-1][j-1] ) ) ) { alignmentElement localInfos; s << WER[i][j-1]; localInfos.push_back ( s.str() ); s.str ( "" ); s << WERchar[i][j-1]; localInfos.push_back ( s.str() ); s.str ( "" ); local.push_back ( localInfos ); j--; } else { alignmentElement localInfos; s << WER[i-1][j]; localInfos.push_back ( s.str() ); s.str ( "" ); s << WERchar[i-1][j]; localInfos.push_back ( s.str() ); s.str ( "" ); local.push_back ( localInfos ); i--; } } } } for ( j = 1; j < HYPSize; j++ ) { for ( i = 1; i < REFSize; i++ ) { cerr << WERchar[i][j] << " "; } cerr << endl; } cerr << endl; for ( j = 1; j < HYPSize; j++ ) { for ( i = 1; i < REFSize; i++ ) { cerr << WER[i][j] << " "; } cerr << endl; } cerr << "=================" << endl; // k=local.size()-1; // while (k>0) // { // alignmentElement localInfos; // localInfos=local.at(k-1); // l_WERalignment.push_back(localInfos); // cerr << (string)localInfos.at(1)+"\t"; k--; // } // cerr< 0 ) { alignmentElement localInfos; localInfos = local.at ( k - 1 ); if ( ( int ) ( localInfos.at ( 1 ).compare ( "D" ) ) == 0 || l > HYPSize - 1 ) { localInfos.push_back ( "***" ); } else { localInfos.push_back ( hyp.at ( l ) ); l++; } if ( ( int ) ( localInfos.at ( 1 ).compare ( "I" ) ) == 0 || m > REFSize - 1 ) { localInfos.push_back ( "***" ); } else { localInfos.push_back ( ref.at ( m ) ); m++; } // cerr << vectorToString(localInfos)<0) { alignmentElement localInfos; localInfos=local.at(k-1); // l_WERalignment.push_back(localInfos); cerr << (string)localInfos.at(0)+"\t"; k--; } cerr<0) { alignmentElement localInfos; localInfos=local.at(k-1); // l_WERalignment.push_back(localInfos); if ((int)(localInfos.at(1).compare("D"))==0) { cerr << "***\t"; } else { cerr << hyp.at(l) << "\t"; l++; } k--; } cerr< vec) // { // string retour(""); // for (vector::iterator vecIter=vec.begin();vecIter!=vec.end(); vecIter++) // { // retour+=(*vecIter)+"\t"; // } // return retour; // } // vector terCalc::subVector(vector vec, int start, int end) // { // if (start>end) // { // cerr << "ERREUR : terCalc::subVector : end > start"< retour; // for (int i=start; ((i hyp, vector ref ) { hashMap tempHash; hashMapInfos retour; for ( int i = 0; i < ( int ) hyp.size(); i++ ) { tempHash.addHasher ( hyp.at ( i ), "" ); } bool cor[ref.size() ]; for ( int i = 0; i < ( int ) ref.size(); i++ ) { if ( tempHash.trouve ( ( string ) ref.at ( i ) ) ) { cor[i] = true; } else { cor[i] = false; } } for ( int start = 0; start < ( int ) ref.size(); start++ ) { if ( cor[start] ) { for ( int end = start; ( ( end < ( int ) ref.size() ) && ( end - start <= MAX_SHIFT_SIZE ) && ( cor[end] ) ); end++ ) { vector ajouter = subVector ( ref, start, end + 1 ); string ajouterString = vectorToString ( ajouter ); vector values = retour.getValue ( ajouterString ); values.push_back ( start ); if ( values.size() > 1 ) { retour.setValue ( ajouterString, values ); } else { retour.addValue ( ajouterString, values ); } } } } return retour; } bool terCalc::spanIntersection ( vecInt refSpan, vecInt hypSpan ) { if ( ( refSpan.at ( 1 ) >= hypSpan.at ( 0 ) ) && ( refSpan.at ( 0 ) <= hypSpan.at ( 1 ) ) ) { return true; } return false; } terAlignment terCalc::MinEditDist ( vector hyp, vector ref, vector curHypSpans ) { double current_best = INF; double last_best = INF; int first_good = 0; int current_first_good = 0; int last_good = -1; int cur_last_good = 0; int last_peak = 0; int cur_last_peak = 0; int i, j; double cost, icost, dcost; double score; // int hwsize = hyp.size()-1; // int rwsize = ref.size()-1; NUM_BEAM_SEARCH_CALLS++; // if ((ref.size()+1 > sizeof(S)) || (hyp.size()+1 > sizeof(S))) // { // int max = ref.size(); // if (hyp.size() > ref.size()) max = hyp.size(); // max += 26; // we only need a +1 here, but let's pad for future use // S = new double[max][max]; // P = new char[max][max]; // } for ( i = 0; i <= ( int ) ref.size(); i++ ) { for ( j = 0; j <= ( int ) hyp.size(); j++ ) { S[i][j] = -1.0; P[i][j] = '0'; } } S[0][0] = 0.0; for ( j = 0; j <= ( int ) hyp.size(); j++ ) { last_best = current_best; current_best = INF; first_good = current_first_good; current_first_good = -1; last_good = cur_last_good; cur_last_good = -1; last_peak = cur_last_peak; cur_last_peak = 0; for ( i = first_good; i <= ( int ) ref.size(); i++ ) { if ( i > last_good ) { break; } if ( S[i][j] < 0 ) { continue; } score = S[i][j]; if ( ( j < ( int ) hyp.size() ) && ( score > last_best + BEAM_WIDTH ) ) { continue; } if ( current_first_good == -1 ) { current_first_good = i ; } if ( ( i < ( int ) ref.size() ) && ( j < ( int ) hyp.size() ) ) { if ( ( int ) refSpans.size() == 0 || ( int ) hypSpans.size() == 0 || spanIntersection ( refSpans.at ( i ), curHypSpans.at ( j ) ) ) { if ( ( int ) ( ref.at ( i ).compare ( hyp.at ( j ) ) ) == 0 ) { cost = match_cost + score; if ( ( S[i+1][j+1] == -1 ) || ( cost < S[i+1][j+1] ) ) { S[i+1][j+1] = cost; P[i+1][j+1] = ' '; } if ( cost < current_best ) { current_best = cost; } if ( current_best == cost ) { cur_last_peak = i + 1; } } else { cost = substitute_cost + score; if ( ( S[i+1][j+1] < 0 ) || ( cost < S[i+1][j+1] ) ) { S[i+1][j+1] = cost; P[i+1][j+1] = 'S'; if ( cost < current_best ) { current_best = cost; } if ( current_best == cost ) { cur_last_peak = i + 1 ; } } } } } cur_last_good = i + 1; if ( j < ( int ) hyp.size() ) { icost = score + insert_cost; if ( ( S[i][j+1] < 0 ) || ( S[i][j+1] > icost ) ) { S[i][j+1] = icost; P[i][j+1] = 'I'; if ( ( cur_last_peak < i ) && ( current_best == icost ) ) { cur_last_peak = i; } } } if ( i < ( int ) ref.size() ) { dcost = score + delete_cost; if ( ( S[ i+1][ j] < 0.0 ) || ( S[i+1][j] > dcost ) ) { S[i+1][j] = dcost; P[i+1][j] = 'D'; if ( i >= last_good ) { last_good = i + 1 ; } } } } } int tracelength = 0; i = ref.size(); j = hyp.size(); while ( ( i > 0 ) || ( j > 0 ) ) { tracelength++; if ( P[i][j] == ' ' ) { i--; j--; } else if ( P[i][j] == 'S' ) { i--; j--; } else if ( P[i][j] == 'D' ) { i--; } else if ( P[i][j] == 'I' ) { j--; } else { cerr << "ERROR : terCalc::MinEditDist : Invalid path : " << P[i][j] << endl; exit ( -1 ); } } vector path ( tracelength ); i = ref.size(); j = hyp.size(); while ( ( i > 0 ) || ( j > 0 ) ) { path[--tracelength] = P[i][j]; if ( P[i][j] == ' ' ) { i--; j--; } else if ( P[i][j] == 'S' ) { i--; j--; } else if ( P[i][j] == 'D' ) { i--; } else if ( P[i][j] == 'I' ) { j--; } } terAlignment to_return; to_return.numWords = ref.size(); to_return.alignment = path; to_return.numEdits = S[ref.size() ][hyp.size() ]; if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::MinEditDist : to_return :" << endl << to_return.toString() << endl << "END DEBUG" << endl; } return to_return; } terAlignment terCalc::TER ( vector hyp, vector ref ) { hashMapInfos rloc = BuildWordMatches ( hyp, ref ); terAlignment cur_align = MinEditDist ( hyp, ref, hypSpans ); vector cur = hyp; cur_align.hyp = hyp; cur_align.ref = ref; cur_align.aftershift = hyp; double edits = 0; // int numshifts = 0; vector allshifts; // cerr << "Initial Alignment:" << endl << cur_align.toString() < cur, vector hyp, vector ref, hashMapInfos rloc, terAlignment med_align ) { bestShiftStruct to_return; bool anygain = false; bool herr[ ( int ) hyp.size() ]; bool rerr[ ( int ) ref.size() ]; int ralign[ ( int ) ref.size() ]; FindAlignErr ( med_align, herr, rerr, ralign ); vector poss_shifts; poss_shifts = GatherAllPossShifts ( cur, ref, rloc, med_align, herr, rerr, ralign ); double curerr = med_align.numEdits; if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl; cerr << "Possible Shifts:" << endl; for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) { for ( int j = 0; j < ( int ) ( poss_shifts.at ( i ) ).size(); j++ ) { cerr << " [" << i << "] " << ( ( poss_shifts.at ( i ) ).at ( j ) ).toString() << endl; } } cerr << endl; cerr << "END DEBUG " << endl; } double cur_best_shift_cost = 0.0; terAlignment cur_best_align = med_align; terShift cur_best_shift; for ( int i = ( int ) poss_shifts.size() - 1; i >= 0; i-- ) { if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl; cerr << "Considering shift of length " << i << " (" << ( poss_shifts.at ( i ) ).size() << ")" << endl; cerr << "END DEBUG " << endl; } /* Consider shifts of length i+1 */ double curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits ); double maxfix = ( 2 * ( 1 + i ) ); if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) { break; } for ( int s = 0; s < ( int ) ( poss_shifts.at ( i ) ).size(); s++ ) { curfix = curerr - ( cur_best_shift_cost + cur_best_align.numEdits ); if ( ( curfix > maxfix ) || ( ( cur_best_shift_cost != 0 ) && ( curfix == maxfix ) ) ) { break; } terShift curshift = ( poss_shifts.at ( i ) ).at ( s ); alignmentStruct shiftReturns = PerformShift ( cur, curshift ); vector shiftarr = shiftReturns.nwords; vector curHypSpans = shiftReturns.aftershift; terAlignment curalign = MinEditDist ( shiftarr, ref, curHypSpans ); curalign.hyp = hyp; curalign.ref = ref; curalign.aftershift = shiftarr; double gain = ( cur_best_align.numEdits + cur_best_shift_cost ) - ( curalign.numEdits + curshift.cost ); // if (DEBUG) { // string testeuh=terAlignment join(" ", shiftarr); if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl; cerr << "Gain for " << curshift.toString() << " is " << gain << ". (result: [" << curalign.join ( " ", shiftarr ) << "]" << endl; cerr << "" << curalign.toString() << "\n" << endl; cerr << "END DEBUG " << endl; } // } // if ( ( gain > 0 ) || ( ( cur_best_shift_cost == 0 ) && ( gain == 0 ) ) ) { anygain = true; cur_best_shift = curshift; cur_best_shift_cost = curshift.cost; cur_best_align = curalign; // if (DEBUG) if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::CalcBestShift :" << endl; cerr << "Tmp Choosing shift: " << cur_best_shift.toString() << " gives:\n" << cur_best_align.toString() << "\n" << endl; cerr << "END DEBUG " << endl; } } } } if ( anygain ) { to_return.m_best_shift = cur_best_shift; to_return.m_best_align = cur_best_align; to_return.m_empty = false; } else { to_return.m_empty = true; } return to_return; } void terCalc::FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign ) { int hpos = -1; int rpos = -1; if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::FindAlignErr : " << endl << align.toString() << endl; cerr << "END DEBUG " << endl; } for ( int i = 0; i < ( int ) align.alignment.size(); i++ ) { char sym = align.alignment[i]; if ( sym == ' ' ) { hpos++; rpos++; herr[hpos] = false; rerr[rpos] = false; ralign[rpos] = hpos; } else if ( sym == 'S' ) { hpos++; rpos++; herr[hpos] = true; rerr[rpos] = true; ralign[rpos] = hpos; } else if ( sym == 'I' ) { hpos++; herr[hpos] = true; } else if ( sym == 'D' ) { rpos++; rerr[rpos] = true; ralign[rpos] = hpos; } else { cerr << "ERROR : terCalc::FindAlignErr : Invalid mini align sequence " << sym << " at pos " << i << endl; exit ( -1 ); } } } vector terCalc::GatherAllPossShifts ( vector hyp, vector ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign ) { vector to_return; // Don't even bother to look if shifts can't be done if ( ( MAX_SHIFT_SIZE <= 0 ) || ( MAX_SHIFT_DIST <= 0 ) ) { // terShift[][] to_return = new terShift[0][]; return to_return; } vector allshifts ( MAX_SHIFT_SIZE + 1 ); // ArrayList[] allshifts = new ArrayList[MAX_SHIFT_SIZE+1]; // for (int i = 0; i < allshifts.length; i++) // { // allshifts[i] = new ArrayList(); // } // List hyplist = Arrays.asList(hyp); for ( int start = 0; start < ( int ) hyp.size(); start++ ) { string subVectorHypString = vectorToString ( subVector ( hyp, start, start + 1 ) ); if ( ! rloc.trouve ( subVectorHypString ) ) { continue; } bool ok = false; vector mtiVec = rloc.getValue ( subVectorHypString ); vector::iterator mti = mtiVec.begin(); while ( mti != mtiVec.end() && ( ! ok ) ) { int moveto = ( *mti ); mti++; if ( ( start != ralign[moveto] ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] - 1 ) <= MAX_SHIFT_DIST ) ) { ok = true; } } if ( ! ok ) { continue; } ok = true; for ( int end = start; ( ok && ( end < ( int ) hyp.size() ) && ( end < start + MAX_SHIFT_SIZE ) ); end++ ) { /* check if cand is good if so, add it */ vector cand = subVector ( hyp, start, end + 1 ); ok = false; if ( ! ( rloc.trouve ( vectorToString ( cand ) ) ) ) { continue; } bool any_herr = false; for ( int i = 0; ( ( i <= ( end - start ) ) && ( ! any_herr ) ); i++ ) { if ( herr[start+i] ) { any_herr = true; } } if ( any_herr == false ) { ok = true; continue; } vector movetoitVec; movetoitVec = rloc.getValue ( ( string ) vectorToString ( cand ) ); vector::iterator movetoit = movetoitVec.begin(); while ( movetoit != movetoitVec.end() ) { int moveto = ( *movetoit ); movetoit++; if ( ! ( ( ralign[moveto] != start ) && ( ( ralign[moveto] < start ) || ( ralign[moveto] > end ) ) && ( ( ralign[moveto] - start ) <= MAX_SHIFT_DIST ) && ( ( start - ralign[moveto] ) <= MAX_SHIFT_DIST ) ) ) { continue; } ok = true; /* check to see if there are any errors in either string (only move if this is the case!) */ bool any_rerr = false; for ( int i = 0; ( i <= end - start ) && ( ! any_rerr ); i++ ) { if ( rerr[moveto+i] ) { any_rerr = true; } } if ( ! any_rerr ) { continue; } for ( int roff = -1; roff <= ( end - start ); roff++ ) { terShift topush; bool topushNull = true; if ( ( roff == -1 ) && ( moveto == 0 ) ) { if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 01 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: -1" << endl << "END DEBUG" << endl; } terShift t01 ( start, end, -1, -1 ); topush = t01; topushNull = false; } else if ( ( start != ralign[moveto+roff] ) && ( ( roff == 0 ) || ( ralign[moveto+roff] != ralign[moveto] ) ) ) { int newloc = ralign[moveto+roff]; if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl << "Consider making " << start << "..." << end << " moveto: " << moveto << " roff: " << roff << " ralign[mt+roff]: " << newloc << endl << "END DEBUG" << endl; } terShift t02 ( start, end, moveto + roff, newloc ); topush = t02; topushNull = false; } if ( !topushNull ) { topush.shifted = cand; topush.cost = shift_cost; if ( PRINT_DEBUG ) { cerr << "BEGIN DEBUG : terCalc::GatherAllPossShifts 02 : " << endl; cerr << "start : " << start << endl; cerr << "end : " << end << endl; cerr << "end - start : " << end - start << endl; cerr << "END DEBUG " << endl; } ( allshifts.at ( end - start ) ).push_back ( topush ); } } } } } // vector to_return; to_return.clear(); // terShift[][] to_return = new terShift[MAX_SHIFT_SIZE+1][]; for ( int i = 0; i < MAX_SHIFT_SIZE + 1; i++ ) { // to_return[i] = (terShift[]) allshifts[i].toArray(new terShift[0]); to_return.push_back ( ( vecTerShift ) allshifts.at ( i ) ); } return to_return; } alignmentStruct terCalc::PerformShift ( vector words, terShift s ) { return PerformShift ( words, s.start, s.end, s.newloc ); } alignmentStruct terCalc::PerformShift ( vector words, int start, int end, int newloc ) { int c = 0; vector nwords ( words ); vector spans ( ( int ) hypSpans.size() ); alignmentStruct toreturn; // ON EST ICI // if((int)hypSpans.size()>0) spans = new TERintpair[(int)hypSpans.size()]; // if(DEBUG) { if ( PRINT_DEBUG ) { if ( ( int ) hypSpans.size() > 0 ) { cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: " << ( int ) hypSpans.size() << endl << "END DEBUG " << endl; } else { cerr << "BEGIN DEBUG : terCalc::PerformShift :" << endl << "word length: " << ( int ) words.size() << " span length: null" << endl << "END DEBUG " << endl; } } // } if ( newloc == -1 ) { for ( int i = start; i <= end; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = 0; i <= start - 1; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = end + 1; i < ( int ) words.size(); i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } } else { if ( newloc < start ) { for ( int i = 0; i <= newloc; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = start; i <= end; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = newloc + 1; i <= start - 1; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = end + 1; i < ( int ) words.size(); i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } } else { if ( newloc > end ) { for ( int i = 0; i <= start - 1; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = end + 1; i <= newloc; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = start; i <= end; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = newloc + 1; i < ( int ) words.size(); i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } } else { // we are moving inside of ourselves for ( int i = 0; i <= start - 1; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = end + 1; ( i < ( int ) words.size() ) && ( i <= ( end + ( newloc - start ) ) ); i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = start; i <= end; i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } for ( int i = ( end + ( newloc - start ) + 1 ); i < ( int ) words.size(); i++ ) { nwords.at ( c++ ) = words.at ( i ); if ( ( int ) hypSpans.size() > 0 ) { spans.at ( c - 1 ) = hypSpans.at ( i ); } } } } } NUM_SHIFTS_CONSIDERED++; toreturn.nwords = nwords; toreturn.aftershift = spans; return toreturn; } void terCalc::setDebugMode ( bool b ) { PRINT_DEBUG = b; } }