diff options
Diffstat (limited to 'mgizapp/src/ATables.cpp')
-rw-r--r-- | mgizapp/src/ATables.cpp | 283 |
1 files changed, 144 insertions, 139 deletions
diff --git a/mgizapp/src/ATables.cpp b/mgizapp/src/ATables.cpp index 8cc4ad2..7552e95 100644 --- a/mgizapp/src/ATables.cpp +++ b/mgizapp/src/ATables.cpp @@ -8,18 +8,18 @@ modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. -This program is distributed in the hope that it will be useful, +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include "ATables.h" +#include "ATables.h" #include "Globals.h" #include "myassert.h" #include "Parameter.h" @@ -28,185 +28,190 @@ GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional ali GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0); template <class VALTYPE> -void amodel<VALTYPE>::printTable(const char *filename) const{ - // print amodel to file with the name filename (it'll be created or overwritten - // format : for a table : - // aj j l m val - // where aj is source word pos, j target word pos, l source sentence length, +void amodel<VALTYPE>::printTable(const char *filename) const +{ + // print amodel to file with the name filename (it'll be created or overwritten + // format : for a table : + // aj j l m val + // where aj is source word pos, j target word pos, l source sentence length, // m is target sentence length. - // + // //return; - if (is_distortion) - cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n'; - else - cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n'; - - ofstream of(filename); - double ssum=0.0; - for(WordIndex l=0; l < MaxSentLength; l++){ - for(WordIndex m=0;m<MaxSentLength;m++){ - if( CompactADTable && l!=m ) - continue; - unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; - unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; - if( is_distortion==0 ){ - for(WordIndex j=1;j<=M; j++){ - double sum=0.0; - for(WordIndex i=0;i<=L; i++){ - VALTYPE x=getValue(i, j, L, M); - if( x>PROB_SMOOTH ){ - of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n'; - sum+=x; - } - } - ssum+=sum; - } - }else{ - for(WordIndex i=0;i<=L;i++){ - double sum=0.0; - for(WordIndex j=1;j<=M;j++){ - VALTYPE x=getValue(j, i, L, M); - if( x>PROB_SMOOTH ){ - of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n'; - sum+=x; - } - } - ssum+=sum; - } + if (is_distortion) + cout << "Dumping pruned distortion table (d) to file:" << filename <<'\n'; + else + cout << "Dumping pruned alignment table (a) to file:" << filename <<'\n'; + + ofstream of(filename); + double ssum=0.0; + for(WordIndex l=0; l < MaxSentLength; l++) { + for(WordIndex m=0; m<MaxSentLength; m++) { + if( CompactADTable && l!=m ) + continue; + unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; + unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; + if( is_distortion==0 ) { + for(WordIndex j=1; j<=M; j++) { + double sum=0.0; + for(WordIndex i=0; i<=L; i++) { + VALTYPE x=getValue(i, j, L, M); + if( x>PROB_SMOOTH ) { + of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n'; + sum+=x; + } + } + ssum+=sum; + } + } else { + for(WordIndex i=0; i<=L; i++) { + double sum=0.0; + for(WordIndex j=1; j<=M; j++) { + VALTYPE x=getValue(j, i, L, M); + if( x>PROB_SMOOTH ) { + of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n'; + sum+=x; } + } + ssum+=sum; } + } } + } } template <class VALTYPE> -void amodel<VALTYPE>::printRealTable(const char *filename) const{ - // print amodel to file with the name filename (it'll be created or overwritten - // format : for a table : - // aj j l m val - // where aj is source word pos, j target word pos, l source sentence length, +void amodel<VALTYPE>::printRealTable(const char *filename) const +{ + // print amodel to file with the name filename (it'll be created or overwritten + // format : for a table : + // aj j l m val + // where aj is source word pos, j target word pos, l source sentence length, // m is target sentence length. - // + // //return; - if (is_distortion) - cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n'; - else - cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n'; - - ofstream of(filename); - for(WordIndex l=0; l < MaxSentLength; l++){ - for(WordIndex m=0;m<MaxSentLength;m++){ - if( CompactADTable && l!=m ) - continue; - unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; - unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; - if( is_distortion==0 ){ - for(WordIndex j=1;j<=M; j++){ - for(WordIndex i=0;i<=L; i++){ - VALTYPE x=getValue(i, j, L, M); - if( x>MINCOUNTINCREASE ) - of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n'; - } - } - }else{ - for(WordIndex i=0;i<=L;i++){ - for(WordIndex j=1;j<=M;j++){ - VALTYPE x=getValue(j, i, L, M); - if( x>MINCOUNTINCREASE ) - of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n'; - } - } - } + if (is_distortion) + cout << "Dumping not pruned distortion table (d) to file:" << filename <<'\n'; + else + cout << "Dumping not pruned alignment table (a) to file:" << filename <<'\n'; + + ofstream of(filename); + for(WordIndex l=0; l < MaxSentLength; l++) { + for(WordIndex m=0; m<MaxSentLength; m++) { + if( CompactADTable && l!=m ) + continue; + unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; + unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; + if( is_distortion==0 ) { + for(WordIndex j=1; j<=M; j++) { + for(WordIndex i=0; i<=L; i++) { + VALTYPE x=getValue(i, j, L, M); + if( x>MINCOUNTINCREASE ) + of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n'; + } + } + } else { + for(WordIndex i=0; i<=L; i++) { + for(WordIndex j=1; j<=M; j++) { + VALTYPE x=getValue(j, i, L, M); + if( x>MINCOUNTINCREASE ) + of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n'; + } } + } } + } } extern short NoEmptyWord; template <class VALTYPE> -bool amodel<VALTYPE>::readTable(const char *filename){ +bool amodel<VALTYPE>::readTable(const char *filename) +{ /* This function reads the a table from a file. Each line is of the format: aj j l m val - where aj is the source word position, j the target word position, + where aj is the source word position, j the target word position, l the source sentence length, and m the target sentence length - + This function also works for a d table, where the positions of aj and i are swapped. Both the a and d tables are 4 dimensional hashes; this function will simply read in the four values and keep them in that order when hashing the fifth value. NAS, 7/11/99 */ - ifstream inf(filename); - cout << "Reading a/d table from " << filename << "\n"; - if(!inf){ - cerr << "\nERROR: Cannot open " << filename<<"\n"; - return false; - } - WordIndex w, x, l, m; - VALTYPE prob; - while(inf >> w >> x >> l >> m >> prob ) - // the NULL word is added to the length - // of the sentence in the tables, but discount it when you write the tables. - setValue(w, x, l, m, prob); - return true; + ifstream inf(filename); + cout << "Reading a/d table from " << filename << "\n"; + if(!inf) { + cerr << "\nERROR: Cannot open " << filename<<"\n"; + return false; + } + WordIndex w, x, l, m; + VALTYPE prob; + while(inf >> w >> x >> l >> m >> prob ) + // the NULL word is added to the length + // of the sentence in the tables, but discount it when you write the tables. + setValue(w, x, l, m, prob); + return true; } template <class VALTYPE> -bool amodel<VALTYPE>::readAugTable(const char *filename){ +bool amodel<VALTYPE>::readAugTable(const char *filename) +{ /* This function reads the a table from a file. Each line is of the format: aj j l m val - where aj is the source word position, j the target word position, + where aj is the source word position, j the target word position, l the source sentence length, and m the target sentence length - + This function also works for a d table, where the positions of aj and i are swapped. Both the a and d tables are 4 dimensional hashes; this function will simply read in the four values and keep them in that order when hashing the fifth value. NAS, 7/11/99 */ - ifstream inf(filename); - cout << "Reading a/d table from " << filename << "\n"; - if(!inf){ - cerr << "\nERROR: Cannot open " << filename<<"\n"; - return false; - } - WordIndex w, x, l, m; - VALTYPE prob; - while(inf >> w >> x >> l >> m >> prob ) - // the NULL word is added to the length - // of the sentence in the tables, but discount it when you write the tables. - addValue(w, x, l, m, prob); - return true; + ifstream inf(filename); + cout << "Reading a/d table from " << filename << "\n"; + if(!inf) { + cerr << "\nERROR: Cannot open " << filename<<"\n"; + return false; + } + WordIndex w, x, l, m; + VALTYPE prob; + while(inf >> w >> x >> l >> m >> prob ) + // the NULL word is added to the length + // of the sentence in the tables, but discount it when you write the tables. + addValue(w, x, l, m, prob); + return true; } template <class VALTYPE> -bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am){ - cout << "start merging " <<"\n"; - for(WordIndex l=0; l < MaxSentLength; l++){ - for(WordIndex m=0;m<MaxSentLength;m++){ - if( CompactADTable && l!=m ) - continue; - unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; - unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; - if( is_distortion==0 ){ - for(WordIndex j=1;j<=M; j++){ - for(WordIndex i=0;i<=L; i++){ - VALTYPE x=am.getValue(i, j, L, M); - addValue(i,j,L,M,x); - } - } - }else{ - for(WordIndex i=0;i<=L;i++){ - for(WordIndex j=1;j<=M;j++){ - VALTYPE x=am.getValue(j, i, L, M); - addValue(j,i,L,M,x); - } - } - } +bool amodel<VALTYPE>::merge(amodel<VALTYPE>& am) +{ + cout << "start merging " <<"\n"; + for(WordIndex l=0; l < MaxSentLength; l++) { + for(WordIndex m=0; m<MaxSentLength; m++) { + if( CompactADTable && l!=m ) + continue; + unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1; + unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1; + if( is_distortion==0 ) { + for(WordIndex j=1; j<=M; j++) { + for(WordIndex i=0; i<=L; i++) { + VALTYPE x=am.getValue(i, j, L, M); + addValue(i,j,L,M,x); + } + } + } else { + for(WordIndex i=0; i<=L; i++) { + for(WordIndex j=1; j<=M; j++) { + VALTYPE x=am.getValue(j, i, L, M); + addValue(j,i,L,M,x); + } } + } } - return true; + } + return true; } -template class amodel<COUNT> ; -//template class amodel<PROB> ; +template class amodel<COUNT> ; +//template class amodel<PROB> ; |