diff options
Diffstat (limited to 'mgizapp/src/TTables.cpp')
-rw-r--r-- | mgizapp/src/TTables.cpp | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/mgizapp/src/TTables.cpp b/mgizapp/src/TTables.cpp new file mode 100644 index 0000000..1e4f3b6 --- /dev/null +++ b/mgizapp/src/TTables.cpp @@ -0,0 +1,177 @@ +/* + +EGYPT Toolkit for Statistical Machine Translation +Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +USA. + +*/ +#include "TTables.h" +#include "Parameter.h" +#include<iostream> +#include <fstream> + +GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7); +GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6); + + +/* ------------------ Method Definiotns for Class tmodel --------------------*/ + + +// To output to STDOUT, submit filename as NULL +template <class COUNT, class PROB> +void tmodel<COUNT, PROB>::printCountTable(const char *filename, + const Vector<WordEntry>& evlist, + const Vector<WordEntry>& fvlist, + const bool actual) const +{ + ostream *tof; + + if(filename) + tof = new ofstream(filename); + else + tof = & cout; + + ostream &of = *tof; + /* for(unsigned int i=0;i<es.size()-1;++i) + for(unsigned int j=es[i];j<es[i+1];++j) + { + const CPPair&x=fs[j].second; + WordIndex e=i,f=fs[j].first; + if( actual ) + of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n'; + else + of << e << ' ' << f << ' ' << x.prob << '\n'; + }*/ + for(unsigned int i=0;i<lexmat.size();++i){ + if( lexmat[i] ){ + for(unsigned int j=0;j<lexmat[i]->size();++j) + { + const CPPair&x=(*lexmat[i])[j].second; + WordIndex e=i,f=(*lexmat[i])[j].first; + if( x.prob>MINCOUNTINCREASE ){ + if( actual ){ + of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n'; + }else{ + of << e << ' ' << f << ' ' << x.count << '\n'; + } + } + } + } + } + + if(filename){ + ((ofstream*)tof)->close(); + delete tof; + } +} + +template <class COUNT, class PROB> +void tmodel<COUNT, PROB>::printProbTable(const char *filename, + const Vector<WordEntry>& evlist, + const Vector<WordEntry>& fvlist, + const bool actual) const +{ + ofstream of(filename); + /* for(unsigned int i=0;i<es.size()-1;++i) + for(unsigned int j=es[i];j<es[i+1];++j) + { + const CPPair&x=fs[j].second; + WordIndex e=i,f=fs[j].first; + if( actual ) + of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n'; + else + of << e << ' ' << f << ' ' << x.prob << '\n'; + }*/ + for(unsigned int i=0;i<lexmat.size();++i){ + if( lexmat[i] ){ + for(unsigned int j=0;j<lexmat[i]->size();++j) + { + const CPPair&x=(*lexmat[i])[j].second; + WordIndex e=i,f=(*lexmat[i])[j].first; + if( x.prob>PROB_SMOOTH ){ + if( actual ){ + of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n'; + }else{ + of << e << ' ' << f << ' ' << x.prob << '\n'; + } + } + } + } + } +} + +template <class COUNT, class PROB> +void tmodel<COUNT, PROB>::printProbTableInverse(const char *, + const Vector<WordEntry>&, + const Vector<WordEntry>&, + const double, + const double, + const bool ) const +{ +} +template <class COUNT, class PROB> +void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int) +{ + for(unsigned int i=0;i<lexmat.size();++i){ + double c=0.0; + if( lexmat[i] ){ + unsigned int lSize=lexmat[i]->size(); + for(unsigned int j=0;j<lSize;++j) + c+=(*lexmat[i])[j].second.count; + for(unsigned int j=0;j<lSize;++j) { + if( c==0 ) + (*lexmat[i])[j].second.prob=1.0/(lSize); + else + (*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c; + (*lexmat[i])[j].second.count=0; + } + } + } +} + +template <class COUNT, class PROB> +bool tmodel<COUNT, PROB>::readProbTable(const char *filename){ + /* This function reads the t table from a file. + Each line is of the format: source_word_id target_word_id p(target_word|source_word) + This is the inverse operation of the printTable function. + NAS, 7/11/99 + */ + ifstream inf(filename); + cerr << "Reading t prob. table from " << filename << "\n"; + if (!inf) { + cerr << "\nERROR: Cannot open " << filename << "\n"; + return false; + } + WordIndex src_id, trg_id; + PROB prob; + int nEntry=0; + while (inf >> src_id >> trg_id >> prob) { + insert(src_id, trg_id, 0.0, prob); + nEntry++; + } + cerr << "Read " << nEntry << " entries in prob. table.\n"; + return true; +} + + + +template class tmodel<COUNT,PROB> ; + +/* ---------------- End of Method Definitions of class tmodel ---------------*/ + + + |