Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mgizapp/src/TTables.cpp')
-rw-r--r--mgizapp/src/TTables.cpp177
1 files changed, 177 insertions, 0 deletions
diff --git a/mgizapp/src/TTables.cpp b/mgizapp/src/TTables.cpp
new file mode 100644
index 0000000..1e4f3b6
--- /dev/null
+++ b/mgizapp/src/TTables.cpp
@@ -0,0 +1,177 @@
+/*
+
+EGYPT Toolkit for Statistical Machine Translation
+Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+USA.
+
+*/
+#include "TTables.h"
+#include "Parameter.h"
+#include<iostream>
+#include <fstream>
+
+GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7);
+GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6);
+
+
+/* ------------------ Method Definiotns for Class tmodel --------------------*/
+
+
+// To output to STDOUT, submit filename as NULL
+template <class COUNT, class PROB>
+void tmodel<COUNT, PROB>::printCountTable(const char *filename,
+ const Vector<WordEntry>& evlist,
+ const Vector<WordEntry>& fvlist,
+ const bool actual) const
+{
+ ostream *tof;
+
+ if(filename)
+ tof = new ofstream(filename);
+ else
+ tof = & cout;
+
+ ostream &of = *tof;
+ /* for(unsigned int i=0;i<es.size()-1;++i)
+ for(unsigned int j=es[i];j<es[i+1];++j)
+ {
+ const CPPair&x=fs[j].second;
+ WordIndex e=i,f=fs[j].first;
+ if( actual )
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
+ else
+ of << e << ' ' << f << ' ' << x.prob << '\n';
+ }*/
+ for(unsigned int i=0;i<lexmat.size();++i){
+ if( lexmat[i] ){
+ for(unsigned int j=0;j<lexmat[i]->size();++j)
+ {
+ const CPPair&x=(*lexmat[i])[j].second;
+ WordIndex e=i,f=(*lexmat[i])[j].first;
+ if( x.prob>MINCOUNTINCREASE ){
+ if( actual ){
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.count << '\n';
+ }else{
+ of << e << ' ' << f << ' ' << x.count << '\n';
+ }
+ }
+ }
+ }
+ }
+
+ if(filename){
+ ((ofstream*)tof)->close();
+ delete tof;
+ }
+}
+
+template <class COUNT, class PROB>
+void tmodel<COUNT, PROB>::printProbTable(const char *filename,
+ const Vector<WordEntry>& evlist,
+ const Vector<WordEntry>& fvlist,
+ const bool actual) const
+{
+ ofstream of(filename);
+ /* for(unsigned int i=0;i<es.size()-1;++i)
+ for(unsigned int j=es[i];j<es[i+1];++j)
+ {
+ const CPPair&x=fs[j].second;
+ WordIndex e=i,f=fs[j].first;
+ if( actual )
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
+ else
+ of << e << ' ' << f << ' ' << x.prob << '\n';
+ }*/
+ for(unsigned int i=0;i<lexmat.size();++i){
+ if( lexmat[i] ){
+ for(unsigned int j=0;j<lexmat[i]->size();++j)
+ {
+ const CPPair&x=(*lexmat[i])[j].second;
+ WordIndex e=i,f=(*lexmat[i])[j].first;
+ if( x.prob>PROB_SMOOTH ){
+ if( actual ){
+ of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
+ }else{
+ of << e << ' ' << f << ' ' << x.prob << '\n';
+ }
+ }
+ }
+ }
+ }
+}
+
+template <class COUNT, class PROB>
+void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
+ const Vector<WordEntry>&,
+ const Vector<WordEntry>&,
+ const double,
+ const double,
+ const bool ) const
+{
+}
+template <class COUNT, class PROB>
+void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
+{
+ for(unsigned int i=0;i<lexmat.size();++i){
+ double c=0.0;
+ if( lexmat[i] ){
+ unsigned int lSize=lexmat[i]->size();
+ for(unsigned int j=0;j<lSize;++j)
+ c+=(*lexmat[i])[j].second.count;
+ for(unsigned int j=0;j<lSize;++j) {
+ if( c==0 )
+ (*lexmat[i])[j].second.prob=1.0/(lSize);
+ else
+ (*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
+ (*lexmat[i])[j].second.count=0;
+ }
+ }
+ }
+}
+
+template <class COUNT, class PROB>
+bool tmodel<COUNT, PROB>::readProbTable(const char *filename){
+ /* This function reads the t table from a file.
+ Each line is of the format: source_word_id target_word_id p(target_word|source_word)
+ This is the inverse operation of the printTable function.
+ NAS, 7/11/99
+ */
+ ifstream inf(filename);
+ cerr << "Reading t prob. table from " << filename << "\n";
+ if (!inf) {
+ cerr << "\nERROR: Cannot open " << filename << "\n";
+ return false;
+ }
+ WordIndex src_id, trg_id;
+ PROB prob;
+ int nEntry=0;
+ while (inf >> src_id >> trg_id >> prob) {
+ insert(src_id, trg_id, 0.0, prob);
+ nEntry++;
+ }
+ cerr << "Read " << nEntry << " entries in prob. table.\n";
+ return true;
+}
+
+
+
+template class tmodel<COUNT,PROB> ;
+
+/* ---------------- End of Method Definitions of class tmodel ---------------*/
+
+
+