1 files changed, 205 insertions, 0 deletions
diff --git a/experimental/bidirectional/src/model1.h b/experimental/bidirectional/src/model1.h
new file mode 100644
index 0000000..cadc1e3
--- /dev/null
+++ b/experimental/bidirectional/src/model1.h
@@ -0,0 +1,205 @@
+/*
+
+EGYPT Toolkit for Statistical Machine Translation
+Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, 
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
+USA.
+
+*/
+#ifndef _model1_h
+#define _model1_h 1
+
+#include <assert.h>
+ 
+#include <iostream>
+#include <strstream>
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <set>
+#include <utility>
+
+#if __GNUC__>2
+#include <ext/hash_map>
+using __gnu_cxx::hash_map;
+#else
+#include <hash_map>
+#endif
+#include <time.h>
+#include <fstream>
+#include <math.h>
+#include <stdio.h>
+
+#include "Vector.h"
+#include "vocab.h"
+#include "TTables.h"
+#include "getSentence.h"
+#include "Perplexity.h"
+#include "vocab.h"
+#include "Dictionary.h"
+#include "ttableDiff.hpp"
+#include "syncObj.h"
+
+extern int NumberOfVALIalignments;
+
+class report_info{
+ public:
+	Mutex alLock;
+  Perplexity& perp;
+  sentenceHandler& sHandler1;
+  Perplexity* testPerp;
+  sentenceHandler* testHandler;
+  Perplexity& trainViterbiPerp; 
+  Perplexity* testViterbiPerp;
+  report_info(Perplexity& _perp,
+	      sentenceHandler& _sHandler1,
+	      Perplexity* _testPerp,
+	      sentenceHandler* _testHandler,
+	      Perplexity& _trainViterbiPerp,
+	      Perplexity* _testViterbiPerp)
+    : perp(_perp),sHandler1(_sHandler1),testPerp(_testPerp),testHandler(_testHandler),trainViterbiPerp(_trainViterbiPerp),testViterbiPerp(_testViterbiPerp)
+    {}
+    
+    report_info(const report_info & rp):
+        perp(rp.perp),sHandler1(rp.sHandler1), testPerp(rp.testPerp),
+        trainViterbiPerp(rp.trainViterbiPerp), testViterbiPerp(rp.testViterbiPerp),
+        testHandler(rp.testHandler)
+    {}
+};
+
+
+class model1 : public report_info{
+public:
+    string efFilename;
+    vcbList&  Elist ;
+    vcbList&  Flist ;
+    double eTotalWCount ; // size of source copus in number of words
+    double fTotalWCount ; // size of target corpus in number of words 
+    int noEnglishWords;
+    int noFrenchWords;
+    tmodel<COUNT, PROB>&tTable;
+    Vector<WordEntry>& evlist ;
+    Vector<WordEntry>& fvlist ;
+    int threadID;
+public:
+    int ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch;
+    int ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI;
+    int ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST;
+    model1 (const char* efname, vcbList& evcblist, vcbList& fvcblist,tmodel<COUNT, PROB>&_tTable,Perplexity& _perp,
+	    sentenceHandler& _sHandler1,
+	    Perplexity* _testPerp,
+	    sentenceHandler* _testHandler,
+	    Perplexity& _trainViterbiPerp,
+	    Perplexity* _testViterbiPerp);
+    
+    model1 (const model1& m1, int _threadID=0);
+    void initialize_table_uniformly(sentenceHandler& sHandler1);
+    
+    int em_with_tricks(int noIterations, 
+        bool seedModel1, Dictionary& dictionary, bool useDict, bool dumpCount = false, 
+	    const char* dumpCountName = NULL, bool useString = false);
+    int em_thread(int noIterations, int thread,Dictionary& dictionary, bool useDict);
+    bool load_table(const char* tname);
+    void readVocabFile(const char* fname, Vector<WordEntry>& vlist, int& vsize, 
+    int& total);
+    inline Vector<WordEntry>& getEnglishVocabList(void)const {return Elist.getVocabList();};
+    inline Vector<WordEntry>& getFrenchVocabList(void)const  {return Flist.getVocabList();};
+    inline double getETotalWCount(void) const {return eTotalWCount;};
+    inline double getFTotalWCount(void) const {return fTotalWCount;};
+    inline int getNoEnglishWords(void) const  {return noEnglishWords;};
+    inline int getNoFrenchWords(void)  const {return noFrenchWords;};
+    inline tmodel<COUNT, PROB>& getTTable(void) {return tTable;};
+    inline string& getEFFilename(void) {return efFilename;};
+        
+////////////////////////////////////////////////////////////////
+// Added by Qin Gao To Enable Parallel Training
+////////////////////////////////////////////////////////////////
+
+    CTTableDiff<COUNT,PROB>* one_step_em(int it ,bool seedModel1, Dictionary& dictionary,
+                    bool useDict);
+    
+    void recombine();
+    
+    void combine_one(CTTableDiff<COUNT,PROB>* cb);
+    
+    void save_table(const char* tname);
+    
+    
+    
+    
+////////////////////////////////////////////////////////////////
+//      END OF QIN GAO's CODE
+////////////////////////////////////////////////////////////////
+private:
+    void em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict, 
+	       Perplexity& viterbiperp, bool=false);
+    void em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, bool , const char*, Dictionary& dictionary, bool useDict, 
+	       Perplexity& viterbiperp, bool=false);
+    friend class model2;
+    friend class hmm;
+public:
+    void addAL(const Vector<WordIndex>& viterbi_alignment,int pair_no,int l){
+        alLock.lock();
+        if( pair_no<=int(ReferenceAlignment.size()) ){
+            //cerr << "AL: " << viterbi_alignment << " " << pair_no << endl;
+            ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
+            if( pair_no<=NumberOfVALIalignments ){
+                ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingVALI,ALtoomuchVALI,ALeventsMissingVALI,ALeventsToomuchVALI,pair_no);
+            }
+            if( pair_no>NumberOfVALIalignments ){
+                ErrorsInAlignment(ReferenceAlignment[pair_no-1],viterbi_alignment,l,ALmissingTEST,ALtoomuchTEST,ALeventsMissingTEST,ALeventsToomuchTEST,pair_no);
+            }
+        }
+        alLock.unlock();
+    }
+    void initAL(){ALmissingVALI=ALtoomuchVALI=ALeventsMissingVALI=ALeventsToomuchVALI=ALmissingTEST=ALtoomuchTEST=ALeventsMissingTEST=ALeventsToomuchTEST=ALmissing=ALtoomuch=ALeventsMissing=ALeventsToomuch=0;}
+    double errorsAL()const{
+        if( ALeventsMissingVALI+ALeventsToomuchVALI ){
+            return (ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI);
+        }else{
+            return 0.0;
+        }
+    }
+    void errorReportAL(ostream&out,string m)const{
+        if( ALeventsMissing+ALeventsToomuch ){
+            out << "alignmentErrors (" << m << "): " 
+            << 100.0*(ALmissing+ALtoomuch)/double(ALeventsMissing+ALeventsToomuch) 
+            << " recall: " << 100.0*(1.0-ALmissing/double(ALeventsMissing))
+            << " precision: " << 100.0*(1.0-ALtoomuch/double(ALeventsToomuch))
+            << " (missing:" << ALmissing << "/" << ALeventsMissing << " " << ALtoomuch 
+            << " " << ALeventsToomuch << ")\n";
+        }
+        if( ALeventsMissingVALI+ALeventsToomuchVALI ){
+            out << "alignmentErrors VALI (" << m << "): " 
+            << 100.0*(ALmissingVALI+ALtoomuchVALI)/double(ALeventsMissingVALI+ALeventsToomuchVALI) 
+            << " recall: " << 100.0*(1.0-ALmissingVALI/double(ALeventsMissingVALI))
+            << " precision: " << 100.0*(1.0-ALtoomuchVALI/double(ALeventsToomuchVALI))
+            << " (missing:" << ALmissingVALI << "/" << ALeventsMissingVALI << " " << ALtoomuchVALI 
+            << " " << ALeventsToomuchVALI << ")\n";
+        }
+        if( ALeventsMissingTEST+ALeventsToomuchTEST ){
+            out << "alignmentErrors TEST(" << m << "): " 
+            << 100.0*(ALmissingTEST+ALtoomuchTEST)/double(ALeventsMissingTEST+ALeventsToomuchTEST) 
+            << " recall: " << 100.0*(1.0-ALmissingTEST/double(ALeventsMissingTEST)) 
+            << " precision: " << 100.0*(1.0-ALtoomuchTEST/double(ALeventsToomuchTEST))
+            << " (missing:" << ALmissingTEST << "/" << ALeventsMissingTEST << " " << ALtoomuchTEST 
+            << " " << ALeventsToomuchTEST << ")\n";
+        }
+    }
+};
+
+#endif