1 files changed, 601 insertions, 0 deletions
diff --git a/mgizapp/src/model1.cpp b/mgizapp/src/model1.cpp
new file mode 100644
index 0000000..e649f8d
--- /dev/null
+++ b/mgizapp/src/model1.cpp
@@ -0,0 +1,601 @@
+/*
+
+EGYPT Toolkit for Statistical Machine Translation
+Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, 
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
+USA.
+
+*/
+#include "model1.h"
+#include "Globals.h"
+#include "utility.h"
+#include "Parameter.h"
+
+extern short NoEmptyWord;
+extern int VerboseSentence;
+
+extern short NCPUS;
+
+GLOBAL_PARAMETER2(int,Model1_Dump_Freq,"MODEL 1 DUMP FREQUENCY","t1","dump frequency of Model 1",PARLEV_OUTPUT,0);
+int NumberOfVALIalignments=100;
+
+model1::model1(const char* efname, vcbList& evcblist, vcbList& fvcblist,tmodel<COUNT, PROB>&_tTable,Perplexity& _perp,
+	      sentenceHandler& _sHandler1,
+	      Perplexity* _testPerp,
+	      sentenceHandler* _testHandler,
+	      Perplexity& _trainViterbiPerp,
+	      Perplexity* _testViterbiPerp):
+  report_info(_perp,_sHandler1,_testPerp,_testHandler,_trainViterbiPerp,_testViterbiPerp),
+  efFilename(efname), Elist(evcblist), Flist(fvcblist), 
+  eTotalWCount(Elist.totalVocab()), fTotalWCount(Flist.totalVocab()), 
+  noEnglishWords(Elist.size()), noFrenchWords(Flist.size()), tTable(_tTable),
+  evlist(Elist.getVocabList()), fvlist(Flist.getVocabList())
+{}
+
+model1::model1 (const model1& m1, int _threadID):
+report_info(m1),efFilename(m1.efFilename),
+Elist(m1.Elist),Flist(m1.Flist),eTotalWCount(m1.eTotalWCount),fTotalWCount(m1.fTotalWCount),
+noEnglishWords(m1.noEnglishWords),noFrenchWords(m1.noFrenchWords),tTable(m1.tTable),
+evlist(m1.evlist),fvlist(m1.fvlist)
+{}
+
+void model1::initialize_table_uniformly(sentenceHandler& sHandler1){
+  WordIndex i, j;
+
+  cout << "Initialize tTable\n";
+
+  sentPair sent ;
+  sHandler1.rewind();
+  while(sHandler1.getNextSentence(sent)){
+    Vector<WordIndex>& es = sent.eSent;
+    Vector<WordIndex>& fs = sent.fSent;
+    PROB uniform = 1.0/es.size() ;
+    for( i=0; i < es.size(); i++)
+          for(j=1; j < fs.size(); j++)
+          tTable.insert(es[i],fs[j],0,uniform);
+  }
+}
+
+struct em_loop_t{
+    model1 *m1;
+    int it;
+    int nthread;
+    Dictionary *dict;
+    bool useDict;
+    int result;
+    pthread_t thread;
+    int valid ;
+};
+    
+void* exe_emloop(void *arg){
+    em_loop_t* em =(em_loop_t *) arg;
+    em->result = em->m1->em_thread(em->it,em->nthread,*em->dict,em->useDict);
+    return arg;
+}
+
+int model1::em_thread(int noIterations, int nthread, /*Perplexity& perp, sentenceHandler& sHandler1, */
+			     Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler, 
+										     Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */ )
+{
+    double minErrors=1.0;int minIter=0;
+    string modelName="Model1",shortModelName="1";
+    char b[2];
+    b[1] = '\0';
+    b[0] = '0' + nthread;
+    time_t st, it_st, fn, it_fn;
+    string tfile, number, alignfile, test_alignfile;
+    int pair_no;
+    bool dump_files = false ;
+    cout << "==========================================================\n";
+    cout << modelName << " Training Started at: "<< ctime(&st) << "\n";  
+    int it = noIterations;
+    pair_no = 0 ;
+    it_st = time(NULL);
+    cout <<  "-----------\n" << modelName << ": Iteration " << it << '\n';
+    dump_files = (Model1_Dump_Freq != 0) &&  ((it % Model1_Dump_Freq)  == 0) && !NODUMPS ;
+//    dump_files = true;
+    number = "";
+    int n = it;
+    do{
+        number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+    } while((n /= 10) > 0);
+    alignfile = Prefix + ".A" + shortModelName + "." + number + ".part" ;
+    alignfile = alignfile + b;
+
+    em_loop(it,perp, sHandler1, false, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp); 
+    return minIter;
+}
+
+int model1::em_with_tricks(int noIterations, /*Perplexity& perp, sentenceHandler& sHandler1, */
+			    bool seedModel1, Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler, 
+										     Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */ 
+, bool dumpCount ,  const char* dumpCountName, bool useString)  // If specified, then will dump files before last iteration
+{
+    double minErrors=1.0;int minIter=0;
+    string modelName="Model1",shortModelName="1";
+    time_t st, it_st, fn, it_fn;
+    string tfile, number, alignfile, test_alignfile;
+    int pair_no;
+    bool dump_files = false ;
+    st = time(NULL);
+    sHandler1.rewind();
+    cout << "==========================================================\n";
+    cout << modelName << " Training Started at: "<< ctime(&st) << "\n";  
+    for(int it = 1; it <= noIterations; it++){
+        pair_no = 0 ;
+        it_st = time(NULL);
+        cout <<  "-----------\n" << modelName << ": Iteration " << it << '\n';
+        dump_files = (Model1_Dump_Freq != 0) &&  ((it % Model1_Dump_Freq)  == 0) && !NODUMPS ;
+	//dump_files = true;
+        number = "";
+        int n = it;
+        do{
+            number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+        } while((n /= 10) > 0);
+        tfile = Prefix + ".t" + shortModelName + "." + number ;
+        alignfile = Prefix + ".A" + shortModelName + "." + number+".part0" ;
+        test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
+        initAL();
+        threadID = 0;
+        int th;
+        vector<em_loop_t> ths;
+        ths.resize(NCPUS);
+        sHandler1.rewind();
+        for (th=1;th<NCPUS;th++){
+            ths[th].m1=this;
+            ths[th].it = it;
+            ths[th].nthread = th;
+            ths[th].dict = & dictionary;
+            ths[th].useDict = useDict;
+            ths[th].result = 0;
+            ths[th].valid = pthread_create(&(ths[th].thread),NULL,exe_emloop,&(ths[th]));
+            if(ths[th].valid){
+                cerr << "Error starting thread " << th << endl;
+            }
+        }
+        em_loop(it,perp, sHandler1, seedModel1, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp); 
+        perp.record("Model1");
+        trainViterbiPerp.record("Model1");
+        errorReportAL(cout, "IBM-1");
+        
+        cerr << "Main thread done, waiting" << endl;;
+        for (th=1;th<NCPUS;th++){
+            pthread_join((ths[th].thread),NULL);
+            cerr << "Thread " << th << "done" << endl;
+        }
+        if (testPerp && testHandler) // calculate test perplexity
+            em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true); 
+        if( errorsAL()<minErrors ) {
+            minErrors=errorsAL();
+            minIter=it;
+        }
+        //if (dump_files){
+        //    if( OutputInAachenFormat==1 )
+        //        tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
+        //}
+        cerr << "Normalizing T " << endl;
+
+		/**
+		 If asked for dumping count table, just dump it.
+		 */
+		if(dumpCount && it == noIterations){
+			string realTableName = dumpCountName;
+			realTableName += ".t.count";
+			tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
+		}
+		
+        tTable.normalizeTable(Elist, Flist);
+        //cout << tTable.getProb(2,2) << endl;
+        cerr << " DONE Normalizing " << endl;
+        cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
+            << " PERPLEXITY " << perp.perplexity() << '\n';
+        if (testPerp && testHandler)
+            cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
+            << " PERPLEXITY " << (*testPerp).perplexity() 
+            << '\n';
+        cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
+            << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
+        if (testPerp && testHandler)
+            cout << modelName << ": ("<<
+            it<<") VITERBI TEST CROSS-ENTROPY " 
+            << (*testViterbiPerp).cross_entropy()
+            << " PERPLEXITY " << (*testViterbiPerp).perplexity() 
+            << '\n';
+        if (dump_files){
+            if( OutputInAachenFormat==0 )
+                tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),
+                                      Flist.getVocabList(),OutputInAachenFormat);
+        }
+        it_fn = time(NULL);
+        cout << "Model 1 Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n";
+        
+        
+    }
+    fn = time(NULL) ;
+    cout <<  "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+    return minIter;
+}
+
+bool model1::load_table(const char* tname){
+  /* This function loads the t table from the given file; use it
+     when you want to load results from previous t training
+     without doing any new training.
+     NAS, 7/11/99
+  */
+    cout << "Model1: loading t table \n" ;
+    return tTable.readProbTable(tname);
+}
+
+  
+extern float MINCOUNTINCREASE;
+void model1::em_loop(int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, 
+		     bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test)
+{
+    WordIndex i, j, l, m ;
+    double cross_entropy;
+    int pair_no=0 ;
+    perp.clear();
+    viterbi_perp.clear();
+    ofstream of2;
+    // for each sentence pair in the corpus
+    if (dump_alignment||FEWDUMPS)
+        of2.open(alignfile);
+    PROB uniform = 1.0/noFrenchWords ;
+    sentPair sent ;
+    
+    while(sHandler1.getNextSentence(sent)){
+        Vector<WordIndex>& es = sent.eSent;
+        Vector<WordIndex>& fs = sent.fSent;
+        const float so  = sent.getCount();
+        l = es.size() - 1;
+        m = fs.size() - 1;
+        cross_entropy = log(1.0);
+        Vector<WordIndex> viterbi_alignment(fs.size());
+        double viterbi_score = 1 ;
+        
+        bool eindict[l + 1];
+        bool findict[m + 1];
+        bool indict[m + 1][l + 1];
+        if(it == 1 && useDict){
+            for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
+            for(unsigned int dummy = 0; dummy <= m; dummy++){
+                findict[dummy] = false;
+                for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++) 
+                    indict[dummy][dummy2] = false;
+            }
+            for(j = 0; j <= m; j++)
+                for(i = 0; i <= l; i++)
+                if(dict.indict(fs[j], es[i])){
+                    eindict[i] = findict[j] = indict[j][i] = true;
+                }
+        }
+        
+        for(j=1; j <= m; j++){
+            // entries  that map fs to all possible ei in this sentence.
+            Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table 
+            LpPair<COUNT,PROB> **sPtrCachePtr;
+            
+            PROB denom = 0.0;
+            WordIndex best_i = 0 ; // i for which fj is best maped to ei
+            PROB word_best_score = 0 ;  // score for the best mapping of fj
+            if (it == 1 && !seedModel1){
+                denom = uniform  * es.size() ;
+                word_best_score = uniform ;
+            }
+            else 
+                for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
+                    PROB e(0.0) ;
+                    (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
+                    if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) 
+                        e = (*((*sPtrCachePtr))).prob;
+                    else e = PROB_SMOOTH ;
+                    denom += e  ;
+                    if (e > word_best_score){
+                        word_best_score = e ;
+                        best_i = i ;
+                    }	
+                }
+            viterbi_alignment[j] = best_i ;
+            viterbi_score *= word_best_score ; /// denom ;
+            if (denom == 0){
+                if (test)
+                    cerr << "WARNING: denom is zero (TEST)\n";
+                else 
+                    cerr << "WARNING: denom is zero (TRAIN)\n";
+            }
+            cross_entropy += log(denom) ;
+            if (!test){
+                if(denom > 0){	  
+                    COUNT val = COUNT(so) / (COUNT) double(denom) ;
+                    /* this if loop implements a constraint on counting:
+                    count(es[i], fs[j]) is implemented if and only if
+                    es[i] and fs[j] occur together in the dictionary, 
+                    OR
+                    es[i] does not occur in the dictionary with any fs[x] and
+                    fs[j] does not occur in the dictionary with any es[y]
+                    */
+                    if(it == 1 && useDict){
+                        for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
+                            if(indict[j][i] || (!findict[j] && !eindict[i])){
+                                PROB e(0.0) ;
+                                if (it == 1 && !seedModel1)
+                                    e =  uniform  ;
+                                else if ((*sPtrCachePtr) != 0 &&  (*((*sPtrCachePtr))).prob > PROB_SMOOTH) 
+                                    e = (*((*sPtrCachePtr))).prob;
+                                else e = PROB_SMOOTH ;
+                                COUNT x=e*val;
+                                if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE )
+                                /*    if ((*sPtrCachePtr) != 0)
+                                    (*((*sPtrCachePtr))).count += x;
+                                else 	      */
+									tTable.incCount(es[i], fs[j], x);
+                            } /* end of if */
+                        } /* end of for i */
+                    } /* end of it == 1 */
+                    // Old code:
+                    else{
+                        for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
+                            //for(i=0; i <= l; i++) {	    
+                            PROB e(0.0) ;
+                            if (it == 1 && !seedModel1)
+                                e =  uniform  ;
+                            else if ((*sPtrCachePtr) != 0 &&  (*((*sPtrCachePtr))).prob > PROB_SMOOTH) 
+                                e = (*((*sPtrCachePtr))).prob;
+                            else e = PROB_SMOOTH ;
+                            //if( !(i==0) )
+                            //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
+                            COUNT x=e*val;
+                            if( pair_no==VerboseSentence )
+                                cout << i << "(" << evlist[es[i]].word << ")," << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
+                            if( (it==1 && !seedModel1)||x>MINCOUNTINCREASE ){
+                                /*if( NoEmptyWord==0 || i!=0 )
+								if ((*sPtrCachePtr) != 0) 
+								(*((*sPtrCachePtr))).count += x;
+							else */	      
+								//cerr << i << " " << j << " (+) " << endl;
+								//cerr.flush();
+								//cerr << es[i] << " " << fs[j] << " (=) "<< endl;
+								//cerr.flush();
+								tTable.incCount(es[i], fs[j], x);
+								//cerr << es[i] << " " << fs[j] << " (-) "<< endl;
+								//cerr.flush();
+							}
+                        } /* end of for i */
+                    } // end of else
+                } // end of if (denom > 0)
+            }// if (!test)
+        } // end of for (j) ;
+        sHandler1.setProbOfSentence(sent,cross_entropy);
+        //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
+        perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
+        viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
+        if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
+            printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
+        addAL(viterbi_alignment,sent.sentenceNo,l);
+        pair_no++;
+    } /* of while */
+}
+
+CTTableDiff<COUNT,PROB>* model1::one_step_em(int it, bool seedModel1, 
+    Dictionary& dictionary, bool useDict){
+        CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
+        double minErrors=1.0;int minIter=0;
+        string modelName="Model1",shortModelName="1";
+        time_t st, it_st, fn, it_fn;
+        string tfile, number, alignfile, test_alignfile;
+        int pair_no;
+        bool dump_files = false ;
+        st = time(NULL);
+        sHandler1.rewind();
+        cout << "==========================================================\n";
+        cout << modelName << " Training Started at: "<< ctime(&st) << "\n";  
+        pair_no = 0 ;
+        it_st = time(NULL);
+        cout <<  "-----------\n" << modelName << ": Iteration " << it << '\n';
+        dump_files = (Model1_Dump_Freq != 0) &&  ((it % Model1_Dump_Freq)  == 0) && !NODUMPS ;
+        number = "";
+        int n = it;
+        do{
+            number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+        } while((n /= 10) > 0);
+        tfile = Prefix + ".t" + shortModelName + "." + number ;
+        alignfile = Prefix + ".A1" ;
+        test_alignfile = Prefix +".tst.A" + shortModelName + "." + number ;
+        initAL();
+        em_loop_1(diff,it,perp, sHandler1, seedModel1, 
+                  dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp); 
+        //if (testPerp && testHandler) // calculate test perplexity
+        //    em_loop(it,*testPerp, *testHandler, seedModel1, dump_files, test_alignfile.c_str(), dictionary, useDict, *testViterbiPerp, true); 
+        if( errorsAL()<minErrors ){
+            minErrors=errorsAL();
+            minIter=it;
+        }
+        fn = time(NULL) ;
+        cout <<  "Partial " << modelName << " Training took: " << difftime(fn, it_st) << " seconds\n";
+        return diff;        
+    }
+
+    void model1::combine_one(CTTableDiff<COUNT,PROB>* cb){
+        cb->AugmentTTable(tTable);
+    }
+    
+    void model1::recombine(){
+        tTable.normalizeTable(Elist, Flist);
+    }
+    
+    void save_table(const char* tname){
+/*         if (dump_files){
+ *             if( OutputInAachenFormat==0 )
+ *                 tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
+ */
+
+    }
+
+        
+void model1::em_loop_1(CTTableDiff<COUNT,PROB> *diff,int it,Perplexity& perp, sentenceHandler& sHandler1, bool seedModel1, 
+    bool dump_alignment, const char* alignfile, Dictionary& dict, bool useDict, Perplexity& viterbi_perp, bool test)    {
+        WordIndex i, j, l, m ;
+        double cross_entropy;
+        int pair_no=0 ;
+        perp.clear();
+        viterbi_perp.clear();
+        ofstream of2;
+        // for each sentence pair in the corpus
+        if (dump_alignment||FEWDUMPS)
+            of2.open(alignfile);
+        PROB uniform = 1.0/noFrenchWords ;
+        sentPair sent ;
+        sHandler1.rewind();
+        while(sHandler1.getNextSentence(sent)){
+            Vector<WordIndex>& es = sent.eSent;
+            Vector<WordIndex>& fs = sent.fSent;
+            const float so  = sent.getCount();
+            l = es.size() - 1;
+            m = fs.size() - 1;
+            cross_entropy = log(1.0);
+            Vector<WordIndex> viterbi_alignment(fs.size());
+            double viterbi_score = 1 ;
+            
+            bool eindict[l + 1];
+            bool findict[m + 1];
+            bool indict[m + 1][l + 1];
+            if(it == 1 && useDict){
+                for(unsigned int dummy = 0; dummy <= l; dummy++) eindict[dummy] = false;
+                for(unsigned int dummy = 0; dummy <= m; dummy++){
+                    findict[dummy] = false;
+                    for(unsigned int dummy2 = 0; dummy2 <= l; dummy2++) 
+                        indict[dummy][dummy2] = false;
+                }
+                for(j = 0; j <= m; j++)
+                    for(i = 0; i <= l; i++)
+                    if(dict.indict(fs[j], es[i])){
+                        eindict[i] = findict[j] = indict[j][i] = true;
+                    }
+            }
+            
+            for(j=1; j <= m; j++){
+                // entries  that map fs to all possible ei in this sentence.
+                Vector<LpPair<COUNT,PROB> *> sPtrCache(es.size(),0); // cache pointers to table 
+                //Vector<COUNT *> sPtrCacheDif(es.size(),0); // cache pointers to table 
+                LpPair<COUNT,PROB> **sPtrCachePtr;
+                //COUNT **sPtrCachePtrDif;
+                
+                PROB denom = 0.0;
+                WordIndex best_i = 0 ; // i for which fj is best maped to ei
+                PROB word_best_score = 0 ;  // score for the best mapping of fj
+                if (it == 1 && !seedModel1){
+                    denom = uniform  * es.size() ;
+                    word_best_score = uniform ;
+                }
+                else {
+                    for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
+                        PROB e(0.0) ;
+                        (*sPtrCachePtr) = tTable.getPtr(es[i], fs[j]) ;
+                        //(*sPtrCachePtrDif) = diff->GetPtr(es[i], fs[j]) ;
+                        if ((*sPtrCachePtr) != 0 && (*((*sPtrCachePtr))).prob > PROB_SMOOTH) 
+                            e = (*((*sPtrCachePtr))).prob;
+                        else e = PROB_SMOOTH ;
+                        denom += e  ;
+                        if (e > word_best_score){
+                            word_best_score = e ;
+                            best_i = i ;
+                        }	
+                    }
+                }
+                viterbi_alignment[j] = best_i ;
+                viterbi_score *= word_best_score ; /// denom ;
+                if (denom == 0){
+                    if (test)
+                        cerr << "WARNING: denom is zero (TEST)\n";
+                    else 
+                        cerr << "WARNING: denom is zero (TRAIN)\n";
+                }
+                cross_entropy += log(denom) ;
+                if (!test){
+                    if(denom > 0){	  
+                        COUNT val = COUNT(so) / (COUNT) double(denom) ;
+                        /* this if loop implements a constraint on counting:
+                        count(es[i], fs[j]) is implemented if and only if
+                        es[i] and fs[j] occur together in the dictionary, 
+                        OR
+                        es[i] does not occur in the dictionary with any fs[x] and
+                        fs[j] does not occur in the dictionary with any es[y]
+                        */
+                        if(it == 1 && useDict){
+                            for((i=0),(sPtrCachePtr=&sPtrCache[0]);
+                                i <= l; i++,sPtrCachePtr++){
+                                if(indict[j][i] || (!findict[j] && !eindict[i])){
+                                    PROB e(0.0) ;
+                                    if (it == 1 && !seedModel1)
+                                        e =  uniform  ;
+                                    else if ((*sPtrCachePtr) != 0 &&  (*((*sPtrCachePtr))).prob > PROB_SMOOTH) 
+                                        e = (*((*sPtrCachePtr))).prob;
+                                    else e = PROB_SMOOTH ;
+                                    COUNT x=e*val;
+                                    if( it==1||x>MINCOUNTINCREASE ){
+                                        /*if ((*sPtrCachePtr) != 0){
+                                            (*((*sPtrCachePtr))).count += x;
+                                        } else {*/
+                                            tTable.incCount(es[i], fs[j], x);
+                                        //}
+                                        diff->incCount(es[i], fs[j], x);
+                                    }
+                                } /* end of if */
+                            } /* end of for i */
+                        } /* end of it == 1 */
+                        // Old code:
+                        else{
+                            for((i=0),(sPtrCachePtr=&sPtrCache[0]); i <= l; i++,sPtrCachePtr++){
+                                //for(i=0; i <= l; i++) {	    
+                                PROB e(0.0) ;
+                                if (it == 1 && !seedModel1)
+                                    e =  uniform  ;
+                                else if ((*sPtrCachePtr) != 0 &&  (*((*sPtrCachePtr))).prob > PROB_SMOOTH) 
+                                    e = (*((*sPtrCachePtr))).prob;
+                                else e = PROB_SMOOTH ;
+                                //if( !(i==0) )
+                                //cout << "COUNT(e): " << e << " " << MINCOUNTINCREASE << endl;
+                                COUNT x=e*val;
+                                if( pair_no==VerboseSentence )
+                                    cout << i << "(" << evlist[es[i]].word << "),"
+                                    << j << "(" << fvlist[fs[j]].word << ")=" << x << endl;
+                                if( it==1||x>MINCOUNTINCREASE )
+                                    if( NoEmptyWord==0 || ( NoEmptyWord==0 || i!=0 )){
+                                        /*if ((*sPtrCachePtr) != 0){
+                                            (*((*sPtrCachePtr))).count += x;
+                                        } else 	      */
+                                            tTable.incCount(es[i], fs[j], x);
+                                        diff->incCount(es[i], fs[j], x);
+                                    }
+                            } /* end of for i */
+                        } // end of else
+                    } // end of if (denom > 0)
+                }// if (!test)
+            } // end of for (j) ;
+            sHandler1.setProbOfSentence(sent,cross_entropy);
+            //cerr << sent << "CE: " << cross_entropy << " " << so << endl;
+            perp.addFactor(cross_entropy-m*log(l+1.0), so, l, m,1);
+            viterbi_perp.addFactor(log(viterbi_score)-m*log(l+1.0), so, l, m,1);
+            if (dump_alignment||(FEWDUMPS&&sent.sentenceNo<1000))
+                printAlignToFile(es, fs, evlist, fvlist, of2, viterbi_alignment, sent.sentenceNo, viterbi_score);
+            addAL(viterbi_alignment,sent.sentenceNo,l);
+            pair_no++;
+        } /* of while */
+        sHandler1.rewind();
+        perp.record("Model1");
+        viterbi_perp.record("Model1");
+        errorReportAL(cout, "IBM-1");
+    }