Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mgizapp/src/hmm.cpp')
-rw-r--r--mgizapp/src/hmm.cpp1903
1 files changed, 961 insertions, 942 deletions
diff --git a/mgizapp/src/hmm.cpp b/mgizapp/src/hmm.cpp
index 7ae635b..8e28287 100644
--- a/mgizapp/src/hmm.cpp
+++ b/mgizapp/src/hmm.cpp
@@ -9,21 +9,21 @@ modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
-This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
-#include "hmm.h"
+#include "hmm.h"
#include "Globals.h"
#include "utility.h"
-#include "HMMTables.h"
+#include "HMMTables.h"
#include "ForwardBackward.h"
#include "Parameter.h"
#include <iostream>
@@ -38,34 +38,34 @@ short UniformEntryExit=3;
short HMMTrainingSpecialFlags=0;
GLOBAL_PARAMETER2(int,ModelH_Dump_Freq,
- "HMM DUMP FREQUENCY","th",
- "dump frequency of HMM",
- PARLEV_OUTPUT,0);
+ "HMM DUMP FREQUENCY","th",
+ "dump frequency of HMM",
+ PARLEV_OUTPUT,0);
GLOBAL_PARAMETER(short,CompareAlDeps,"emAlignmentDependencies",
- "lextrain: dependencies in the HMM alignment model. "
- " &1: sentence length; &2: previous class; &4: previous position; "
- " &8: French position; &16: French class"
- ,PARLEV_MODELS,2);
+ "lextrain: dependencies in the HMM alignment model. "
+ " &1: sentence length; &2: previous class; &4: previous position; "
+ " &8: French position; &16: French class"
+ ,PARLEV_MODELS,2);
GLOBAL_PARAMETER(double,GLOBALProbabilityForEmpty,
- "emProbForEmpty","f-b-trn: probability for empty word",
- PARLEV_MODELS,0.4);
+ "emProbForEmpty","f-b-trn: probability for empty word",
+ PARLEV_MODELS,0.4);
GLOBAL_PARAMETER(short,SmoothHMM,"emSmoothHMM",
- "f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",
- PARLEV_SPECIAL,2);
+ "f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",
+ PARLEV_SPECIAL,2);
GLOBAL_PARAMETER(double,HMMAlignmentModelSmoothFactor,"emAlSmooth",
- "f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",
- PARLEV_SMOOTH,0.2);
+ "f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",
+ PARLEV_SMOOTH,0.2);
/*template<class T>
void smooth_standard(T*a,T*b,double p)
{
int n=b-a;
- if( n==0 )
+ if( n==0 )
return;
double pp=p/n;
for(T*i=a;i!=b;++i)
@@ -74,170 +74,174 @@ void smooth_standard(T*a,T*b,double p)
hmm::hmm(model2&m2,WordClasses &e, WordClasses& f)
-: ewordclasses(e), fwordclasses(f),model2(m2),counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses),
-probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses)
-{
+ : ewordclasses(e), fwordclasses(f),model2(m2),counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses),
+ probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses)
+{
}
-void hmm::initialize_table_uniformly(sentenceHandler&){}
+void hmm::initialize_table_uniformly(sentenceHandler&) {}
-struct hmm_em_loop_t{
- hmm *m;
- int done;
- int valid;
- string alignfile;
- int it;
- bool dump_files;
- bool resume;
- pthread_t thread;
- hmm_em_loop_t():m(0),done(0),valid(0){};
+struct hmm_em_loop_t {
+ hmm *m;
+ int done;
+ int valid;
+ string alignfile;
+ int it;
+ bool dump_files;
+ bool resume;
+ pthread_t thread;
+ hmm_em_loop_t():m(0),done(0),valid(0) {};
};
-
-void* hmm_exe_emloop(void *arg){
- hmm_em_loop_t* em =(hmm_em_loop_t *) arg;
- em->m->em_thread(em->it,em->alignfile,em->dump_files,em->resume);
- em->done = -1;
- return arg;
-}
-
-void hmm::em_thread(int it,string alignfile,bool dump_files,bool resume){
- em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
+
+void* hmm_exe_emloop(void *arg)
+{
+ hmm_em_loop_t* em =(hmm_em_loop_t *) arg;
+ em->m->em_thread(em->it,em->alignfile,em->dump_files,em->resume);
+ em->done = -1;
+ return arg;
+}
+
+void hmm::em_thread(int it,string alignfile,bool dump_files,bool resume)
+{
+ em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
}
extern short NCPUS;
-int hmm::em_with_tricks(int noIterations,bool dumpCount,
- const char* dumpCountName, bool useString ,bool resume){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
+int hmm::em_with_tricks(int noIterations,bool dumpCount,
+ const char* dumpCountName, bool useString ,bool resume)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Hmm",shortModelName="hmm";
+ int dumpFreq=ModelH_Dump_Freq;
+ time_t it_st, st, it_fn, fn;
+ string tfile, afile,afileh, number, alignfile, test_alignfile;
+ bool dump_files = false ;
+ ofstream of2 ;
+ st = time(NULL) ;
+ sHandler1.rewind();
+ cout << "\n==========================================================\n";
+ cout << modelName << " Training Started at: " << my_ctime(&st);
+ vector<hmm_em_loop_t> th;
+ th.resize(NCPUS);
+ for(int it=1; it <= noIterations ; it++) {
+ it_st = time(NULL) ;
+ cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0 || it == noIterations) && !NODUMPS;
+
+ cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl;
+ //dump_files = true;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ // acfile = Prefix + ".ac" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+ alignfile = Prefix + ".A" + shortModelName + "." + number ;
+ test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+ aCountTable.clear();
+ initAL();
sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
- vector<hmm_em_loop_t> th;
- th.resize(NCPUS);
- for(int it=1; it <= noIterations ; it++){
- it_st = time(NULL) ;
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0 || it == noIterations) && !NODUMPS;
-
- cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl;
- //dump_files = true;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- // acfile = Prefix + ".ac" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- sHandler1.rewind();
- int k;
- char node[2] ;
- node[1] = '\0';
- for (k=1 ; k< NCPUS ; k++){
- th[k].m = this;
- th[k].done = 0;
- th[k].valid = 0;
- th[k].it = it;
- th[k].resume = resume;
- th[k].alignfile = alignfile + ".part";
- node[0] = '0' + k;
- th[k].alignfile += node;
- th[k].dump_files = dump_files;
- th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k]));
- if(th[k].valid){
- cerr << "Error starting thread " << k << endl;
- }
- }
- node[0] = '0';
- alignfile += ".part";
- alignfile += node;
- em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
- for (k=1;k<NCPUS;k++){
- pthread_join((th[k].thread),NULL);
- cerr << "Thread " << k << "done" << endl;
- }
- perp.record("HMM");
- trainViterbiPerp.record("HMM");
- errorReportAL(cout,"HMM");
-
- sHandler1.rewind();
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- if (testPerp && testHandler){
- testHandler->rewind();
- em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1 && (!resume),it);
- testHandler->rewind();
- }
- if (dump_files&&OutputInAachenFormat==1)
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
-
- if(dumpCount && it == noIterations){
- string realTableName = dumpCountName;
- realTableName += ".t.count";
- tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
- string realATableName = dumpCountName;
- realATableName += ".a.count";
- aCountTable.printRealTable(realATableName.c_str());
- string realHTableName = dumpCountName;
- realHTableName += ".h.count";
- string fnamealpha = realHTableName;
- string fnamebeta = realHTableName;
- fnamealpha += ".alpha";
- fnamebeta += ".beta";
- counts.writeJumps(realHTableName.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
-
- }
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- probs=counts;
- cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity()
- << '\n';
- cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << testViterbiPerp->perplexity()
- << '\n';
- if (dump_files){
- if( OutputInAachenFormat==0)
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- // ofstream afilestream(afileh.c_str());
- string fnamealpha = afileh;
- string fnamebeta = afileh;
- fnamealpha += ".alpha";
- fnamebeta += ".beta";
- probs.writeJumps(afileh.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
+ int k;
+ char node[2] ;
+ node[1] = '\0';
+ for (k=1 ; k< NCPUS ; k++) {
+ th[k].m = this;
+ th[k].done = 0;
+ th[k].valid = 0;
+ th[k].it = it;
+ th[k].resume = resume;
+ th[k].alignfile = alignfile + ".part";
+ node[0] = '0' + k;
+ th[k].alignfile += node;
+ th[k].dump_files = dump_files;
+ th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k]));
+ if(th[k].valid) {
+ cerr << "Error starting thread " << k << endl;
+ }
+ }
+ node[0] = '0';
+ alignfile += ".part";
+ alignfile += node;
+ em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
+ for (k=1; k<NCPUS; k++) {
+ pthread_join((th[k].thread),NULL);
+ cerr << "Thread " << k << "done" << endl;
+ }
+ perp.record("HMM");
+ trainViterbiPerp.record("HMM");
+ errorReportAL(cout,"HMM");
+
+ sHandler1.rewind();
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ if (testPerp && testHandler) {
+ testHandler->rewind();
+ em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1 && (!resume),it);
+ testHandler->rewind();
+ }
+ if (dump_files&&OutputInAachenFormat==1)
+ tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
+
+ if(dumpCount && it == noIterations) {
+ string realTableName = dumpCountName;
+ realTableName += ".t.count";
+ tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
+ string realATableName = dumpCountName;
+ realATableName += ".a.count";
+ aCountTable.printRealTable(realATableName.c_str());
+ string realHTableName = dumpCountName;
+ realHTableName += ".h.count";
+ string fnamealpha = realHTableName;
+ string fnamebeta = realHTableName;
+ fnamealpha += ".alpha";
+ fnamebeta += ".beta";
+ counts.writeJumps(realHTableName.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
+
+ }
+ tTable.normalizeTable(Elist, Flist);
+ aCountTable.normalize(aTable);
+ probs=counts;
+ cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
+ << " PERPLEXITY " << perp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
+ << " PERPLEXITY " << (*testPerp).perplexity()
+ << '\n';
+ cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
+ << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
+ if (testPerp && testHandler)
+ cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
+ << " PERPLEXITY " << testViterbiPerp->perplexity()
+ << '\n';
+ if (dump_files) {
+ if( OutputInAachenFormat==0)
+ tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
+ // ofstream afilestream(afileh.c_str());
+ string fnamealpha = afileh;
+ string fnamebeta = afileh;
+ fnamealpha += ".alpha";
+ fnamebeta += ".beta";
+ probs.writeJumps(afileh.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
// aCountTable.printTable(acfile.c_str());
- aTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
- cout << "\n" << modelName << " Iteration: " << it<< " took: " <<
- difftime(it_fn, it_st) << " seconds\n";
- } // end of iterations
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- //cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return minIter;
+ aTable.printTable(afile.c_str());
+ }
+ it_fn = time(NULL) ;
+ cout << "\n" << modelName << " Iteration: " << it<< " took: " <<
+ difftime(it_fn, it_st) << " seconds\n";
+ } // end of iterations
+ fn = time(NULL) ;
+ cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ //cout << "tTable contains " << tTable.getHash().bucket_count()
+ // << " buckets and " << tTable.getHash().size() << " entries." ;
+ cout << "==========================================================\n";
+ return minIter;
}
/*template<class T>
@@ -258,627 +262,638 @@ T normalize_if_possible_with_increment(T*a,T*b,int increment)
return sum;
}*/
-void hmm::load_table(const char* aname){
- cout << "Hmm: loading a table not implemented.\n";
- abort();
- ifstream anamefile(aname);
- probs.readJumps(anamefile);
+void hmm::load_table(const char* aname)
+{
+ cout << "Hmm: loading a table not implemented.\n";
+ abort();
+ ifstream anamefile(aname);
+ probs.readJumps(anamefile);
}
HMMNetwork *hmm::makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const
{
- unsigned int i,j;
- unsigned int l = es.size() - 1;
- unsigned int m = fs.size() - 1;
- unsigned int I=2*l,J=m;
- int IJ=I*J;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- HMMNetwork *net = new HMMNetwork(I,J);
- fill(net->alphainit.begin(),net->alphainit.end(),0.0);
- fill(net->betainit.begin(),net->betainit.end(),0.0);
- for(j=1;j<=m;j++){
- for(i=1;i<=l;i++){
- // cout << es[i] <<" " << fs[j] <<" " << tTable.getProb(es[i], fs[j]) << endl;
- net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ;
- }
- double emptyContribution=0;
- emptyContribution=tTable.getProb(es[0],fs[j]) ;
- for(i=1;i<=l;i++)
- net->n(i+l-1,j-1)=emptyContribution;
- net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12));
+ unsigned int i,j;
+ unsigned int l = es.size() - 1;
+ unsigned int m = fs.size() - 1;
+ unsigned int I=2*l,J=m;
+ int IJ=I*J;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+ HMMNetwork *net = new HMMNetwork(I,J);
+ fill(net->alphainit.begin(),net->alphainit.end(),0.0);
+ fill(net->betainit.begin(),net->betainit.end(),0.0);
+ for(j=1; j<=m; j++) {
+ for(i=1; i<=l; i++) {
+ // cout << es[i] <<" " << fs[j] <<" " << tTable.getProb(es[i], fs[j]) << endl;
+ net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ;
}
- if( DependencyOfJ )
- net->e.resize(m-1);
- else
- net->e.resize(J>1);
- for(j=0;j<net->e.size();j++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]);
- net->e[j].resize(I,I,0);
- for(unsigned int i1=0;i1<I;++i1) {
- Array<double> al(l);
- CLASSIFY2(i1,i1real);
- for(unsigned int i2=0;i2<l;i2++)
- al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass
- ,j+1);
- normalize_if_possible(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()));
- if( SmoothHMM&2 )
- smooth_standard(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()),HMMAlignmentModelSmoothFactor);
- for(unsigned int i2=0;i2<I;i2++) {
- CLASSIFY(i2,empty_i2,i2real);
- net->e[j](i1,i2) = al[i2real];
-
- if( empty_i2 )
- if(i1real!=i2real) {
- net->e[j](i1,i2)=0;
- } else{
- net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1
- }
- }
- normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I);
- }
+ double emptyContribution=0;
+ emptyContribution=tTable.getProb(es[0],fs[j]) ;
+ for(i=1; i<=l; i++)
+ net->n(i+l-1,j-1)=emptyContribution;
+ net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12));
+ }
+ if( DependencyOfJ )
+ net->e.resize(m-1);
+ else
+ net->e.resize(J>1);
+ for(j=0; j<net->e.size(); j++) {
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]);
+ net->e[j].resize(I,I,0);
+ for(unsigned int i1=0; i1<I; ++i1) {
+ Array<double> al(l);
+ CLASSIFY2(i1,i1real);
+ for(unsigned int i2=0; i2<l; i2++)
+ al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass
+ ,j+1);
+ normalize_if_possible(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()));
+ if( SmoothHMM&2 )
+ smooth_standard(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()),HMMAlignmentModelSmoothFactor);
+ for(unsigned int i2=0; i2<I; i2++) {
+ CLASSIFY(i2,empty_i2,i2real);
+ net->e[j](i1,i2) = al[i2real];
+
+ if( empty_i2 )
+ if(i1real!=i2real) {
+ net->e[j](i1,i2)=0;
+ } else {
+ net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1
+ }
+ }
+ normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I);
}
- if( doInit ){
- for(unsigned int i=0;i<I;++i)
- {
- net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I);
- net->betainit[i]=1.0;
- }
- }else{
- if( DependencyOfPrevAJ==0 ){
- for(i=0;i<I;i++){
- CLASSIFY2(i,ireal);
- net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0);
- }
- }else{
- if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit);
- if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit);
- }
+ }
+ if( doInit ) {
+ for(unsigned int i=0; i<I; ++i) {
+ net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I);
+ net->betainit[i]=1.0;
+ }
+ } else {
+ if( DependencyOfPrevAJ==0 ) {
+ for(i=0; i<I; i++) {
+ CLASSIFY2(i,ireal);
+ net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0);
+ }
+ } else {
+ if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit);
+ if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit);
}
- massert( net->alphainit.size()==I );massert( net->betainit.size()==I );
- normalize_if_possible(const_cast<double*>(&(net->alphainit[0])),const_cast<double*>(&(net->alphainit[0])+net->alphainit.size()));
- normalize_if_possible(const_cast<double*>(&(net->betainit[0])),const_cast<double*>(&(net->betainit[0])+net->betainit.size()));
- transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l));
- return net;
+ }
+ massert( net->alphainit.size()==I );
+ massert( net->betainit.size()==I );
+ normalize_if_possible(const_cast<double*>(&(net->alphainit[0])),const_cast<double*>(&(net->alphainit[0])+net->alphainit.size()));
+ normalize_if_possible(const_cast<double*>(&(net->betainit[0])),const_cast<double*>(&(net->betainit[0])+net->betainit.size()));
+ transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l));
+ return net;
}
extern float MINCOUNTINCREASE;
-void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
-){
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
-
- while(sHandler1.getNextSentence(sent)){
- const Vector<WordIndex>& es = sent.get_eSent();// #
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());// #
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
-
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
-
- if( !test ){
+void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int
+ )
+{
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS )
+ of2.open(alignfile);
+ sentPair sent ;
+
+ while(sHandler1.getNextSentence(sent)) {
+ const Vector<WordIndex>& es = sent.get_eSent();// #
+ const Vector<WordIndex>& fs = sent.get_fSent();
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());// #
+
+ unsigned int I=2*l,J=m;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+ HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
+
+ Array<double> gamma;
+ Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
+ double trainProb;
+ trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
+
+ if( !test ) {
#ifdef WIN32
- double *gp=const_cast<double*>(&(gamma[0]));
+ double *gp=const_cast<double*>(&(gamma[0]));
#else
- double *gp=conv<double>(gamma.begin());
+ double *gp=conv<double>(gamma.begin());
#endif
-
- for(unsigned int i2=0;i2<J;i2++)
- for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.addValue(0,i2+1,l,m,add);
- //aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.addValue(1+i1,1+i2,l,m,add);
- //aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
+
+ for(unsigned int i2=0; i2<J; i2++)
+ for(unsigned int i1=0; i1<I; ++i1,++gp) {
+ if( *gp>MINCOUNTINCREASE ) {
+ COUNT add= *gp*so;
+ if( i1>=l ) {
+ tTable.incCount(es[0],fs[1+i2],add);
+ aCountTable.addValue(0,i2+1,l,m,add);
+ //aCountTable.getRef(0,i2+1,l,m)+=add;
+ } else {
+ tTable.incCount(es[1+i1],fs[1+i2],add);
+ aCountTable.addValue(1+i1,1+i2,l,m,add);
+ //aCountTable.getRef(1+i1,1+i2,l,m)+=add;
}
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- double *ep=epsilon[jj].begin();
- if( ep ){
- //for(i=0;i<I;i++)
- // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
- // for(i=0;i<I*I;++i)
- // ep[i] *= I;
- //if( DependencyOfJ )
- // if( J-1 )
- // for(i=0;i<I*I;++i)
- // ep[i] /= (J-1);
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- for(i=0;i<I;i++){
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- CLASSIFY(i,i_empty,ireal);
- CLASSIFY2(i_bef,i_befreal);
- if( i_empty )
- p0c+=*ep * mult;
- else{
- int v = ewordclasses.getClass(es[1+i_befreal]);
- //cerr << v <<" " << es.size() << " "<< i_befreal << endl;
- counts.addAlCount(i_befreal,ireal,l,m,v,
- frenchClass ,jj+1,*ep * mult,0.0);
- np0c+=*ep * mult;
- }
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- }
+ }
+ }
+ double p0c=0.0,np0c=0.0;
+ for(unsigned int jj=0; jj<epsilon.size(); jj++) {
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
+ double *ep=epsilon[jj].begin();
+ if( ep ) {
+ //for(i=0;i<I;i++)
+ // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
+ // for(i=0;i<I*I;++i)
+ // ep[i] *= I;
+ //if( DependencyOfJ )
+ // if( J-1 )
+ // for(i=0;i<I*I;++i)
+ // ep[i] /= (J-1);
+ double mult=1.0;
+ mult*=l;
+ //if( DependencyOfJ && J-1)
+ // mult/=(J-1);
+ for(i=0; i<I; i++) {
+ for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
+ CLASSIFY(i,i_empty,ireal);
+ CLASSIFY2(i_bef,i_befreal);
+ if( i_empty )
+ p0c+=*ep * mult;
+ else {
+ int v = ewordclasses.getClass(es[1+i_befreal]);
+ //cerr << v <<" " << es.size() << " "<< i_befreal << endl;
+ counts.addAlCount(i_befreal,ireal,l,m,v,
+ frenchClass ,jj+1,*ep * mult,0.0);
+ np0c+=*ep * mult;
+ }
+ massert( &epsilon[jj](i,i_bef)== ep);
}
+ }
+ }
+ }
#ifdef WIN32
- double *gp1=const_cast<double *>(&(gamma[0])),*gp2=const_cast<double*>(&(gamma[0])+gamma.size())-I;
+ double *gp1=const_cast<double *>(&(gamma[0])),*gp2=const_cast<double*>(&(gamma[0])+gamma.size())-I;
#else
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
+ double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
#endif
- hmmentry_type&ai0=counts.doGetAlphaInit(I);
- Array<double>&ai = ai0.first;
- hmmentry_type&bi0=counts.doGetBetaInit(I);
- Array<double>&bi = bi0.first;
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
+ hmmentry_type&ai0=counts.doGetAlphaInit(I);
+ Array<double>&ai = ai0.first;
+ hmmentry_type&bi0=counts.doGetBetaInit(I);
+ Array<double>&bi = bi0.first;
+ int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
#ifdef WIN32
- ai0.second->lock();
+ ai0.second->lock();
#else
- ai0.second.lock();
+ ai0.second.lock();
#endif
- for(i=0;i<I;i++,gp1++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- //bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
- }
- }
+ for(i=0; i<I; i++,gp1++) {
+ CLASSIFY(i,i_empty,ireal);
+ ai[i]+= *gp1;
+ //bi[i]+= *gp2;
+ if( DependencyOfPrevAJ==0 ) {
+ if( i_empty )
+ p0c+=*gp1;
+ else {
+ counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
+ np0c+=*gp1;
+ }
+ }
+ }
#ifdef WIN32
- ai0.second->unlock();
- bi0.second->lock();
+ ai0.second->unlock();
+ bi0.second->lock();
#else
- ai0.second.unlock();
- bi0.second.lock();
+ ai0.second.unlock();
+ bi0.second.lock();
#endif
- for(i=0;i<I;i++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- bi[i]+= *gp2;
- }
-#ifdef WIN32
- bi0.second->unlock();
-#else
- bi0.second.unlock();
-#endif
-
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
- }
+ for(i=0; i<I; i++,gp2++) {
+ CLASSIFY(i,i_empty,ireal);
+ bi[i]+= *gp2;
+ }
+#ifdef WIN32
+ bi0.second->unlock();
+#else
+ bi0.second.unlock();
+#endif
+
+ if( Verbose )
+ cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+
+ cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
+ Array<int>vit;
+ double viterbi_score=1.0;
+ if( (HMMTrainingSpecialFlags&1) )
+ HMMViterbi(*net,gamma,vit);
+ else
+ viterbi_score=HMMRealViterbi(*net,vit);
+ for(j=1; j<=m; j++) {
+ viterbi_alignment[j]=vit[j-1]+1;
+ if( viterbi_alignment[j]>l)
+ viterbi_alignment[j]=0;
+ }
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ perp.addFactor(cross_entropy, so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
+ if( Verbose )
+ cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
+
+ delete net;
+ net=0;
+ if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
+ addAL(viterbi_alignment,sent.getSentenceNo(),l);
+
+ pair_no++;
+ } /* of while */
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
- }
- sHandler1.setProbOfSentence(sent,cross_entropy);
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
-
- delete net;net=0;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
-
- pair_no++;
- } /* of while */
-
}
-void hmm::clearCountTable(){counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);}
+void hmm::clearCountTable()
+{
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+}
#if 0
-CTTableDiff<COUNT,PROB>* hmm::em_loop_1(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int
-){
- CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
- //diff->incCount(1,1,0);
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
- sHandler1.rewind();
- int nnn = 0;
- while(sHandler1.getNextSentence(sent)){
- nnn ++;
- cout << nnn << endl;
- cout << 1 << endl;
- const Vector<WordIndex>& es = sent.get_eSent();
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
- cout << 2 << endl;
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- cout << 2.5 << endl;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
- cout << 3 << endl;
- if( !test ){
- double *gp=conv<double>(gamma.begin());
- cout << 4 << endl;
- for(unsigned int i2=0;i2<J;i2++)for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- diff->incCount(es[0],fs[1+i2],add);
- //tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- diff->incCount(es[1+i1],fs[1+i2],add);
- //tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
- }
- cout << 5 << endl;
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- if (nnn==7779) cout << 1 << endl;
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- if (nnn==7779) cout << 2 << endl;
- double *ep=epsilon[jj].begin();
- if (nnn==7779) cout << 3 << endl;
- if( ep ){
- //for(i=0;i<I;i++)
- // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
- // for(i=0;i<I*I;++i)
- // ep[i] *= I;
- //if( DependencyOfJ )
- // if( J-1 )
- // for(i=0;i<I*I;++i)
- // ep[i] /= (J-1);
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- if (nnn==7779) cout << 4 << ":" << I << endl;
- for(i=0;i<I;i++){
- if (nnn==7779) cout << "i:" << i << endl;
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- if (nnn==7779) cout << " CL 1" << endl;
- CLASSIFY(i,i_empty,ireal);
- if (nnn==7779) cout << " CL 2 : " << i_bef << " " << (size_t)ep << endl;
- CLASSIFY2(i_bef,i_befreal);
- if((i+1)*(i_bef+1)>epsilon[jj].getLen1()*epsilon[jj].getLen2()){
- continue;
- }
- if( i_empty )
- p0c+=epsilon[jj](i,i_bef)*mult;// p0c+=*ep * mult;
- else{
- if (nnn==7779) cout << "ELSE" << endl;
- if (nnn==7779){
- cout << i_befreal<<" " <<ireal<<" " << l<<" " << m<<" "<< jj<<" "<<epsilon.size()<< " " << epsilon[jj].getLen1() <<" " << epsilon[jj].getLen2()<< endl;
- np0c+=epsilon[jj](i,i_bef)*mult;
- cout <<"..."<<endl;
- cout <<"......"<<ewordclasses.getClass(es[1+i_befreal]) << endl;
- cout <<"......"<<endl;
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,0,0.0);
- np0c+=epsilon[jj](i,i_bef)*mult;
- }
- else{
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,epsilon[jj](i,i_bef)*mult,0.0);
- np0c+=epsilon[jj](i,i_bef)*mult;
- }
- }
- if (nnn==7779) cout << "FI" << endl;
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- if (nnn==7779) cout << 5 << endl;
- }
+CTTableDiff<COUNT,PROB>* hmm::em_loop_1(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int
+ )
+{
+ CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
+ //diff->incCount(1,1,0);
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS )
+ of2.open(alignfile);
+ sentPair sent ;
+ sHandler1.rewind();
+ int nnn = 0;
+ while(sHandler1.getNextSentence(sent)) {
+ nnn ++;
+ cout << nnn << endl;
+ cout << 1 << endl;
+ const Vector<WordIndex>& es = sent.get_eSent();
+ const Vector<WordIndex>& fs = sent.get_fSent();
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());
+
+ unsigned int I=2*l,J=m;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+ cout << 2 << endl;
+ HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
+ Array<double> gamma;
+ Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
+ double trainProb;
+ cout << 2.5 << endl;
+ trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
+ cout << 3 << endl;
+ if( !test ) {
+ double *gp=conv<double>(gamma.begin());
+ cout << 4 << endl;
+ for(unsigned int i2=0; i2<J; i2++)for(unsigned int i1=0; i1<I; ++i1,++gp) {
+ if( *gp>MINCOUNTINCREASE ) {
+ COUNT add= *gp*so;
+ if( i1>=l ) {
+ diff->incCount(es[0],fs[1+i2],add);
+ //tTable.incCount(es[0],fs[1+i2],add);
+ aCountTable.getRef(0,i2+1,l,m)+=add;
+ } else {
+ diff->incCount(es[1+i1],fs[1+i2],add);
+ //tTable.incCount(es[1+i1],fs[1+i2],add);
+ aCountTable.getRef(1+i1,1+i2,l,m)+=add;
}
- // cout << 6 << endl;
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
- Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
- Array<double>&bi=counts.doGetBetaInit(I);
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
- for(i=0;i<I;i++,gp1++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
+ }
+ }
+ cout << 5 << endl;
+ double p0c=0.0,np0c=0.0;
+ for(unsigned int jj=0; jj<epsilon.size(); jj++) {
+ if (nnn==7779) cout << 1 << endl;
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
+ if (nnn==7779) cout << 2 << endl;
+ double *ep=epsilon[jj].begin();
+ if (nnn==7779) cout << 3 << endl;
+ if( ep ) {
+ //for(i=0;i<I;i++)
+ // normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
+ // for(i=0;i<I*I;++i)
+ // ep[i] *= I;
+ //if( DependencyOfJ )
+ // if( J-1 )
+ // for(i=0;i<I*I;++i)
+ // ep[i] /= (J-1);
+ double mult=1.0;
+ mult*=l;
+ //if( DependencyOfJ && J-1)
+ // mult/=(J-1);
+ if (nnn==7779) cout << 4 << ":" << I << endl;
+ for(i=0; i<I; i++) {
+ if (nnn==7779) cout << "i:" << i << endl;
+ for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
+ if (nnn==7779) cout << " CL 1" << endl;
+ CLASSIFY(i,i_empty,ireal);
+ if (nnn==7779) cout << " CL 2 : " << i_bef << " " << (size_t)ep << endl;
+ CLASSIFY2(i_bef,i_befreal);
+ if((i+1)*(i_bef+1)>epsilon[jj].getLen1()*epsilon[jj].getLen2()) {
+ continue;
+ }
+ if( i_empty )
+ p0c+=epsilon[jj](i,i_bef)*mult;// p0c+=*ep * mult;
+ else {
+ if (nnn==7779) cout << "ELSE" << endl;
+ if (nnn==7779) {
+ cout << i_befreal<<" " <<ireal<<" " << l<<" " << m<<" "<< jj<<" "<<epsilon.size()<< " " << epsilon[jj].getLen1() <<" " << epsilon[jj].getLen2()<< endl;
+ np0c+=epsilon[jj](i,i_bef)*mult;
+ cout <<"..."<<endl;
+ cout <<"......"<<ewordclasses.getClass(es[1+i_befreal]) << endl;
+ cout <<"......"<<endl;
+ counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
+ frenchClass ,jj+1,0,0.0);
+ np0c+=epsilon[jj](i,i_bef)*mult;
+ } else {
+ counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
+ frenchClass ,jj+1,epsilon[jj](i,i_bef)*mult,0.0);
+ np0c+=epsilon[jj](i,i_bef)*mult;
}
+ }
+ if (nnn==7779) cout << "FI" << endl;
+ massert( &epsilon[jj](i,i_bef)== ep);
}
- // cout << 7 << endl;
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+ if (nnn==7779) cout << 5 << endl;
}
- //cout << 8 << endl;
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- //cout << 9 << endl;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- //cout << 10 << endl;
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
+ }
+ // cout << 6 << endl;
+ double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
+ Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
+ Array<double>&bi=counts.doGetBetaInit(I);
+ int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
+ for(i=0; i<I; i++,gp1++,gp2++) {
+ CLASSIFY(i,i_empty,ireal);
+ ai[i]+= *gp1;
+ bi[i]+= *gp2;
+ if( DependencyOfPrevAJ==0 ) {
+ if( i_empty )
+ p0c+=*gp1;
+ else {
+ counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
+ np0c+=*gp1;
+ }
}
- //cout << 11 << endl;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cout << 12 << endl;
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
- delete net;net=0;
- //cout << 13 << endl;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- //cout << 14 << endl;
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
- pair_no++;
- } /* of while */
- sHandler1.rewind();
- perp.record("HMM");
- viterbi_perp.record("HMM");
- errorReportAL(cout,"HMM");
- return diff;
+ }
+ // cout << 7 << endl;
+ if( Verbose )
+ cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+ //cout << 8 << endl;
+ cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
+ Array<int>vit;
+ double viterbi_score=1.0;
+ //cout << 9 << endl;
+ if( (HMMTrainingSpecialFlags&1) )
+ HMMViterbi(*net,gamma,vit);
+ else
+ viterbi_score=HMMRealViterbi(*net,vit);
+ //cout << 10 << endl;
+ for(j=1; j<=m; j++) {
+ viterbi_alignment[j]=vit[j-1]+1;
+ if( viterbi_alignment[j]>l)
+ viterbi_alignment[j]=0;
+ }
+ //cout << 11 << endl;
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ //cout << 12 << endl;
+ perp.addFactor(cross_entropy, so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
+ if( Verbose )
+ cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
+ delete net;
+ net=0;
+ //cout << 13 << endl;
+ if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
+ //cout << 14 << endl;
+ addAL(viterbi_alignment,sent.getSentenceNo(),l);
+ pair_no++;
+ } /* of while */
+ sHandler1.rewind();
+ perp.record("HMM");
+ viterbi_perp.record("HMM");
+ errorReportAL(cout,"HMM");
+ return diff;
}
#endif
Mutex mu;
#if 0
-void hmm::em_loop_2(Perplexity& perp, sentenceHandler& sHandler1,
- bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
- bool test,bool doInit,int part
-){
- WordIndex i, j, l, m ;
- double cross_entropy;
- int pair_no=0 ;
- perp.clear();
- viterbi_perp.clear();
- ofstream of2;
- // for each sentence pair in the corpus
- if (dump_alignment||FEWDUMPS )
- of2.open(alignfile);
- sentPair sent ;
- //sHandler1.rewind();
- int nnn = 0;
- while(sHandler1.getNextSentence(sent)){
- //nnn ++;
- //cout << nnn << endl;
- //cout << 1 << endl;
- const Vector<WordIndex>& es = sent.get_eSent();
- const Vector<WordIndex>& fs = sent.get_fSent();
- const float so = sent.getCount();
- l = es.size() - 1;
- m = fs.size() - 1;
- cross_entropy = log(1.0);
- Vector<WordIndex> viterbi_alignment(fs.size());
-
- unsigned int I=2*l,J=m;
- bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
- bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
-
- HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
- Array<double> gamma;
- Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
- double trainProb;
- trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
- if( !test ){
- double *gp=conv<double>(gamma.begin());
- for(unsigned int i2=0;i2<J;i2++)for(unsigned int i1=0;i1<I;++i1,++gp){
- if( *gp>MINCOUNTINCREASE ) {
- COUNT add= *gp*so;
- if( i1>=l ){
- //diff->incCount(es[0],fs[1+i2],add);
- tTable.incCount(es[0],fs[1+i2],add);
- aCountTable.getRef(0,i2+1,l,m)+=add;
- } else {
- //diff->incCount(es[1+i1],fs[1+i2],add);
- tTable.incCount(es[1+i1],fs[1+i2],add);
- aCountTable.getRef(1+i1,1+i2,l,m)+=add;
- }
- }
- }
- double p0c=0.0,np0c=0.0;
- for(unsigned int jj=0;jj<epsilon.size();jj++){
- int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
- double *ep=epsilon[jj].begin();
- if( ep ){
- double mult=1.0;
- mult*=l;
- //if( DependencyOfJ && J-1)
- // mult/=(J-1);
- for(i=0;i<I;i++){
- for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++){
- CLASSIFY(i,i_empty,ireal);
- CLASSIFY2(i_bef,i_befreal);
- if( i_empty ){
- p0c+=*ep * mult;
- }else{
- //mu.lock();
- //cout<<"\rP "<<part<<" ";
- //cout<<epsilon.size()<<" "<<jj<<" ";
- //cout<<epsilon[jj].h1<<" " << epsilon[jj].h2<<" ";
- //cout<<i<<" "<<i_bef<<" ";
- //cout<<I<<" "<<J<<" ";
-
- cout.flush();
- counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
- frenchClass ,jj+1,*ep * mult,0.0);
- np0c+=*ep * mult;
- //mu.unlock();
- }
- massert( &epsilon[jj](i,i_bef)== ep);
- }
- }
- }
+void hmm::em_loop_2(Perplexity& perp, sentenceHandler& sHandler1,
+ bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
+ bool test,bool doInit,int part
+ )
+{
+ WordIndex i, j, l, m ;
+ double cross_entropy;
+ int pair_no=0 ;
+ perp.clear();
+ viterbi_perp.clear();
+ ofstream of2;
+ // for each sentence pair in the corpus
+ if (dump_alignment||FEWDUMPS )
+ of2.open(alignfile);
+ sentPair sent ;
+ //sHandler1.rewind();
+ int nnn = 0;
+ while(sHandler1.getNextSentence(sent)) {
+ //nnn ++;
+ //cout << nnn << endl;
+ //cout << 1 << endl;
+ const Vector<WordIndex>& es = sent.get_eSent();
+ const Vector<WordIndex>& fs = sent.get_fSent();
+ const float so = sent.getCount();
+ l = es.size() - 1;
+ m = fs.size() - 1;
+ cross_entropy = log(1.0);
+ Vector<WordIndex> viterbi_alignment(fs.size());
+
+ unsigned int I=2*l,J=m;
+ bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
+ bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
+
+ HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
+ Array<double> gamma;
+ Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
+ double trainProb;
+ trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
+ if( !test ) {
+ double *gp=conv<double>(gamma.begin());
+ for(unsigned int i2=0; i2<J; i2++)for(unsigned int i1=0; i1<I; ++i1,++gp) {
+ if( *gp>MINCOUNTINCREASE ) {
+ COUNT add= *gp*so;
+ if( i1>=l ) {
+ //diff->incCount(es[0],fs[1+i2],add);
+ tTable.incCount(es[0],fs[1+i2],add);
+ aCountTable.getRef(0,i2+1,l,m)+=add;
+ } else {
+ //diff->incCount(es[1+i1],fs[1+i2],add);
+ tTable.incCount(es[1+i1],fs[1+i2],add);
+ aCountTable.getRef(1+i1,1+i2,l,m)+=add;
}
- double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
- Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
- Array<double>&bi=counts.doGetBetaInit(I);
- int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
- for(i=0;i<I;i++,gp1++,gp2++){
- CLASSIFY(i,i_empty,ireal);
- ai[i]+= *gp1;
- bi[i]+= *gp2;
- if( DependencyOfPrevAJ==0 ){
- if( i_empty )
- p0c+=*gp1;
- else{
- counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
- np0c+=*gp1;
- }
- }
+ }
+ }
+ double p0c=0.0,np0c=0.0;
+ for(unsigned int jj=0; jj<epsilon.size(); jj++) {
+ int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
+ double *ep=epsilon[jj].begin();
+ if( ep ) {
+ double mult=1.0;
+ mult*=l;
+ //if( DependencyOfJ && J-1)
+ // mult/=(J-1);
+ for(i=0; i<I; i++) {
+ for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
+ CLASSIFY(i,i_empty,ireal);
+ CLASSIFY2(i_bef,i_befreal);
+ if( i_empty ) {
+ p0c+=*ep * mult;
+ } else {
+ //mu.lock();
+ //cout<<"\rP "<<part<<" ";
+ //cout<<epsilon.size()<<" "<<jj<<" ";
+ //cout<<epsilon[jj].h1<<" " << epsilon[jj].h2<<" ";
+ //cout<<i<<" "<<i_bef<<" ";
+ //cout<<I<<" "<<J<<" ";
+
+ cout.flush();
+ counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
+ frenchClass ,jj+1,*ep * mult,0.0);
+ np0c+=*ep * mult;
+ //mu.unlock();
+ }
+ massert( &epsilon[jj](i,i_bef)== ep);
}
- // cout << 7 << endl;
- if( Verbose )
- cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
}
- //cout << 8 << endl;
- cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
- Array<int>vit;
- double viterbi_score=1.0;
- //cout << 9 << endl;
- if( (HMMTrainingSpecialFlags&1) )
- HMMViterbi(*net,gamma,vit);
- else
- viterbi_score=HMMRealViterbi(*net,vit);
- //cout << 10 << endl;
- for(j=1;j<=m;j++){
- viterbi_alignment[j]=vit[j-1]+1;
- if( viterbi_alignment[j]>l)
- viterbi_alignment[j]=0;
+ }
+ double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
+ Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
+ Array<double>&bi=counts.doGetBetaInit(I);
+ int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
+ for(i=0; i<I; i++,gp1++,gp2++) {
+ CLASSIFY(i,i_empty,ireal);
+ ai[i]+= *gp1;
+ bi[i]+= *gp2;
+ if( DependencyOfPrevAJ==0 ) {
+ if( i_empty )
+ p0c+=*gp1;
+ else {
+ counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
+ np0c+=*gp1;
+ }
}
- //cout << 11 << endl;
- sHandler1.setProbOfSentence(sent,cross_entropy);
- //cout << 12 << endl;
- perp.addFactor(cross_entropy, so, l, m,1);
- viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
- if( Verbose )
- cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
- delete net;net=0;
- //cout << 13 << endl;
- if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
- //cout << 14 << endl;
- addAL(viterbi_alignment,sent.getSentenceNo(),l);
- pair_no++;
- } /* of while */
-
-
- return ;
+ }
+ // cout << 7 << endl;
+ if( Verbose )
+ cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
+ }
+ //cout << 8 << endl;
+ cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
+ Array<int>vit;
+ double viterbi_score=1.0;
+ //cout << 9 << endl;
+ if( (HMMTrainingSpecialFlags&1) )
+ HMMViterbi(*net,gamma,vit);
+ else
+ viterbi_score=HMMRealViterbi(*net,vit);
+ //cout << 10 << endl;
+ for(j=1; j<=m; j++) {
+ viterbi_alignment[j]=vit[j-1]+1;
+ if( viterbi_alignment[j]>l)
+ viterbi_alignment[j]=0;
+ }
+ //cout << 11 << endl;
+ sHandler1.setProbOfSentence(sent,cross_entropy);
+ //cout << 12 << endl;
+ perp.addFactor(cross_entropy, so, l, m,1);
+ viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
+ if( Verbose )
+ cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
+ delete net;
+ net=0;
+ //cout << 13 << endl;
+ if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
+ printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
+ //cout << 14 << endl;
+ addAL(viterbi_alignment,sent.getSentenceNo(),l);
+ pair_no++;
+ } /* of while */
+
+
+ return ;
}
-CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
- st = time(NULL) ;
- sHandler1.rewind();
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
- pair_no = 0;
-
- cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
- dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".AH" ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- CTTableDiff<COUNT,PROB>* diff =em_loop_1(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it);
-
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- // if (testPerp && testHandler)
-// em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1,it);
+CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Hmm",shortModelName="hmm";
+ int dumpFreq=ModelH_Dump_Freq;
+ time_t it_st, st, it_fn, fn;
+ string tfile, afile,afileh, number, alignfile, test_alignfile;
+ int pair_no = 0;
+ bool dump_files = false ;
+ ofstream of2 ;
+ st = time(NULL) ;
+ sHandler1.rewind();
+ cout << "\n==========================================================\n";
+ cout << modelName << " Training Started at: " << my_ctime(&st);
+ pair_no = 0;
+
+ cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
+ dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+ alignfile = Prefix + ".AH" ;
+ test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+ aCountTable.clear();
+ initAL();
+ CTTableDiff<COUNT,PROB>* diff =em_loop_1(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it);
+
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ // if (testPerp && testHandler)
+// em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1,it);
// if (dump_files&&OutputInAachenFormat==1)
// tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
// tTable.normalizeTable(Elist, Flist);
@@ -888,231 +903,235 @@ CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it){
// << " PERPLEXITY " << perp.perplexity() << '\n';
// if (testPerp && testHandler)
// cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
-// << " PERPLEXITY " << (*testPerp).perplexity()
+// << " PERPLEXITY " << (*testPerp).perplexity()
// << '\n';
// cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
// << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
// if (testPerp && testHandler)
// cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
-// << " PERPLEXITY " << testViterbiPerp->perplexity()
+// << " PERPLEXITY " << testViterbiPerp->perplexity()
// << '\n';
// if (dump_files){
// if( OutputInAachenFormat==0)
/// tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- // ofstream afilestream(afileh.c_str());
- // probs.writeJumps(afilestream);
- // aCountTable.printTable(afile.c_str());
-
- fn = time(NULL) ;
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- //cout << "tTable contains " << tTable.getHash().bucket_count()
- // << " buckets and " << tTable.getHash().size() << " entries." ;
- cout << "==========================================================\n";
- return diff;
+// ofstream afilestream(afileh.c_str());
+// probs.writeJumps(afilestream);
+// aCountTable.printTable(afile.c_str());
+
+ fn = time(NULL) ;
+ cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ //cout << "tTable contains " << tTable.getHash().bucket_count()
+ // << " buckets and " << tTable.getHash().size() << " entries." ;
+ cout << "==========================================================\n";
+ return diff;
}
-void hmm::em_one_step_2(int it,int part){
- double minErrors=1.0;int minIter=0;
- string modelName="Hmm",shortModelName="hmm";
- int dumpFreq=ModelH_Dump_Freq;
- time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
- int pair_no = 0;
- bool dump_files = false ;
- ofstream of2 ;
-
- pair_no = 0;
+void hmm::em_one_step_2(int it,int part)
+{
+ double minErrors=1.0;
+ int minIter=0;
+ string modelName="Hmm",shortModelName="hmm";
+ int dumpFreq=ModelH_Dump_Freq;
+ time_t it_st, st, it_fn, fn;
+ string tfile, afile,afileh, number, alignfile, test_alignfile;
+ int pair_no = 0;
+ bool dump_files = false ;
+ ofstream of2 ;
+ pair_no = 0;
- dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".Ahmm." ;
- char v[2];
- v[1] = 0;
- v[0] = '0' + it;
- alignfile += v;
- alignfile += ".part";
- v[0] = '0' + part;
- alignfile += v;
-
- counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
- aCountTable.clear();
- initAL();
- em_loop_2(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,part);
-
- if( errorsAL()<minErrors ){
- minErrors=errorsAL();
- minIter=it;
- }
- return ;
+
+ dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
+ number = "";
+ int n = it;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+ alignfile = Prefix + ".Ahmm." ;
+ char v[2];
+ v[1] = 0;
+ v[0] = '0' + it;
+ alignfile += v;
+ alignfile += ".part";
+ v[0] = '0' + part;
+ alignfile += v;
+
+ counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
+ aCountTable.clear();
+ initAL();
+ em_loop_2(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,part);
+
+ if( errorsAL()<minErrors ) {
+ minErrors=errorsAL();
+ minIter=it;
+ }
+ return ;
}
-struct hmm_align_struct{
- hmm *h;
- int part;
- int iter;
- int valid;
- pthread_t thread;
- int done;
+struct hmm_align_struct {
+ hmm *h;
+ int part;
+ int iter;
+ int valid;
+ pthread_t thread;
+ int done;
};
-void* em_thread(void *arg){
- hmm_align_struct * hm = (hmm_align_struct*) arg;
- hm->h->em_one_step_2(hm->iter,hm->part);
- hm->done = 1;
- return hm;
+void* em_thread(void *arg)
+{
+ hmm_align_struct * hm = (hmm_align_struct*) arg;
+ hm->h->em_one_step_2(hm->iter,hm->part);
+ hm->done = 1;
+ return hm;
}
-int multi_thread_em(int noIter, int noThread, hmm* base){
- // First, do one-step EM
- int i;
- int j;
- time_t it_st, st, it_fn, fn;
- fn = time(NULL);
- int dumpFreq=ModelH_Dump_Freq;
- bool dump_files = false ;
- string modelName = "HMM",shortModelName="hmm";
- string tfile, afile,acfile,afileh, number, alignfile, test_alignfile;
- vector<amodel<COUNT> > counts;
- vector<model2 *> m2;
- counts.resize(noThread);
- m2.resize(noThread);
- for(j=1;j<noThread;j++){
- m2[j] = new model2(*((model1*)base),base->aTable,counts[j]);
+int multi_thread_em(int noIter, int noThread, hmm* base)
+{
+ // First, do one-step EM
+ int i;
+ int j;
+ time_t it_st, st, it_fn, fn;
+ fn = time(NULL);
+ int dumpFreq=ModelH_Dump_Freq;
+ bool dump_files = false ;
+ string modelName = "HMM",shortModelName="hmm";
+ string tfile, afile,acfile,afileh, number, alignfile, test_alignfile;
+ vector<amodel<COUNT> > counts;
+ vector<model2 *> m2;
+ counts.resize(noThread);
+ m2.resize(noThread);
+ for(j=1; j<noThread; j++) {
+ m2[j] = new model2(*((model1*)base),base->aTable,counts[j]);
+ }
+ st = time(NULL);
+ cout << "\n==========================================================\n";
+ cout << modelName << " Training Started at: " << my_ctime(&st);
+
+ for(i=1; i<=noIter; i++) {
+ base->perp.clear();
+ base->trainViterbiPerp.clear();
+ if (base->testPerp && base->testHandler) {
+ base->testHandler->rewind();
+ base->testPerp->clear();
+ base->testViterbiPerp->clear();
}
- st = time(NULL);
- cout << "\n==========================================================\n";
- cout << modelName << " Training Started at: " << my_ctime(&st);
-
- for(i=1;i<=noIter;i++){
- base->perp.clear();
- base->trainViterbiPerp.clear();
- if (base->testPerp && base->testHandler){
- base->testHandler->rewind();
- base->testPerp->clear();
- base->testViterbiPerp->clear();
- }
-
- it_st = time(NULL) ;
-
- cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n';
- dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS;
- dump_files = true;
- string number = "";
- int n = i;
- do{
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t" + shortModelName + "." + number ;
- afile = Prefix + ".a" + shortModelName + "." + number ;
- acfile = Prefix + ".ac" + shortModelName + "." + number ;
- afileh = Prefix + ".h" + shortModelName + "." + number ;
-
- alignfile = Prefix + ".A" + shortModelName + "." + number ;
- test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
- base->initAL();
- // except the current thread
- vector<hmm_align_struct> args;
- base->sHandler1.rewind();
- args.resize(noThread);
- for(j=1;j<noThread;j++){
- args[j].iter = i;
- args[j].part = j;
- args[j].done = 0;
- counts[j].clear();
- args[j].h = new hmm(*m2[j],base->ewordclasses,base->fwordclasses);
- args[j].h->probs = base->probs;
- args[j].valid = pthread_create(&(args[j].thread),NULL,em_thread,&(args[j]));
- if(args[j].valid){
- cerr << "Error starting thread " << j << endl;
- }
- }
- base->em_one_step_2(i,0);
- //ofstream afilestream(afileh.c_str());
- while(1){
- bool done = true;
- for (j=1;j<noThread;j++){
- //pthread_join((args[j].thread),NULL);
- // Start normalization as soon as possible
- if(args[j].done==1){
- args[j].done = 2;
- base->aCountTable.merge(args[j].h->aCountTable);
- //afilestream << "BEFORE MERGE"<<endl;
- //base->counts.writeJumps(afilestream);
- //afilestream << "MERGING"<<endl;
- //args[j].h->counts.writeJumps(afilestream);
- //afilestream << "MERGED"<<endl;
- base->counts.merge(args[j].h->counts);
- //base->counts.writeJumps(afilestream);
- delete args[j].h;
- args[j].h = 0;
- }else if(args[j].done==2){
- // Nothing
- }else if(args[j].done==0){
- done = false;
- }
- }
- if(done) break;
+
+ it_st = time(NULL) ;
+
+ cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n';
+ dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS;
+ dump_files = true;
+ string number = "";
+ int n = i;
+ do {
+ number.insert((size_t)0, 1, (char)(n % 10 + '0'));
+ } while((n /= 10) > 0);
+ tfile = Prefix + ".t" + shortModelName + "." + number ;
+ afile = Prefix + ".a" + shortModelName + "." + number ;
+ acfile = Prefix + ".ac" + shortModelName + "." + number ;
+ afileh = Prefix + ".h" + shortModelName + "." + number ;
+
+ alignfile = Prefix + ".A" + shortModelName + "." + number ;
+ test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
+ base->initAL();
+ // except the current thread
+ vector<hmm_align_struct> args;
+ base->sHandler1.rewind();
+ args.resize(noThread);
+ for(j=1; j<noThread; j++) {
+ args[j].iter = i;
+ args[j].part = j;
+ args[j].done = 0;
+ counts[j].clear();
+ args[j].h = new hmm(*m2[j],base->ewordclasses,base->fwordclasses);
+ args[j].h->probs = base->probs;
+ args[j].valid = pthread_create(&(args[j].thread),NULL,em_thread,&(args[j]));
+ if(args[j].valid) {
+ cerr << "Error starting thread " << j << endl;
+ }
+ }
+ base->em_one_step_2(i,0);
+ //ofstream afilestream(afileh.c_str());
+ while(1) {
+ bool done = true;
+ for (j=1; j<noThread; j++) {
+ //pthread_join((args[j].thread),NULL);
+ // Start normalization as soon as possible
+ if(args[j].done==1) {
+ args[j].done = 2;
+ base->aCountTable.merge(args[j].h->aCountTable);
+ //afilestream << "BEFORE MERGE"<<endl;
+ //base->counts.writeJumps(afilestream);
+ //afilestream << "MERGING"<<endl;
+ //args[j].h->counts.writeJumps(afilestream);
+ //afilestream << "MERGED"<<endl;
+ base->counts.merge(args[j].h->counts);
+ //base->counts.writeJumps(afilestream);
+ delete args[j].h;
+ args[j].h = 0;
+ } else if(args[j].done==2) {
+ // Nothing
+ } else if(args[j].done==0) {
+ done = false;
}
- base->perp.record("HMM");
- base->trainViterbiPerp.record("HMM");
- base->errorReportAL(cout,"HMM");
-
- // Normalize
+ }
+ if(done) break;
+ }
+ base->perp.record("HMM");
+ base->trainViterbiPerp.record("HMM");
+ base->errorReportAL(cout,"HMM");
+
+ // Normalize
// cout <<" Writing " << afileh <<"\n";
- base->probs = base->counts;
+ base->probs = base->counts;
// cout <<" Writing " << afileh <<"\n";
// ofstream afilestream(afileh.c_str());
// base->probs.writeJumps(afilestream);
- base->tTable.normalizeTable(base->Elist, base->Flist);
- base->aCountTable.normalize(base->aTable);
- base->aCountTable.clear();
- if (base->testPerp && base->testHandler)
- base->em_loop(*base->testPerp, *base->testHandler, dump_files, test_alignfile.c_str(), *base->testViterbiPerp, true,i==1,i);
- if (dump_files&&OutputInAachenFormat==1)
- base->tTable.printCountTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),1);
- cout << modelName << ": ("<<i<<") TRAIN CROSS-ENTROPY " << base->perp.cross_entropy()
- << " PERPLEXITY " << base->perp.perplexity() << '\n';
- if (base->testPerp && base->testHandler)
- cout << modelName << ": ("<<i<<") TEST CROSS-ENTROPY " << base->testPerp->cross_entropy()
- << " PERPLEXITY " << base->testPerp->perplexity()
- << '\n';
- cout << modelName << ": ("<<i<<") VITERBI TRAIN CROSS-ENTROPY " << base->trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << base->trainViterbiPerp.perplexity() << '\n';
- if (base->testPerp && base->testHandler)
- cout << modelName << ": ("<<i<<") VITERBI TEST CROSS-ENTROPY " << base->testViterbiPerp->cross_entropy()
- << " PERPLEXITY " << base->testViterbiPerp->perplexity()
- << '\n';
- dump_files = true;
- if (dump_files){
- if( OutputInAachenFormat==0)
- base->tTable.printProbTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),OutputInAachenFormat);
- ofstream afilestream(afileh.c_str());
- base->counts.writeJumps(afilestream);
- //base->counts.clear();
- base->aCountTable.printTable(acfile.c_str());
- base->aTable.printTable(afile.c_str());
- }
- it_fn = time(NULL) ;
-
- cout << "\n" << modelName << " Iteration: " << i<< " took: " <<
- difftime(it_fn, it_st) << " seconds\n";
-
- }
- for(j=1;j<noThread;j++){
- delete m2[j];
+ base->tTable.normalizeTable(base->Elist, base->Flist);
+ base->aCountTable.normalize(base->aTable);
+ base->aCountTable.clear();
+ if (base->testPerp && base->testHandler)
+ base->em_loop(*base->testPerp, *base->testHandler, dump_files, test_alignfile.c_str(), *base->testViterbiPerp, true,i==1,i);
+ if (dump_files&&OutputInAachenFormat==1)
+ base->tTable.printCountTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),1);
+ cout << modelName << ": ("<<i<<") TRAIN CROSS-ENTROPY " << base->perp.cross_entropy()
+ << " PERPLEXITY " << base->perp.perplexity() << '\n';
+ if (base->testPerp && base->testHandler)
+ cout << modelName << ": ("<<i<<") TEST CROSS-ENTROPY " << base->testPerp->cross_entropy()
+ << " PERPLEXITY " << base->testPerp->perplexity()
+ << '\n';
+ cout << modelName << ": ("<<i<<") VITERBI TRAIN CROSS-ENTROPY " << base->trainViterbiPerp.cross_entropy()
+ << " PERPLEXITY " << base->trainViterbiPerp.perplexity() << '\n';
+ if (base->testPerp && base->testHandler)
+ cout << modelName << ": ("<<i<<") VITERBI TEST CROSS-ENTROPY " << base->testViterbiPerp->cross_entropy()
+ << " PERPLEXITY " << base->testViterbiPerp->perplexity()
+ << '\n';
+ dump_files = true;
+ if (dump_files) {
+ if( OutputInAachenFormat==0)
+ base->tTable.printProbTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),OutputInAachenFormat);
+ ofstream afilestream(afileh.c_str());
+ base->counts.writeJumps(afilestream);
+ //base->counts.clear();
+ base->aCountTable.printTable(acfile.c_str());
+ base->aTable.printTable(afile.c_str());
}
- cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
- return 1;
+ it_fn = time(NULL) ;
+
+ cout << "\n" << modelName << " Iteration: " << i<< " took: " <<
+ difftime(it_fn, it_st) << " seconds\n";
+
+ }
+ for(j=1; j<noThread; j++) {
+ delete m2[j];
+ }
+ cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
+ return 1;
}
@@ -1120,4 +1139,4 @@ int multi_thread_em(int noIter, int noThread, hmm* base){
#endif
#include "HMMTables.cpp"
template class HMMTables<int,WordClasses>;
-
+