diff options
author | dowobeha <dowobeha@9a26d1b7-1c8f-445c-8fdd-6576f508279d> | 2012-08-23 21:51:50 +0400 |
---|---|---|
committer | dowobeha <dowobeha@9a26d1b7-1c8f-445c-8fdd-6576f508279d> | 2012-08-23 21:51:50 +0400 |
commit | 991e235b0475779665be29e03f32e89211018529 (patch) | |
tree | 4ce7fd801cdd4cea2b68a5bd06f2b27af04c9f38 | |
parent | 645304b798b1d7ec114f2b2654c1067d7cabbc58 (diff) |
Added flags to mgiza to explicitly specify input *.classes files.
The new flags are -sourcevocabularyclasses and -targetvocabularyclasses
-rw-r--r-- | mgizapp/src/Globals.h | 1 | ||||
-rw-r--r-- | mgizapp/src/d4norm.cxx | 11 | ||||
-rw-r--r-- | mgizapp/src/hmmnorm.cxx | 11 | ||||
-rw-r--r-- | mgizapp/src/main.cpp | 33 | ||||
-rw-r--r-- | mgizapp/src/model3.cpp | 4 |
5 files changed, 42 insertions, 18 deletions
diff --git a/mgizapp/src/Globals.h b/mgizapp/src/Globals.h index 3fa17d9..693a117 100644 --- a/mgizapp/src/Globals.h +++ b/mgizapp/src/Globals.h @@ -32,6 +32,7 @@ extern float PROB_SMOOTH,MINCOUNTINCREASE; extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ; extern string Prefix, LogFilename, OPath, SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename, + SourceVocabClassesFilename, TargetVocabClassesFilename, t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename; extern ofstream logmsg ; extern Mutex logmsg_lock; diff --git a/mgizapp/src/d4norm.cxx b/mgizapp/src/d4norm.cxx index 7a1da31..a790a62 100644 --- a/mgizapp/src/d4norm.cxx +++ b/mgizapp/src/d4norm.cxx @@ -56,6 +56,7 @@ GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY", using namespace std; string Prefix, LogFilename, OPath, Usage, SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename, + SourceVocabClassesFilename, TargetVocabClassesFilename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename; @@ -74,11 +75,11 @@ int main(int argc, char* argv[]){ fTrainVcbList.setName(argv[2]); eTrainVcbList.readVocabList(); fTrainVcbList.readVocabList(); - string evcbcls = argv[1]; - string fvcbcls = argv[2]; - evcbcls += ".classes"; - fvcbcls += ".classes"; - d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, evcbcls.c_str(), fvcbcls.c_str(),eTrainVcbList,fTrainVcbList); + SourceVocabClassesFilename = argv[1]; + TargetVocabClassesFilename = argv[2]; + SourceVocabClassesFilename += ".classes"; + TargetVocabClassesFilename += ".classes"; + d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str(),eTrainVcbList,fTrainVcbList); // Start iteration: for(int i =4; i< argc ; i++){ string name = argv[i]; diff --git a/mgizapp/src/hmmnorm.cxx b/mgizapp/src/hmmnorm.cxx index 9d737c5..2643102 100644 --- a/mgizapp/src/hmmnorm.cxx +++ b/mgizapp/src/hmmnorm.cxx @@ -55,6 +55,7 @@ GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY", using namespace std; string Prefix, LogFilename, OPath, Usage, SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename, + SourceVocabClassesFilename, TargetVocabClassesFilename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename; @@ -84,11 +85,11 @@ int main(int argc, char* argv[]){ model2 m2(m1, aTable, aCountTable); WordClasses french,english; hmm h(m2,english,french); - string evcbcls = argv[1]; - string fvcbcls = argv[2]; - evcbcls += ".classes"; - fvcbcls += ".classes"; - h.makeWordClasses(m1.Elist, m1.Flist, evcbcls.c_str(), fvcbcls.c_str()); + SourceVocabClassesFilename = argv[1]; + TargetVocabClassesFilename = argv[2]; + SourceVocabClassesFilename += ".classes"; + TargetVocabClassesFilename += ".classes"; + h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str()); string base = argv[4]; string baseA = base+".alpha"; string baseB = base+".beta"; diff --git a/mgizapp/src/main.cpp b/mgizapp/src/main.cpp index cd44526..d3a23c9 100644 --- a/mgizapp/src/main.cpp +++ b/mgizapp/src/main.cpp @@ -98,6 +98,7 @@ Vector<map< pair<int,int>,char > > ReferenceAlignment; bool useDict = false; string CoocurrenceFile; string Prefix, LogFilename, OPath, Usage, SourceVocabFilename, + SourceVocabClassesFilename(""), TargetVocabClassesFilename(""), TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename; @@ -250,8 +251,8 @@ void printDecoderConfigFile() { decoder << "Target.vcb = " << TargetVocabFilename << '\n'; // decoder << "Source.classes = " << SourceVocabFilename + ".classes" << '\n'; // decoder << "Target.classes = " << TargetVocabFilename + ".classes" <<'\n'; - decoder << "Source.classes = " << SourceVocabFilename+".classes" << '\n'; - decoder << "Target.classes = " << TargetVocabFilename + ".classes" <<'\n'; + decoder << "Source.classes = " << SourceVocabClassesFilename << '\n'; + decoder << "Target.classes = " << TargetVocabClassesFilename <<'\n'; p=Prefix + ".fe0_"+ /*lastModelName*/"3" + ".final"; decoder << "FZeroWords = " <<stripPath(p.c_str()) << '\n'; @@ -836,8 +837,8 @@ double StartTraining(int&result) { if (HMM_Iterations > 0 && (restart < 2 || restart == 4 || restart == 5 || restart == 6)) { cout << "NOTE: I am doing iterations with the HMM model!\n"; - h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabFilename - +".classes", TargetVocabFilename+".classes"); + h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename + , TargetVocabClassesFilename); if(restart != 6) h.initialize_table_uniformly(*corpus); if(Model3_Iterations == 0 && Model4_Iterations == 0 && @@ -873,8 +874,8 @@ double StartTraining(int&result) { errors=m3.errorsAL(); } if(restart >= 7 && hmmvalid){ - h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabFilename - +".classes", TargetVocabFilename+".classes"); + h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename + , TargetVocabClassesFilename); } if (HMM_Iterations>0 || restart == 7) m3.setHMM(&h); @@ -960,6 +961,18 @@ int main(int argc, char* argv[]) { "target vocabulary file name", TargetVocabFilename,-1)); getGlobalParSet().insert(new Parameter<string>( + "Source Vocabulary Classes", + ParameterChangedFlag, + "source vocabulary classes file name", + SourceVocabClassesFilename, + PARLEV_INPUT)); + getGlobalParSet().insert(new Parameter<string>( + "Target Vocabulary Classes", + ParameterChangedFlag, + "target vocabulary classes file name", + TargetVocabClassesFilename, + PARLEV_INPUT)); + getGlobalParSet().insert(new Parameter<string>( "C", ParameterChangedFlag, "training corpus file name", @@ -1092,6 +1105,14 @@ int main(int argc, char* argv[]) { // parseArguments(argc, argv); + if (SourceVocabClassesFilename=="") { + makeSetCommand("sourcevocabularyclasses",SourceVocabFilename+".classes",getGlobalParSet(),2); + } + + if (TargetVocabClassesFilename=="") { + makeSetCommand("targetvocabularyclasses",TargetVocabFilename+".classes",getGlobalParSet(),2); + } + // Determine number of threads if(NCPUS == 0){ diff --git a/mgizapp/src/model3.cpp b/mgizapp/src/model3.cpp index 797dd58..ec3c701 100644 --- a/mgizapp/src/model3.cpp +++ b/mgizapp/src/model3.cpp @@ -366,8 +366,8 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4, d4m.readProbTable(previous_d4model.c_str(),previous_d4model_1.c_str()); } if(h==NULL) - d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes", - TargetVocabFilename+".classes",Elist,Flist); + d4m.makeWordClasses(Elist, Flist, SourceVocabClassesFilename, + TargetVocabClassesFilename,Elist,Flist); d5model d5m(d4m); //d5m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes", |