Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordowobeha <dowobeha@9a26d1b7-1c8f-445c-8fdd-6576f508279d>2012-08-23 21:51:50 +0400
committerdowobeha <dowobeha@9a26d1b7-1c8f-445c-8fdd-6576f508279d>2012-08-23 21:51:50 +0400
commit991e235b0475779665be29e03f32e89211018529 (patch)
tree4ce7fd801cdd4cea2b68a5bd06f2b27af04c9f38
parent645304b798b1d7ec114f2b2654c1067d7cabbc58 (diff)
Added flags to mgiza to explicitly specify input *.classes files.
The new flags are -sourcevocabularyclasses and -targetvocabularyclasses
-rw-r--r--mgizapp/src/Globals.h1
-rw-r--r--mgizapp/src/d4norm.cxx11
-rw-r--r--mgizapp/src/hmmnorm.cxx11
-rw-r--r--mgizapp/src/main.cpp33
-rw-r--r--mgizapp/src/model3.cpp4
5 files changed, 42 insertions, 18 deletions
diff --git a/mgizapp/src/Globals.h b/mgizapp/src/Globals.h
index 3fa17d9..693a117 100644
--- a/mgizapp/src/Globals.h
+++ b/mgizapp/src/Globals.h
@@ -32,6 +32,7 @@ extern float PROB_SMOOTH,MINCOUNTINCREASE;
extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
extern string Prefix, LogFilename, OPath,
SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
+ SourceVocabClassesFilename, TargetVocabClassesFilename,
t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
extern ofstream logmsg ;
extern Mutex logmsg_lock;
diff --git a/mgizapp/src/d4norm.cxx b/mgizapp/src/d4norm.cxx
index 7a1da31..a790a62 100644
--- a/mgizapp/src/d4norm.cxx
+++ b/mgizapp/src/d4norm.cxx
@@ -56,6 +56,7 @@ GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
using namespace std;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
+ SourceVocabClassesFilename, TargetVocabClassesFilename,
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
@@ -74,11 +75,11 @@ int main(int argc, char* argv[]){
fTrainVcbList.setName(argv[2]);
eTrainVcbList.readVocabList();
fTrainVcbList.readVocabList();
- string evcbcls = argv[1];
- string fvcbcls = argv[2];
- evcbcls += ".classes";
- fvcbcls += ".classes";
- d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, evcbcls.c_str(), fvcbcls.c_str(),eTrainVcbList,fTrainVcbList);
+ SourceVocabClassesFilename = argv[1];
+ TargetVocabClassesFilename = argv[2];
+ SourceVocabClassesFilename += ".classes";
+ TargetVocabClassesFilename += ".classes";
+ d4m.makeWordClasses(eTrainVcbList, fTrainVcbList, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str(),eTrainVcbList,fTrainVcbList);
// Start iteration:
for(int i =4; i< argc ; i++){
string name = argv[i];
diff --git a/mgizapp/src/hmmnorm.cxx b/mgizapp/src/hmmnorm.cxx
index 9d737c5..2643102 100644
--- a/mgizapp/src/hmmnorm.cxx
+++ b/mgizapp/src/hmmnorm.cxx
@@ -55,6 +55,7 @@ GLOBAL_PARAMETER(WordIndex, MAX_FERTILITY, "MAX_FERTILITY",
using namespace std;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
+ SourceVocabClassesFilename, TargetVocabClassesFilename,
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
@@ -84,11 +85,11 @@ int main(int argc, char* argv[]){
model2 m2(m1, aTable, aCountTable);
WordClasses french,english;
hmm h(m2,english,french);
- string evcbcls = argv[1];
- string fvcbcls = argv[2];
- evcbcls += ".classes";
- fvcbcls += ".classes";
- h.makeWordClasses(m1.Elist, m1.Flist, evcbcls.c_str(), fvcbcls.c_str());
+ SourceVocabClassesFilename = argv[1];
+ TargetVocabClassesFilename = argv[2];
+ SourceVocabClassesFilename += ".classes";
+ TargetVocabClassesFilename += ".classes";
+ h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename.c_str(), TargetVocabClassesFilename.c_str());
string base = argv[4];
string baseA = base+".alpha";
string baseB = base+".beta";
diff --git a/mgizapp/src/main.cpp b/mgizapp/src/main.cpp
index cd44526..d3a23c9 100644
--- a/mgizapp/src/main.cpp
+++ b/mgizapp/src/main.cpp
@@ -98,6 +98,7 @@ Vector<map< pair<int,int>,char > > ReferenceAlignment;
bool useDict = false;
string CoocurrenceFile;
string Prefix, LogFilename, OPath, Usage, SourceVocabFilename,
+ SourceVocabClassesFilename(""), TargetVocabClassesFilename(""),
TargetVocabFilename, CorpusFilename, TestCorpusFilename, t_Filename,
a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
@@ -250,8 +251,8 @@ void printDecoderConfigFile() {
decoder << "Target.vcb = " << TargetVocabFilename << '\n';
// decoder << "Source.classes = " << SourceVocabFilename + ".classes" << '\n';
// decoder << "Target.classes = " << TargetVocabFilename + ".classes" <<'\n';
- decoder << "Source.classes = " << SourceVocabFilename+".classes" << '\n';
- decoder << "Target.classes = " << TargetVocabFilename + ".classes" <<'\n';
+ decoder << "Source.classes = " << SourceVocabClassesFilename << '\n';
+ decoder << "Target.classes = " << TargetVocabClassesFilename <<'\n';
p=Prefix + ".fe0_"+ /*lastModelName*/"3" + ".final";
decoder << "FZeroWords = " <<stripPath(p.c_str()) << '\n';
@@ -836,8 +837,8 @@ double StartTraining(int&result) {
if (HMM_Iterations > 0 && (restart < 2 || restart == 4 || restart == 5 || restart == 6)) {
cout << "NOTE: I am doing iterations with the HMM model!\n";
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabFilename
- +".classes", TargetVocabFilename+".classes");
+ h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
+ , TargetVocabClassesFilename);
if(restart != 6) h.initialize_table_uniformly(*corpus);
if(Model3_Iterations == 0 && Model4_Iterations == 0 &&
@@ -873,8 +874,8 @@ double StartTraining(int&result) {
errors=m3.errorsAL();
}
if(restart >= 7 && hmmvalid){
- h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabFilename
- +".classes", TargetVocabFilename+".classes");
+ h.makeWordClasses(m1.Elist, m1.Flist, SourceVocabClassesFilename
+ , TargetVocabClassesFilename);
}
if (HMM_Iterations>0 || restart == 7)
m3.setHMM(&h);
@@ -960,6 +961,18 @@ int main(int argc, char* argv[]) {
"target vocabulary file name",
TargetVocabFilename,-1));
getGlobalParSet().insert(new Parameter<string>(
+ "Source Vocabulary Classes",
+ ParameterChangedFlag,
+ "source vocabulary classes file name",
+ SourceVocabClassesFilename,
+ PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
+ "Target Vocabulary Classes",
+ ParameterChangedFlag,
+ "target vocabulary classes file name",
+ TargetVocabClassesFilename,
+ PARLEV_INPUT));
+ getGlobalParSet().insert(new Parameter<string>(
"C",
ParameterChangedFlag,
"training corpus file name",
@@ -1092,6 +1105,14 @@ int main(int argc, char* argv[]) {
//
parseArguments(argc, argv);
+ if (SourceVocabClassesFilename=="") {
+ makeSetCommand("sourcevocabularyclasses",SourceVocabFilename+".classes",getGlobalParSet(),2);
+ }
+
+ if (TargetVocabClassesFilename=="") {
+ makeSetCommand("targetvocabularyclasses",TargetVocabFilename+".classes",getGlobalParSet(),2);
+ }
+
// Determine number of threads
if(NCPUS == 0){
diff --git a/mgizapp/src/model3.cpp b/mgizapp/src/model3.cpp
index 797dd58..ec3c701 100644
--- a/mgizapp/src/model3.cpp
+++ b/mgizapp/src/model3.cpp
@@ -366,8 +366,8 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4,
d4m.readProbTable(previous_d4model.c_str(),previous_d4model_1.c_str());
}
if(h==NULL)
- d4m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",
- TargetVocabFilename+".classes",Elist,Flist);
+ d4m.makeWordClasses(Elist, Flist, SourceVocabClassesFilename,
+ TargetVocabClassesFilename,Elist,Flist);
d5model d5m(d4m);
//d5m.makeWordClasses(Elist, Flist, SourceVocabFilename+".classes",