Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 18:23:33 +0400
committerskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 18:23:33 +0400
commit14155c0068826630f06b859f85b9e3f57e65d838 (patch)
tree0a1d586ed638305a57167f292b701d233fc5c5cb
parentc86dcc67633211cdbae918b5cf8122615503e99f (diff)
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/DPR_MOSES@3191 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--DPR_model/smt_configulation.cpp2
-rw-r--r--DPR_model/smt_mainProcess_generatePhraseOption.cpp143
-rw-r--r--DPR_model/template_configulationFile2
3 files changed, 84 insertions, 63 deletions
diff --git a/DPR_model/smt_configulation.cpp b/DPR_model/smt_configulation.cpp
index 0678a365f..e91fdaffb 100644
--- a/DPR_model/smt_configulation.cpp
+++ b/DPR_model/smt_configulation.cpp
@@ -108,6 +108,8 @@ int main(int argc, char *argv[])
confFile<<"tableFilterLabel = 1 \n";
confFile<<"Directory + name of the (output) weight parameter matrix (e.g. ./data/weightMatrix).\n";
confFile<<"weightMatrixFile = \n";
+ confFile<<"Need to train the weight matrix (If you have trained the weight matrix, then use 0)? 1. Yes, 0. No. \n";
+ confFile<<"weightMatrixTrainLabel = 1\n";
confFile<<"Directory + name of the (output) sentence phrase options table (e.g. ./data/phraseOption).\n";
confFile<<"phraseOptionFile = \n";
confFile<<"Directory + name of the test corpus (Used for extracting the phrase options).\n";
diff --git a/DPR_model/smt_mainProcess_generatePhraseOption.cpp b/DPR_model/smt_mainProcess_generatePhraseOption.cpp
index 87fff920c..62af8cd44 100644
--- a/DPR_model/smt_mainProcess_generatePhraseOption.cpp
+++ b/DPR_model/smt_mainProcess_generatePhraseOption.cpp
@@ -41,9 +41,10 @@ Input:
0: collect phrase options for one sentence and output, use less memory but slower
19. maxTranslation --- the maximum number of translation for each source phrase, if 0, use all translations
20. minTrainingExample --- the minimum number of training examples required
+21. weightMatrixTrainLabel -- 1: train the weigth matrix; 2. Use the existing weight matrix
Output:
-21. fout_weightMatrix (weightMatrixFile) --- the output file for the weight matrix;
-22. fout_phraseOptionDB (phraseOptionFile) --- the phrase option database.
+22. fout_weightMatrix (weightMatrixFile) --- the output file for the weight matrix;
+23. fout_phraseOptionDB (phraseOptionFile) --- the phrase option database.
****************************************************************************************************************/
#include <cstdlib>
@@ -81,6 +82,7 @@ int main(int argc, char *argv[])
bool batchLabel; //the batch output label
int maxTranslations; //the maximum number of translations
int minTrainingExample; //the minimum number of training examples
+ bool weightMatrixTrainLabel; //1 if need train the weight matrix, 0 otherwise
//1.1 Process the arguments
@@ -361,7 +363,16 @@ int main(int argc, char *argv[])
istringstream temp(directoryName);
temp>>batchLabel;
successFlag[21]=1;
- }
+ }
+
+ //configulation 23. weightMatrixTrainLabel
+ else if (strcmp(fileName,"weightMatrixTrainLabel")==0)
+ {
+ //cout<<"20\n";
+ istringstream temp(directoryName);
+ temp>>weightMatrixTrainLabel;
+ successFlag[22]=1;
+ }
//cout<<fileName<<'\n';
//cout<<directoryName<<'\n';
@@ -439,6 +450,9 @@ int main(int argc, char *argv[])
else if (!successFlag[21])
{cerr<<"Error in smt_mainProcess_generatePhraseOption: missing the batch output label (batchOutputLabel) for outputing the sentence phrase options.\n";
exit(1);}
+ else if (!successFlag[22])
+ {cerr<<"Error in smt_mainProcess_generatePhraseOption: missing the weight matrix train label (weightMatrixTrainLabel) for the DPR model.\n";
+ exit(1);}
//else check the open state of input files
else
{
@@ -571,6 +585,7 @@ int main(int argc, char *argv[])
cout<<"eTol = "<<eTol<<'\n';
cout<<"\nFor outputing the sentence phrase options:\n";
cout<<"batchOutputLabel = "<<batchLabel<<'\n';
+ cout<<"weightMatrixTrainLabel = "<<weightMatrixTrainLabel<<'\n';
cout<<"----------------------------------------------------------------\n\n";
//system("PAUSE");
}
@@ -585,79 +600,81 @@ int main(int argc, char *argv[])
//********************************************************************************************************
//2.read the phrase pair extraction table
- time_prev=time(NULL);
- cout<<"Step 1. Read the phrase pair extraction table (might take a bit long time, please be patient).\n";
- sourceReorderingTable* trainingPhraseTable = new sourceReorderingTable(phraseDBFile,classSetup,distCut);
- time_next=time(NULL);
- cout<<"----------------------------------------------------------------\n";
- cout<<"Processed time: "<<time_next-time_prev<<" seconds.\n";
- cout<<"----------------------------------------------------------------\n\n";
- //********************************************************************************************************
+ weightMatrixW* weightMatrix;
+ if (weightMatrixTrainLabel)
+ {
+ time_prev=time(NULL);
+ cout<<"Step 1. Read the phrase pair extraction table (might take a bit long time, please be patient).\n";
+ sourceReorderingTable* trainingPhraseTable = new sourceReorderingTable(phraseDBFile,classSetup,distCut);
+ time_next=time(NULL);
+ cout<<"----------------------------------------------------------------\n";
+ cout<<"Processed time: "<<time_next-time_prev<<" seconds.\n";
+ cout<<"----------------------------------------------------------------\n\n";
+ //********************************************************************************************************
- //********************************************************************************************************
- //3.train the weight clusters
+ //********************************************************************************************************
+ //3.train the weight clusters
- time_prev=time(NULL);
- cout<<"Step 2. train the weight clusters.\n";
- vector<string> clusterNames = trainingPhraseTable->getClusterNames();
- int numClusters = trainingPhraseTable->getNumCluster();
- int processClusters=0; //store the number of clusters processed
- weightMatrixW* weightMatrix = new weightMatrixW();
- ifstream phraseTableFile(phraseDBFile,ios::binary); //re-open the phraseDBFile to get the features
- ofstream weightMatrixFile(weightMatrixFileName,ios::out); //output the weight matrix
-
- for (int i=0; i<numClusters; i++)
- {
- //3.1 For each cluster
- string sourcePhrase=clusterNames[i];
- int numberExample=trainingPhraseTable->getClusterMember(sourcePhrase);
-
- if (numberExample>=minTrainingExample)
+ time_prev=time(NULL);
+ cout<<"Step 2. train the weight clusters.\n";
+ vector<string> clusterNames = trainingPhraseTable->getClusterNames();
+ int numClusters = trainingPhraseTable->getNumCluster();
+ int processClusters=0; //store the number of clusters processed
+ weightMatrix = new weightMatrixW();
+ ifstream phraseTableFile(phraseDBFile,ios::binary); //re-open the phraseDBFile to get the features
+ ofstream weightMatrixFile(weightMatrixFileName,ios::out); //output the weight matrix
+
+ for (int i=0; i<numClusters; i++)
{
- processClusters++;
- //3.2 Get the training examples
- vector<vector<int> > trainingTable=trainingPhraseTable->getExamples(sourcePhrase,phraseTableFile);
+ //3.1 For each cluster
+ string sourcePhrase=clusterNames[i];
+ int numberExample=trainingPhraseTable->getClusterMember(sourcePhrase);
- //3.3 Train the weight cluster
- weightClusterW weightCluster(sourcePhrase, classSetup);
- weightCluster.structureLearningW(trainingTable, maxRound, step, eTol);
+ if (numberExample>=minTrainingExample)
+ {
+ processClusters++;
+ //3.2 Get the training examples
+ vector<vector<int> > trainingTable=trainingPhraseTable->getExamples(sourcePhrase,phraseTableFile);
- //3.4 write the weight cluster and update the weight matrix
- unsigned long long startPos=weightCluster.writeWeightCluster(weightMatrixFile);
- weightMatrix->insertWeightCluster(sourcePhrase,startPos);
- }
+ //3.3 Train the weight cluster
+ weightClusterW weightCluster(sourcePhrase, classSetup);
+ weightCluster.structureLearningW(trainingTable, maxRound, step, eTol);
+ //3.4 write the weight cluster and update the weight matrix
+ unsigned long long startPos=weightCluster.writeWeightCluster(weightMatrixFile);
+ weightMatrix->insertWeightCluster(sourcePhrase,startPos);
+ }
- //3.5 Notice
- if ((i+1)%100==0)
- {
- cout<<".";
- if ((i+1)%1000==0)
- cout<<'\n';
- }
+
+ //3.5 Notice
+ if ((i+1)%100==0)
+ {
+ cout<<".";
+ if ((i+1)%1000==0)
+ cout<<'\n';
+ }
- }
- cout<<"\nThe number of clusters been trained: "<<processClusters<<".\n";
+ }
+ cout<<"\nThe number of clusters been trained: "<<processClusters<<".\n";
- //3.5 Output the weight matrix
-
- weightMatrix->writeWeightMatrix(weightMatrixFilePosName);
- weightMatrixFile.close();
- phraseTableFile.close();
- delete trainingPhraseTable;
+ //3.5 Output the weight matrix
- time_next=time(NULL);
- cout<<'\n';
- cout<<"----------------------------------------------------------------\n";
- cout<<"Processed time: "<<time_next-time_prev<<" seconds.\n";
- cout<<"----------------------------------------------------------------\n\n";
+ weightMatrix->writeWeightMatrix(weightMatrixFilePosName);
+ weightMatrixFile.close();
+ phraseTableFile.close();
+ delete trainingPhraseTable;
- /*
- If the weight matrix has already trained, then can comment all the procedures in 3. but use the following*/
- //------------------------------------------------------------------------
- //weightMatrixW* weightMatrix = new weightMatrixW(weightMatrixFilePosName);
+ time_next=time(NULL);
+ cout<<'\n';
+ cout<<"----------------------------------------------------------------\n";
+ cout<<"Processed time: "<<time_next-time_prev<<" seconds.\n";
+ cout<<"----------------------------------------------------------------\n\n";
+ }
+ else {
+ weightMatrix = new weightMatrixW(weightMatrixFilePosName);
+ }
//-------------------------------------------------------------------------
diff --git a/DPR_model/template_configulationFile b/DPR_model/template_configulationFile
index ae34ca442..885675d16 100644
--- a/DPR_model/template_configulationFile
+++ b/DPR_model/template_configulationFile
@@ -37,6 +37,8 @@ If the phrase translation table is already filtered, fill 1 and 0 otherwise.
tableFilterLabel = 1
Directory + name of the (output) weight parameter matrix (e.g. ./data/weightMatrix).
weightMatrixFile =
+Need to train the weight matrix (If you have trained the weight matrix, then use 0)? 1. Yes, 0. No.
+weightMatrixTrainLabel = 1
Directory + name of the (output) sentence phrase options table (e.g. ./data/phraseOption).
phraseOptionFile =
Directory + name of the test corpus (Used for extracting the phrase options).