Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--phrase-extract/PhraseExtractionOptions.h12
-rw-r--r--phrase-extract/SentenceAlignment.cpp3
-rw-r--r--phrase-extract/SentenceAlignment.h3
-rw-r--r--phrase-extract/extract-main.cpp31
-rw-r--r--phrase-extract/extract-rules-main.cpp2
5 files changed, 44 insertions, 7 deletions
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h
index 6c7966736..2daeaf0ca 100644
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@@ -46,6 +46,7 @@ class PhraseExtractionOptions {
bool includeSentenceIdFlag; //include sentence id in extract file
bool onlyOutputSpanInfo;
bool gzOutput;
+ std::string instanceWeightsFile; //weights for each sentence
public:
PhraseExtractionOptions(const int initmaxPhraseLength):
@@ -99,7 +100,11 @@ public:
}
void initGzOutput (const bool initgzOutput){
gzOutput= initgzOutput;
- }
+ }
+ void initInstanceWeightsFile(const char* initInstanceWeightsFile) {
+ instanceWeightsFile = std::string(initInstanceWeightsFile);
+ }
+
// functions for getting values
bool isAllModelsOutputFlag() const {
return allModelsOutputFlag;
@@ -136,7 +141,10 @@ public:
}
bool isGzOutput () const {
return gzOutput;
- }
+ }
+ std::string getInstanceWeightsFile() const {
+ return instanceWeightsFile;
+ }
};
}
diff --git a/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp
index af1cfa953..96ef02865 100644
--- a/phrase-extract/SentenceAlignment.cpp
+++ b/phrase-extract/SentenceAlignment.cpp
@@ -54,10 +54,11 @@ bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bo
return true;
}
-bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], int sentenceID, bool boundaryRules)
+bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], char weightString[], int sentenceID, bool boundaryRules)
{
using namespace std;
this->sentenceID = sentenceID;
+ this->weightString = std::string(weightString);
// process sentence strings and store in target and source members.
if (!processTargetSentence(targetString, sentenceID, boundaryRules)) {
diff --git a/phrase-extract/SentenceAlignment.h b/phrase-extract/SentenceAlignment.h
index 7c2988780..76cf950d4 100644
--- a/phrase-extract/SentenceAlignment.h
+++ b/phrase-extract/SentenceAlignment.h
@@ -35,6 +35,7 @@ public:
std::vector<int> alignedCountS;
std::vector<std::vector<int> > alignedToT;
int sentenceID;
+ std::string weightString;
virtual ~SentenceAlignment();
@@ -43,7 +44,7 @@ public:
virtual bool processSourceSentence(const char *, int, bool boundaryRules);
bool create(char targetString[], char sourceString[],
- char alignmentString[], int sentenceID, bool boundaryRules);
+ char alignmentString[], char weightString[], int sentenceID, bool boundaryRules);
};
diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp
index 8749f5059..92c8a470e 100644
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@@ -114,7 +114,7 @@ int main(int argc, char* argv[])
if (argc < 6) {
cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
- cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n ]\n";
+ cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ]\n";
exit(1);
}
@@ -144,6 +144,12 @@ int main(int argc, char* argv[])
sentenceOffset = atoi(argv[++i]);
} else if (strcmp(argv[i], "--GZOutput") == 0) {
options.initGzOutput(true);
+ } else if (strcmp(argv[i], "--InstanceWeights") == 0) {
+ if (i+1 >= argc) {
+ cerr << "extract: syntax error, used switch --InstanceWeights without file name" << endl;
+ exit(1);
+ }
+ options.initInstanceWeightsFile(argv[++i]);
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@@ -220,6 +226,13 @@ int main(int argc, char* argv[])
istream *fFileP = &fFile;
istream *aFileP = &aFile;
+ istream *iwFileP = NULL;
+ auto_ptr<Moses::InputFileStream> instanceWeightsFile;
+ if (options.getInstanceWeightsFile().length()) {
+ instanceWeightsFile.reset(new Moses::InputFileStream(options.getInstanceWeightsFile()));
+ iwFileP = instanceWeightsFile.get();
+ }
+
// open output files
if (options.isTranslationFlag()) {
string fileNameExtractInv = fileNameExtract + ".inv" + (options.isGzOutput()?".gz":"");
@@ -238,10 +251,14 @@ int main(int argc, char* argv[])
char englishString[LINE_MAX_LENGTH];
char foreignString[LINE_MAX_LENGTH];
char alignmentString[LINE_MAX_LENGTH];
+ char weightString[LINE_MAX_LENGTH];
SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__);
if (eFileP->eof()) break;
SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
+ if (iwFileP) {
+ SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__);
+ }
SentenceAlignment sentence;
// cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
//az: output src, tgt, and alingment line
@@ -251,7 +268,7 @@ int main(int argc, char* argv[])
cout << "LOG: ALT: " << alignmentString << endl;
cout << "LOG: PHRASES_BEGIN:" << endl;
}
- if (sentence.create( englishString, foreignString, alignmentString, i, false)) {
+ if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation);
task->Run();
delete task;
@@ -695,6 +712,16 @@ for(int fi=startF; fi<=endF; fi++) {
if (m_options.isOrientationFlag())
outextractstrOrientation << orientationInfo;
+ if (m_options.getInstanceWeightsFile().length()) {
+ if (m_options.isTranslationFlag()) {
+ outextractstr << " ||| " << sentence.weightString;
+ outextractstrInv << " ||| " << sentence.weightString;
+ }
+ if (m_options.isOrientationFlag()) {
+ outextractstrOrientation << " ||| " << sentence.weightString;
+ }
+ }
+
if (m_options.isIncludeSentenceIdFlag()) {
outextractstr << " ||| " << sentence.sentenceID;
}
diff --git a/phrase-extract/extract-rules-main.cpp b/phrase-extract/extract-rules-main.cpp
index 974e575b9..0ab678dbd 100644
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@@ -337,7 +337,7 @@ int main(int argc, char* argv[])
cout << "LOG: PHRASES_BEGIN:" << endl;
}
- if (sentence.create(targetString, sourceString, alignmentString, i, options.boundaryRules)) {
+ if (sentence.create(targetString, sourceString, alignmentString,"", i, options.boundaryRules)) {
if (options.unknownWordLabelFlag) {
collectWordLabelCounts(sentence);
}