Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRico Sennrich <rico.sennrich@gmx.ch>2013-09-08 01:04:01 +0400
committerRico Sennrich <rico.sennrich@gmx.ch>2013-09-08 01:04:01 +0400
commit7138056b8fd60b4572f016f038360dd5fb19a39c (patch)
treebb14de73775788233ff8aeb800da016365358826 /phrase-extract/extract-main.cpp
parentb27d140619ff7340d7cac26b9ab8fb368c460af7 (diff)
flexibility scores
Diffstat (limited to 'phrase-extract/extract-main.cpp')
-rw-r--r--phrase-extract/extract-main.cpp96
1 files changed, 93 insertions, 3 deletions
diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp
index 4804f83b8..5bc018173 100644
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@@ -80,6 +80,8 @@ int sentenceOffset = 0;
std::vector<std::string> Tokenize(const std::string& str,
const std::string& delimiters = " \t");
+bool flexScoreFlag = false;
+
}
namespace MosesTraining
@@ -88,18 +90,22 @@ namespace MosesTraining
class ExtractTask
{
public:
- ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFile, Moses::OutputFileStream &extractFileInv,Moses::OutputFileStream &extractFileOrientation):
+ ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFile, Moses::OutputFileStream &extractFileInv,Moses::OutputFileStream &extractFileOrientation, Moses::OutputFileStream &extractFileContext, Moses::OutputFileStream &extractFileContextInv):
m_sentence(sentence),
m_options(initoptions),
m_extractFile(extractFile),
m_extractFileInv(extractFileInv),
- m_extractFileOrientation(extractFileOrientation) {}
+ m_extractFileOrientation(extractFileOrientation),
+ m_extractFileContext(extractFileContext),
+ m_extractFileContextInv(extractFileContextInv) {}
void Run();
private:
vector< string > m_extractedPhrases;
vector< string > m_extractedPhrasesInv;
vector< string > m_extractedPhrasesOri;
vector< string > m_extractedPhrasesSid;
+ vector< string > m_extractedPhrasesContext;
+ vector< string > m_extractedPhrasesContextInv;
void extractBase(SentenceAlignment &);
void extract(SentenceAlignment &);
void addPhrase(SentenceAlignment &, int, int, int, int, string &);
@@ -112,6 +118,8 @@ private:
Moses::OutputFileStream &m_extractFile;
Moses::OutputFileStream &m_extractFileInv;
Moses::OutputFileStream &m_extractFileOrientation;
+ Moses::OutputFileStream &m_extractFileContext;
+ Moses::OutputFileStream &m_extractFileContextInv;
};
}
@@ -129,6 +137,8 @@ int main(int argc, char* argv[])
Moses::OutputFileStream extractFile;
Moses::OutputFileStream extractFileInv;
Moses::OutputFileStream extractFileOrientation;
+ Moses::OutputFileStream extractFileContext;
+ Moses::OutputFileStream extractFileContextInv;
const char* const &fileNameE = argv[1];
const char* const &fileNameF = argv[2];
const char* const &fileNameA = argv[3];
@@ -140,6 +150,8 @@ int main(int argc, char* argv[])
options.initOnlyOutputSpanInfo(true);
} else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
options.initOrientationFlag(true);
+ } else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
+ options.initFlexScoreFlag(true);
} else if (strcmp(argv[i],"--NoTTable") == 0) {
options.initTranslationFlag(false);
} else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
@@ -254,8 +266,15 @@ int main(int argc, char* argv[])
string fileNameExtractOrientation = fileNameExtract + ".o" + (options.isGzOutput()?".gz":"");
extractFileOrientation.Open(fileNameExtractOrientation.c_str());
}
+ if (options.isFlexScoreFlag()) {
+ string fileNameExtractContext = fileNameExtract + ".context" + (options.isGzOutput()?".gz":"");
+ string fileNameExtractContextInv = fileNameExtract + ".context.inv" + (options.isGzOutput()?".gz":"");
+ extractFileContext.Open(fileNameExtractContext.c_str());
+ extractFileContextInv.Open(fileNameExtractContextInv.c_str());
+ }
int i = sentenceOffset;
+
while(true) {
i++;
if (i%10000 == 0) cerr << "." << flush;
@@ -280,7 +299,7 @@ int main(int argc, char* argv[])
cout << "LOG: PHRASES_BEGIN:" << endl;
}
if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
- ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation);
+ ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation, extractFileContext, extractFileContextInv);
task->Run();
delete task;
@@ -302,6 +321,11 @@ int main(int argc, char* argv[])
if (options.isOrientationFlag()) {
extractFileOrientation.Close();
}
+
+ if (options.isFlexScoreFlag()) {
+ extractFileContext.Close();
+ extractFileContextInv.Close();
+ }
}
}
@@ -315,6 +339,8 @@ void ExtractTask::Run()
m_extractedPhrasesInv.clear();
m_extractedPhrasesOri.clear();
m_extractedPhrasesSid.clear();
+ m_extractedPhrasesContext.clear();
+ m_extractedPhrasesContextInv.clear();
}
@@ -680,6 +706,8 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
ostringstream outextractstr;
ostringstream outextractstrInv;
ostringstream outextractstrOrientation;
+ ostringstream outextractstrContext;
+ ostringstream outextractstrContextInv;
if (m_options.isOnlyOutputSpanInfo()) {
cout << startF << " " << endF << " " << startE << " " << endE << endl;
@@ -693,19 +721,25 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
for(int fi=startF; fi<=endF; fi++) {
if (m_options.isTranslationFlag()) outextractstr << sentence.source[fi] << " ";
if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.source[fi] << " ";
+ if (m_options.isFlexScoreFlag()) outextractstrContext << sentence.source[fi] << " ";
}
if (m_options.isTranslationFlag()) outextractstr << "||| ";
if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";
+ if (m_options.isFlexScoreFlag()) outextractstrContext << "||| ";
// target
for(int ei=startE; ei<=endE; ei++) {
if (m_options.isTranslationFlag()) outextractstr << sentence.target[ei] << " ";
if (m_options.isTranslationFlag()) outextractstrInv << sentence.target[ei] << " ";
if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.target[ei] << " ";
+ if (m_options.isFlexScoreFlag()) outextractstrContext << sentence.target[ei] << " ";
+ if (m_options.isFlexScoreFlag()) outextractstrContextInv << sentence.target[ei] << " ";
}
if (m_options.isTranslationFlag()) outextractstr << "|||";
if (m_options.isTranslationFlag()) outextractstrInv << "||| ";
if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";
+ if (m_options.isFlexScoreFlag()) outextractstrContext << "||| ";
+ if (m_options.isFlexScoreFlag()) outextractstrContextInv << "||| ";
// source (for inverse)
@@ -714,6 +748,12 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
outextractstrInv << sentence.source[fi] << " ";
outextractstrInv << "|||";
}
+ if (m_options.isFlexScoreFlag()) {
+ for(int fi=startF; fi<=endF; fi++)
+ outextractstrContextInv << sentence.source[fi] << " ";
+ outextractstrContextInv << "|||";
+ }
+
// alignment
if (m_options.isTranslationFlag()) {
for(int ei=startE; ei<=endE; ei++) {
@@ -743,6 +783,46 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
}
+
+ // generate two lines for every extracted phrase:
+ // once with left, once with right context
+ if (m_options.isFlexScoreFlag()) {
+
+ string strContext = outextractstrContext.str();
+ string strContextInv = outextractstrContextInv.str();
+
+ ostringstream outextractstrContextRight(strContext, ostringstream::app);
+ ostringstream outextractstrContextRightInv(strContextInv, ostringstream::app);
+
+ // write context to left
+ outextractstrContext << "< ";
+ if (startF == 0) outextractstrContext << "<s>";
+ else outextractstrContext << sentence.source[startF-1];
+
+ outextractstrContextInv << " < ";
+ if (startE == 0) outextractstrContextInv << "<s>";
+ else outextractstrContextInv << sentence.target[startE-1];
+
+ // write context to right
+ outextractstrContextRight << "> ";
+ if (endF+1 == sentence.source.size()) outextractstrContextRight << "<s>";
+ else outextractstrContextRight << sentence.source[endF+1];
+
+ outextractstrContextRightInv << " > ";
+ if (endE+1 == sentence.target.size()) outextractstrContextRightInv << "<s>";
+ else outextractstrContextRightInv << sentence.target[endE+1];
+
+ outextractstrContext << "\n";
+ outextractstrContextInv << "\n";
+ outextractstrContextRight << "\n";
+ outextractstrContextRightInv << "\n";
+
+ m_extractedPhrasesContext.push_back(outextractstrContext.str());
+ m_extractedPhrasesContextInv.push_back(outextractstrContextInv.str());
+ m_extractedPhrasesContext.push_back(outextractstrContextRight.str());
+ m_extractedPhrasesContextInv.push_back(outextractstrContextRightInv.str());
+ }
+
if (m_options.isTranslationFlag()) outextractstr << "\n";
if (m_options.isTranslationFlag()) outextractstrInv << "\n";
if (m_options.isOrientationFlag()) outextractstrOrientation << "\n";
@@ -760,6 +840,8 @@ void ExtractTask::writePhrasesToFile()
ostringstream outextractFile;
ostringstream outextractFileInv;
ostringstream outextractFileOrientation;
+ ostringstream outextractFileContext;
+ ostringstream outextractFileContextInv;
for(vector<string>::const_iterator phrase=m_extractedPhrases.begin(); phrase!=m_extractedPhrases.end(); phrase++) {
outextractFile<<phrase->data();
@@ -770,10 +852,18 @@ void ExtractTask::writePhrasesToFile()
for(vector<string>::const_iterator phrase=m_extractedPhrasesOri.begin(); phrase!=m_extractedPhrasesOri.end(); phrase++) {
outextractFileOrientation<<phrase->data();
}
+ for(vector<string>::const_iterator phrase=m_extractedPhrasesContext.begin();phrase!=m_extractedPhrasesContext.end();phrase++){
+ outextractFileContext<<phrase->data();
+ }
+ for(vector<string>::const_iterator phrase=m_extractedPhrasesContextInv.begin();phrase!=m_extractedPhrasesContextInv.end();phrase++){
+ outextractFileContextInv<<phrase->data();
+ }
m_extractFile << outextractFile.str();
m_extractFileInv << outextractFileInv.str();
m_extractFileOrientation << outextractFileOrientation.str();
+ m_extractFileContext << outextractFileContext.str();
+ m_extractFileContextInv << outextractFileContextInv.str();
}
// if proper conditioning, we need the number of times a source phrase occured