diff options
author | phikoehn <pkoehn@inf.ed.ac.uk> | 2012-09-03 10:24:07 +0400 |
---|---|---|
committer | phikoehn <pkoehn@inf.ed.ac.uk> | 2012-09-03 10:24:07 +0400 |
commit | e072a7f9a76f8c6a6589451ee186256cb4dabc5e (patch) | |
tree | 5b441dcd2bc068cac7c614fefb08fb78b254e4fd /phrase-extract | |
parent | 0e783dc5297841b663149f558646d82881151c40 (diff) |
merge issues
Diffstat (limited to 'phrase-extract')
-rw-r--r-- | phrase-extract/PhraseExtractionOptions.h | 10 | ||||
-rw-r--r-- | phrase-extract/extract.cpp | 67 | ||||
-rw-r--r-- | phrase-extract/score.cpp | 4 |
3 files changed, 16 insertions, 65 deletions
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h index d541144b7..400b587ed 100644 --- a/phrase-extract/PhraseExtractionOptions.h +++ b/phrase-extract/PhraseExtractionOptions.h @@ -44,6 +44,7 @@ class PhraseExtractionOptions { bool orientationFlag; bool translationFlag; bool sentenceIdFlag; //create extract file with sentence id + bool includeSentenceIdFlag; //include sentence id in extract file bool onlyOutputSpanInfo; bool gzOutput; @@ -60,10 +61,9 @@ public: orientationFlag(false), translationFlag(true), sentenceIdFlag(false), + includeSentenceIdFlag(false), onlyOutputSpanInfo(false), gzOutput(false){} - - //functions for initialization of options void initAllModelsOutputFlag(const bool initallModelsOutputFlag){ @@ -96,6 +96,9 @@ public: void initSentenceIdFlag(const bool initsentenceIdFlag){ sentenceIdFlag=initsentenceIdFlag; } + void initIncludeSentenceIdFlag(const bool initincludeSentenceIdFlag){ + sentenceIdFlag=initincludeSentenceIdFlag; + } void initOnlyOutputSpanInfo(const bool initonlyOutputSpanInfo){ onlyOutputSpanInfo= initonlyOutputSpanInfo; } @@ -133,6 +136,9 @@ public: bool isSentenceIdFlag() const { return sentenceIdFlag; } + bool isIncludeSentenceIdFlag() const { + return includeSentenceIdFlag; + } bool isOnlyOutputSpanInfo() const { return onlyOutputSpanInfo; } diff --git a/phrase-extract/extract.cpp b/phrase-extract/extract.cpp index b6ea97c6e..58eb4b2f3 100644 --- a/phrase-extract/extract.cpp +++ b/phrase-extract/extract.cpp @@ -66,41 +66,6 @@ typedef map <int, set<int> > HSentenceVertices; void insertVertex(HSentenceVertices &, int, int); void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, int, int, int, int); -<<<<<<< HEAD -string getOrientString(REO_POS, REO_MODEL_TYPE); - -bool ge(int, int); -bool le(int, int); -bool lt(int, int); - -void extractBase(SentenceAlignment &); -void extract(SentenceAlignment &); -void addPhrase(SentenceAlignment &, int, int, int, int, string &); -bool isAligned (SentenceAlignment &, int, int); - -bool allModelsOutputFlag = false; - -bool wordModel = false; -REO_MODEL_TYPE wordType = REO_MSD; -bool phraseModel = false; -REO_MODEL_TYPE phraseType = REO_MSD; -bool hierModel = false; -REO_MODEL_TYPE hierType = REO_MSD; - - -Moses::OutputFileStream extractFile; -Moses::OutputFileStream extractFileInv; -Moses::OutputFileStream extractFileOrientation; -Moses::OutputFileStream extractFileSentenceId; -int maxPhraseLength; -bool orientationFlag = false; -bool translationFlag = true; -bool sentenceIdFlag = false; //create extract file with sentence id -int sentenceOffset = 0; -bool includeSentenceIdFlag = false; //include sentence id in extract file -bool onlyOutputSpanInfo = false; -bool gzOutput = false; -======= string getOrientString(REO_POS, REO_MODEL_TYPE); bool ge(int, int); @@ -108,8 +73,7 @@ bool gzOutput = false; bool lt(int, int); bool isAligned (SentenceAlignment &, int, int); - ->>>>>>> b317522563feb4ca7ff978a0de661ec2189934ea + int sentenceOffset = 0; } @@ -150,14 +114,9 @@ int main(int argc, char* argv[]) cerr << "PhraseExtract v1.4, written by Philipp Koehn\n" << "phrase extraction from an aligned parallel corpus\n"; -<<<<<<< HEAD - if (argc < 6) { - cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] | --OnlyOutputSpanInfo | --NoTTable | --SentenceId | --IncludeSentenceId | --SentenceOffset n ]\n"; -======= if (argc < 6) { cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] "; - cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --SentenceId | --GZOutput ]\n"; ->>>>>>> b317522563feb4ca7ff978a0de661ec2189934ea + cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --SentenceId | --GZOutput | --IncludeSentenceId | --SentenceOffset n ]\n"; exit(1); } @@ -179,19 +138,15 @@ int main(int argc, char* argv[]) } else if (strcmp(argv[i],"--NoTTable") == 0) { options.initTranslationFlag(false); } else if (strcmp(argv[i], "--SentenceId") == 0) { -<<<<<<< HEAD - sentenceIdFlag = true; + options.initSentenceIdFlag(true); } else if (strcmp(argv[i], "--IncludeSentenceId") == 0) { - includeSentenceIdFlag = true; + options.initIncludeSentenceIdFlag(true); } else if (strcmp(argv[i], "--SentenceOffset") == 0) { if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') { cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl; exit(1); } sentenceOffset = atoi(argv[++i]); -======= - options.initSentenceIdFlag(true); ->>>>>>> b317522563feb4ca7ff978a0de661ec2189934ea } else if (strcmp(argv[i], "--GZOutput") == 0) { options.initGzOutput(true); } else if(strcmp(argv[i],"--model") == 0) { @@ -758,20 +713,14 @@ for(int fi=startF; fi<=endF; fi++) { if (m_options.isOrientationFlag()) outextractstrOrientation << orientationInfo; -<<<<<<< HEAD - if (sentenceIdFlag) - extractFileSentenceId << sentence.sentenceID; - - if (includeSentenceIdFlag) - extractFile << " ||| " << sentence.sentenceID; -======= if (m_options.isSentenceIdFlag()) { outextractstrSentenceId << sentence.sentenceID; } ->>>>>>> b317522563feb4ca7ff978a0de661ec2189934ea - + if (m_options.isIncludeSentenceIdFlag()) { + outextractstr << " ||| " << sentence.sentenceID; + } - if (m_options.isTranslationFlag()) outextractstr << "\n"; + if (m_options.isTranslationFlag()) outextractstr << "\n"; if (m_options.isTranslationFlag()) outextractstrInv << "\n"; if (m_options.isOrientationFlag()) outextractstrOrientation << "\n"; if (m_options.isSentenceIdFlag()) outextractstrSentenceId << "\n"; diff --git a/phrase-extract/score.cpp b/phrase-extract/score.cpp index 9ec976f46..8348a44bc 100644 --- a/phrase-extract/score.cpp +++ b/phrase-extract/score.cpp @@ -104,12 +104,8 @@ int main(int argc, char* argv[]) string fileNameLex = argv[2]; string fileNamePhraseTable = argv[3]; string fileNameCountOfCounts; -<<<<<<< HEAD char* fileNameFunctionWords = NULL; char* fileNameDomain = NULL; -======= - string fileNameFunctionWords; ->>>>>>> b317522563feb4ca7ff978a0de661ec2189934ea for(int i=4; i<argc; i++) { if (strcmp(argv[i],"inverse") == 0 || strcmp(argv[i],"--Inverse") == 0) { |