Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2014-06-08 19:23:14 +0400
committerHieu Hoang <hieu@hoang.co.uk>2014-06-08 19:23:14 +0400
commitcb94a3181bd00c74bf0b2b81fea4aee2195dc121 (patch)
tree3bedb39ba6be32a408583c965ace9e68f23861a0
parent23ba0de2247e84db69759445a41c4c4f04840460 (diff)
use standard c++ getline instead of old Moses SAFE_GETLINE
-rw-r--r--phrase-extract/DomainFeature.cpp11
-rw-r--r--phrase-extract/consolidate-direct-main.cpp24
-rw-r--r--phrase-extract/consolidate-reverse-main.cpp22
-rw-r--r--phrase-extract/extract-ordering-main.cpp28
-rw-r--r--phrase-extract/extract-rules-main.cpp20
-rw-r--r--phrase-extract/relax-parse-main.cpp8
-rw-r--r--phrase-extract/statistics-main.cpp28
7 files changed, 56 insertions, 85 deletions
diff --git a/phrase-extract/DomainFeature.cpp b/phrase-extract/DomainFeature.cpp
index 2f99a8709..337364b1d 100644
--- a/phrase-extract/DomainFeature.cpp
+++ b/phrase-extract/DomainFeature.cpp
@@ -4,8 +4,6 @@
#include "InputFileStream.h"
#include "SafeGetline.h"
-#define TABLE_LINE_MAX_LENGTH 1000
-
using namespace std;
namespace MosesTraining
@@ -16,12 +14,11 @@ void Domain::load( const std::string &domainFileName )
{
Moses::InputFileStream fileS( domainFileName );
istream *fileP = &fileS;
- while(true) {
- char line[TABLE_LINE_MAX_LENGTH];
- SAFE_GETLINE((*fileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__);
- if (fileP->eof()) break;
+
+ string line;
+ while(getline(*fileP, line)) {
// read
- vector< string > domainSpecLine = tokenize( line );
+ vector< string > domainSpecLine = tokenize( line.c_str() );
int lineNumber;
if (domainSpecLine.size() != 2 ||
! sscanf(domainSpecLine[0].c_str(), "%d", &lineNumber)) {
diff --git a/phrase-extract/consolidate-direct-main.cpp b/phrase-extract/consolidate-direct-main.cpp
index 3b38f741c..40e0e35d4 100644
--- a/phrase-extract/consolidate-direct-main.cpp
+++ b/phrase-extract/consolidate-direct-main.cpp
@@ -26,16 +26,9 @@
#include "InputFileStream.h"
#include "OutputFileStream.h"
-#include "SafeGetline.h"
-
-#define LINE_MAX_LENGTH 10000
-
using namespace std;
-char line[LINE_MAX_LENGTH];
-
-
-vector< string > splitLine()
+vector< string > splitLine(const char *line)
{
vector< string > item;
int start=0;
@@ -61,14 +54,15 @@ bool getLine( istream &fileP, vector< string > &item )
{
if (fileP.eof())
return false;
-
- SAFE_GETLINE((fileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
- if (fileP.eof())
+
+ string line;
+ if (getline(fileP, line)) {
+ item = splitLine(line.c_str());
return false;
-
- item = splitLine();
-
- return true;
+ }
+ else {
+ return false;
+ }
}
diff --git a/phrase-extract/consolidate-reverse-main.cpp b/phrase-extract/consolidate-reverse-main.cpp
index 6843bf3aa..891773418 100644
--- a/phrase-extract/consolidate-reverse-main.cpp
+++ b/phrase-extract/consolidate-reverse-main.cpp
@@ -30,20 +30,17 @@
#include "SafeGetline.h"
#include "InputFileStream.h"
-#define LINE_MAX_LENGTH 10000
-
using namespace std;
bool hierarchicalFlag = false;
bool onlyDirectFlag = false;
bool phraseCountFlag = true;
bool logProbFlag = false;
-char line[LINE_MAX_LENGTH];
void processFiles( char*, char*, char* );
bool getLine( istream &fileP, vector< string > &item );
string reverseAlignment(const string &alignments);
-vector< string > splitLine();
+vector< string > splitLine(const char *lin);
inline void Tokenize(std::vector<std::string> &output
, const std::string& str
@@ -190,17 +187,18 @@ bool getLine( istream &fileP, vector< string > &item )
{
if (fileP.eof())
return false;
-
- SAFE_GETLINE((fileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
- if (fileP.eof())
+
+ string line;
+ if (getline(fileP, line)) {
+ item = splitLine(line.c_str());
return false;
-
- item = splitLine();
-
- return true;
+ }
+ else {
+ return false;
+ }
}
-vector< string > splitLine()
+vector< string > splitLine(const char *line)
{
vector< string > item;
bool betweenWords = true;
diff --git a/phrase-extract/extract-ordering-main.cpp b/phrase-extract/extract-ordering-main.cpp
index 104457b01..78132d4fd 100644
--- a/phrase-extract/extract-ordering-main.cpp
+++ b/phrase-extract/extract-ordering-main.cpp
@@ -32,10 +32,6 @@ using namespace MosesTraining;
namespace MosesTraining
{
-
-const long int LINE_MAX_LENGTH = 500000 ;
-
-
// HPhraseVertex represents a point in the alignment matrix
typedef pair <int, int> HPhraseVertex;
@@ -246,20 +242,20 @@ int main(int argc, char* argv[])
int i = sentenceOffset;
- while(true) {
+ string englishString, foreignString, alignmentString, weightString;
+
+ while(getline(*eFileP, englishString)) {
i++;
- if (i%10000 == 0) cerr << "." << flush;
- char englishString[LINE_MAX_LENGTH];
- char foreignString[LINE_MAX_LENGTH];
- char alignmentString[LINE_MAX_LENGTH];
- char weightString[LINE_MAX_LENGTH];
- SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__);
- if (eFileP->eof()) break;
- SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
- SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
+
+ getline(*eFileP, englishString);
+ getline(*fFileP, foreignString);
+ getline(*aFileP, alignmentString);
if (iwFileP) {
- SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__);
+ getline(*iwFileP, weightString);
}
+
+ if (i%10000 == 0) cerr << "." << flush;
+
SentenceAlignment sentence;
// cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
//az: output src, tgt, and alingment line
@@ -269,7 +265,7 @@ int main(int argc, char* argv[])
cout << "LOG: ALT: " << alignmentString << endl;
cout << "LOG: PHRASES_BEGIN:" << endl;
}
- if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
+ if (sentence.create( englishString.c_str(), foreignString.c_str(), alignmentString.c_str(), weightString.c_str(), i, false)) {
ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFileOrientation);
task->Run();
delete task;
diff --git a/phrase-extract/extract-rules-main.cpp b/phrase-extract/extract-rules-main.cpp
index f5f44316e..30963f32b 100644
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@@ -47,8 +47,6 @@
#include "InputFileStream.h"
#include "OutputFileStream.h"
-#define LINE_MAX_LENGTH 500000
-
using namespace std;
using namespace MosesTraining;
@@ -326,17 +324,15 @@ int main(int argc, char* argv[])
// loop through all sentence pairs
size_t i=sentenceOffset;
- while(true) {
+ string targetString, sourceString, alignmentString;
+
+ while(getline(*tFileP, targetString)) {
i++;
- if (i%1000 == 0) cerr << i << " " << flush;
- char targetString[LINE_MAX_LENGTH];
- char sourceString[LINE_MAX_LENGTH];
- char alignmentString[LINE_MAX_LENGTH];
- SAFE_GETLINE((*tFileP), targetString, LINE_MAX_LENGTH, '\n', __FILE__);
- if (tFileP->eof()) break;
- SAFE_GETLINE((*sFileP), sourceString, LINE_MAX_LENGTH, '\n', __FILE__);
- SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
+ getline(*sFileP, sourceString);
+ getline(*aFileP, alignmentString);
+
+ if (i%1000 == 0) cerr << i << " " << flush;
SentenceAlignmentWithSyntax sentence
(targetLabelCollection, sourceLabelCollection,
@@ -349,7 +345,7 @@ int main(int argc, char* argv[])
cout << "LOG: PHRASES_BEGIN:" << endl;
}
- if (sentence.create(targetString, sourceString, alignmentString,"", i, options.boundaryRules)) {
+ if (sentence.create(targetString.c_str(), sourceString.c_str(), alignmentString.c_str(),"", i, options.boundaryRules)) {
if (options.unknownWordLabelFlag) {
collectWordLabelCounts(sentence);
}
diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp
index a58d4d97f..c04cae85b 100644
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@@ -33,17 +33,13 @@ int main(int argc, char* argv[])
// loop through all sentences
int i=0;
- char inBuffer[LINE_MAX_LENGTH];
- while(true) {
+ string inBuffer;
+ while(getline(cin, inBuffer)) {
i++;
if (i%1000 == 0) cerr << "." << flush;
if (i%10000 == 0) cerr << ":" << flush;
if (i%100000 == 0) cerr << "!" << flush;
- // get line from stdin
- SAFE_GETLINE( cin, inBuffer, LINE_MAX_LENGTH, '\n', __FILE__);
- if (cin.eof()) break;
-
// process into syntax tree representation
string inBufferString = string( inBuffer );
set< string > labelCollection; // set of labels, not used
diff --git a/phrase-extract/statistics-main.cpp b/phrase-extract/statistics-main.cpp
index 67373ec93..f1563dc05 100644
--- a/phrase-extract/statistics-main.cpp
+++ b/phrase-extract/statistics-main.cpp
@@ -19,8 +19,6 @@
using namespace std;
using namespace MosesTraining;
-#define LINE_MAX_LENGTH 10000
-
namespace MosesTraining
{
@@ -31,7 +29,7 @@ public:
vector< vector<size_t> > alignedToE;
vector< vector<size_t> > alignedToF;
- bool create( char*, int );
+ bool create( const char*, int );
void clear();
bool equals( const PhraseAlignment& );
};
@@ -106,16 +104,14 @@ int main(int argc, char* argv[])
vector< PhraseAlignment > phrasePairsWithSameF;
int i=0;
int fileCount = 0;
- while(true) {
+
+ string line;
+ while(getline(extractFileP, line)) {
if (extractFileP.eof()) break;
if (++i % 100000 == 0) cerr << "." << flush;
- char line[LINE_MAX_LENGTH];
- SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
- // if (fileCount>0)
- if (extractFileP.eof())
- break;
+
PhraseAlignment phrasePair;
- bool isPhrasePair = phrasePair.create( line, i );
+ bool isPhrasePair = phrasePair.create( line.c_str(), i );
if (lastForeign >= 0 && lastForeign != phrasePair.foreign) {
processPhrasePairs( phrasePairsWithSameF );
for(size_t j=0; j<phrasePairsWithSameF.size(); j++)
@@ -124,7 +120,7 @@ int main(int argc, char* argv[])
phraseTableE.clear();
phraseTableF.clear();
phrasePair.clear(); // process line again, since phrase tables flushed
- phrasePair.create( line, i );
+ phrasePair.create( line.c_str(), i );
phrasePairBase = 0;
}
lastForeign = phrasePair.foreign;
@@ -242,7 +238,7 @@ void processPhrasePairs( vector< PhraseAlignment > &phrasePair )
}
}
-bool PhraseAlignment::create( char line[], int lineID )
+bool PhraseAlignment::create(const char line[], int lineID )
{
vector< string > token = tokenize( line );
int item = 1;
@@ -321,16 +317,14 @@ void LexicalTable::load( const string &filePath )
}
istream *inFileP = &inFile;
- char line[LINE_MAX_LENGTH];
+ string line;
int i=0;
- while(true) {
+ while(getline(*inFileP, line)) {
i++;
if (i%100000 == 0) cerr << "." << flush;
- SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
- if (inFileP->eof()) break;
- vector<string> token = tokenize( line );
+ vector<string> token = tokenize( line.c_str() );
if (token.size() != 3) {
cerr << "line " << i << " in " << filePath << " has wrong number of tokens, skipping:\n" <<
token.size() << " " << token[0] << " " << line << endl;