Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-05-25 09:15:29 +0300
committerJeroen Vermeulen <jtv@precisiontranslationtools.com>2015-05-25 09:15:29 +0300
commite85751929300bef48286680a5648c29830fe7cd6 (patch)
tree483060b73cc73a1b7474b7048758def00deaa8b7
parentbdf9bd6103e44b8e6b750e7851681545e53a18fd (diff)
Unify int-to-string conversion.
The codebase was full of repeated patterns for: 1. Representing a nonnegative number as a string. 2. Representing a small nonnegative number as a digit. 3. Functions for representing numbers as string, mostly unused. This replaces all of them with a single new inline function. In the case where the output was meant to be one digit, there were cases where actually the number could overflow, resulting in weird characters. In particular, these could cause crashes on Windows machines with more than 10 CPU cores. You'd get a filename with ':' in it, which doesn't work. Such cases are replaced with zero-padded three-digit strings, to keep sort orders sane.
-rw-r--r--mgizapp/src/d4norm.cxx8
-rw-r--r--mgizapp/src/hmm.cpp50
-rw-r--r--mgizapp/src/hmmnorm.cxx8
-rw-r--r--mgizapp/src/main.cpp11
-rw-r--r--mgizapp/src/model1.cpp7
-rw-r--r--mgizapp/src/model2.cpp8
-rw-r--r--mgizapp/src/model3.cpp109
-rw-r--r--mgizapp/src/utility.h12
8 files changed, 45 insertions, 168 deletions
diff --git a/mgizapp/src/d4norm.cxx b/mgizapp/src/d4norm.cxx
index b061416..bc6e163 100644
--- a/mgizapp/src/d4norm.cxx
+++ b/mgizapp/src/d4norm.cxx
@@ -107,14 +107,6 @@ int main(int argc, char* argv[])
// Some utility functions to get it compile..
ofstream logmsg;
-const string str2Num(int n)
-{
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
-}
double LAMBDA=1.09;
Vector<map< pair<int,int>,char > > ReferenceAlignment;
diff --git a/mgizapp/src/hmm.cpp b/mgizapp/src/hmm.cpp
index 8e28287..66a6bda 100644
--- a/mgizapp/src/hmm.cpp
+++ b/mgizapp/src/hmm.cpp
@@ -115,7 +115,7 @@ int hmm::em_with_tricks(int noIterations,bool dumpCount,
string modelName="Hmm",shortModelName="hmm";
int dumpFreq=ModelH_Dump_Freq;
time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
+ string tfile, afile,afileh, alignfile, test_alignfile;
bool dump_files = false ;
ofstream of2 ;
st = time(NULL) ;
@@ -131,11 +131,7 @@ int hmm::em_with_tricks(int noIterations,bool dumpCount,
cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl;
//dump_files = true;
- number = "";
- int n = it;
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
+ const string number = represent_number(it);
tfile = Prefix + ".t" + shortModelName + "." + number ;
afile = Prefix + ".a" + shortModelName + "." + number ;
// acfile = Prefix + ".ac" + shortModelName + "." + number ;
@@ -147,26 +143,20 @@ int hmm::em_with_tricks(int noIterations,bool dumpCount,
initAL();
sHandler1.rewind();
int k;
- char node[2] ;
- node[1] = '\0';
for (k=1 ; k< NCPUS ; k++) {
th[k].m = this;
th[k].done = 0;
th[k].valid = 0;
th[k].it = it;
th[k].resume = resume;
- th[k].alignfile = alignfile + ".part";
- node[0] = '0' + k;
- th[k].alignfile += node;
+ th[k].alignfile = alignfile + ".part" + represent_number(k, 3);
th[k].dump_files = dump_files;
th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k]));
if(th[k].valid) {
cerr << "Error starting thread " << k << endl;
}
}
- node[0] = '0';
- alignfile += ".part";
- alignfile += node;
+ alignfile += ".part" + represent_number(0, 3);
em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
for (k=1; k<NCPUS; k++) {
pthread_join((th[k].thread),NULL);
@@ -861,7 +851,7 @@ CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it)
string modelName="Hmm",shortModelName="hmm";
int dumpFreq=ModelH_Dump_Freq;
time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
+ string tfile, afile,afileh, alignfile, test_alignfile;
int pair_no = 0;
bool dump_files = false ;
ofstream of2 ;
@@ -873,11 +863,7 @@ CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it)
cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
+ const string number = represent_number(it);
tfile = Prefix + ".t" + shortModelName + "." + number ;
afile = Prefix + ".a" + shortModelName + "." + number ;
afileh = Prefix + ".h" + shortModelName + "." + number ;
@@ -934,7 +920,7 @@ void hmm::em_one_step_2(int it,int part)
string modelName="Hmm",shortModelName="hmm";
int dumpFreq=ModelH_Dump_Freq;
time_t it_st, st, it_fn, fn;
- string tfile, afile,afileh, number, alignfile, test_alignfile;
+ string tfile, afile,afileh, alignfile, test_alignfile;
int pair_no = 0;
bool dump_files = false ;
ofstream of2 ;
@@ -943,22 +929,12 @@ void hmm::em_one_step_2(int it,int part)
dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
+ const string number = represent_number(it, 3);
tfile = Prefix + ".t" + shortModelName + "." + number ;
afile = Prefix + ".a" + shortModelName + "." + number ;
afileh = Prefix + ".h" + shortModelName + "." + number ;
- alignfile = Prefix + ".Ahmm." ;
- char v[2];
- v[1] = 0;
- v[0] = '0' + it;
- alignfile += v;
- alignfile += ".part";
- v[0] = '0' + part;
- alignfile += v;
+ alignfile = Prefix + ".Ahmm." + represent_number(it, 3) ;
+ alignfile += ".part" + represent_number(part, 3);
counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
aCountTable.clear();
@@ -1026,11 +1002,7 @@ int multi_thread_em(int noIter, int noThread, hmm* base)
cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n';
dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS;
dump_files = true;
- string number = "";
- int n = i;
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
+ const string number = represent_number(i);
tfile = Prefix + ".t" + shortModelName + "." + number ;
afile = Prefix + ".a" + shortModelName + "." + number ;
acfile = Prefix + ".ac" + shortModelName + "." + number ;
diff --git a/mgizapp/src/hmmnorm.cxx b/mgizapp/src/hmmnorm.cxx
index d0c9015..2528ea9 100644
--- a/mgizapp/src/hmmnorm.cxx
+++ b/mgizapp/src/hmmnorm.cxx
@@ -116,14 +116,6 @@ int main(int argc, char* argv[])
// Some utility functions to get it compile..
ofstream logmsg;
-const string str2Num(int n)
-{
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
-}
double LAMBDA=1.09;
Vector<map< pair<int,int>,char > > ReferenceAlignment;
diff --git a/mgizapp/src/main.cpp b/mgizapp/src/main.cpp
index f06c8e8..3dc0b90 100644
--- a/mgizapp/src/main.cpp
+++ b/mgizapp/src/main.cpp
@@ -112,15 +112,6 @@ string countPrefix;
Mutex logmsg_lock;
ofstream logmsg;
-const string str2Num(int n)
-{
- string number = "";
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- return (number);
-}
-
double LAMBDA=1.09;
sentenceHandler *testCorpus=0, *corpus=0;
Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp;
@@ -237,7 +228,7 @@ void printDecoderConfigFile()
lastmodel = 2;
else
lastmodel = 1;
- string lastModelName = str2Num(lastmodel);
+ string lastModelName = represent_number(lastmodel);
string p=Prefix + ".t" + /*lastModelName*/"3" +".final";
decoder << "TTable = " << stripPath(p.c_str()) << '\n';
p = Prefix + ".ti.final";
diff --git a/mgizapp/src/model1.cpp b/mgizapp/src/model1.cpp
index 74d3331..e2336cb 100644
--- a/mgizapp/src/model1.cpp
+++ b/mgizapp/src/model1.cpp
@@ -105,10 +105,7 @@ int model1::em_thread(int noIterations, int nthread, /*Perplexity& perp, sentenc
Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler,
Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */ )
{
- string modelName="Model1",shortModelName="1";
- char b[2];
- b[1] = '\0';
- b[0] = '0' + nthread;
+ string modelName="Model1", shortModelName="1";
time_t st = time(NULL);
string tfile, number, alignfile, test_alignfile;
bool dump_files = false ;
@@ -124,7 +121,7 @@ int model1::em_thread(int noIterations, int nthread, /*Perplexity& perp, sentenc
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
} while((n /= 10) > 0);
alignfile = Prefix + ".A" + shortModelName + "." + number + ".part" ;
- alignfile = alignfile + b;
+ alignfile = alignfile + represent_number(nthread, 3);
em_loop(it,perp, sHandler1, false, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp);
return 0;
diff --git a/mgizapp/src/model2.cpp b/mgizapp/src/model2.cpp
index c0e6c4f..82420f2 100644
--- a/mgizapp/src/model2.cpp
+++ b/mgizapp/src/model2.cpp
@@ -62,7 +62,7 @@ int model2::em_with_tricks(int noIterations,bool dumpCount,
int minIter=0;
string modelName="Model2",shortModelName="2";
time_t it_st, st, it_fn, fn;
- string tfile, afile, number, alignfile, test_alignfile;
+ string tfile, afile, alignfile, test_alignfile;
int pair_no = 0;
bool dump_files = false ;
ofstream of2 ;
@@ -75,11 +75,7 @@ int model2::em_with_tricks(int noIterations,bool dumpCount,
it_st = time(NULL) ;
cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
dump_files = (Model2_Dump_Freq != 0) && ((it % Model2_Dump_Freq) == 0) && !NODUMPS;
- number = "";
- int n = it;
- do {
- number.insert((size_t)0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
+ const string number = represent_number(it);
tfile = Prefix + ".t" + shortModelName + "." + number ;
afile = Prefix + ".a" + shortModelName + "." + number ;
alignfile = Prefix + ".A" + shortModelName + "." + number ;
diff --git a/mgizapp/src/model3.cpp b/mgizapp/src/model3.cpp
index 39d5514..ec693ec 100644
--- a/mgizapp/src/model3.cpp
+++ b/mgizapp/src/model3.cpp
@@ -106,12 +106,7 @@ void model3::em(int noIterations, sentenceHandler& sHandler1)
cout << "\n" << "Model3: Iteration " << it;
// set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
+ const string number = represent_number(it);
tfile = Prefix + ".t3." + number;
afile = Prefix + ".a3." + number;
nfile = Prefix + ".n3." + number;
@@ -435,14 +430,7 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4,
string d4file2;
{
// set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
+ const string number = (final ? "final" : represent_number(it));
tfile = Prefix + ".t3." + number;
tfile_actual = Prefix + ".actual.t3." + number;
afile = Prefix + ".a3." + number;
@@ -470,17 +458,13 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4,
if (testPerp && testHandler)
(*testHandler).rewind();
- char node[2] ;
- node[1] = '\0';
for (k=1 ; k< NCPUS ; k++) {
th[k].m = this;
th[k].done = 0;
th[k].valid = 0;
th[k].it = it;
th[k].final = final;
- th[k].alignfile = alignfile + ".part";
- node[0] = '0' + k;
- th[k].alignfile += node;
+ th[k].alignfile = alignfile + ".part" + represent_number(k, 3);
th[k].dump_files = dump_files;
th[k].fromModel = fromModel;
th[k].toModel = toModel;
@@ -490,9 +474,7 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4,
cerr << "Error starting thread " << k << endl;
}
}
- node[0] = '0';
- alignfile = alignfile + ".part";
- alignfile += node;
+ alignfile = alignfile + ".part" + represent_number(0, 3);
#ifdef TRICKY_IBM3_TRAINING
@@ -723,14 +705,7 @@ int model3::viterbi_hto3()
string d4file2;
{
// set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
+ const string number = (final ? "final" : represent_number(it));
tfile = Prefix + ".t3." + number;
tfile_actual = Prefix + ".actual.t3." + number;
afile = Prefix + ".a3." + number;
@@ -741,14 +716,8 @@ int model3::viterbi_hto3()
d4file2 = Prefix + ".D4." + number;
d5file = Prefix + ".d5." + number;
alignfile = Prefix + ".AH3_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
+ alignfile += represent_number(iter, 3);
+ alignfile += ".part" + represent_number(part, 3);
test_alignfile = Prefix + ".tst.A3." + number;
test_alignfile = Prefix + ".tst.A3." + number;
p0file = Prefix + ".p0_3." + number;
@@ -805,14 +774,7 @@ int model3::viterbi_3to3()
string d4file2;
{
// set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
+ const string number = (final ? "final" : represent_number(it));
tfile = Prefix + ".t3." + number;
tfile_actual = Prefix + ".actual.t3." + number;
afile = Prefix + ".a3." + number;
@@ -823,14 +785,8 @@ int model3::viterbi_3to3()
d4file2 = Prefix + ".D4." + number;
d5file = Prefix + ".d5." + number;
alignfile = Prefix + ".A3_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
+ alignfile += represent_number(iter, 3);
+ alignfile += ".part" + represent_number(part, 3);
test_alignfile = Prefix + ".tst.A3." + number;
p0file = Prefix + ".p0_3." + number;
}
@@ -894,14 +850,7 @@ d4model* model3::viterbi_3to4()
string d4file2;
{
// set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
+ const string number = (final ? "final" : represent_number(it));
tfile = Prefix + ".t3." + number;
tfile_actual = Prefix + ".actual.t3." + number;
afile = Prefix + ".a3." + number;
@@ -912,14 +861,8 @@ d4model* model3::viterbi_3to4()
d4file2 = Prefix + ".D4." + number;
d5file = Prefix + ".d5." + number;
alignfile = Prefix + ".A34_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
+ alignfile += represent_number(iter, 3);
+ alignfile += ".part" + represent_number(part, 3);
test_alignfile = Prefix + ".tst.A3." + number;
p0file = Prefix + ".p0_3." + number;
}
@@ -975,12 +918,7 @@ int model3::viterbi_4to4(d4model& d4m)
string d4file2;
{
// set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
+ const string number = represent_number(it);
tfile = Prefix + ".t3." + number;
tfile_actual = Prefix + ".actual.t3." + number;
afile = Prefix + ".a3." + number;
@@ -991,14 +929,8 @@ int model3::viterbi_4to4(d4model& d4m)
d4file2 = Prefix + ".D4." + number;
d5file = Prefix + ".d5." + number;
alignfile = Prefix + ".A4_";
- char _p[2];
- _p[1] = 0;
- _p[0] = iter + '0';
- alignfile += _p;
- alignfile += ".part";
- _p[1] = 0;
- _p[0] = part + '0';
- alignfile += _p;
+ alignfile += represent_number(iter, 3);
+ alignfile += ".part" + represent_number(part, 3);
test_alignfile = Prefix + ".tst.A3." + number;
p0file = Prefix + ".p0_3." + number;
}
@@ -1136,14 +1068,7 @@ void multi_thread_m34_em(model3& m3, int ncpu, int Model3_Iterations,
string d4file2;
{
// set up the names of the files where the tables will be printed
- int n = i;
- number = "";
- do {
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while ((n /= 10) > 0);
- if (final)
- number="final";
+ const string number = (final ? "final": represent_number(i));
tfile = Prefix + ".t3." + number;
tfile_actual = Prefix + ".actual.t3." + number;
afile = Prefix + ".a3." + number;
diff --git a/mgizapp/src/utility.h b/mgizapp/src/utility.h
index 2eeed8c..32a5a17 100644
--- a/mgizapp/src/utility.h
+++ b/mgizapp/src/utility.h
@@ -55,5 +55,17 @@ extern double factorial(int) ;
string my_ctime(const time_t* t);
+/** Return a nonnegative number as a human-readable string of the given length.
+ *
+ * The string is not localized, and will be at least the specified number of
+ * digits long. If the value is not large enough, it will be zero-padded.
+ */
+inline std::string represent_number(unsigned int value, int digits=1) {
+ std::string result;
+ for (unsigned int remainder = value ; remainder > 0; remainder /= 10)
+ result.insert(0, 1, '0' + remainder % 10);
+ if (result.size() < digits) result.insert(0, digits - result.size(), '0');
+ return result;
+}
#endif