diff options
author | Jeroen Vermeulen <jtv@precisiontranslationtools.com> | 2015-05-25 09:15:29 +0300 |
---|---|---|
committer | Jeroen Vermeulen <jtv@precisiontranslationtools.com> | 2015-05-25 09:15:29 +0300 |
commit | e85751929300bef48286680a5648c29830fe7cd6 (patch) | |
tree | 483060b73cc73a1b7474b7048758def00deaa8b7 | |
parent | bdf9bd6103e44b8e6b750e7851681545e53a18fd (diff) |
Unify int-to-string conversion.
The codebase was full of repeated patterns for:
1. Representing a nonnegative number as a string.
2. Representing a small nonnegative number as a digit.
3. Functions for representing numbers as string, mostly unused.
This replaces all of them with a single new inline function. In the case
where the output was meant to be one digit, there were cases where actually
the number could overflow, resulting in weird characters. In particular,
these could cause crashes on Windows machines with more than 10 CPU cores.
You'd get a filename with ':' in it, which doesn't work. Such cases are
replaced with zero-padded three-digit strings, to keep sort orders sane.
-rw-r--r-- | mgizapp/src/d4norm.cxx | 8 | ||||
-rw-r--r-- | mgizapp/src/hmm.cpp | 50 | ||||
-rw-r--r-- | mgizapp/src/hmmnorm.cxx | 8 | ||||
-rw-r--r-- | mgizapp/src/main.cpp | 11 | ||||
-rw-r--r-- | mgizapp/src/model1.cpp | 7 | ||||
-rw-r--r-- | mgizapp/src/model2.cpp | 8 | ||||
-rw-r--r-- | mgizapp/src/model3.cpp | 109 | ||||
-rw-r--r-- | mgizapp/src/utility.h | 12 |
8 files changed, 45 insertions, 168 deletions
diff --git a/mgizapp/src/d4norm.cxx b/mgizapp/src/d4norm.cxx index b061416..bc6e163 100644 --- a/mgizapp/src/d4norm.cxx +++ b/mgizapp/src/d4norm.cxx @@ -107,14 +107,6 @@ int main(int argc, char* argv[]) // Some utility functions to get it compile.. ofstream logmsg; -const string str2Num(int n) -{ - string number = ""; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - return (number); -} double LAMBDA=1.09; Vector<map< pair<int,int>,char > > ReferenceAlignment; diff --git a/mgizapp/src/hmm.cpp b/mgizapp/src/hmm.cpp index 8e28287..66a6bda 100644 --- a/mgizapp/src/hmm.cpp +++ b/mgizapp/src/hmm.cpp @@ -115,7 +115,7 @@ int hmm::em_with_tricks(int noIterations,bool dumpCount, string modelName="Hmm",shortModelName="hmm"; int dumpFreq=ModelH_Dump_Freq; time_t it_st, st, it_fn, fn; - string tfile, afile,afileh, number, alignfile, test_alignfile; + string tfile, afile,afileh, alignfile, test_alignfile; bool dump_files = false ; ofstream of2 ; st = time(NULL) ; @@ -131,11 +131,7 @@ int hmm::em_with_tricks(int noIterations,bool dumpCount, cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl; //dump_files = true; - number = ""; - int n = it; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while((n /= 10) > 0); + const string number = represent_number(it); tfile = Prefix + ".t" + shortModelName + "." + number ; afile = Prefix + ".a" + shortModelName + "." + number ; // acfile = Prefix + ".ac" + shortModelName + "." + number ; @@ -147,26 +143,20 @@ int hmm::em_with_tricks(int noIterations,bool dumpCount, initAL(); sHandler1.rewind(); int k; - char node[2] ; - node[1] = '\0'; for (k=1 ; k< NCPUS ; k++) { th[k].m = this; th[k].done = 0; th[k].valid = 0; th[k].it = it; th[k].resume = resume; - th[k].alignfile = alignfile + ".part"; - node[0] = '0' + k; - th[k].alignfile += node; + th[k].alignfile = alignfile + ".part" + represent_number(k, 3); th[k].dump_files = dump_files; th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k])); if(th[k].valid) { cerr << "Error starting thread " << k << endl; } } - node[0] = '0'; - alignfile += ".part"; - alignfile += node; + alignfile += ".part" + represent_number(0, 3); em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it); for (k=1; k<NCPUS; k++) { pthread_join((th[k].thread),NULL); @@ -861,7 +851,7 @@ CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it) string modelName="Hmm",shortModelName="hmm"; int dumpFreq=ModelH_Dump_Freq; time_t it_st, st, it_fn, fn; - string tfile, afile,afileh, number, alignfile, test_alignfile; + string tfile, afile,afileh, alignfile, test_alignfile; int pair_no = 0; bool dump_files = false ; ofstream of2 ; @@ -873,11 +863,7 @@ CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it) cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n'; dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS; - number = ""; - int n = it; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while((n /= 10) > 0); + const string number = represent_number(it); tfile = Prefix + ".t" + shortModelName + "." + number ; afile = Prefix + ".a" + shortModelName + "." + number ; afileh = Prefix + ".h" + shortModelName + "." + number ; @@ -934,7 +920,7 @@ void hmm::em_one_step_2(int it,int part) string modelName="Hmm",shortModelName="hmm"; int dumpFreq=ModelH_Dump_Freq; time_t it_st, st, it_fn, fn; - string tfile, afile,afileh, number, alignfile, test_alignfile; + string tfile, afile,afileh, alignfile, test_alignfile; int pair_no = 0; bool dump_files = false ; ofstream of2 ; @@ -943,22 +929,12 @@ void hmm::em_one_step_2(int it,int part) dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS; - number = ""; - int n = it; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while((n /= 10) > 0); + const string number = represent_number(it, 3); tfile = Prefix + ".t" + shortModelName + "." + number ; afile = Prefix + ".a" + shortModelName + "." + number ; afileh = Prefix + ".h" + shortModelName + "." + number ; - alignfile = Prefix + ".Ahmm." ; - char v[2]; - v[1] = 0; - v[0] = '0' + it; - alignfile += v; - alignfile += ".part"; - v[0] = '0' + part; - alignfile += v; + alignfile = Prefix + ".Ahmm." + represent_number(it, 3) ; + alignfile += ".part" + represent_number(part, 3); counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses); aCountTable.clear(); @@ -1026,11 +1002,7 @@ int multi_thread_em(int noIter, int noThread, hmm* base) cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n'; dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS; dump_files = true; - string number = ""; - int n = i; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while((n /= 10) > 0); + const string number = represent_number(i); tfile = Prefix + ".t" + shortModelName + "." + number ; afile = Prefix + ".a" + shortModelName + "." + number ; acfile = Prefix + ".ac" + shortModelName + "." + number ; diff --git a/mgizapp/src/hmmnorm.cxx b/mgizapp/src/hmmnorm.cxx index d0c9015..2528ea9 100644 --- a/mgizapp/src/hmmnorm.cxx +++ b/mgizapp/src/hmmnorm.cxx @@ -116,14 +116,6 @@ int main(int argc, char* argv[]) // Some utility functions to get it compile.. ofstream logmsg; -const string str2Num(int n) -{ - string number = ""; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - return (number); -} double LAMBDA=1.09; Vector<map< pair<int,int>,char > > ReferenceAlignment; diff --git a/mgizapp/src/main.cpp b/mgizapp/src/main.cpp index f06c8e8..3dc0b90 100644 --- a/mgizapp/src/main.cpp +++ b/mgizapp/src/main.cpp @@ -112,15 +112,6 @@ string countPrefix; Mutex logmsg_lock; ofstream logmsg; -const string str2Num(int n) -{ - string number = ""; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - return (number); -} - double LAMBDA=1.09; sentenceHandler *testCorpus=0, *corpus=0; Perplexity trainPerp, testPerp, trainViterbiPerp, testViterbiPerp; @@ -237,7 +228,7 @@ void printDecoderConfigFile() lastmodel = 2; else lastmodel = 1; - string lastModelName = str2Num(lastmodel); + string lastModelName = represent_number(lastmodel); string p=Prefix + ".t" + /*lastModelName*/"3" +".final"; decoder << "TTable = " << stripPath(p.c_str()) << '\n'; p = Prefix + ".ti.final"; diff --git a/mgizapp/src/model1.cpp b/mgizapp/src/model1.cpp index 74d3331..e2336cb 100644 --- a/mgizapp/src/model1.cpp +++ b/mgizapp/src/model1.cpp @@ -105,10 +105,7 @@ int model1::em_thread(int noIterations, int nthread, /*Perplexity& perp, sentenc Dictionary& dictionary, bool useDict /*Perplexity* testPerp, sentenceHandler* testHandler, Perplexity& trainViterbiPerp, Perplexity* testViterbiPerp */ ) { - string modelName="Model1",shortModelName="1"; - char b[2]; - b[1] = '\0'; - b[0] = '0' + nthread; + string modelName="Model1", shortModelName="1"; time_t st = time(NULL); string tfile, number, alignfile, test_alignfile; bool dump_files = false ; @@ -124,7 +121,7 @@ int model1::em_thread(int noIterations, int nthread, /*Perplexity& perp, sentenc number.insert((size_t)0, 1, (char)(n % 10 + '0')); } while((n /= 10) > 0); alignfile = Prefix + ".A" + shortModelName + "." + number + ".part" ; - alignfile = alignfile + b; + alignfile = alignfile + represent_number(nthread, 3); em_loop(it,perp, sHandler1, false, dump_files, alignfile.c_str(), dictionary, useDict, trainViterbiPerp); return 0; diff --git a/mgizapp/src/model2.cpp b/mgizapp/src/model2.cpp index c0e6c4f..82420f2 100644 --- a/mgizapp/src/model2.cpp +++ b/mgizapp/src/model2.cpp @@ -62,7 +62,7 @@ int model2::em_with_tricks(int noIterations,bool dumpCount, int minIter=0; string modelName="Model2",shortModelName="2"; time_t it_st, st, it_fn, fn; - string tfile, afile, number, alignfile, test_alignfile; + string tfile, afile, alignfile, test_alignfile; int pair_no = 0; bool dump_files = false ; ofstream of2 ; @@ -75,11 +75,7 @@ int model2::em_with_tricks(int noIterations,bool dumpCount, it_st = time(NULL) ; cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n'; dump_files = (Model2_Dump_Freq != 0) && ((it % Model2_Dump_Freq) == 0) && !NODUMPS; - number = ""; - int n = it; - do { - number.insert((size_t)0, 1, (char)(n % 10 + '0')); - } while((n /= 10) > 0); + const string number = represent_number(it); tfile = Prefix + ".t" + shortModelName + "." + number ; afile = Prefix + ".a" + shortModelName + "." + number ; alignfile = Prefix + ".A" + shortModelName + "." + number ; diff --git a/mgizapp/src/model3.cpp b/mgizapp/src/model3.cpp index 39d5514..ec693ec 100644 --- a/mgizapp/src/model3.cpp +++ b/mgizapp/src/model3.cpp @@ -106,12 +106,7 @@ void model3::em(int noIterations, sentenceHandler& sHandler1) cout << "\n" << "Model3: Iteration " << it; // set up the names of the files where the tables will be printed - int n = it; - number = ""; - do { - //mj changed next line - number.insert((size_t) 0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); + const string number = represent_number(it); tfile = Prefix + ".t3." + number; afile = Prefix + ".a3." + number; nfile = Prefix + ".n3." + number; @@ -435,14 +430,7 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4, string d4file2; { // set up the names of the files where the tables will be printed - int n = it; - number = ""; - do { - //mj changed next line - number.insert((size_t) 0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - if (final) - number="final"; + const string number = (final ? "final" : represent_number(it)); tfile = Prefix + ".t3." + number; tfile_actual = Prefix + ".actual.t3." + number; afile = Prefix + ".a3." + number; @@ -470,17 +458,13 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4, if (testPerp && testHandler) (*testHandler).rewind(); - char node[2] ; - node[1] = '\0'; for (k=1 ; k< NCPUS ; k++) { th[k].m = this; th[k].done = 0; th[k].valid = 0; th[k].it = it; th[k].final = final; - th[k].alignfile = alignfile + ".part"; - node[0] = '0' + k; - th[k].alignfile += node; + th[k].alignfile = alignfile + ".part" + represent_number(k, 3); th[k].dump_files = dump_files; th[k].fromModel = fromModel; th[k].toModel = toModel; @@ -490,9 +474,7 @@ int model3::viterbi(int noIterationsModel3, int noIterationsModel4, cerr << "Error starting thread " << k << endl; } } - node[0] = '0'; - alignfile = alignfile + ".part"; - alignfile += node; + alignfile = alignfile + ".part" + represent_number(0, 3); #ifdef TRICKY_IBM3_TRAINING @@ -723,14 +705,7 @@ int model3::viterbi_hto3() string d4file2; { // set up the names of the files where the tables will be printed - int n = it; - number = ""; - do { - //mj changed next line - number.insert((size_t) 0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - if (final) - number="final"; + const string number = (final ? "final" : represent_number(it)); tfile = Prefix + ".t3." + number; tfile_actual = Prefix + ".actual.t3." + number; afile = Prefix + ".a3." + number; @@ -741,14 +716,8 @@ int model3::viterbi_hto3() d4file2 = Prefix + ".D4." + number; d5file = Prefix + ".d5." + number; alignfile = Prefix + ".AH3_"; - char _p[2]; - _p[1] = 0; - _p[0] = iter + '0'; - alignfile += _p; - alignfile += ".part"; - _p[1] = 0; - _p[0] = part + '0'; - alignfile += _p; + alignfile += represent_number(iter, 3); + alignfile += ".part" + represent_number(part, 3); test_alignfile = Prefix + ".tst.A3." + number; test_alignfile = Prefix + ".tst.A3." + number; p0file = Prefix + ".p0_3." + number; @@ -805,14 +774,7 @@ int model3::viterbi_3to3() string d4file2; { // set up the names of the files where the tables will be printed - int n = it; - number = ""; - do { - //mj changed next line - number.insert((size_t) 0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - if (final) - number="final"; + const string number = (final ? "final" : represent_number(it)); tfile = Prefix + ".t3." + number; tfile_actual = Prefix + ".actual.t3." + number; afile = Prefix + ".a3." + number; @@ -823,14 +785,8 @@ int model3::viterbi_3to3() d4file2 = Prefix + ".D4." + number; d5file = Prefix + ".d5." + number; alignfile = Prefix + ".A3_"; - char _p[2]; - _p[1] = 0; - _p[0] = iter + '0'; - alignfile += _p; - alignfile += ".part"; - _p[1] = 0; - _p[0] = part + '0'; - alignfile += _p; + alignfile += represent_number(iter, 3); + alignfile += ".part" + represent_number(part, 3); test_alignfile = Prefix + ".tst.A3." + number; p0file = Prefix + ".p0_3." + number; } @@ -894,14 +850,7 @@ d4model* model3::viterbi_3to4() string d4file2; { // set up the names of the files where the tables will be printed - int n = it; - number = ""; - do { - //mj changed next line - number.insert((size_t) 0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - if (final) - number="final"; + const string number = (final ? "final" : represent_number(it)); tfile = Prefix + ".t3." + number; tfile_actual = Prefix + ".actual.t3." + number; afile = Prefix + ".a3." + number; @@ -912,14 +861,8 @@ d4model* model3::viterbi_3to4() d4file2 = Prefix + ".D4." + number; d5file = Prefix + ".d5." + number; alignfile = Prefix + ".A34_"; - char _p[2]; - _p[1] = 0; - _p[0] = iter + '0'; - alignfile += _p; - alignfile += ".part"; - _p[1] = 0; - _p[0] = part + '0'; - alignfile += _p; + alignfile += represent_number(iter, 3); + alignfile += ".part" + represent_number(part, 3); test_alignfile = Prefix + ".tst.A3." + number; p0file = Prefix + ".p0_3." + number; } @@ -975,12 +918,7 @@ int model3::viterbi_4to4(d4model& d4m) string d4file2; { // set up the names of the files where the tables will be printed - int n = it; - number = ""; - do { - //mj changed next line - number.insert((size_t) 0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); + const string number = represent_number(it); tfile = Prefix + ".t3." + number; tfile_actual = Prefix + ".actual.t3." + number; afile = Prefix + ".a3." + number; @@ -991,14 +929,8 @@ int model3::viterbi_4to4(d4model& d4m) d4file2 = Prefix + ".D4." + number; d5file = Prefix + ".d5." + number; alignfile = Prefix + ".A4_"; - char _p[2]; - _p[1] = 0; - _p[0] = iter + '0'; - alignfile += _p; - alignfile += ".part"; - _p[1] = 0; - _p[0] = part + '0'; - alignfile += _p; + alignfile += represent_number(iter, 3); + alignfile += ".part" + represent_number(part, 3); test_alignfile = Prefix + ".tst.A3." + number; p0file = Prefix + ".p0_3." + number; } @@ -1136,14 +1068,7 @@ void multi_thread_m34_em(model3& m3, int ncpu, int Model3_Iterations, string d4file2; { // set up the names of the files where the tables will be printed - int n = i; - number = ""; - do { - //mj changed next line - number.insert((size_t) 0, 1, (char)(n % 10 + '0')); - } while ((n /= 10) > 0); - if (final) - number="final"; + const string number = (final ? "final": represent_number(i)); tfile = Prefix + ".t3." + number; tfile_actual = Prefix + ".actual.t3." + number; afile = Prefix + ".a3." + number; diff --git a/mgizapp/src/utility.h b/mgizapp/src/utility.h index 2eeed8c..32a5a17 100644 --- a/mgizapp/src/utility.h +++ b/mgizapp/src/utility.h @@ -55,5 +55,17 @@ extern double factorial(int) ; string my_ctime(const time_t* t); +/** Return a nonnegative number as a human-readable string of the given length. + * + * The string is not localized, and will be at least the specified number of + * digits long. If the value is not large enough, it will be zero-padded. + */ +inline std::string represent_number(unsigned int value, int digits=1) { + std::string result; + for (unsigned int remainder = value ; remainder > 0; remainder /= 10) + result.insert(0, 1, '0' + remainder % 10); + if (result.size() < digits) result.insert(0, digits - result.size(), '0'); + return result; +} #endif |