diff options
author | Nicola Bertoldi <bertoldi@fbk.eu> | 2014-01-15 19:49:57 +0400 |
---|---|---|
committer | Nicola Bertoldi <bertoldi@fbk.eu> | 2014-01-15 19:49:57 +0400 |
commit | e452a13062400b09ff95af971c1424ac34cf1930 (patch) | |
tree | 11b52a2f1228f4e64fb31d061d7e564970140fd7 | |
parent | bd83999264407dd7970736ee3e70a6b39fb19014 (diff) |
beautify
45 files changed, 918 insertions, 787 deletions
diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp index 0120802ac..12adffd03 100644 --- a/OnDiskPt/OnDiskWrapper.cpp +++ b/OnDiskPt/OnDiskWrapper.cpp @@ -59,28 +59,28 @@ bool OnDiskWrapper::OpenForLoad(const std::string &filePath) { m_fileSource.open((filePath + "/Source.dat").c_str(), ios::in | ios::binary); UTIL_THROW_IF(!m_fileSource.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/Source.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/Source.dat"); m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::in | ios::binary); UTIL_THROW_IF(!m_fileTargetInd.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/TargetInd.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetInd.dat"); m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::in | ios::binary); UTIL_THROW_IF(!m_fileTargetColl.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/TargetColl.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetColl.dat"); m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::in); UTIL_THROW_IF(!m_fileVocab.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/Vocab.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/Vocab.dat"); m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::in); UTIL_THROW_IF(!m_fileMisc.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/Misc.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/Misc.dat"); // set up root node LoadMisc(); @@ -124,46 +124,46 @@ void OnDiskWrapper::BeginSave(const std::string &filePath m_fileSource.open((filePath + "/Source.dat").c_str(), ios::out | ios::in | ios::binary | ios::ate | ios::trunc); UTIL_THROW_IF(!m_fileSource.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/Source.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/Source.dat"); m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc); UTIL_THROW_IF(!m_fileTargetInd.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/TargetInd.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetInd.dat"); m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc); UTIL_THROW_IF(!m_fileTargetColl.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/TargetColl.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/TargetColl.dat"); m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::out | ios::ate | ios::trunc); UTIL_THROW_IF(!m_fileVocab.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/Vocab.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/Vocab.dat"); m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::out | ios::ate | ios::trunc); UTIL_THROW_IF(!m_fileMisc.is_open(), - util::FileOpenException, - "Couldn't open file " << filePath << "/Misc.dat"); + util::FileOpenException, + "Couldn't open file " << filePath << "/Misc.dat"); // offset by 1. 0 offset is reserved char c = 0xff; m_fileSource.write(&c, 1); UTIL_THROW_IF2(1 != m_fileSource.tellp(), - "Couldn't write to stream m_fileSource"); + "Couldn't write to stream m_fileSource"); m_fileTargetInd.write(&c, 1); UTIL_THROW_IF2(1 != m_fileTargetInd.tellp(), - "Couldn't write to stream m_fileTargetInd"); + "Couldn't write to stream m_fileTargetInd"); m_fileTargetColl.write(&c, 1); UTIL_THROW_IF2(1 != m_fileTargetColl.tellp(), - "Couldn't write to stream m_fileTargetColl"); + "Couldn't write to stream m_fileTargetColl"); // set up root node UTIL_THROW_IF2(GetNumCounts() != 1, - "Not sure what this is..."); + "Not sure what this is..."); vector<float> counts(GetNumCounts()); counts[0] = DEFAULT_COUNT; @@ -212,8 +212,8 @@ UINT64 OnDiskWrapper::GetMisc(const std::string &key) const std::map<std::string, UINT64>::const_iterator iter; iter = m_miscInfo.find(key); UTIL_THROW_IF2(iter == m_miscInfo.end() - , "Couldn't find value for key " << key - ); + , "Couldn't find value for key " << key + ); return iter->second; } @@ -238,7 +238,7 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &fact break; } UTIL_THROW_IF2(factor == NULL, - "Expecting factor " << factorType << " at position " << ind); + "Expecting factor " << factorType << " at position " << ind); strme << "|" << factor->GetString(); } // for (size_t factorType diff --git a/OnDiskPt/OnDiskWrapper.h b/OnDiskPt/OnDiskWrapper.h index a31c473fa..5b7cdfe01 100644 --- a/OnDiskPt/OnDiskWrapper.h +++ b/OnDiskPt/OnDiskWrapper.h @@ -61,10 +61,12 @@ public: , int numSourceFactors, int numTargetFactors, int numScores); void EndSave(); - Vocab &GetVocab() - { return m_vocab; } - const Vocab &GetVocab() const - { return m_vocab; } + Vocab &GetVocab() { + return m_vocab; + } + const Vocab &GetVocab() const { + return m_vocab; + } size_t GetSourceWordSize() const; size_t GetTargetWordSize() const; diff --git a/OnDiskPt/Phrase.cpp b/OnDiskPt/Phrase.cpp index a09c99fe5..c1933338b 100644 --- a/OnDiskPt/Phrase.cpp +++ b/OnDiskPt/Phrase.cpp @@ -35,8 +35,8 @@ void Phrase::AddWord(WordPtr word) void Phrase::AddWord(WordPtr word, size_t pos) { - UTIL_THROW_IF2(!(pos < m_words.size()), - "Trying to get word " << pos << " when phrase size is " << m_words.size()); + UTIL_THROW_IF2(!(pos < m_words.size()), + "Trying to get word " << pos << " when phrase size is " << m_words.size()); m_words.insert(m_words.begin() + pos + 1, word); } diff --git a/mert/Data.cpp b/mert/Data.cpp index 095b26041..b93c3b6c0 100644 --- a/mert/Data.cpp +++ b/mert/Data.cpp @@ -263,13 +263,13 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor { UTIL_THROW_IF(shard_count == 0, util::Exception, "Must have at least 1 shard"); UTIL_THROW_IF(shard_size < 0 || shard_size > 1, - util::Exception, - "Shard size must be between 0 and 1, inclusive. Currently " << shard_size); + util::Exception, + "Shard size must be between 0 and 1, inclusive. Currently " << shard_size); size_t data_size = m_score_data->size(); UTIL_THROW_IF(data_size != m_feature_data->size(), - util::Exception, - "Error"); + util::Exception, + "Error"); shard_size *= data_size; const float coeff = static_cast<float>(data_size) / shard_count; diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp index 3f5aa48a6..5da32363f 100644 --- a/mert/Optimizer.cpp +++ b/mert/Optimizer.cpp @@ -168,8 +168,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, // The rightmost bestindex is the one with the highest slope. // They should be equal but there might be. - UTIL_THROW_IF(abs(leftmost->first-gradient.rbegin()->first) >= 0.0001, - util::Exception, "Error"); + UTIL_THROW_IF(abs(leftmost->first-gradient.rbegin()->first) >= 0.0001, + util::Exception, "Error"); // A small difference due to rounding error break; } @@ -191,8 +191,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, if (tit == previnserted) { // The threshold is the same as before can happen if 2 candidates are the same for example. UTIL_THROW_IF(previnserted->second.back().first != newd.first, - util::Exception, - "Error"); + util::Exception, + "Error"); previnserted->second.back()=newd; // just replace the 1 best for sentence S // previnsert doesn't change } else { @@ -207,8 +207,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, // We append the diffs in previnsert to tit before destroying previnsert. tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end()); UTIL_THROW_IF(tit->second.back().first != newd.first, - util::Exception, - "Error"); + util::Exception, + "Error"); tit->second.back()=newd; // change diff for sentence S thresholdmap.erase(previnserted); // erase old previnsert previnserted = tit; // point previnsert to the new threshold @@ -216,8 +216,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, } UTIL_THROW_IF(previnserted == thresholdmap.end(), - util::Exception, - "Error"); + util::Exception, + "Error"); } else { //normal insertion process previnserted = AddThreshold(thresholdmap, leftmostx, newd); } @@ -254,8 +254,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, // We skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best. UTIL_THROW_IF(scores.size() != thresholdmap.size(), - util::Exception, - "Error"); + util::Exception, + "Error"); for (unsigned int sc = 0; sc != scores.size(); sc++) { //cerr << "x=" << thrit->first << " => " << scores[sc] << endl; diff --git a/mert/Point.cpp b/mert/Point.cpp index 1db59ce66..55dc6a6b2 100644 --- a/mert/Point.cpp +++ b/mert/Point.cpp @@ -40,8 +40,8 @@ Point::Point(const vector<parameter_t>& init, m_max[i] = max[i]; } } else { - UTIL_THROW_IF(init.size() != m_pdim, util::Exception, "Error"); - UTIL_THROW_IF(m_opt_indices.size() != Point::m_dim, util::Exception, "Error"); + UTIL_THROW_IF(init.size() != m_pdim, util::Exception, "Error"); + UTIL_THROW_IF(m_opt_indices.size() != Point::m_dim, util::Exception, "Error"); for (unsigned int i = 0; i < Point::m_dim; i++) { operator[](i) = init[m_opt_indices[i]]; m_min[i] = min[m_opt_indices[i]]; diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp index ed3ff2458..ffaf03be4 100644 --- a/mert/Scorer.cpp +++ b/mert/Scorer.cpp @@ -25,9 +25,9 @@ const int kUnknownToken = -1; Scorer::Scorer(const string& name, const string& config) : m_name(name), m_vocab(mert::VocabularyFactory::GetVocabulary()), - #if defined(__GLIBCXX__) || defined(__GLIBCPP__) +#if defined(__GLIBCXX__) || defined(__GLIBCPP__) m_filter(NULL), - #endif +#endif m_score_data(NULL), m_enable_preserve_case(true) { diff --git a/mira/Main.cpp b/mira/Main.cpp index 73daf5a99..9de8f9cdb 100644 --- a/mira/Main.cpp +++ b/mira/Main.cpp @@ -1238,15 +1238,15 @@ int main(int argc, char** argv) cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl; cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl; update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically( - weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0], - bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0], - modelScoresFear[0][0], learning_rate, rank, epoch); + weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0], + bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0], + modelScoresFear[0][0], learning_rate, rank, epoch); } else { cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl; cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl; update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope, - featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope, - modelScoresFear, learning_rate, rank, epoch); + featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope, + modelScoresFear, learning_rate, rank, epoch); } } else { // model_hope_fear diff --git a/misc/1-1-Extraction.cpp b/misc/1-1-Extraction.cpp index 459fa4900..cf3817abf 100644 --- a/misc/1-1-Extraction.cpp +++ b/misc/1-1-Extraction.cpp @@ -105,13 +105,12 @@ void constructCepts(vector < pair < set <int> , set <int> > > & ceptsInPhrase, s int tgt; ceptsInPhrase.clear(); int res; - - for (int j=0; j<alignment.size(); j+=1) - { - res = alignment[j].find("-"); - mAlign.push_back(alignment[j].substr(0,res)); - mAlign.push_back(alignment[j].substr(res+1)); - } + + for (int j=0; j<alignment.size(); j+=1) { + res = alignment[j].find("-"); + mAlign.push_back(alignment[j].substr(0,res)); + mAlign.push_back(alignment[j].substr(res+1)); + } for (int j=0; j<mAlign.size(); j+=2) { align.push_back(stringToInteger(mAlign[j+1])); @@ -166,29 +165,26 @@ void constructCepts(vector < pair < set <int> , set <int> > > & ceptsInPhrase, s void getOneToOne(vector < pair < set <int> , set <int> > > & ceptsInPhrase , vector <string> & currF , vector <string> & currE, set <string> & one) { - string temp; - - for (int i = 0; i< ceptsInPhrase.size(); i++) - { - if (ceptsInPhrase[i].first.size() == 1 && ceptsInPhrase[i].second.size() == 1) - { - temp = currF[(*ceptsInPhrase[i].second.begin())] + "\t" + currE[(*ceptsInPhrase[i].first.begin())]; - - if (one.find(temp) == one.end()) - one.insert(temp); - } - } - + string temp; + + for (int i = 0; i< ceptsInPhrase.size(); i++) { + if (ceptsInPhrase[i].first.size() == 1 && ceptsInPhrase[i].second.size() == 1) { + temp = currF[(*ceptsInPhrase[i].second.begin())] + "\t" + currE[(*ceptsInPhrase[i].first.begin())]; + + if (one.find(temp) == one.end()) + one.insert(temp); + } + } + } void printOneToOne ( set <string> & one) { - set <string> :: iterator iter; + set <string> :: iterator iter; - for (iter = one.begin(); iter != one.end(); iter++) - { - cout<<*iter<<endl; - } + for (iter = one.begin(); iter != one.end(); iter++) { + cout<<*iter<<endl; + } } int main(int argc, char * argv[]) @@ -219,11 +215,11 @@ int main(int argc, char * argv[]) getWords(e[i],currE); getWords(f[i],currF); getWords(a[i],currA); - + cerr<<"Processing "<<i<<endl; constructCepts(ceptsInPhrase, sourceNullWords , targetNullWords, currA , currE.size(), currF.size()); getOneToOne(ceptsInPhrase , currF , currE, one); - + /* cout<<"________________________________________"<<endl; @@ -234,7 +230,7 @@ int main(int argc, char * argv[]) } - printOneToOne(one); + printOneToOne(one); return 0; diff --git a/misc/TransliterationMining.cpp b/misc/TransliterationMining.cpp index ec272b93a..8c2291864 100644 --- a/misc/TransliterationMining.cpp +++ b/misc/TransliterationMining.cpp @@ -1,12 +1,12 @@ - /* +/* ######################################################################################## - Transliteration Mining - A Program to Extract Transliteration Pairs from - a bilingual word list - Source Contributor: Nadir Durrani +Transliteration Mining - A Program to Extract Transliteration Pairs from +a bilingual word list +Source Contributor: Nadir Durrani ######################################################################################## - + */ #include <cstdlib> @@ -25,419 +25,392 @@ using namespace std; double initTransitionProb; double LAMBDA; -double addLogProbs(double A , double B) // this function adds probabilities ... +double addLogProbs(double A , double B) // this function adds probabilities ... { - - if (A == B) - return (A + log10(2.0)); - - if (A > B) - { - if (A - B > 6) // A is a lot bigger ... - return A; - else - return (A + log10(1+pow(10,(B-A)))); - } - - else // B > A - { - if (B - A > 6) - return B; - else - return (B + log10(1+pow(10,(A-B)))); - } - + + if (A == B) + return (A + log10(2.0)); + + if (A > B) { + if (A - B > 6) // A is a lot bigger ... + return A; + else + return (A + log10(1+pow(10,(B-A)))); + } + + else { // B > A + if (B - A > 6) + return B; + else + return (B + log10(1+pow(10,(A-B)))); + } + } class NodeStructure { - public: - - NodeStructure(){}; - NodeStructure(vector <string> & s , vector <string> & t); - double getPosterior(){return PPR;} - void computeFwdBckProbs(map <string , double> & gammas, map <string, double> & alignmentCounts); - void computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams); - void print(); - - vector <string> source; - vector <string> target; - ~NodeStructure(){}; - - private: - - double NTR; // Non-transliteration probability of a sentence pair ... - double PPR; // Posterior Probability ... - double ALPHA; - double BETA; - - void computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts); - double computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents); - double FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents); - double BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & chidren); - double computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children); - void getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges); - void getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges); - double getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge); - void updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta); - void computePosteriorProb(); - double scaleGamma(double g); - void getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3); - +public: + + NodeStructure() {}; + NodeStructure(vector <string> & s , vector <string> & t); + double getPosterior() { + return PPR; + } + void computeFwdBckProbs(map <string , double> & gammas, map <string, double> & alignmentCounts); + void computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams); + void print(); + + vector <string> source; + vector <string> target; + ~NodeStructure() {}; + +private: + + double NTR; // Non-transliteration probability of a sentence pair ... + double PPR; // Posterior Probability ... + double ALPHA; + double BETA; + + void computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts); + double computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents); + double FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents); + double BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & chidren); + double computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children); + void getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges); + void getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges); + double getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge); + void updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta); + void computePosteriorProb(); + double scaleGamma(double g); + void getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3); + }; void NodeStructure :: print() { - - for (int i = 0; i < source.size(); i++) - cout<<source[i]; - - cout<<"\t"; - for (int i = 0; i < target.size(); i++) - cout<<target[i]; + for (int i = 0; i < source.size(); i++) + cout<<source[i]; + + cout<<"\t"; + + for (int i = 0; i < target.size(); i++) + cout<<target[i]; - cout<<"\t"<<pow(10,PPR)<<endl; + cout<<"\t"<<pow(10,PPR)<<endl; } NodeStructure :: NodeStructure(vector <string> & s , vector <string> & t) { - source = s; - target = t; + source = s; + target = t; } void NodeStructure :: getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3) { - if (v2.first - v1.first == 0) - v3.first = -1; - else - v3.first = v2.first; - - if (v2.second - v1.second == 0) - v3.second = -1; - else - v3.second = v2.second; + if (v2.first - v1.first == 0) + v3.first = -1; + else + v3.first = v2.first; + + if (v2.second - v1.second == 0) + v3.second = -1; + else + v3.second = v2.second; } void NodeStructure :: computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts) { - vector < pair < int , int> > incomingEdges; - map < pair <int , int> , double > :: iterator cIter; - map < pair <int , int> , double > :: iterator pIter; - pair <int , int> ST = make_pair (-1,-1); - pair <int , int> edge; - - children.erase(ST); - double tProb; - double alpha; - double beta; - - for (cIter = children.begin(); cIter != children.end(); cIter++) - { - ST = cIter->first; - - getIncomingEdges (ST , incomingEdges); - beta = cIter->second; - - for (int i = 0; i< incomingEdges.size(); i++) - { - pIter = parents.find(incomingEdges[i]); - - alpha = pIter->second; - getEdge (incomingEdges[i] , ST , edge); - - updateAlignmentCount(transitionProbs, alignmentCounts , edge , alpha , beta); - } - } + vector < pair < int , int> > incomingEdges; + map < pair <int , int> , double > :: iterator cIter; + map < pair <int , int> , double > :: iterator pIter; + pair <int , int> ST = make_pair (-1,-1); + pair <int , int> edge; + + children.erase(ST); + double tProb; + double alpha; + double beta; + + for (cIter = children.begin(); cIter != children.end(); cIter++) { + ST = cIter->first; + + getIncomingEdges (ST , incomingEdges); + beta = cIter->second; + + for (int i = 0; i< incomingEdges.size(); i++) { + pIter = parents.find(incomingEdges[i]); + + alpha = pIter->second; + getEdge (incomingEdges[i] , ST , edge); + + updateAlignmentCount(transitionProbs, alignmentCounts , edge , alpha , beta); + } + } } -void NodeStructure :: computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams) +void NodeStructure :: computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams) { - - NTR = 0.0; - - for (int i = 0; i < source.size(); i++) - { - NTR += sourceUnigrams[source[i]]; - } - - for (int i = 0; i < target.size(); i++) - { - - NTR += targetUnigrams[target[i]]; - } + + NTR = 0.0; + + for (int i = 0; i < source.size(); i++) { + NTR += sourceUnigrams[source[i]]; + } + + for (int i = 0; i < target.size(); i++) { + + NTR += targetUnigrams[target[i]]; + } } double NodeStructure :: scaleGamma(double g) { - double translit = log10 (1 - pow (10, PPR)); - return g + translit; + double translit = log10 (1 - pow (10, PPR)); + return g + translit; } void NodeStructure :: computePosteriorProb() { - double LAMBDA2 = log10(1 - pow(10, LAMBDA)); - double transliterate = LAMBDA2 + ALPHA; // Transliteration Prob ... - double translate = LAMBDA + NTR; // Translation Prob ... - double trans = transliterate - translate; - //cout<<LAMBDA<<" "<<LAMBDA2<<endl; - //cout<<transliterate<<" "<<translate<<" "<<trans<<endl; - //cout<<pow(10 , trans)<<endl; - double prob = 1/(1+ pow(10 , trans)); - PPR = log10(prob); - - //cout<<"Posterior Prob "<<PPR<<endl; + double LAMBDA2 = log10(1 - pow(10, LAMBDA)); + double transliterate = LAMBDA2 + ALPHA; // Transliteration Prob ... + double translate = LAMBDA + NTR; // Translation Prob ... + double trans = transliterate - translate; + //cout<<LAMBDA<<" "<<LAMBDA2<<endl; + //cout<<transliterate<<" "<<translate<<" "<<trans<<endl; + //cout<<pow(10 , trans)<<endl; + double prob = 1/(1+ pow(10 , trans)); + PPR = log10(prob); + + //cout<<"Posterior Prob "<<PPR<<endl; } - + void NodeStructure :: computeFwdBckProbs(map <string , double> & gammas , map <string, double> & alignmentCounts) { - pair <int , int> START = make_pair (source.size()-1 , target.size()-1); - pair <int , int> END = make_pair (-1 , -1); + pair <int , int> START = make_pair (source.size()-1 , target.size()-1); + pair <int , int> END = make_pair (-1 , -1); - map < pair <int , int> , double > parents; - parents[make_pair(-1,-1)] = 0.0; - map < pair <int , int> , double > children; - children[make_pair(source.size()-1,target.size()-1)] = 0.0; + map < pair <int , int> , double > parents; + parents[make_pair(-1,-1)] = 0.0; + map < pair <int , int> , double > children; + children[make_pair(source.size()-1,target.size()-1)] = 0.0; - ALPHA = computeFwdProbs(START , gammas, parents); - BETA = computeBckProbs(END , gammas, children); - - computePosteriorProb(); - //cout<<"Alpha "<<ALPHA<<" Beta "<<BETA<<endl; - computeGammaForEdges(parents , children , gammas , alignmentCounts); + ALPHA = computeFwdProbs(START , gammas, parents); + BETA = computeBckProbs(END , gammas, children); + + computePosteriorProb(); + //cout<<"Alpha "<<ALPHA<<" Beta "<<BETA<<endl; + computeGammaForEdges(parents , children , gammas , alignmentCounts); } void NodeStructure :: getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges) { - incomingEdges.clear(); - - if (ST.first == -1) // Source is NULL .. - { - incomingEdges.push_back(make_pair(ST.first , ST.second-1)); - } - else if (ST.second == -1) // Target is NULL ... - { - incomingEdges.push_back(make_pair(ST.first-1 , ST.second)); - } - else - { - incomingEdges.push_back(make_pair(ST.first , ST.second-1)); - incomingEdges.push_back(make_pair(ST.first-1 , ST.second)); - incomingEdges.push_back(make_pair(ST.first-1 , ST.second-1)); - } + incomingEdges.clear(); + + if (ST.first == -1) { // Source is NULL .. + incomingEdges.push_back(make_pair(ST.first , ST.second-1)); + } else if (ST.second == -1) { // Target is NULL ... + incomingEdges.push_back(make_pair(ST.first-1 , ST.second)); + } else { + incomingEdges.push_back(make_pair(ST.first , ST.second-1)); + incomingEdges.push_back(make_pair(ST.first-1 , ST.second)); + incomingEdges.push_back(make_pair(ST.first-1 , ST.second-1)); + } } void NodeStructure :: getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges) { - if (ST.first == source.size()-1) // Source is END .. - { - outgoingEdges.push_back(make_pair(ST.first , ST.second+1)); - } - else if (ST.second == target.size()-1) // Target is END ... - { - outgoingEdges.push_back(make_pair(ST.first+1 , ST.second)); - } - else - { - outgoingEdges.push_back(make_pair(ST.first , ST.second+1)); - outgoingEdges.push_back(make_pair(ST.first+1 , ST.second)); - outgoingEdges.push_back(make_pair(ST.first+1 , ST.second+1)); - } + if (ST.first == source.size()-1) { // Source is END .. + outgoingEdges.push_back(make_pair(ST.first , ST.second+1)); + } else if (ST.second == target.size()-1) { // Target is END ... + outgoingEdges.push_back(make_pair(ST.first+1 , ST.second)); + } else { + outgoingEdges.push_back(make_pair(ST.first , ST.second+1)); + outgoingEdges.push_back(make_pair(ST.first+1 , ST.second)); + outgoingEdges.push_back(make_pair(ST.first+1 , ST.second+1)); + } } void NodeStructure :: updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta) { - double tProb; - double tgamma; - double gamma; - map <string , double> :: iterator aCounts; - string query; - - if (edge.first == -1) - query = "NULL"; - else - query = source[edge.first]; - - query += "-"; - - if (edge.second == -1) - query += "NULL"; - else - query += target[edge.second]; - - //cout<<" Query "<<query<<endl; - if (transitionProbs.size() == 0) - tProb = initTransitionProb; - else - tProb = transitionProbs[query]; - - - tgamma = alpha + tProb + beta - ALPHA; - gamma = scaleGamma(tgamma); - //cout<<alpha<<" "<<beta<<" "<<gamma<<endl; - //cout<<tProb<<" "<<ALPHA<<endl; - - aCounts = alignmentCounts.find(query); - - if (aCounts == alignmentCounts.end()) - { - alignmentCounts[query] = gamma; - } - else - { - double temp = aCounts->second; - aCounts->second = addLogProbs(temp , gamma); - } - + double tProb; + double tgamma; + double gamma; + map <string , double> :: iterator aCounts; + string query; + + if (edge.first == -1) + query = "NULL"; + else + query = source[edge.first]; + + query += "-"; + + if (edge.second == -1) + query += "NULL"; + else + query += target[edge.second]; + + //cout<<" Query "<<query<<endl; + if (transitionProbs.size() == 0) + tProb = initTransitionProb; + else + tProb = transitionProbs[query]; + + + tgamma = alpha + tProb + beta - ALPHA; + gamma = scaleGamma(tgamma); + //cout<<alpha<<" "<<beta<<" "<<gamma<<endl; + //cout<<tProb<<" "<<ALPHA<<endl; + + aCounts = alignmentCounts.find(query); + + if (aCounts == alignmentCounts.end()) { + alignmentCounts[query] = gamma; + } else { + double temp = aCounts->second; + aCounts->second = addLogProbs(temp , gamma); + } + } double NodeStructure :: getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge) { - if (transitionProbs.size() == 0) - return initTransitionProb; - - string query; - - if (edge.first == -1) - query = "NULL"; - else - query = source[edge.first]; - - query += "-"; - - if (edge.second == -1) - query += "NULL"; - else - query += target[edge.second]; - - //cout<<" Query "<<query<<endl; - return transitionProbs[query]; + if (transitionProbs.size() == 0) + return initTransitionProb; + + string query; + + if (edge.first == -1) + query = "NULL"; + else + query = source[edge.first]; + + query += "-"; + + if (edge.second == -1) + query += "NULL"; + else + query += target[edge.second]; + + //cout<<" Query "<<query<<endl; + return transitionProbs[query]; } double NodeStructure :: FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents) { - double thisAlpha; - double alpha = -2000; - vector < pair < int , int> > incomingEdges; - pair <int , int> edge; - - - getIncomingEdges (TS , incomingEdges); - - for (int k = 0; k < incomingEdges.size(); k++) - { - thisAlpha = parents[incomingEdges[k]]; - getEdge (incomingEdges[k], TS , edge); - thisAlpha += getTransitionProb(gammas , edge); // Get Transition Prob ... - double temp = alpha; - alpha = addLogProbs(temp , thisAlpha); // Sum of all parents * transition prob .. - // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl; - } - - return alpha; + double thisAlpha; + double alpha = -2000; + vector < pair < int , int> > incomingEdges; + pair <int , int> edge; + + + getIncomingEdges (TS , incomingEdges); + + for (int k = 0; k < incomingEdges.size(); k++) { + thisAlpha = parents[incomingEdges[k]]; + getEdge (incomingEdges[k], TS , edge); + thisAlpha += getTransitionProb(gammas , edge); // Get Transition Prob ... + double temp = alpha; + alpha = addLogProbs(temp , thisAlpha); // Sum of all parents * transition prob .. + // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl; + } + + return alpha; } double NodeStructure :: computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents) { - - pair <int , int> TS; - double alpha; - - for (int i = 0; i < source.size(); i++) - { - TS = make_pair (i , -1); - alpha = FwdProb (TS, gammas, parents); - parents[TS] = alpha; - } - - for (int i = 0; i < target.size(); i++) - { - TS = make_pair (-1 , i); - alpha = FwdProb (TS, gammas, parents); - parents[TS] = alpha; - } - - for (int i = 0; i < source.size(); i++) - { - for (int j = 0; j < target.size(); j++) - { - TS = make_pair (i , j); - alpha = FwdProb (TS, gammas, parents); - parents[TS] = alpha; - } - } - - return parents[ST]; + + pair <int , int> TS; + double alpha; + + for (int i = 0; i < source.size(); i++) { + TS = make_pair (i , -1); + alpha = FwdProb (TS, gammas, parents); + parents[TS] = alpha; + } + + for (int i = 0; i < target.size(); i++) { + TS = make_pair (-1 , i); + alpha = FwdProb (TS, gammas, parents); + parents[TS] = alpha; + } + + for (int i = 0; i < source.size(); i++) { + for (int j = 0; j < target.size(); j++) { + TS = make_pair (i , j); + alpha = FwdProb (TS, gammas, parents); + parents[TS] = alpha; + } + } + + return parents[ST]; } double NodeStructure :: BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & children) { - double thisBeta; - double beta = -2000; - vector < pair < int , int> > outgoingEdges; - pair <int , int> edge; - - getOutgoingEdges (TS , outgoingEdges); - - for (int k = 0; k < outgoingEdges.size(); k++) - { - thisBeta = children[outgoingEdges[k]]; - getEdge (TS , outgoingEdges[k], edge); - thisBeta += getTransitionProb(gammas , edge); // Get Transition Prob ... - double temp = beta; - beta = addLogProbs(temp , thisBeta); // Sum of all parents * transition prob .. - // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl; - } - - return beta; + double thisBeta; + double beta = -2000; + vector < pair < int , int> > outgoingEdges; + pair <int , int> edge; + + getOutgoingEdges (TS , outgoingEdges); + + for (int k = 0; k < outgoingEdges.size(); k++) { + thisBeta = children[outgoingEdges[k]]; + getEdge (TS , outgoingEdges[k], edge); + thisBeta += getTransitionProb(gammas , edge); // Get Transition Prob ... + double temp = beta; + beta = addLogProbs(temp , thisBeta); // Sum of all parents * transition prob .. + // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl; + } + + return beta; } double NodeStructure :: computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children) { - - pair <int , int> TS; - double beta; - - for (int i = source.size()-2; i >= -1; i--) - { - TS = make_pair (i , target.size()-1); - beta = BckProb (TS, gammas, children); - children[TS] = beta; - } - - for (int i = target.size()-2; i >=-1; i--) - { - TS = make_pair (source.size()-1 , i); - beta = BckProb (TS, gammas, children); - children[TS] = beta; - } - - for (int i = source.size()-2 ; i >= -1 ; i--) - { - for (int j = target.size()-2 ; j >= -1; j--) - { - TS = make_pair (i , j); - beta = BckProb (TS, gammas, children); - children[TS] = beta; - } - } - - return children[ST]; + + pair <int , int> TS; + double beta; + + for (int i = source.size()-2; i >= -1; i--) { + TS = make_pair (i , target.size()-1); + beta = BckProb (TS, gammas, children); + children[TS] = beta; + } + + for (int i = target.size()-2; i >=-1; i--) { + TS = make_pair (source.size()-1 , i); + beta = BckProb (TS, gammas, children); + children[TS] = beta; + } + + for (int i = source.size()-2 ; i >= -1 ; i--) { + for (int j = target.size()-2 ; j >= -1; j--) { + TS = make_pair (i , j); + beta = BckProb (TS, gammas, children); + children[TS] = beta; + } + } + + return children[ST]; } @@ -445,204 +418,188 @@ double NodeStructure :: computeBckProbs(pair <int , int> & ST, map <string , dou void loadInput(const char * fileName, vector <string> & input) { - /* This function loads a file into a vector of strings */ - - ifstream sr (fileName); - string line; - - if(sr.is_open()) - { - while(getline(sr , line )) - { - input.push_back(line); - } - - sr.close(); - } - else - { - cout<<"Unable to read "<<fileName<<endl; - exit(1); - } + /* This function loads a file into a vector of strings */ + + ifstream sr (fileName); + string line; + + if(sr.is_open()) { + while(getline(sr , line )) { + input.push_back(line); + } + + sr.close(); + } else { + cout<<"Unable to read "<<fileName<<endl; + exit(1); + } } void printGammas(map <string, double> & alignmentCounts) { - map <string , double> :: iterator aCounts; + map <string , double> :: iterator aCounts; - for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) - { - cout<<aCounts->first<<" "<<aCounts->second<<endl; - } + for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) { + cout<<aCounts->first<<" "<<aCounts->second<<endl; + } } void getWords(string s, vector <string> & currInput) { - /* This function splits a string into vector of strings using space character as a delimiter */ + /* This function splits a string into vector of strings using space character as a delimiter */ - istringstream iss(s); - currInput.clear(); - do - { - string sub; - iss >> sub; - currInput.push_back(sub); + istringstream iss(s); + currInput.clear(); + do { + string sub; + iss >> sub; + currInput.push_back(sub); - } while (iss); + } while (iss); - currInput.pop_back(); + currInput.pop_back(); } double getInitTransitionProb(int sourceToken, int targetToken) { - double prod = sourceToken * targetToken; - return log10(1/prod); + double prod = sourceToken * targetToken; + return log10(1/prod); } void runIteration(map <int , NodeStructure> & graph , map <string , double> & gammas , int size) { - map <string, double> alignmentCounts; - map <int , NodeStructure> :: iterator i; - map <string , double> :: iterator aCounts; - double sum = -2000.0; - double tPPR = -2000.0; - - for (i = graph.begin(); i != graph.end(); i++) - { - - i->second.computeFwdBckProbs(gammas , alignmentCounts); - double temp = tPPR; - - tPPR = addLogProbs(graph[i->first].getPosterior() , temp); - - } - - for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) - { - double temp = sum; - sum = addLogProbs(aCounts->second, temp); - } - - - for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) // Normalizing ... - { - aCounts->second = aCounts->second - sum; - } - - gammas.clear(); - gammas = alignmentCounts; - - LAMBDA = tPPR - log10(size); + map <string, double> alignmentCounts; + map <int , NodeStructure> :: iterator i; + map <string , double> :: iterator aCounts; + double sum = -2000.0; + double tPPR = -2000.0; + + for (i = graph.begin(); i != graph.end(); i++) { + + i->second.computeFwdBckProbs(gammas , alignmentCounts); + double temp = tPPR; + + tPPR = addLogProbs(graph[i->first].getPosterior() , temp); + + } + + for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) { + double temp = sum; + sum = addLogProbs(aCounts->second, temp); + } + + + for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) { // Normalizing ... + aCounts->second = aCounts->second - sum; + } + + gammas.clear(); + gammas = alignmentCounts; + + LAMBDA = tPPR - log10(size); } void setNTRProbabilities(map <int , NodeStructure> & graph , map <string , double> & sourceTypes , map <string , double > & targetTypes, double sourceTokens, double targetTokens) { - - map <string , double> :: iterator i; - map <int , NodeStructure> :: iterator j; - - for (i = sourceTypes.begin(); i!= sourceTypes.end(); i++) - { - i->second = log10(i->second/sourceTokens); - } + map <string , double> :: iterator i; + map <int , NodeStructure> :: iterator j; + + + for (i = sourceTypes.begin(); i!= sourceTypes.end(); i++) { + i->second = log10(i->second/sourceTokens); + } - for (i = targetTypes.begin(); i!= targetTypes.end(); i++) - { - i->second = log10(i->second/targetTokens); - } + for (i = targetTypes.begin(); i!= targetTypes.end(); i++) { + i->second = log10(i->second/targetTokens); + } - for (j = graph.begin(); j != graph.end(); j++) - { - j->second.computeNonTransliterationProb(sourceTypes , targetTypes); - } + for (j = graph.begin(); j != graph.end(); j++) { + j->second.computeNonTransliterationProb(sourceTypes , targetTypes); + } } void printPosterior(map <int , NodeStructure> & graph) { - map <int , NodeStructure> :: iterator i; + map <int , NodeStructure> :: iterator i; - for (i = graph.begin(); i != graph.end(); i++) - graph[i->first].print(); + for (i = graph.begin(); i != graph.end(); i++) + graph[i->first].print(); } int main(int argc, char * argv[]) { - vector <string> input; - vector <string> source; - vector <string> target; - map <string , double> sourceTypes; - map <string , double> targetTypes; - set < vector <string> > tgt; - set < vector <string> > src; - double sourceTokens = 0; - double targetTokens = 0; - map <int , NodeStructure> graph; - map <string , double> gammas; - - loadInput(argv[1],input); - - cerr<<"Constructing Graph "<<endl; - - for(int i=0; i<input.size(); i+=2) - { - - //cerr<<input[i]<<endl; - //cerr<<input[i+1]<<endl; - - - getWords(input[i],source); - getWords(input[i+1],target); - - if (src.find(source) == src.end()) - { - for (int j = 0; j< source.size(); j++) - sourceTypes[source[j]]++; - src.insert(source); - sourceTokens += source.size(); - } - - if (tgt.find(target) == tgt.end()) - { - for (int j = 0; j< target.size(); j++) - targetTypes[target[j]]++; - - tgt.insert(target); - targetTokens += target.size(); - } - - NodeStructure obj (source,target); - graph[i] = obj; - - } - - setNTRProbabilities(graph, sourceTypes, targetTypes, sourceTokens, targetTokens); - initTransitionProb = getInitTransitionProb(sourceTypes.size()+1, targetTypes.size()+1); - - LAMBDA = log10(0.5); - - - for (int i = 0; i< 10; i++) - { - - cerr<<"Computing Probs : iteration "<<i+1<<endl; - runIteration(graph , gammas , input.size()/2); - - } - - printPosterior(graph); - cerr<<"Finished..."<<endl; - - return 0; + vector <string> input; + vector <string> source; + vector <string> target; + map <string , double> sourceTypes; + map <string , double> targetTypes; + set < vector <string> > tgt; + set < vector <string> > src; + double sourceTokens = 0; + double targetTokens = 0; + map <int , NodeStructure> graph; + map <string , double> gammas; + + loadInput(argv[1],input); + + cerr<<"Constructing Graph "<<endl; + + for(int i=0; i<input.size(); i+=2) { + + //cerr<<input[i]<<endl; + //cerr<<input[i+1]<<endl; + + + getWords(input[i],source); + getWords(input[i+1],target); + + if (src.find(source) == src.end()) { + for (int j = 0; j< source.size(); j++) + sourceTypes[source[j]]++; + src.insert(source); + sourceTokens += source.size(); + } + + if (tgt.find(target) == tgt.end()) { + for (int j = 0; j< target.size(); j++) + targetTypes[target[j]]++; + + tgt.insert(target); + targetTokens += target.size(); + } + + NodeStructure obj (source,target); + graph[i] = obj; + + } + + setNTRProbabilities(graph, sourceTypes, targetTypes, sourceTokens, targetTokens); + initTransitionProb = getInitTransitionProb(sourceTypes.size()+1, targetTypes.size()+1); + + LAMBDA = log10(0.5); + + + for (int i = 0; i< 10; i++) { + + cerr<<"Computing Probs : iteration "<<i+1<<endl; + runIteration(graph , gammas , input.size()/2); + + } + + printPosterior(graph); + cerr<<"Finished..."<<endl; + + return 0; } diff --git a/moses-chart-cmd/IOWrapper.cpp b/moses-chart-cmd/IOWrapper.cpp index 2716e1d53..c19363442 100644 --- a/moses-chart-cmd/IOWrapper.cpp +++ b/moses-chart-cmd/IOWrapper.cpp @@ -129,7 +129,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str()); m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream); UTIL_THROW_IF2(!m_alignmentInfoStream->good(), - "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile()); + "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile()); } } @@ -175,7 +175,7 @@ InputType*IOWrapper::GetInput(InputType* inputType) void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors) { UTIL_THROW_IF2(outputFactorOrder.size() == 0, - "Cannot be empty phrase"); + "Cannot be empty phrase"); if (reportAllFactors == true) { out << phrase; } else { @@ -184,12 +184,12 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<Fa const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); out << *factor; UTIL_THROW_IF2(factor == NULL, - "Empty factor 0 at position " << pos); + "Empty factor 0 at position " << pos); for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); UTIL_THROW_IF2(factor == NULL, - "Empty factor " << i << " at position " << pos); + "Empty factor " << i << " at position " << pos); out << "|" << *factor; } @@ -233,7 +233,7 @@ void IOWrapper::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, l for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) { const Factor *factor = mbrBestHypo[i]; UTIL_THROW_IF(factor == NULL, util::Exception, - "No factor at position " << i ); + "No factor at position " << i ); cout << *factor << " "; } @@ -375,7 +375,7 @@ void IOWrapper::OutputDetailedTranslationReport( OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId); UTIL_THROW_IF2(m_detailOutputCollector == NULL, - "No ouput file for detailed reports specified"); + "No ouput file for detailed reports specified"); m_detailOutputCollector->Write(translationId, out.str()); } @@ -392,7 +392,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport( OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId); UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL, - "No output file for tree fragments specified"); + "No output file for tree fragments specified"); m_detailTreeFragmentsOutputCollector->Write(translationId, out.str()); } @@ -426,7 +426,7 @@ void IOWrapper::OutputDetailedAllTranslationReport( } } UTIL_THROW_IF2(m_detailAllOutputCollector == NULL, - "No output file for details specified"); + "No output file for details specified"); m_detailAllOutputCollector->Write(translationId, out.str()); } @@ -454,7 +454,7 @@ void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId) // delete 1st & last UTIL_THROW_IF2(outPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outPhrase.RemoveWord(0); outPhrase.RemoveWord(outPhrase.GetSize() - 1); @@ -486,7 +486,7 @@ void IOWrapper::OutputBestHypo(search::Applied applied, long translationId) Incremental::ToPhrase(applied, outPhrase); // delete 1st & last UTIL_THROW_IF2(outPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outPhrase.RemoveWord(0); outPhrase.RemoveWord(outPhrase.GetSize() - 1); out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); @@ -575,7 +575,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran // delete 1st & last UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); @@ -651,7 +651,7 @@ void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long Incremental::PhraseAndFeatures(*i, outputPhrase, features); // <s> and </s> UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); @@ -817,7 +817,7 @@ size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypoth size_t targetInd = 0; for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) { if (tp.GetWord(targetPos).IsNonTerminal()) { - UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); + UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); size_t sourceInd = targetPos2SourceInd[targetPos]; size_t sourcePos = sourceInd2pos[sourceInd]; diff --git a/moses-cmd/IOWrapper.cpp b/moses-cmd/IOWrapper.cpp index da2589acd..3e55f96e9 100644 --- a/moses-cmd/IOWrapper.cpp +++ b/moses-cmd/IOWrapper.cpp @@ -168,16 +168,16 @@ void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder const std::string &path = staticData.GetDetailedTranslationReportingFilePath(); m_detailedTranslationReportingStream = new std::ofstream(path.c_str()); UTIL_THROW_IF(!m_detailedTranslationReportingStream->good(), - util::FileOpenException, - "File for output of detailed translation report could not be open"); + util::FileOpenException, + "File for output of detailed translation report could not be open"); } // sentence alignment output if (! staticData.GetAlignmentOutputFile().empty()) { m_alignmentOutputStream = new ofstream(staticData.GetAlignmentOutputFile().c_str()); UTIL_THROW_IF(!m_alignmentOutputStream->good(), - util::FileOpenException, - "File for output of word alignment could not be open"); + util::FileOpenException, + "File for output of word alignment could not be open"); } } @@ -208,7 +208,7 @@ std::map<size_t, const Factor*> GetPlaceholders(const Hypothesis &hypo, FactorTy if (factor) { std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos); UTIL_THROW_IF2(targetPos.size() != 1, - "Placeholder should be aligned to 1, and only 1, word"); + "Placeholder should be aligned to 1, and only 1, word"); ret[*targetPos.begin()] = factor; } } @@ -223,7 +223,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector< char reportSegmentation, bool reportAllFactors) { UTIL_THROW_IF2(outputFactorOrder.size() == 0, - "Must specific at least 1 output factor"); + "Must specific at least 1 output factor"); const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { @@ -250,7 +250,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector< } UTIL_THROW_IF2(factor == NULL, - "No factor 0 at position " << pos); + "No factor 0 at position " << pos); //preface surface form with UNK if marking unknowns const Word &word = phrase.GetWord(pos); @@ -263,7 +263,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector< for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); UTIL_THROW_IF2(factor == NULL, - "No factor " << i << " at position " << pos); + "No factor " << i << " at position " << pos); out << "|" << *factor; } @@ -399,7 +399,7 @@ void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId* for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) { const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]); UTIL_THROW_IF2(factor == NULL, - "No factor 0 at position " << i); + "No factor 0 at position " << i); if (i>0) out << " " << *factor; else out << *factor; } diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp index 4c6b421fc..39d88f34d 100644 --- a/moses-cmd/LatticeMBRGrid.cpp +++ b/moses-cmd/LatticeMBRGrid.cpp @@ -70,7 +70,7 @@ public: void addParam(gridkey key, const string& arg, float defaultValue) { m_args[arg] = key; UTIL_THROW_IF2(m_grid.find(key) != m_grid.end(), - "Couldn't find value for key " << (int) key); + "Couldn't find value for key " << (int) key); m_grid[key].push_back(defaultValue); } diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index eb4f20e0d..220b72a94 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -307,9 +307,9 @@ public: out << m_source->GetTranslationId() << " "; } - if (staticData.GetReportSegmentation() == 2) { - manager.GetOutputLanguageModelOrder(out, bestHypo); - } + if (staticData.GetReportSegmentation() == 2) { + manager.GetOutputLanguageModelOrder(out, bestHypo); + } OutputBestSurface( out, bestHypo, diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index aa29a4a12..e3d3da453 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -126,7 +126,7 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption { // don't call this on a bidirectional object UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward, - "Unknown direction: " << m_direction); + "Unknown direction: " << m_direction); const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ? topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) : m_prevScore; diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp index d934e67d7..c1a295b94 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp @@ -268,7 +268,7 @@ Scores LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, co } if(m_FactorsC.empty()) { - UTIL_THROW_IF2(1 != cands.size(), "Error"); + UTIL_THROW_IF2(1 != cands.size(), "Error"); return cands[0].GetScore(0); } else { score = auxFindScoreForContext(cands, c); @@ -283,7 +283,7 @@ Scores LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, co Scores LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context) { if(m_FactorsC.empty()) { - UTIL_THROW_IF2(cands.size() > 1, "Error"); + UTIL_THROW_IF2(cands.size() > 1, "Error"); return (1 == cands.size())?(cands[0].GetScore(0)):(Scores()); } else { @@ -384,7 +384,7 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile, } else { //sanity check ALL lines must have same number of tokens UTIL_THROW_IF2(numTokens != tokens.size(), - "Lines do not have the same number of tokens"); + "Lines do not have the same number of tokens"); } size_t phrase = 0; for(; phrase < numKeyTokens; ++phrase) { diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp index dfa380a77..0bb7aed95 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.cpp +++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp @@ -21,7 +21,7 @@ OpSequenceModel::OpSequenceModel(const std::string &line) OpSequenceModel::~OpSequenceModel() { - delete OSM; + delete OSM; } void OpSequenceModel :: readLanguageModel(const char *lmFile) @@ -199,7 +199,7 @@ FFState* OpSequenceModel::EvaluateChart( int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const { - UTIL_THROW2("Chart decoding not support by UTIL_THROW2"); + UTIL_THROW2("Chart decoding not support by UTIL_THROW2"); } diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp index 64d3d87b4..ad6773fc8 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp @@ -42,7 +42,7 @@ ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory( , m_ruleTable(ruleTable) { UTIL_THROW_IF2(m_dottedRuleColls.size() != 0, - "Dotted rule collection not correctly initialized"); + "Dotted rule collection not correctly initialized"); size_t sourceSize = parser.GetSize(); m_dottedRuleColls.resize(sourceSize); @@ -179,8 +179,8 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection( dottedRuleCol.Add(relEndPos+1, dottedRule); } } - // we only need to check once if a terminal matches the input at a given position. - expandableDottedRuleListTerminalsOnly.erase(it); + // we only need to check once if a terminal matches the input at a given position. + expandableDottedRuleListTerminalsOnly.erase(it); } // list of rules that that cover the entire span diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp index ced36c186..8820890be 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp @@ -49,7 +49,7 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk( , m_filePath(filePath) { UTIL_THROW_IF2(m_expandableDottedRuleListVec.size() != 0, - "Dotted rule collection not correctly initialized"); + "Dotted rule collection not correctly initialized"); size_t sourceSize = parser.GetSize(); m_expandableDottedRuleListVec.resize(sourceSize); @@ -237,12 +237,12 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( std::vector<float> weightT = staticData.GetWeights(&m_dictionary); targetPhraseCollection - = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec - ,m_outputFactorsVec - ,m_dictionary - ,weightT - ,m_dbWrapper.GetVocab() - ,true); + = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec + ,m_outputFactorsVec + ,m_dictionary + ,weightT + ,m_dbWrapper.GetVocab() + ,true); delete tpcollBerkeleyDb; m_cache[tpCollFilePos] = targetPhraseCollection; diff --git a/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h b/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h index 390d19b5c..a95b5599e 100644 --- a/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h +++ b/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h @@ -103,8 +103,7 @@ public: if (dottedRule->GetLastNode().GetNonTerminalMap().empty() && !dottedRule->IsRoot()) { size_t startPos = dottedRule->GetWordsRange().GetEndPos() + 1; m_expandableDottedRuleListTerminalsOnly[startPos].push_back(dottedRule); - } - else { + } else { m_expandableDottedRuleList.push_back(dottedRule); } } diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp index cd277ad0c..c90dcd6d9 100644 --- a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp +++ b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp @@ -366,10 +366,10 @@ void BlockHashIndex::CalcHash(size_t current, void* source_void) if(lastKey > temp) { if(source->nkeys != 2 || temp != "###DUMMY_KEY###") { - std::stringstream strme; - strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl; - strme << "1: " << lastKey << std::endl; - strme << "2: " << temp << std::endl; + std::stringstream strme; + strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl; + strme << "1: " << lastKey << std::endl; + strme << "2: " << temp << std::endl; UTIL_THROW2(strme.str()); } } diff --git a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp index d771d3212..6a585d2a8 100644 --- a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp +++ b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp @@ -206,7 +206,7 @@ std::string LexicalReorderingTableCreator::EncodeLine(std::vector<std::string>& if(m_numScoreComponent != scores.size()) { std::stringstream strme; strme << "Error: Wrong number of scores detected (" - << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl; + << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl; strme << "Line: " << tokens[0] << " ||| ... ||| " << scoresString << std::endl; UTIL_THROW2(strme.str()); } diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index 360b7a9fd..d6860a43b 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -99,7 +99,7 @@ void PhraseDictionaryCompact::Load() phraseSize = m_targetPhrasesMapped.load(pFile, true); UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0, - "Not successfully loaded"); + "Not successfully loaded"); } // now properly declared in TargetPhraseCollection.h diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp index 00983fd53..a2bd879b4 100644 --- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp +++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp @@ -714,10 +714,10 @@ std::string PhraseTableCreator::EncodeLine(std::vector<std::string>& tokens, siz std::vector<float> scores = Tokenize<float>(scoresStr); if(scores.size() != m_numScoreComponent) { - std::stringstream strme; - strme << "Error: Wrong number of scores detected (" - << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl; - strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[3] << " ..." << std::endl; + std::stringstream strme; + strme << "Error: Wrong number of scores detected (" + << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl; + strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[3] << " ..." << std::endl; UTIL_THROW2(strme.str()); } @@ -1040,30 +1040,30 @@ void RankingTask::operator()() *it = Moses::Trim(*it); if(tokens.size() < 4) { - std::stringstream strme; - strme << "Error: It seems the following line has a wrong format:" << std::endl; - strme << "Line " << i << ": " << lines[i] << std::endl; + std::stringstream strme; + strme << "Error: It seems the following line has a wrong format:" << std::endl; + strme << "Line " << i << ": " << lines[i] << std::endl; UTIL_THROW2(strme.str()); } if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) { - std::stringstream strme; - strme << "Error: It seems the following line contains no alignment information, " << std::endl; - strme << "but you are using "; - strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc"); - strme << " encoding which makes use of alignment data. " << std::endl; - strme << "Use -encoding None" << std::endl; - strme << "Line " << i << ": " << lines[i] << std::endl; + std::stringstream strme; + strme << "Error: It seems the following line contains no alignment information, " << std::endl; + strme << "but you are using "; + strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc"); + strme << " encoding which makes use of alignment data. " << std::endl; + strme << "Use -encoding None" << std::endl; + strme << "Line " << i << ": " << lines[i] << std::endl; UTIL_THROW2(strme.str()); } std::vector<float> scores = Tokenize<float>(tokens[2]); if(scores.size() != m_creator.m_numScoreComponent) { - std::stringstream strme; - strme << "Error: It seems the following line has a wrong number of scores (" - << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl; - strme << "Line " << i << ": " << lines[i] << std::endl; - UTIL_THROW2(strme.str()); + std::stringstream strme; + strme << "Error: It seems the following line has a wrong number of scores (" + << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl; + strme << "Line " << i << ": " << lines[i] << std::endl; + UTIL_THROW2(strme.str()); } float sortScore = scores[m_creator.m_sortScoreIndex]; @@ -1140,20 +1140,20 @@ void EncodingTask::operator()() *it = Moses::Trim(*it); if(tokens.size() < 3) { - std::stringstream strme; - strme << "Error: It seems the following line has a wrong format:" << std::endl; - strme << "Line " << i << ": " << lines[i] << std::endl; + std::stringstream strme; + strme << "Error: It seems the following line has a wrong format:" << std::endl; + strme << "Line " << i << ": " << lines[i] << std::endl; UTIL_THROW2(strme.str()); } if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) { - std::stringstream strme; - strme << "Error: It seems the following line contains no alignment information, " << std::endl; - strme << "but you are using "; - strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc"); - strme << " encoding which makes use of alignment data. " << std::endl; - strme << "Use -encoding None" << std::endl; - strme << "Line " << i << ": " << lines[i] << std::endl; + std::stringstream strme; + strme << "Error: It seems the following line contains no alignment information, " << std::endl; + strme << "but you are using "; + strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc"); + strme << " encoding which makes use of alignment data. " << std::endl; + strme << "Use -encoding None" << std::endl; + strme << "Line " << i << ": " << lines[i] << std::endl; UTIL_THROW2(strme.str()); } diff --git a/moses/TranslationModel/DynSAInclude/FileHandler.cpp b/moses/TranslationModel/DynSAInclude/FileHandler.cpp index 4e92ad907..5f9cd7c45 100644 --- a/moses/TranslationModel/DynSAInclude/FileHandler.cpp +++ b/moses/TranslationModel/DynSAInclude/FileHandler.cpp @@ -71,13 +71,13 @@ bool FileHandler::setStreamBuffer(bool checkExists) { // redirect stdin or stdout if necesary if (path_ == FileHandler::kStdInDescriptor) { - UTIL_THROW_IF2(flags_ & std::ios::in == 0, - "Incorrect flags: " << flags_); + UTIL_THROW_IF2(flags_ & std::ios::in == 0, + "Incorrect flags: " << flags_); std::streambuf* sb = std::cin.rdbuf(); buffer_ = sb; } else if (path_ == FileHandler::kStdOutDescriptor) { - UTIL_THROW_IF2(flags_ & std::ios::out == 0, - "Incorrect flags: " << flags_); + UTIL_THROW_IF2(flags_ & std::ios::out == 0, + "Incorrect flags: " << flags_); std::streambuf* sb = std::cout.rdbuf(); buffer_ = sb; } else { diff --git a/moses/TranslationModel/DynSAInclude/RandLMFilter.h b/moses/TranslationModel/DynSAInclude/RandLMFilter.h index e8defb110..19566ff40 100644 --- a/moses/TranslationModel/DynSAInclude/RandLMFilter.h +++ b/moses/TranslationModel/DynSAInclude/RandLMFilter.h @@ -62,9 +62,9 @@ public: address_mask_ = full_mask_ >> first_bit_; } Filter(FileHandler* fin, bool loaddata = true) : data_(NULL) { - assert(loadHeader(fin)); + assert(loadHeader(fin)); if (loaddata) - assert(loadData(fin)); + assert(loadData(fin)); } virtual ~Filter() { delete[] data_; @@ -80,7 +80,7 @@ public: } // read / write functions inline bool read(uint64_t address, T* value) { - assert(address <= addresses_); + assert(address <= addresses_); // copy address to 'value' uint64_t data_bit = address * width_; uint32_t data_cell = (data_bit >> log_cell_width_); // % cells_; @@ -102,7 +102,7 @@ public: return true; } inline T read(uint64_t address) { - assert(address <= addresses_); + assert(address <= addresses_); // return value at address T value = 0; uint64_t data_bit = address * width_; @@ -124,7 +124,7 @@ public: return value; } inline bool write(uint64_t address, T value) { - assert(address <= addresses_); + assert(address <= addresses_); assert(log2(value) <= width_); // write 'value' to address uint64_t data_bit = address * width_; diff --git a/moses/TranslationModel/DynSAInclude/onlineRLM.h b/moses/TranslationModel/DynSAInclude/onlineRLM.h index 929602399..cd9ed8a87 100644 --- a/moses/TranslationModel/DynSAInclude/onlineRLM.h +++ b/moses/TranslationModel/DynSAInclude/onlineRLM.h @@ -148,8 +148,8 @@ int OnlineRLM<T>::query(const wordID_t* IDs, int len) //markQueried(hpdItr); // mark this event as "hit" value -= ((value & this->hitMask_) != 0) ? this->hitMask_ : 0; // check for previous hit marks } else { - UTIL_THROW_IF2(filterIdx >= this->cells_, - "Out of bound: " << filterIdx); + UTIL_THROW_IF2(filterIdx >= this->cells_, + "Out of bound: " << filterIdx); //markQueried(filterIdx); } } @@ -341,7 +341,7 @@ const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len) int dummy(0); float**addresses = new float*[len]; // only interested in addresses of cache UTIL_THROW_IF2(cache_->getCache2(ngram, len, &addresses[0], &dummy) != len, - "Error"); + "Error"); // return address of cache node float *addr0 = addresses[0]; diff --git a/moses/TranslationModel/DynSAInclude/params.cpp b/moses/TranslationModel/DynSAInclude/params.cpp index 27cf06a51..4696258f8 100644 --- a/moses/TranslationModel/DynSAInclude/params.cpp +++ b/moses/TranslationModel/DynSAInclude/params.cpp @@ -65,10 +65,10 @@ bool Parameters::loadParams(int argc, char ** argv) if( getValueType(param) == kBoolValue ) { jumpBy = 1; UTIL_THROW_IF2(!setParamValue(param, kTrueValue), - "Couldn't set parameter " << param); + "Couldn't set parameter " << param); } else { //not of type bool so must have corresponding value UTIL_THROW_IF2(i+1 >= argc, - "Out of bound error: " << i+1); + "Out of bound error: " << i+1); jumpBy = 2; std::string val = argv[i+1]; diff --git a/moses/TranslationModel/DynSAInclude/quantizer.h b/moses/TranslationModel/DynSAInclude/quantizer.h index 002535dd0..6dbcc3cc4 100644 --- a/moses/TranslationModel/DynSAInclude/quantizer.h +++ b/moses/TranslationModel/DynSAInclude/quantizer.h @@ -17,7 +17,7 @@ class LogQtizer { public: LogQtizer(float i): base_(pow(2, 1 / i)) { - UTIL_THROW_IF2(base_ <= 1, "Can't calculate log base less than 1"); + UTIL_THROW_IF2(base_ <= 1, "Can't calculate log base less than 1"); max_code_ = 0; float value = 1; // code = 1 -> value = 1 for any base std::vector<float> code_to_value_vec; @@ -40,13 +40,13 @@ public: std::cerr << "Initialized quantization (size = " << max_code_ + 1 << ")" << std::endl; } LogQtizer(FileHandler* fin) { - UTIL_THROW_IF2(fin == NULL, "Null file handle"); + UTIL_THROW_IF2(fin == NULL, "Null file handle"); load(fin); } int code(float value) { // should just be: return log_b(value) UTIL_THROW_IF2(value < min_value_ || value > max_value_, - "Value " << value << " out of bound"); + "Value " << value << " out of bound"); // but binary search removes errors due to floor operator above int code = static_cast<int>(std::lower_bound(code_to_value_, code_to_value_+ max_code_, diff --git a/moses/TranslationModel/DynSAInclude/vocab.cpp b/moses/TranslationModel/DynSAInclude/vocab.cpp index 1e6c92203..b717f533c 100644 --- a/moses/TranslationModel/DynSAInclude/vocab.cpp +++ b/moses/TranslationModel/DynSAInclude/vocab.cpp @@ -134,7 +134,7 @@ bool Vocab::Load(FileHandler* vcbin, const FactorDirection& direction, if (id == 0 && word != GetkOOVWord()) id = m_ids2words.size() + 1; // assign ids sequentially starting from 1 UTIL_THROW_IF2(m_ids2words.count(id) != 0 || m_words2ids.count(word) != 0, - "Error"); + "Error"); m_ids2words[id] = word; m_words2ids[word] = id; diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp index 9d2e3fa20..b6e779078 100644 --- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp +++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp @@ -73,7 +73,7 @@ void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t // no-term vector<string> split = Tokenize(tok, ","); UTIL_THROW_IF2(split.size() != 2, - "Incorrectly formmatted non-terminal: " << tok); + "Incorrectly formmatted non-terminal: " << tok); tok = "[X]" + split[0] + "]"; size_t coIndex = Scan<size_t>(split[1]); @@ -100,7 +100,7 @@ void ReformateHieroScore(string &scoreString) string &tok = toks[i]; vector<string> nameValue = Tokenize(tok, "="); UTIL_THROW_IF2(nameValue.size() != 2, - "Incorrectly formatted score: " << tok); + "Incorrectly formatted score: " << tok); float score = Scan<float>(nameValue[1]); score = exp(-score); @@ -212,7 +212,7 @@ bool RuleTableLoaderStandard::Load(FormatType format const size_t numScoreComponents = ruleTable.GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!=" - << numScoreComponents << ") of score components on line " << count); + << numScoreComponents << ") of score components on line " << count); } // parse source & find pt node diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp index 81fd43fcb..4c2f4d186 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp @@ -50,7 +50,7 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source) *this); UTIL_THROW_IF2(ret == NULL, - "Rules not successfully loaded for sentence id " << translationId); + "Rules not successfully loaded for sentence id " << translationId); } void PhraseDictionaryALSuffixArray::CleanUpAfterSentenceProcessing(const InputType &source) diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp index 7cb26ad88..5d8c95428 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp @@ -84,13 +84,13 @@ PhraseDictionaryFuzzyMatch:: SetParameter(const std::string& key, const std::string& value) { if (key == "source") { - m_config[0] = value; + m_config[0] = value; } else if (key == "target") { - m_config[1] = value; + m_config[1] = value; } else if (key == "alignment") { - m_config[2] = value; + m_config[2] = value; } else { - PhraseDictionary::SetParameter(key, value); + PhraseDictionary::SetParameter(key, value); } } @@ -150,7 +150,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten char dirName[] = "/tmp/moses.XXXXXX"; char *temp = mkdtemp(dirName); UTIL_THROW_IF2(temp == NULL, - "Couldn't create temporary directory " << dirName); + "Couldn't create temporary directory " << dirName); string dirNameStr(dirName); @@ -218,11 +218,11 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten const size_t numScoreComponents = GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!=" - << numScoreComponents << ") of score components on line " << count); + << numScoreComponents << ") of score components on line " << count); } UTIL_THROW_IF2(scoreVector.size() != numScoreComponents, - "Number of scores incorrectly specified"); + "Number of scores incorrectly specified"); // parse source & find pt node @@ -298,9 +298,9 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDi const Word &sourceNonTerm = word; UTIL_THROW_IF2(iterAlign == alignmentInfo.end(), - "No alignment for non-term at position " << pos); + "No alignment for non-term at position " << pos); UTIL_THROW_IF2(iterAlign->first != pos, - "Alignment info incorrect at position " << pos); + "Alignment info incorrect at position " << pos); size_t targetNonTermInd = iterAlign->second; ++iterAlign; @@ -312,7 +312,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDi } UTIL_THROW_IF2(currNode == NULL, - "Node not found at position " << pos); + "Node not found at position " << pos); } @@ -338,7 +338,7 @@ const PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(long t { std::map<long, PhraseDictionaryNodeMemory>::const_iterator iter = m_collection.find(translationId); UTIL_THROW_IF2(iter == m_collection.end(), - "Couldn't find root node for input: " << translationId); + "Couldn't find root node for input: " << translationId); return iter->second; } PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(const InputType &source) @@ -346,7 +346,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(const InputT long transId = source.GetTranslationId(); std::map<long, PhraseDictionaryNodeMemory>::iterator iter = m_collection.find(transId); UTIL_THROW_IF2(iter == m_collection.end(), - "Couldn't find root node for input: " << transId); + "Couldn't find root node for input: " << transId); return iter->second; } diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp index ea60d4d23..778e349d2 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp @@ -85,20 +85,20 @@ void PhraseDictionaryOnDisk::InitializeForInput(InputType const& source) obj->BeginLoad(m_filePath); UTIL_THROW_IF2(obj->GetMisc("Version") != OnDiskPt::OnDiskWrapper::VERSION_NUM, - "On-disk phrase table is version " << obj->GetMisc("Version") - << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM); + "On-disk phrase table is version " << obj->GetMisc("Version") + << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM); UTIL_THROW_IF2(obj->GetMisc("NumSourceFactors") != m_input.size(), - "On-disk phrase table has " << obj->GetMisc("NumSourceFactors") << " source factors." - << ". The ini file specified " << m_input.size() << " source factors"); + "On-disk phrase table has " << obj->GetMisc("NumSourceFactors") << " source factors." + << ". The ini file specified " << m_input.size() << " source factors"); UTIL_THROW_IF2(obj->GetMisc("NumTargetFactors") != m_output.size(), - "On-disk phrase table has " << obj->GetMisc("NumTargetFactors") << " target factors." - << ". The ini file specified " << m_output.size() << " target factors"); + "On-disk phrase table has " << obj->GetMisc("NumTargetFactors") << " target factors." + << ". The ini file specified " << m_output.size() << " target factors"); UTIL_THROW_IF2(obj->GetMisc("NumScores") != m_numScoreComponents, - "On-disk phrase table has " << obj->GetMisc("NumScores") << " scores." - << ". The ini file specified " << m_numScoreComponents << " scores"); + "On-disk phrase table has " << obj->GetMisc("NumScores") << " scores." + << ". The ini file specified " << m_numScoreComponents << " scores"); m_implementation.reset(obj); } @@ -194,7 +194,7 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionN const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper); TargetPhraseCollection *targetPhrases - = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false); + = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false); delete targetPhrasesOnDisk; diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp index e0e9fd3e2..e5071cbba 100644 --- a/phrase-extract/InternalStructFeature.cpp +++ b/phrase-extract/InternalStructFeature.cpp @@ -6,67 +6,72 @@ namespace MosesTraining { InternalStructFeature::InternalStructFeature() - :m_type(0){ - //cout<<"InternalStructFeature: Construct "<<m_type<<"\n"; + :m_type(0) +{ + //cout<<"InternalStructFeature: Construct "<<m_type<<"\n"; } -bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const{ - //cout<<"InternalStructFeature: Equals\n"; - //don't know what it's used for and what we should compare - //-> if the dense score is the same - //-> if the sparse feature is set - // compare phrases? with the internalStrucutre string? - /** Return true if the two phrase pairs are equal from the point of this feature. Assume - that they already compare true according to PhraseAlignment.equals() - **/ +bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const +{ + //cout<<"InternalStructFeature: Equals\n"; + //don't know what it's used for and what we should compare + //-> if the dense score is the same + //-> if the sparse feature is set + // compare phrases? with the internalStrucutre string? + /** Return true if the two phrase pairs are equal from the point of this feature. Assume + that they already compare true according to PhraseAlignment.equals() + **/ -/* if(lhs.ghkmParse==rhs.ghkmParse) - return true; - else - return false; -*/ - //return true; + /* if(lhs.ghkmParse==rhs.ghkmParse) + return true; + else + return false; + */ + //return true; } void InternalStructFeature::add(const ScoreFeatureContext& context, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const{ - for(size_t i=0; i<context.phrasePair.size(); i++) { - add(&context.phrasePair[i]->treeFragment, denseValues, sparseValues); - } + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const +{ + for(size_t i=0; i<context.phrasePair.size(); i++) { + add(&context.phrasePair[i]->treeFragment, denseValues, sparseValues); + } } void InternalStructFeatureDense::add(std::string *internalStruct, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const{ - //cout<<"Dense: "<<*internalStruct<<endl; - size_t start=0; - int countNP=0; - while((start = internalStruct->find("NP", start)) != string::npos) { - countNP++; - start+=2; //length of "NP" - } - //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln? - //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 ) - denseValues.push_back(exp(countNP)); + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const +{ + //cout<<"Dense: "<<*internalStruct<<endl; + size_t start=0; + int countNP=0; + while((start = internalStruct->find("NP", start)) != string::npos) { + countNP++; + start+=2; //length of "NP" + } + //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln? + //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 ) + denseValues.push_back(exp(countNP)); } void InternalStructFeatureSparse::add(std::string *internalStruct, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const{ - //cout<<"Sparse: "<<*internalStruct<<endl; - if(internalStruct->find("VBZ")!=std::string::npos) - sparseValues["NTVBZ"] = 1; - if(internalStruct->find("VBD")!=std::string::npos) - sparseValues["NTVBD"] = 1; - if(internalStruct->find("VBP")!=std::string::npos) - sparseValues["NTVBP"] = 1; - if(internalStruct->find("PP")!=std::string::npos) - sparseValues["NTPP"] = 1; - if(internalStruct->find("SBAR")!=std::string::npos) - sparseValues["NTSBAR"] = 1; + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const +{ + //cout<<"Sparse: "<<*internalStruct<<endl; + if(internalStruct->find("VBZ")!=std::string::npos) + sparseValues["NTVBZ"] = 1; + if(internalStruct->find("VBD")!=std::string::npos) + sparseValues["NTVBD"] = 1; + if(internalStruct->find("VBP")!=std::string::npos) + sparseValues["NTVBP"] = 1; + if(internalStruct->find("PP")!=std::string::npos) + sparseValues["NTPP"] = 1; + if(internalStruct->find("SBAR")!=std::string::npos) + sparseValues["NTSBAR"] = 1; } diff --git a/phrase-extract/InternalStructFeature.h b/phrase-extract/InternalStructFeature.h index bd513a715..fe6dedc15 100644 --- a/phrase-extract/InternalStructFeature.h +++ b/phrase-extract/InternalStructFeature.h @@ -21,46 +21,50 @@ namespace MosesTraining class InternalStructFeature : public ScoreFeature { public: - InternalStructFeature(); - /** Return true if the two phrase pairs are equal from the point of this feature. Assume - that they already compare true according to PhraseAlignment.equals() - **/ - bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const; - /** Add the values for this feature function. */ - void add(const ScoreFeatureContext& context, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const; + InternalStructFeature(); + /** Return true if the two phrase pairs are equal from the point of this feature. Assume + that they already compare true according to PhraseAlignment.equals() + **/ + bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const; + /** Add the values for this feature function. */ + void add(const ScoreFeatureContext& context, + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const; protected: - /** Overriden in subclass */ - virtual void add(std::string *internalStruct, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const = 0; - int m_type; + /** Overriden in subclass */ + virtual void add(std::string *internalStruct, + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const = 0; + int m_type; }; class InternalStructFeatureDense : public InternalStructFeature { public: - InternalStructFeatureDense() - :InternalStructFeature(){m_type=1;} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";} + InternalStructFeatureDense() + :InternalStructFeature() { + m_type=1; + } //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";} protected: - virtual void add(std::string *internalStruct, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const; + virtual void add(std::string *internalStruct, + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const; }; class InternalStructFeatureSparse : public InternalStructFeature { public: - InternalStructFeatureSparse() - :InternalStructFeature(){m_type=2;}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";} + InternalStructFeatureSparse() + :InternalStructFeature() { + m_type=2; + }// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";} protected: - virtual void add(std::string *internalStruct, - std::vector<float>& denseValues, - std::map<std::string,float>& sparseValues) const; + virtual void add(std::string *internalStruct, + std::vector<float>& denseValues, + std::map<std::string,float>& sparseValues) const; }; } diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h index 574b9afc1..8aae418ab 100644 --- a/phrase-extract/PhraseExtractionOptions.h +++ b/phrase-extract/PhraseExtractionOptions.h @@ -68,9 +68,9 @@ public: includeSentenceIdFlag(false), onlyOutputSpanInfo(false), gzOutput(false), - flexScoreFlag(false), - debug(false) -{} + flexScoreFlag(false), + debug(false) + {} //functions for initialization of options void initAllModelsOutputFlag(const bool initallModelsOutputFlag) { diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp index f98759755..233be0168 100644 --- a/phrase-extract/ScoreFeature.cpp +++ b/phrase-extract/ScoreFeature.cpp @@ -39,7 +39,7 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args) bool sparseDomainAdded = false; for (size_t i = 0; i < args.size(); ++i) { - if (args[i] == "--IgnoreSentenceId") { + if (args[i] == "--IgnoreSentenceId") { m_includeSentenceId = true; } else if (args[i].substr(0,8) == "--Domain") { string type = args[i].substr(8); @@ -77,15 +77,15 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args) } sparseDomainAdded = true; m_includeSentenceId = true; - } else if(args[i] == "--GHKMFeatureSparse"){ - //MARIA - m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse())); - } else if(args[i] == "--GHKMFeatureDense"){ - //MARIA - m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense())); + } else if(args[i] == "--GHKMFeatureSparse") { + //MARIA + m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse())); + } else if(args[i] == "--GHKMFeatureDense") { + //MARIA + m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense())); } else { UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]); - } + } } diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp index 5cf7185f5..399714856 100644 --- a/phrase-extract/ScoreFeatureTest.cpp +++ b/phrase-extract/ScoreFeatureTest.cpp @@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except) args.clear(); args = Tokenize("--DomainSubset"," "); BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException); - + } template <class Expected> diff --git a/phrase-extract/Util.cpp___ b/phrase-extract/Util.cpp___ new file mode 100644 index 000000000..812705660 --- /dev/null +++ b/phrase-extract/Util.cpp___ @@ -0,0 +1,28 @@ +/* + * Util.cpp + * mert - Minimum Error Rate Training + * + * Created by Nicola Bertoldi on 13/05/08. + * + */ + +#include "Util.h" + +using namespace std; + +namespace MosesTuning +{ + +void Tokenize(const char *str, const char delim, + std::vector<std::string> *res) +{ + while (1) { + const char *begin = str; + while (*str != delim && *str) str++; + if (begin != str) // Don't create empty string objects. + res->push_back(std::string(begin, str)); + if (*str++ == 0) break; + } +} + +} diff --git a/phrase-extract/Util.h___ b/phrase-extract/Util.h___ new file mode 100644 index 000000000..f63124f1d --- /dev/null +++ b/phrase-extract/Util.h___ @@ -0,0 +1,140 @@ +/* + * Util.h + * mert - Minimum Error Rate Training + * + * Created by Nicola Bertoldi on 13/05/08. + * + */ + +#ifndef MERT_UTIL_H_ +#define MERT_UTIL_H_ + +#include <cmath> +#include <cstdlib> +#include <stdexcept> +#include <limits> +#include <vector> +#include <map> +#include <iostream> +#include <sstream> +#include <string> +#include <cstring> + +#include "Types.h" + +namespace MosesTuning +{ + +#ifdef TRACE_ENABLE +#define TRACE_ERR(str) { std::cerr << str; } +#else +#define TRACE_ERR(str) { } +#endif + +#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2) +// gcc nth_element() bug +#define NTH_ELEMENT3(begin, middle, end) std::sort(begin, end) +#define NTH_ELEMENT4(begin, middle, end, orderer) std::sort(begin, end, orderer) +#else +#define NTH_ELEMENT3(begin, middle, end) std::nth_element(begin, middle, end) +#define NTH_ELEMENT4(begin, middle, end, orderer) std::nth_element(begin, middle, end, orderer) +#endif + +const char kDefaultDelimiterSymbol[] = " "; + +int verboselevel(); +int setverboselevel(int v); + + +const float kEPS = 0.0001f; + +template <typename T> +bool IsAlmostEqual(T expected, T actual, float round=kEPS) +{ + if (std::abs(expected - actual) < round) { + return true; + } else { + std::cerr << "Fail: expected = " << expected + << " (actual = " << actual << ")" << std::endl; + return false; + } +} + +/** + * Find the specified delimiter for the string 'str', and 'str' is assigned + * to a substring object that starts at the position of first occurrence of + * the delimiter in 'str'. 'substr' is copied from 'str' ranging from + * the start position of 'str' to the position of first occurrence of + * the delimiter. + * + * It returns the position of first occurrence in the queried string. + * If the content is not found, std::string::npos is returned. + */ +size_t getNextPound(std::string &str, std::string &substr, + const std::string &delimiter = kDefaultDelimiterSymbol); + +void split(const std::string &s, char delim, std::vector<std::string> &elems); + +/** + * Split the string 'str' with specified delimitter 'delim' into tokens. + * The resulting tokens are set to 'res'. + * + * ex. "a,b,c" => {"a", "b", "c"}. + */ +void Tokenize(const char *str, const char delim, std::vector<std::string> *res); + +/** + * Returns true iff "str" ends with "suffix". + * e.g., Given str = "abc:" and suffix = ":", this function returns true. + */ +inline bool EndsWith(const std::string& str, const char* suffix) +{ + return str.find_last_of(suffix) == str.size() - 1; +} + +template<typename T> +inline std::string stringify(T x) +{ + std::ostringstream o; + if (!(o << x)) + throw std::runtime_error("stringify(template<typename T>)"); + return o.str(); +} + +inline ScoreStatsType ConvertCharToScoreStatsType(const char *str) +{ + return std::atoi(str); +} + +inline ScoreStatsType ConvertStringToScoreStatsType(const std::string& str) +{ + return ConvertCharToScoreStatsType(str.c_str()); +} + +inline FeatureStatsType ConvertCharToFeatureStatsType(const char *str) +{ + return static_cast<FeatureStatsType>(std::atof(str)); +} + +inline FeatureStatsType ConvertStringToFeatureStatsType(const std::string &str) +{ + return ConvertCharToFeatureStatsType(str.c_str()); +} + +inline std::string trimStr(const std::string& Src, const std::string& c = " \r\n") +{ + size_t p2 = Src.find_last_not_of(c); + if (p2 == std::string::npos) return std::string(); + size_t p1 = Src.find_first_not_of(c); + if (p1 == std::string::npos) p1 = 0; + return Src.substr(p1, (p2-p1)+1); +} + +// Utilities to measure decoding time +void ResetUserTime(); +void PrintUserTime(const std::string &message); +double GetUserTime(); + +} + +#endif // MERT_UTIL_H_ diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp index b2cde6d64..bf306f30d 100644 --- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp +++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp @@ -163,13 +163,13 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out) } } -void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g) +void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g) { - Write(rule,false); - m_fwd << " Tree "; - g.PrintTree(m_fwd); - m_fwd << std::endl; - m_inv << std::endl; + Write(rule,false); + m_fwd << " Tree "; + g.PrintTree(m_fwd); + m_fwd << std::endl; + m_inv << std::endl; } } // namespace GHKM diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h index 18f423149..01883cdff 100644 --- a/phrase-extract/extract-ghkm/ScfgRuleWriter.h +++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h @@ -44,7 +44,7 @@ public: void Write(const ScfgRule &rule, bool printEndl=true); - void Write(const ScfgRule &rule, const Subgraph &g); + void Write(const ScfgRule &rule, const Subgraph &g); private: // Disallow copying diff --git a/phrase-extract/extract-ghkm/Subgraph.cpp b/phrase-extract/extract-ghkm/Subgraph.cpp index 0c941218d..6796cec95 100644 --- a/phrase-extract/extract-ghkm/Subgraph.cpp +++ b/phrase-extract/extract-ghkm/Subgraph.cpp @@ -119,12 +119,12 @@ float Subgraph::CalcPcfgScore() const return score; } -void Subgraph::PrintTree(std::ostream &out) const +void Subgraph::PrintTree(std::ostream &out) const { RecursivelyPrintTree(m_root,out); } -void Subgraph::RecursivelyPrintTree(const Node *n, std::ostream &out) const +void Subgraph::RecursivelyPrintTree(const Node *n, std::ostream &out) const { NodeType nodeType = n->GetType(); if (nodeType == TREE) { diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp index 5d58028d6..76f695d2a 100644 --- a/phrase-extract/extract-main.cpp +++ b/phrase-extract/extract-main.cpp @@ -171,7 +171,7 @@ int main(int argc, char* argv[]) } options.initInstanceWeightsFile(argv[++i]); } else if (strcmp(argv[i], "--Debug") == 0) { - options.debug = true; + options.debug = true; } else if(strcmp(argv[i],"--model") == 0) { if (i+1 >= argc) { cerr << "extract: syntax error, no model's information provided to the option --model " << endl; @@ -722,9 +722,9 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, } if (m_options.debug) { - outextractstr << "sentenceID=" << sentence.sentenceID << " "; - outextractstrInv << "sentenceID=" << sentence.sentenceID << " "; - outextractstrOrientation << "sentenceID=" << sentence.sentenceID << " "; + outextractstr << "sentenceID=" << sentence.sentenceID << " "; + outextractstrInv << "sentenceID=" << sentence.sentenceID << " "; + outextractstrOrientation << "sentenceID=" << sentence.sentenceID << " "; } for(int fi=startF; fi<=endF; fi++) { |