Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicola Bertoldi <bertoldi@fbk.eu>2014-01-15 19:49:57 +0400
committerNicola Bertoldi <bertoldi@fbk.eu>2014-01-15 19:49:57 +0400
commite452a13062400b09ff95af971c1424ac34cf1930 (patch)
tree11b52a2f1228f4e64fb31d061d7e564970140fd7
parentbd83999264407dd7970736ee3e70a6b39fb19014 (diff)
beautify
-rw-r--r--OnDiskPt/OnDiskWrapper.cpp54
-rw-r--r--OnDiskPt/OnDiskWrapper.h10
-rw-r--r--OnDiskPt/Phrase.cpp4
-rw-r--r--mert/Data.cpp8
-rw-r--r--mert/Optimizer.cpp20
-rw-r--r--mert/Point.cpp4
-rw-r--r--mert/Scorer.cpp4
-rw-r--r--mira/Main.cpp10
-rw-r--r--misc/1-1-Extraction.cpp52
-rw-r--r--misc/TransliterationMining.cpp927
-rw-r--r--moses-chart-cmd/IOWrapper.cpp26
-rw-r--r--moses-cmd/IOWrapper.cpp18
-rw-r--r--moses-cmd/LatticeMBRGrid.cpp2
-rw-r--r--moses-cmd/Main.cpp6
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.cpp2
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingTable.cpp6
-rw-r--r--moses/FF/OSM-Feature/OpSequenceModel.cpp4
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp6
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp14
-rw-r--r--moses/TranslationModel/CYKPlusParser/DotChartInMemory.h3
-rw-r--r--moses/TranslationModel/CompactPT/BlockHashIndex.cpp8
-rw-r--r--moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp2
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp2
-rw-r--r--moses/TranslationModel/CompactPT/PhraseTableCreator.cpp58
-rw-r--r--moses/TranslationModel/DynSAInclude/FileHandler.cpp8
-rw-r--r--moses/TranslationModel/DynSAInclude/RandLMFilter.h10
-rw-r--r--moses/TranslationModel/DynSAInclude/onlineRLM.h6
-rw-r--r--moses/TranslationModel/DynSAInclude/params.cpp4
-rw-r--r--moses/TranslationModel/DynSAInclude/quantizer.h6
-rw-r--r--moses/TranslationModel/DynSAInclude/vocab.cpp2
-rw-r--r--moses/TranslationModel/RuleTable/LoaderStandard.cpp6
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp2
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp24
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp18
-rw-r--r--phrase-extract/InternalStructFeature.cpp99
-rw-r--r--phrase-extract/InternalStructFeature.h52
-rw-r--r--phrase-extract/PhraseExtractionOptions.h6
-rw-r--r--phrase-extract/ScoreFeature.cpp16
-rw-r--r--phrase-extract/ScoreFeatureTest.cpp2
-rw-r--r--phrase-extract/Util.cpp___28
-rw-r--r--phrase-extract/Util.h___140
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.cpp12
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.h2
-rw-r--r--phrase-extract/extract-ghkm/Subgraph.cpp4
-rw-r--r--phrase-extract/extract-main.cpp8
45 files changed, 918 insertions, 787 deletions
diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp
index 0120802ac..12adffd03 100644
--- a/OnDiskPt/OnDiskWrapper.cpp
+++ b/OnDiskPt/OnDiskWrapper.cpp
@@ -59,28 +59,28 @@ bool OnDiskWrapper::OpenForLoad(const std::string &filePath)
{
m_fileSource.open((filePath + "/Source.dat").c_str(), ios::in | ios::binary);
UTIL_THROW_IF(!m_fileSource.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/Source.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/Source.dat");
m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::in | ios::binary);
UTIL_THROW_IF(!m_fileTargetInd.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/TargetInd.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/TargetInd.dat");
m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::in | ios::binary);
UTIL_THROW_IF(!m_fileTargetColl.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/TargetColl.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/TargetColl.dat");
m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::in);
UTIL_THROW_IF(!m_fileVocab.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/Vocab.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/Vocab.dat");
m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::in);
UTIL_THROW_IF(!m_fileMisc.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/Misc.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/Misc.dat");
// set up root node
LoadMisc();
@@ -124,46 +124,46 @@ void OnDiskWrapper::BeginSave(const std::string &filePath
m_fileSource.open((filePath + "/Source.dat").c_str(), ios::out | ios::in | ios::binary | ios::ate | ios::trunc);
UTIL_THROW_IF(!m_fileSource.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/Source.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/Source.dat");
m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc);
UTIL_THROW_IF(!m_fileTargetInd.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/TargetInd.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/TargetInd.dat");
m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc);
UTIL_THROW_IF(!m_fileTargetColl.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/TargetColl.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/TargetColl.dat");
m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::out | ios::ate | ios::trunc);
UTIL_THROW_IF(!m_fileVocab.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/Vocab.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/Vocab.dat");
m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::out | ios::ate | ios::trunc);
UTIL_THROW_IF(!m_fileMisc.is_open(),
- util::FileOpenException,
- "Couldn't open file " << filePath << "/Misc.dat");
+ util::FileOpenException,
+ "Couldn't open file " << filePath << "/Misc.dat");
// offset by 1. 0 offset is reserved
char c = 0xff;
m_fileSource.write(&c, 1);
UTIL_THROW_IF2(1 != m_fileSource.tellp(),
- "Couldn't write to stream m_fileSource");
+ "Couldn't write to stream m_fileSource");
m_fileTargetInd.write(&c, 1);
UTIL_THROW_IF2(1 != m_fileTargetInd.tellp(),
- "Couldn't write to stream m_fileTargetInd");
+ "Couldn't write to stream m_fileTargetInd");
m_fileTargetColl.write(&c, 1);
UTIL_THROW_IF2(1 != m_fileTargetColl.tellp(),
- "Couldn't write to stream m_fileTargetColl");
+ "Couldn't write to stream m_fileTargetColl");
// set up root node
UTIL_THROW_IF2(GetNumCounts() != 1,
- "Not sure what this is...");
+ "Not sure what this is...");
vector<float> counts(GetNumCounts());
counts[0] = DEFAULT_COUNT;
@@ -212,8 +212,8 @@ UINT64 OnDiskWrapper::GetMisc(const std::string &key) const
std::map<std::string, UINT64>::const_iterator iter;
iter = m_miscInfo.find(key);
UTIL_THROW_IF2(iter == m_miscInfo.end()
- , "Couldn't find value for key " << key
- );
+ , "Couldn't find value for key " << key
+ );
return iter->second;
}
@@ -238,7 +238,7 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &fact
break;
}
UTIL_THROW_IF2(factor == NULL,
- "Expecting factor " << factorType << " at position " << ind);
+ "Expecting factor " << factorType << " at position " << ind);
strme << "|" << factor->GetString();
} // for (size_t factorType
diff --git a/OnDiskPt/OnDiskWrapper.h b/OnDiskPt/OnDiskWrapper.h
index a31c473fa..5b7cdfe01 100644
--- a/OnDiskPt/OnDiskWrapper.h
+++ b/OnDiskPt/OnDiskWrapper.h
@@ -61,10 +61,12 @@ public:
, int numSourceFactors, int numTargetFactors, int numScores);
void EndSave();
- Vocab &GetVocab()
- { return m_vocab; }
- const Vocab &GetVocab() const
- { return m_vocab; }
+ Vocab &GetVocab() {
+ return m_vocab;
+ }
+ const Vocab &GetVocab() const {
+ return m_vocab;
+ }
size_t GetSourceWordSize() const;
size_t GetTargetWordSize() const;
diff --git a/OnDiskPt/Phrase.cpp b/OnDiskPt/Phrase.cpp
index a09c99fe5..c1933338b 100644
--- a/OnDiskPt/Phrase.cpp
+++ b/OnDiskPt/Phrase.cpp
@@ -35,8 +35,8 @@ void Phrase::AddWord(WordPtr word)
void Phrase::AddWord(WordPtr word, size_t pos)
{
- UTIL_THROW_IF2(!(pos < m_words.size()),
- "Trying to get word " << pos << " when phrase size is " << m_words.size());
+ UTIL_THROW_IF2(!(pos < m_words.size()),
+ "Trying to get word " << pos << " when phrase size is " << m_words.size());
m_words.insert(m_words.begin() + pos + 1, word);
}
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 095b26041..b93c3b6c0 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -263,13 +263,13 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
{
UTIL_THROW_IF(shard_count == 0, util::Exception, "Must have at least 1 shard");
UTIL_THROW_IF(shard_size < 0 || shard_size > 1,
- util::Exception,
- "Shard size must be between 0 and 1, inclusive. Currently " << shard_size);
+ util::Exception,
+ "Shard size must be between 0 and 1, inclusive. Currently " << shard_size);
size_t data_size = m_score_data->size();
UTIL_THROW_IF(data_size != m_feature_data->size(),
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
shard_size *= data_size;
const float coeff = static_cast<float>(data_size) / shard_count;
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 3f5aa48a6..5da32363f 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -168,8 +168,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
// The rightmost bestindex is the one with the highest slope.
// They should be equal but there might be.
- UTIL_THROW_IF(abs(leftmost->first-gradient.rbegin()->first) >= 0.0001,
- util::Exception, "Error");
+ UTIL_THROW_IF(abs(leftmost->first-gradient.rbegin()->first) >= 0.0001,
+ util::Exception, "Error");
// A small difference due to rounding error
break;
}
@@ -191,8 +191,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
if (tit == previnserted) {
// The threshold is the same as before can happen if 2 candidates are the same for example.
UTIL_THROW_IF(previnserted->second.back().first != newd.first,
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
previnserted->second.back()=newd; // just replace the 1 best for sentence S
// previnsert doesn't change
} else {
@@ -207,8 +207,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
// We append the diffs in previnsert to tit before destroying previnsert.
tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end());
UTIL_THROW_IF(tit->second.back().first != newd.first,
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
tit->second.back()=newd; // change diff for sentence S
thresholdmap.erase(previnserted); // erase old previnsert
previnserted = tit; // point previnsert to the new threshold
@@ -216,8 +216,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
}
UTIL_THROW_IF(previnserted == thresholdmap.end(),
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
} else { //normal insertion process
previnserted = AddThreshold(thresholdmap, leftmostx, newd);
}
@@ -254,8 +254,8 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
// We skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best.
UTIL_THROW_IF(scores.size() != thresholdmap.size(),
- util::Exception,
- "Error");
+ util::Exception,
+ "Error");
for (unsigned int sc = 0; sc != scores.size(); sc++) {
//cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
diff --git a/mert/Point.cpp b/mert/Point.cpp
index 1db59ce66..55dc6a6b2 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -40,8 +40,8 @@ Point::Point(const vector<parameter_t>& init,
m_max[i] = max[i];
}
} else {
- UTIL_THROW_IF(init.size() != m_pdim, util::Exception, "Error");
- UTIL_THROW_IF(m_opt_indices.size() != Point::m_dim, util::Exception, "Error");
+ UTIL_THROW_IF(init.size() != m_pdim, util::Exception, "Error");
+ UTIL_THROW_IF(m_opt_indices.size() != Point::m_dim, util::Exception, "Error");
for (unsigned int i = 0; i < Point::m_dim; i++) {
operator[](i) = init[m_opt_indices[i]];
m_min[i] = min[m_opt_indices[i]];
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index ed3ff2458..ffaf03be4 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -25,9 +25,9 @@ const int kUnknownToken = -1;
Scorer::Scorer(const string& name, const string& config)
: m_name(name),
m_vocab(mert::VocabularyFactory::GetVocabulary()),
- #if defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
m_filter(NULL),
- #endif
+#endif
m_score_data(NULL),
m_enable_preserve_case(true)
{
diff --git a/mira/Main.cpp b/mira/Main.cpp
index 73daf5a99..9de8f9cdb 100644
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@@ -1238,15 +1238,15 @@ int main(int argc, char** argv)
cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl;
update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(
- weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0],
- bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0],
- modelScoresFear[0][0], learning_rate, rank, epoch);
+ weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0],
+ bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0],
+ modelScoresFear[0][0], learning_rate, rank, epoch);
} else {
cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl;
cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl;
update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope,
- featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope,
- modelScoresFear, learning_rate, rank, epoch);
+ featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope,
+ modelScoresFear, learning_rate, rank, epoch);
}
} else {
// model_hope_fear
diff --git a/misc/1-1-Extraction.cpp b/misc/1-1-Extraction.cpp
index 459fa4900..cf3817abf 100644
--- a/misc/1-1-Extraction.cpp
+++ b/misc/1-1-Extraction.cpp
@@ -105,13 +105,12 @@ void constructCepts(vector < pair < set <int> , set <int> > > & ceptsInPhrase, s
int tgt;
ceptsInPhrase.clear();
int res;
-
- for (int j=0; j<alignment.size(); j+=1)
- {
- res = alignment[j].find("-");
- mAlign.push_back(alignment[j].substr(0,res));
- mAlign.push_back(alignment[j].substr(res+1));
- }
+
+ for (int j=0; j<alignment.size(); j+=1) {
+ res = alignment[j].find("-");
+ mAlign.push_back(alignment[j].substr(0,res));
+ mAlign.push_back(alignment[j].substr(res+1));
+ }
for (int j=0; j<mAlign.size(); j+=2) {
align.push_back(stringToInteger(mAlign[j+1]));
@@ -166,29 +165,26 @@ void constructCepts(vector < pair < set <int> , set <int> > > & ceptsInPhrase, s
void getOneToOne(vector < pair < set <int> , set <int> > > & ceptsInPhrase , vector <string> & currF , vector <string> & currE, set <string> & one)
{
- string temp;
-
- for (int i = 0; i< ceptsInPhrase.size(); i++)
- {
- if (ceptsInPhrase[i].first.size() == 1 && ceptsInPhrase[i].second.size() == 1)
- {
- temp = currF[(*ceptsInPhrase[i].second.begin())] + "\t" + currE[(*ceptsInPhrase[i].first.begin())];
-
- if (one.find(temp) == one.end())
- one.insert(temp);
- }
- }
-
+ string temp;
+
+ for (int i = 0; i< ceptsInPhrase.size(); i++) {
+ if (ceptsInPhrase[i].first.size() == 1 && ceptsInPhrase[i].second.size() == 1) {
+ temp = currF[(*ceptsInPhrase[i].second.begin())] + "\t" + currE[(*ceptsInPhrase[i].first.begin())];
+
+ if (one.find(temp) == one.end())
+ one.insert(temp);
+ }
+ }
+
}
void printOneToOne ( set <string> & one)
{
- set <string> :: iterator iter;
+ set <string> :: iterator iter;
- for (iter = one.begin(); iter != one.end(); iter++)
- {
- cout<<*iter<<endl;
- }
+ for (iter = one.begin(); iter != one.end(); iter++) {
+ cout<<*iter<<endl;
+ }
}
int main(int argc, char * argv[])
@@ -219,11 +215,11 @@ int main(int argc, char * argv[])
getWords(e[i],currE);
getWords(f[i],currF);
getWords(a[i],currA);
-
+
cerr<<"Processing "<<i<<endl;
constructCepts(ceptsInPhrase, sourceNullWords , targetNullWords, currA , currE.size(), currF.size());
getOneToOne(ceptsInPhrase , currF , currE, one);
-
+
/*
cout<<"________________________________________"<<endl;
@@ -234,7 +230,7 @@ int main(int argc, char * argv[])
}
- printOneToOne(one);
+ printOneToOne(one);
return 0;
diff --git a/misc/TransliterationMining.cpp b/misc/TransliterationMining.cpp
index ec272b93a..8c2291864 100644
--- a/misc/TransliterationMining.cpp
+++ b/misc/TransliterationMining.cpp
@@ -1,12 +1,12 @@
- /*
+/*
########################################################################################
- Transliteration Mining - A Program to Extract Transliteration Pairs from
- a bilingual word list
- Source Contributor: Nadir Durrani
+Transliteration Mining - A Program to Extract Transliteration Pairs from
+a bilingual word list
+Source Contributor: Nadir Durrani
########################################################################################
-
+
*/
#include <cstdlib>
@@ -25,419 +25,392 @@ using namespace std;
double initTransitionProb;
double LAMBDA;
-double addLogProbs(double A , double B) // this function adds probabilities ...
+double addLogProbs(double A , double B) // this function adds probabilities ...
{
-
- if (A == B)
- return (A + log10(2.0));
-
- if (A > B)
- {
- if (A - B > 6) // A is a lot bigger ...
- return A;
- else
- return (A + log10(1+pow(10,(B-A))));
- }
-
- else // B > A
- {
- if (B - A > 6)
- return B;
- else
- return (B + log10(1+pow(10,(A-B))));
- }
-
+
+ if (A == B)
+ return (A + log10(2.0));
+
+ if (A > B) {
+ if (A - B > 6) // A is a lot bigger ...
+ return A;
+ else
+ return (A + log10(1+pow(10,(B-A))));
+ }
+
+ else { // B > A
+ if (B - A > 6)
+ return B;
+ else
+ return (B + log10(1+pow(10,(A-B))));
+ }
+
}
class NodeStructure
{
- public:
-
- NodeStructure(){};
- NodeStructure(vector <string> & s , vector <string> & t);
- double getPosterior(){return PPR;}
- void computeFwdBckProbs(map <string , double> & gammas, map <string, double> & alignmentCounts);
- void computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams);
- void print();
-
- vector <string> source;
- vector <string> target;
- ~NodeStructure(){};
-
- private:
-
- double NTR; // Non-transliteration probability of a sentence pair ...
- double PPR; // Posterior Probability ...
- double ALPHA;
- double BETA;
-
- void computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts);
- double computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents);
- double FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents);
- double BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & chidren);
- double computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children);
- void getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges);
- void getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges);
- double getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge);
- void updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta);
- void computePosteriorProb();
- double scaleGamma(double g);
- void getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3);
-
+public:
+
+ NodeStructure() {};
+ NodeStructure(vector <string> & s , vector <string> & t);
+ double getPosterior() {
+ return PPR;
+ }
+ void computeFwdBckProbs(map <string , double> & gammas, map <string, double> & alignmentCounts);
+ void computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams);
+ void print();
+
+ vector <string> source;
+ vector <string> target;
+ ~NodeStructure() {};
+
+private:
+
+ double NTR; // Non-transliteration probability of a sentence pair ...
+ double PPR; // Posterior Probability ...
+ double ALPHA;
+ double BETA;
+
+ void computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts);
+ double computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents);
+ double FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents);
+ double BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & chidren);
+ double computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children);
+ void getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges);
+ void getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges);
+ double getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge);
+ void updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta);
+ void computePosteriorProb();
+ double scaleGamma(double g);
+ void getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3);
+
};
void NodeStructure :: print()
{
-
- for (int i = 0; i < source.size(); i++)
- cout<<source[i];
-
- cout<<"\t";
- for (int i = 0; i < target.size(); i++)
- cout<<target[i];
+ for (int i = 0; i < source.size(); i++)
+ cout<<source[i];
+
+ cout<<"\t";
+
+ for (int i = 0; i < target.size(); i++)
+ cout<<target[i];
- cout<<"\t"<<pow(10,PPR)<<endl;
+ cout<<"\t"<<pow(10,PPR)<<endl;
}
NodeStructure :: NodeStructure(vector <string> & s , vector <string> & t)
{
- source = s;
- target = t;
+ source = s;
+ target = t;
}
void NodeStructure :: getEdge (pair <int , int> & v1 , pair <int , int> & v2 , pair <int , int> & v3)
{
- if (v2.first - v1.first == 0)
- v3.first = -1;
- else
- v3.first = v2.first;
-
- if (v2.second - v1.second == 0)
- v3.second = -1;
- else
- v3.second = v2.second;
+ if (v2.first - v1.first == 0)
+ v3.first = -1;
+ else
+ v3.first = v2.first;
+
+ if (v2.second - v1.second == 0)
+ v3.second = -1;
+ else
+ v3.second = v2.second;
}
void NodeStructure :: computeGammaForEdges(map < pair <int , int> , double > & parents, map < pair <int , int> , double > & children , map <string, double> & transitionProbs , map <string, double> & alignmentCounts)
{
- vector < pair < int , int> > incomingEdges;
- map < pair <int , int> , double > :: iterator cIter;
- map < pair <int , int> , double > :: iterator pIter;
- pair <int , int> ST = make_pair (-1,-1);
- pair <int , int> edge;
-
- children.erase(ST);
- double tProb;
- double alpha;
- double beta;
-
- for (cIter = children.begin(); cIter != children.end(); cIter++)
- {
- ST = cIter->first;
-
- getIncomingEdges (ST , incomingEdges);
- beta = cIter->second;
-
- for (int i = 0; i< incomingEdges.size(); i++)
- {
- pIter = parents.find(incomingEdges[i]);
-
- alpha = pIter->second;
- getEdge (incomingEdges[i] , ST , edge);
-
- updateAlignmentCount(transitionProbs, alignmentCounts , edge , alpha , beta);
- }
- }
+ vector < pair < int , int> > incomingEdges;
+ map < pair <int , int> , double > :: iterator cIter;
+ map < pair <int , int> , double > :: iterator pIter;
+ pair <int , int> ST = make_pair (-1,-1);
+ pair <int , int> edge;
+
+ children.erase(ST);
+ double tProb;
+ double alpha;
+ double beta;
+
+ for (cIter = children.begin(); cIter != children.end(); cIter++) {
+ ST = cIter->first;
+
+ getIncomingEdges (ST , incomingEdges);
+ beta = cIter->second;
+
+ for (int i = 0; i< incomingEdges.size(); i++) {
+ pIter = parents.find(incomingEdges[i]);
+
+ alpha = pIter->second;
+ getEdge (incomingEdges[i] , ST , edge);
+
+ updateAlignmentCount(transitionProbs, alignmentCounts , edge , alpha , beta);
+ }
+ }
}
-void NodeStructure :: computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams)
+void NodeStructure :: computeNonTransliterationProb (map <string , double> & sourceUnigrams , map <string , double> & targetUnigrams)
{
-
- NTR = 0.0;
-
- for (int i = 0; i < source.size(); i++)
- {
- NTR += sourceUnigrams[source[i]];
- }
-
- for (int i = 0; i < target.size(); i++)
- {
-
- NTR += targetUnigrams[target[i]];
- }
+
+ NTR = 0.0;
+
+ for (int i = 0; i < source.size(); i++) {
+ NTR += sourceUnigrams[source[i]];
+ }
+
+ for (int i = 0; i < target.size(); i++) {
+
+ NTR += targetUnigrams[target[i]];
+ }
}
double NodeStructure :: scaleGamma(double g)
{
- double translit = log10 (1 - pow (10, PPR));
- return g + translit;
+ double translit = log10 (1 - pow (10, PPR));
+ return g + translit;
}
void NodeStructure :: computePosteriorProb()
{
- double LAMBDA2 = log10(1 - pow(10, LAMBDA));
- double transliterate = LAMBDA2 + ALPHA; // Transliteration Prob ...
- double translate = LAMBDA + NTR; // Translation Prob ...
- double trans = transliterate - translate;
- //cout<<LAMBDA<<" "<<LAMBDA2<<endl;
- //cout<<transliterate<<" "<<translate<<" "<<trans<<endl;
- //cout<<pow(10 , trans)<<endl;
- double prob = 1/(1+ pow(10 , trans));
- PPR = log10(prob);
-
- //cout<<"Posterior Prob "<<PPR<<endl;
+ double LAMBDA2 = log10(1 - pow(10, LAMBDA));
+ double transliterate = LAMBDA2 + ALPHA; // Transliteration Prob ...
+ double translate = LAMBDA + NTR; // Translation Prob ...
+ double trans = transliterate - translate;
+ //cout<<LAMBDA<<" "<<LAMBDA2<<endl;
+ //cout<<transliterate<<" "<<translate<<" "<<trans<<endl;
+ //cout<<pow(10 , trans)<<endl;
+ double prob = 1/(1+ pow(10 , trans));
+ PPR = log10(prob);
+
+ //cout<<"Posterior Prob "<<PPR<<endl;
}
-
+
void NodeStructure :: computeFwdBckProbs(map <string , double> & gammas , map <string, double> & alignmentCounts)
{
- pair <int , int> START = make_pair (source.size()-1 , target.size()-1);
- pair <int , int> END = make_pair (-1 , -1);
+ pair <int , int> START = make_pair (source.size()-1 , target.size()-1);
+ pair <int , int> END = make_pair (-1 , -1);
- map < pair <int , int> , double > parents;
- parents[make_pair(-1,-1)] = 0.0;
- map < pair <int , int> , double > children;
- children[make_pair(source.size()-1,target.size()-1)] = 0.0;
+ map < pair <int , int> , double > parents;
+ parents[make_pair(-1,-1)] = 0.0;
+ map < pair <int , int> , double > children;
+ children[make_pair(source.size()-1,target.size()-1)] = 0.0;
- ALPHA = computeFwdProbs(START , gammas, parents);
- BETA = computeBckProbs(END , gammas, children);
-
- computePosteriorProb();
- //cout<<"Alpha "<<ALPHA<<" Beta "<<BETA<<endl;
- computeGammaForEdges(parents , children , gammas , alignmentCounts);
+ ALPHA = computeFwdProbs(START , gammas, parents);
+ BETA = computeBckProbs(END , gammas, children);
+
+ computePosteriorProb();
+ //cout<<"Alpha "<<ALPHA<<" Beta "<<BETA<<endl;
+ computeGammaForEdges(parents , children , gammas , alignmentCounts);
}
void NodeStructure :: getIncomingEdges (pair <int , int> & ST , vector < pair < int , int> > & incomingEdges)
{
- incomingEdges.clear();
-
- if (ST.first == -1) // Source is NULL ..
- {
- incomingEdges.push_back(make_pair(ST.first , ST.second-1));
- }
- else if (ST.second == -1) // Target is NULL ...
- {
- incomingEdges.push_back(make_pair(ST.first-1 , ST.second));
- }
- else
- {
- incomingEdges.push_back(make_pair(ST.first , ST.second-1));
- incomingEdges.push_back(make_pair(ST.first-1 , ST.second));
- incomingEdges.push_back(make_pair(ST.first-1 , ST.second-1));
- }
+ incomingEdges.clear();
+
+ if (ST.first == -1) { // Source is NULL ..
+ incomingEdges.push_back(make_pair(ST.first , ST.second-1));
+ } else if (ST.second == -1) { // Target is NULL ...
+ incomingEdges.push_back(make_pair(ST.first-1 , ST.second));
+ } else {
+ incomingEdges.push_back(make_pair(ST.first , ST.second-1));
+ incomingEdges.push_back(make_pair(ST.first-1 , ST.second));
+ incomingEdges.push_back(make_pair(ST.first-1 , ST.second-1));
+ }
}
void NodeStructure :: getOutgoingEdges (pair <int , int> & ST , vector < pair < int , int> > & outgoingEdges)
{
- if (ST.first == source.size()-1) // Source is END ..
- {
- outgoingEdges.push_back(make_pair(ST.first , ST.second+1));
- }
- else if (ST.second == target.size()-1) // Target is END ...
- {
- outgoingEdges.push_back(make_pair(ST.first+1 , ST.second));
- }
- else
- {
- outgoingEdges.push_back(make_pair(ST.first , ST.second+1));
- outgoingEdges.push_back(make_pair(ST.first+1 , ST.second));
- outgoingEdges.push_back(make_pair(ST.first+1 , ST.second+1));
- }
+ if (ST.first == source.size()-1) { // Source is END ..
+ outgoingEdges.push_back(make_pair(ST.first , ST.second+1));
+ } else if (ST.second == target.size()-1) { // Target is END ...
+ outgoingEdges.push_back(make_pair(ST.first+1 , ST.second));
+ } else {
+ outgoingEdges.push_back(make_pair(ST.first , ST.second+1));
+ outgoingEdges.push_back(make_pair(ST.first+1 , ST.second));
+ outgoingEdges.push_back(make_pair(ST.first+1 , ST.second+1));
+ }
}
void NodeStructure :: updateAlignmentCount(map <string, double> & transitionProbs, map <string, double> & alignmentCounts , pair <int,int> & edge , double alpha , double beta)
{
- double tProb;
- double tgamma;
- double gamma;
- map <string , double> :: iterator aCounts;
- string query;
-
- if (edge.first == -1)
- query = "NULL";
- else
- query = source[edge.first];
-
- query += "-";
-
- if (edge.second == -1)
- query += "NULL";
- else
- query += target[edge.second];
-
- //cout<<" Query "<<query<<endl;
- if (transitionProbs.size() == 0)
- tProb = initTransitionProb;
- else
- tProb = transitionProbs[query];
-
-
- tgamma = alpha + tProb + beta - ALPHA;
- gamma = scaleGamma(tgamma);
- //cout<<alpha<<" "<<beta<<" "<<gamma<<endl;
- //cout<<tProb<<" "<<ALPHA<<endl;
-
- aCounts = alignmentCounts.find(query);
-
- if (aCounts == alignmentCounts.end())
- {
- alignmentCounts[query] = gamma;
- }
- else
- {
- double temp = aCounts->second;
- aCounts->second = addLogProbs(temp , gamma);
- }
-
+ double tProb;
+ double tgamma;
+ double gamma;
+ map <string , double> :: iterator aCounts;
+ string query;
+
+ if (edge.first == -1)
+ query = "NULL";
+ else
+ query = source[edge.first];
+
+ query += "-";
+
+ if (edge.second == -1)
+ query += "NULL";
+ else
+ query += target[edge.second];
+
+ //cout<<" Query "<<query<<endl;
+ if (transitionProbs.size() == 0)
+ tProb = initTransitionProb;
+ else
+ tProb = transitionProbs[query];
+
+
+ tgamma = alpha + tProb + beta - ALPHA;
+ gamma = scaleGamma(tgamma);
+ //cout<<alpha<<" "<<beta<<" "<<gamma<<endl;
+ //cout<<tProb<<" "<<ALPHA<<endl;
+
+ aCounts = alignmentCounts.find(query);
+
+ if (aCounts == alignmentCounts.end()) {
+ alignmentCounts[query] = gamma;
+ } else {
+ double temp = aCounts->second;
+ aCounts->second = addLogProbs(temp , gamma);
+ }
+
}
double NodeStructure :: getTransitionProb(map <string, double> & transitionProbs , pair <int,int> & edge)
{
- if (transitionProbs.size() == 0)
- return initTransitionProb;
-
- string query;
-
- if (edge.first == -1)
- query = "NULL";
- else
- query = source[edge.first];
-
- query += "-";
-
- if (edge.second == -1)
- query += "NULL";
- else
- query += target[edge.second];
-
- //cout<<" Query "<<query<<endl;
- return transitionProbs[query];
+ if (transitionProbs.size() == 0)
+ return initTransitionProb;
+
+ string query;
+
+ if (edge.first == -1)
+ query = "NULL";
+ else
+ query = source[edge.first];
+
+ query += "-";
+
+ if (edge.second == -1)
+ query += "NULL";
+ else
+ query += target[edge.second];
+
+ //cout<<" Query "<<query<<endl;
+ return transitionProbs[query];
}
double NodeStructure :: FwdProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & parents)
{
- double thisAlpha;
- double alpha = -2000;
- vector < pair < int , int> > incomingEdges;
- pair <int , int> edge;
-
-
- getIncomingEdges (TS , incomingEdges);
-
- for (int k = 0; k < incomingEdges.size(); k++)
- {
- thisAlpha = parents[incomingEdges[k]];
- getEdge (incomingEdges[k], TS , edge);
- thisAlpha += getTransitionProb(gammas , edge); // Get Transition Prob ...
- double temp = alpha;
- alpha = addLogProbs(temp , thisAlpha); // Sum of all parents * transition prob ..
- // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl;
- }
-
- return alpha;
+ double thisAlpha;
+ double alpha = -2000;
+ vector < pair < int , int> > incomingEdges;
+ pair <int , int> edge;
+
+
+ getIncomingEdges (TS , incomingEdges);
+
+ for (int k = 0; k < incomingEdges.size(); k++) {
+ thisAlpha = parents[incomingEdges[k]];
+ getEdge (incomingEdges[k], TS , edge);
+ thisAlpha += getTransitionProb(gammas , edge); // Get Transition Prob ...
+ double temp = alpha;
+ alpha = addLogProbs(temp , thisAlpha); // Sum of all parents * transition prob ..
+ // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl;
+ }
+
+ return alpha;
}
double NodeStructure :: computeFwdProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & parents)
{
-
- pair <int , int> TS;
- double alpha;
-
- for (int i = 0; i < source.size(); i++)
- {
- TS = make_pair (i , -1);
- alpha = FwdProb (TS, gammas, parents);
- parents[TS] = alpha;
- }
-
- for (int i = 0; i < target.size(); i++)
- {
- TS = make_pair (-1 , i);
- alpha = FwdProb (TS, gammas, parents);
- parents[TS] = alpha;
- }
-
- for (int i = 0; i < source.size(); i++)
- {
- for (int j = 0; j < target.size(); j++)
- {
- TS = make_pair (i , j);
- alpha = FwdProb (TS, gammas, parents);
- parents[TS] = alpha;
- }
- }
-
- return parents[ST];
+
+ pair <int , int> TS;
+ double alpha;
+
+ for (int i = 0; i < source.size(); i++) {
+ TS = make_pair (i , -1);
+ alpha = FwdProb (TS, gammas, parents);
+ parents[TS] = alpha;
+ }
+
+ for (int i = 0; i < target.size(); i++) {
+ TS = make_pair (-1 , i);
+ alpha = FwdProb (TS, gammas, parents);
+ parents[TS] = alpha;
+ }
+
+ for (int i = 0; i < source.size(); i++) {
+ for (int j = 0; j < target.size(); j++) {
+ TS = make_pair (i , j);
+ alpha = FwdProb (TS, gammas, parents);
+ parents[TS] = alpha;
+ }
+ }
+
+ return parents[ST];
}
double NodeStructure :: BckProb (pair <int , int> & TS, map <string , double> & gammas, map < pair <int , int> , double > & children)
{
- double thisBeta;
- double beta = -2000;
- vector < pair < int , int> > outgoingEdges;
- pair <int , int> edge;
-
- getOutgoingEdges (TS , outgoingEdges);
-
- for (int k = 0; k < outgoingEdges.size(); k++)
- {
- thisBeta = children[outgoingEdges[k]];
- getEdge (TS , outgoingEdges[k], edge);
- thisBeta += getTransitionProb(gammas , edge); // Get Transition Prob ...
- double temp = beta;
- beta = addLogProbs(temp , thisBeta); // Sum of all parents * transition prob ..
- // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl;
- }
-
- return beta;
+ double thisBeta;
+ double beta = -2000;
+ vector < pair < int , int> > outgoingEdges;
+ pair <int , int> edge;
+
+ getOutgoingEdges (TS , outgoingEdges);
+
+ for (int k = 0; k < outgoingEdges.size(); k++) {
+ thisBeta = children[outgoingEdges[k]];
+ getEdge (TS , outgoingEdges[k], edge);
+ thisBeta += getTransitionProb(gammas , edge); // Get Transition Prob ...
+ double temp = beta;
+ beta = addLogProbs(temp , thisBeta); // Sum of all parents * transition prob ..
+ // cout<<temp<<"+"<<thisAlpha<<"="<<alpha<<endl;
+ }
+
+ return beta;
}
double NodeStructure :: computeBckProbs(pair <int , int> & ST, map <string , double> & gammas, map < pair <int , int> , double > & children)
{
-
- pair <int , int> TS;
- double beta;
-
- for (int i = source.size()-2; i >= -1; i--)
- {
- TS = make_pair (i , target.size()-1);
- beta = BckProb (TS, gammas, children);
- children[TS] = beta;
- }
-
- for (int i = target.size()-2; i >=-1; i--)
- {
- TS = make_pair (source.size()-1 , i);
- beta = BckProb (TS, gammas, children);
- children[TS] = beta;
- }
-
- for (int i = source.size()-2 ; i >= -1 ; i--)
- {
- for (int j = target.size()-2 ; j >= -1; j--)
- {
- TS = make_pair (i , j);
- beta = BckProb (TS, gammas, children);
- children[TS] = beta;
- }
- }
-
- return children[ST];
+
+ pair <int , int> TS;
+ double beta;
+
+ for (int i = source.size()-2; i >= -1; i--) {
+ TS = make_pair (i , target.size()-1);
+ beta = BckProb (TS, gammas, children);
+ children[TS] = beta;
+ }
+
+ for (int i = target.size()-2; i >=-1; i--) {
+ TS = make_pair (source.size()-1 , i);
+ beta = BckProb (TS, gammas, children);
+ children[TS] = beta;
+ }
+
+ for (int i = source.size()-2 ; i >= -1 ; i--) {
+ for (int j = target.size()-2 ; j >= -1; j--) {
+ TS = make_pair (i , j);
+ beta = BckProb (TS, gammas, children);
+ children[TS] = beta;
+ }
+ }
+
+ return children[ST];
}
@@ -445,204 +418,188 @@ double NodeStructure :: computeBckProbs(pair <int , int> & ST, map <string , dou
void loadInput(const char * fileName, vector <string> & input)
{
- /* This function loads a file into a vector of strings */
-
- ifstream sr (fileName);
- string line;
-
- if(sr.is_open())
- {
- while(getline(sr , line ))
- {
- input.push_back(line);
- }
-
- sr.close();
- }
- else
- {
- cout<<"Unable to read "<<fileName<<endl;
- exit(1);
- }
+ /* This function loads a file into a vector of strings */
+
+ ifstream sr (fileName);
+ string line;
+
+ if(sr.is_open()) {
+ while(getline(sr , line )) {
+ input.push_back(line);
+ }
+
+ sr.close();
+ } else {
+ cout<<"Unable to read "<<fileName<<endl;
+ exit(1);
+ }
}
void printGammas(map <string, double> & alignmentCounts)
{
- map <string , double> :: iterator aCounts;
+ map <string , double> :: iterator aCounts;
- for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++)
- {
- cout<<aCounts->first<<" "<<aCounts->second<<endl;
- }
+ for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) {
+ cout<<aCounts->first<<" "<<aCounts->second<<endl;
+ }
}
void getWords(string s, vector <string> & currInput)
{
- /* This function splits a string into vector of strings using space character as a delimiter */
+ /* This function splits a string into vector of strings using space character as a delimiter */
- istringstream iss(s);
- currInput.clear();
- do
- {
- string sub;
- iss >> sub;
- currInput.push_back(sub);
+ istringstream iss(s);
+ currInput.clear();
+ do {
+ string sub;
+ iss >> sub;
+ currInput.push_back(sub);
- } while (iss);
+ } while (iss);
- currInput.pop_back();
+ currInput.pop_back();
}
double getInitTransitionProb(int sourceToken, int targetToken)
{
- double prod = sourceToken * targetToken;
- return log10(1/prod);
+ double prod = sourceToken * targetToken;
+ return log10(1/prod);
}
void runIteration(map <int , NodeStructure> & graph , map <string , double> & gammas , int size)
{
- map <string, double> alignmentCounts;
- map <int , NodeStructure> :: iterator i;
- map <string , double> :: iterator aCounts;
- double sum = -2000.0;
- double tPPR = -2000.0;
-
- for (i = graph.begin(); i != graph.end(); i++)
- {
-
- i->second.computeFwdBckProbs(gammas , alignmentCounts);
- double temp = tPPR;
-
- tPPR = addLogProbs(graph[i->first].getPosterior() , temp);
-
- }
-
- for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++)
- {
- double temp = sum;
- sum = addLogProbs(aCounts->second, temp);
- }
-
-
- for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) // Normalizing ...
- {
- aCounts->second = aCounts->second - sum;
- }
-
- gammas.clear();
- gammas = alignmentCounts;
-
- LAMBDA = tPPR - log10(size);
+ map <string, double> alignmentCounts;
+ map <int , NodeStructure> :: iterator i;
+ map <string , double> :: iterator aCounts;
+ double sum = -2000.0;
+ double tPPR = -2000.0;
+
+ for (i = graph.begin(); i != graph.end(); i++) {
+
+ i->second.computeFwdBckProbs(gammas , alignmentCounts);
+ double temp = tPPR;
+
+ tPPR = addLogProbs(graph[i->first].getPosterior() , temp);
+
+ }
+
+ for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) {
+ double temp = sum;
+ sum = addLogProbs(aCounts->second, temp);
+ }
+
+
+ for (aCounts = alignmentCounts.begin(); aCounts != alignmentCounts.end(); aCounts++) { // Normalizing ...
+ aCounts->second = aCounts->second - sum;
+ }
+
+ gammas.clear();
+ gammas = alignmentCounts;
+
+ LAMBDA = tPPR - log10(size);
}
void setNTRProbabilities(map <int , NodeStructure> & graph , map <string , double> & sourceTypes , map <string , double > & targetTypes, double sourceTokens, double targetTokens)
{
-
- map <string , double> :: iterator i;
- map <int , NodeStructure> :: iterator j;
-
- for (i = sourceTypes.begin(); i!= sourceTypes.end(); i++)
- {
- i->second = log10(i->second/sourceTokens);
- }
+ map <string , double> :: iterator i;
+ map <int , NodeStructure> :: iterator j;
+
+
+ for (i = sourceTypes.begin(); i!= sourceTypes.end(); i++) {
+ i->second = log10(i->second/sourceTokens);
+ }
- for (i = targetTypes.begin(); i!= targetTypes.end(); i++)
- {
- i->second = log10(i->second/targetTokens);
- }
+ for (i = targetTypes.begin(); i!= targetTypes.end(); i++) {
+ i->second = log10(i->second/targetTokens);
+ }
- for (j = graph.begin(); j != graph.end(); j++)
- {
- j->second.computeNonTransliterationProb(sourceTypes , targetTypes);
- }
+ for (j = graph.begin(); j != graph.end(); j++) {
+ j->second.computeNonTransliterationProb(sourceTypes , targetTypes);
+ }
}
void printPosterior(map <int , NodeStructure> & graph)
{
- map <int , NodeStructure> :: iterator i;
+ map <int , NodeStructure> :: iterator i;
- for (i = graph.begin(); i != graph.end(); i++)
- graph[i->first].print();
+ for (i = graph.begin(); i != graph.end(); i++)
+ graph[i->first].print();
}
int main(int argc, char * argv[])
{
- vector <string> input;
- vector <string> source;
- vector <string> target;
- map <string , double> sourceTypes;
- map <string , double> targetTypes;
- set < vector <string> > tgt;
- set < vector <string> > src;
- double sourceTokens = 0;
- double targetTokens = 0;
- map <int , NodeStructure> graph;
- map <string , double> gammas;
-
- loadInput(argv[1],input);
-
- cerr<<"Constructing Graph "<<endl;
-
- for(int i=0; i<input.size(); i+=2)
- {
-
- //cerr<<input[i]<<endl;
- //cerr<<input[i+1]<<endl;
-
-
- getWords(input[i],source);
- getWords(input[i+1],target);
-
- if (src.find(source) == src.end())
- {
- for (int j = 0; j< source.size(); j++)
- sourceTypes[source[j]]++;
- src.insert(source);
- sourceTokens += source.size();
- }
-
- if (tgt.find(target) == tgt.end())
- {
- for (int j = 0; j< target.size(); j++)
- targetTypes[target[j]]++;
-
- tgt.insert(target);
- targetTokens += target.size();
- }
-
- NodeStructure obj (source,target);
- graph[i] = obj;
-
- }
-
- setNTRProbabilities(graph, sourceTypes, targetTypes, sourceTokens, targetTokens);
- initTransitionProb = getInitTransitionProb(sourceTypes.size()+1, targetTypes.size()+1);
-
- LAMBDA = log10(0.5);
-
-
- for (int i = 0; i< 10; i++)
- {
-
- cerr<<"Computing Probs : iteration "<<i+1<<endl;
- runIteration(graph , gammas , input.size()/2);
-
- }
-
- printPosterior(graph);
- cerr<<"Finished..."<<endl;
-
- return 0;
+ vector <string> input;
+ vector <string> source;
+ vector <string> target;
+ map <string , double> sourceTypes;
+ map <string , double> targetTypes;
+ set < vector <string> > tgt;
+ set < vector <string> > src;
+ double sourceTokens = 0;
+ double targetTokens = 0;
+ map <int , NodeStructure> graph;
+ map <string , double> gammas;
+
+ loadInput(argv[1],input);
+
+ cerr<<"Constructing Graph "<<endl;
+
+ for(int i=0; i<input.size(); i+=2) {
+
+ //cerr<<input[i]<<endl;
+ //cerr<<input[i+1]<<endl;
+
+
+ getWords(input[i],source);
+ getWords(input[i+1],target);
+
+ if (src.find(source) == src.end()) {
+ for (int j = 0; j< source.size(); j++)
+ sourceTypes[source[j]]++;
+ src.insert(source);
+ sourceTokens += source.size();
+ }
+
+ if (tgt.find(target) == tgt.end()) {
+ for (int j = 0; j< target.size(); j++)
+ targetTypes[target[j]]++;
+
+ tgt.insert(target);
+ targetTokens += target.size();
+ }
+
+ NodeStructure obj (source,target);
+ graph[i] = obj;
+
+ }
+
+ setNTRProbabilities(graph, sourceTypes, targetTypes, sourceTokens, targetTokens);
+ initTransitionProb = getInitTransitionProb(sourceTypes.size()+1, targetTypes.size()+1);
+
+ LAMBDA = log10(0.5);
+
+
+ for (int i = 0; i< 10; i++) {
+
+ cerr<<"Computing Probs : iteration "<<i+1<<endl;
+ runIteration(graph , gammas , input.size()/2);
+
+ }
+
+ printPosterior(graph);
+ cerr<<"Finished..."<<endl;
+
+ return 0;
}
diff --git a/moses-chart-cmd/IOWrapper.cpp b/moses-chart-cmd/IOWrapper.cpp
index 2716e1d53..c19363442 100644
--- a/moses-chart-cmd/IOWrapper.cpp
+++ b/moses-chart-cmd/IOWrapper.cpp
@@ -129,7 +129,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
- "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
+ "File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
}
}
@@ -175,7 +175,7 @@ InputType*IOWrapper::GetInput(InputType* inputType)
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Cannot be empty phrase");
+ "Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
@@ -184,12 +184,12 @@ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<Fa
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
- "Empty factor 0 at position " << pos);
+ "Empty factor 0 at position " << pos);
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
- "Empty factor " << i << " at position " << pos);
+ "Empty factor " << i << " at position " << pos);
out << "|" << *factor;
}
@@ -233,7 +233,7 @@ void IOWrapper::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, l
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
const Factor *factor = mbrBestHypo[i];
UTIL_THROW_IF(factor == NULL, util::Exception,
- "No factor at position " << i );
+ "No factor at position " << i );
cout << *factor << " ";
}
@@ -375,7 +375,7 @@ void IOWrapper::OutputDetailedTranslationReport(
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
UTIL_THROW_IF2(m_detailOutputCollector == NULL,
- "No ouput file for detailed reports specified");
+ "No ouput file for detailed reports specified");
m_detailOutputCollector->Write(translationId, out.str());
}
@@ -392,7 +392,7 @@ void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
- "No output file for tree fragments specified");
+ "No output file for tree fragments specified");
m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
}
@@ -426,7 +426,7 @@ void IOWrapper::OutputDetailedAllTranslationReport(
}
}
UTIL_THROW_IF2(m_detailAllOutputCollector == NULL,
- "No output file for details specified");
+ "No output file for details specified");
m_detailAllOutputCollector->Write(translationId, out.str());
}
@@ -454,7 +454,7 @@ void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
@@ -486,7 +486,7 @@ void IOWrapper::OutputBestHypo(search::Applied applied, long translationId)
Incremental::ToPhrase(applied, outPhrase);
// delete 1st & last
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
@@ -575,7 +575,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long tran
// delete 1st & last
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@@ -651,7 +651,7 @@ void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long
Incremental::PhraseAndFeatures(*i, outputPhrase, features);
// <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
@@ -817,7 +817,7 @@ size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypoth
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];
diff --git a/moses-cmd/IOWrapper.cpp b/moses-cmd/IOWrapper.cpp
index da2589acd..3e55f96e9 100644
--- a/moses-cmd/IOWrapper.cpp
+++ b/moses-cmd/IOWrapper.cpp
@@ -168,16 +168,16 @@ void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder
const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
UTIL_THROW_IF(!m_detailedTranslationReportingStream->good(),
- util::FileOpenException,
- "File for output of detailed translation report could not be open");
+ util::FileOpenException,
+ "File for output of detailed translation report could not be open");
}
// sentence alignment output
if (! staticData.GetAlignmentOutputFile().empty()) {
m_alignmentOutputStream = new ofstream(staticData.GetAlignmentOutputFile().c_str());
UTIL_THROW_IF(!m_alignmentOutputStream->good(),
- util::FileOpenException,
- "File for output of word alignment could not be open");
+ util::FileOpenException,
+ "File for output of word alignment could not be open");
}
}
@@ -208,7 +208,7 @@ std::map<size_t, const Factor*> GetPlaceholders(const Hypothesis &hypo, FactorTy
if (factor) {
std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
- "Placeholder should be aligned to 1, and only 1, word");
+ "Placeholder should be aligned to 1, and only 1, word");
ret[*targetPos.begin()] = factor;
}
}
@@ -223,7 +223,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
char reportSegmentation, bool reportAllFactors)
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
- "Must specific at least 1 output factor");
+ "Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = StaticData::Instance().GetMarkUnknown();
if (reportAllFactors == true) {
@@ -250,7 +250,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
}
UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << pos);
+ "No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
@@ -263,7 +263,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
- "No factor " << i << " at position " << pos);
+ "No factor " << i << " at position " << pos);
out << "|" << *factor;
}
@@ -399,7 +399,7 @@ void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
UTIL_THROW_IF2(factor == NULL,
- "No factor 0 at position " << i);
+ "No factor 0 at position " << i);
if (i>0) out << " " << *factor;
else out << *factor;
}
diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp
index 4c6b421fc..39d88f34d 100644
--- a/moses-cmd/LatticeMBRGrid.cpp
+++ b/moses-cmd/LatticeMBRGrid.cpp
@@ -70,7 +70,7 @@ public:
void addParam(gridkey key, const string& arg, float defaultValue) {
m_args[arg] = key;
UTIL_THROW_IF2(m_grid.find(key) != m_grid.end(),
- "Couldn't find value for key " << (int) key);
+ "Couldn't find value for key " << (int) key);
m_grid[key].push_back(defaultValue);
}
diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp
index eb4f20e0d..220b72a94 100644
--- a/moses-cmd/Main.cpp
+++ b/moses-cmd/Main.cpp
@@ -307,9 +307,9 @@ public:
out << m_source->GetTranslationId() << " ";
}
- if (staticData.GetReportSegmentation() == 2) {
- manager.GetOutputLanguageModelOrder(out, bestHypo);
- }
+ if (staticData.GetReportSegmentation() == 2) {
+ manager.GetOutputLanguageModelOrder(out, bestHypo);
+ }
OutputBestSurface(
out,
bestHypo,
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index aa29a4a12..e3d3da453 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -126,7 +126,7 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
- "Unknown direction: " << m_direction);
+ "Unknown direction: " << m_direction);
const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ?
topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) : m_prevScore;
diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
index d934e67d7..c1a295b94 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
@@ -268,7 +268,7 @@ Scores LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, co
}
if(m_FactorsC.empty()) {
- UTIL_THROW_IF2(1 != cands.size(), "Error");
+ UTIL_THROW_IF2(1 != cands.size(), "Error");
return cands[0].GetScore(0);
} else {
score = auxFindScoreForContext(cands, c);
@@ -283,7 +283,7 @@ Scores LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, co
Scores LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands, const Phrase& context)
{
if(m_FactorsC.empty()) {
- UTIL_THROW_IF2(cands.size() > 1, "Error");
+ UTIL_THROW_IF2(cands.size() > 1, "Error");
return (1 == cands.size())?(cands[0].GetScore(0)):(Scores());
} else {
@@ -384,7 +384,7 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
} else {
//sanity check ALL lines must have same number of tokens
UTIL_THROW_IF2(numTokens != tokens.size(),
- "Lines do not have the same number of tokens");
+ "Lines do not have the same number of tokens");
}
size_t phrase = 0;
for(; phrase < numKeyTokens; ++phrase) {
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index dfa380a77..0bb7aed95 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -21,7 +21,7 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
OpSequenceModel::~OpSequenceModel()
{
- delete OSM;
+ delete OSM;
}
void OpSequenceModel :: readLanguageModel(const char *lmFile)
@@ -199,7 +199,7 @@ FFState* OpSequenceModel::EvaluateChart(
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
- UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
+ UTIL_THROW2("Chart decoding not support by UTIL_THROW2");
}
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
index 64d3d87b4..ad6773fc8 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
@@ -42,7 +42,7 @@ ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory(
, m_ruleTable(ruleTable)
{
UTIL_THROW_IF2(m_dottedRuleColls.size() != 0,
- "Dotted rule collection not correctly initialized");
+ "Dotted rule collection not correctly initialized");
size_t sourceSize = parser.GetSize();
m_dottedRuleColls.resize(sourceSize);
@@ -179,8 +179,8 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
dottedRuleCol.Add(relEndPos+1, dottedRule);
}
}
- // we only need to check once if a terminal matches the input at a given position.
- expandableDottedRuleListTerminalsOnly.erase(it);
+ // we only need to check once if a terminal matches the input at a given position.
+ expandableDottedRuleListTerminalsOnly.erase(it);
}
// list of rules that that cover the entire span
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
index ced36c186..8820890be 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
@@ -49,7 +49,7 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
, m_filePath(filePath)
{
UTIL_THROW_IF2(m_expandableDottedRuleListVec.size() != 0,
- "Dotted rule collection not correctly initialized");
+ "Dotted rule collection not correctly initialized");
size_t sourceSize = parser.GetSize();
m_expandableDottedRuleListVec.resize(sourceSize);
@@ -237,12 +237,12 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
targetPhraseCollection
- = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
- ,m_outputFactorsVec
- ,m_dictionary
- ,weightT
- ,m_dbWrapper.GetVocab()
- ,true);
+ = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
+ ,m_outputFactorsVec
+ ,m_dictionary
+ ,weightT
+ ,m_dbWrapper.GetVocab()
+ ,true);
delete tpcollBerkeleyDb;
m_cache[tpCollFilePos] = targetPhraseCollection;
diff --git a/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h b/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h
index 390d19b5c..a95b5599e 100644
--- a/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h
+++ b/moses/TranslationModel/CYKPlusParser/DotChartInMemory.h
@@ -103,8 +103,7 @@ public:
if (dottedRule->GetLastNode().GetNonTerminalMap().empty() && !dottedRule->IsRoot()) {
size_t startPos = dottedRule->GetWordsRange().GetEndPos() + 1;
m_expandableDottedRuleListTerminalsOnly[startPos].push_back(dottedRule);
- }
- else {
+ } else {
m_expandableDottedRuleList.push_back(dottedRule);
}
}
diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp
index cd277ad0c..c90dcd6d9 100644
--- a/moses/TranslationModel/CompactPT/BlockHashIndex.cpp
+++ b/moses/TranslationModel/CompactPT/BlockHashIndex.cpp
@@ -366,10 +366,10 @@ void BlockHashIndex::CalcHash(size_t current, void* source_void)
if(lastKey > temp) {
if(source->nkeys != 2 || temp != "###DUMMY_KEY###") {
- std::stringstream strme;
- strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
- strme << "1: " << lastKey << std::endl;
- strme << "2: " << temp << std::endl;
+ std::stringstream strme;
+ strme << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
+ strme << "1: " << lastKey << std::endl;
+ strme << "2: " << temp << std::endl;
UTIL_THROW2(strme.str());
}
}
diff --git a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
index d771d3212..6a585d2a8 100644
--- a/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.cpp
@@ -206,7 +206,7 @@ std::string LexicalReorderingTableCreator::EncodeLine(std::vector<std::string>&
if(m_numScoreComponent != scores.size()) {
std::stringstream strme;
strme << "Error: Wrong number of scores detected ("
- << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
+ << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
strme << "Line: " << tokens[0] << " ||| ... ||| " << scoresString << std::endl;
UTIL_THROW2(strme.str());
}
diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
index 360b7a9fd..d6860a43b 100644
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -99,7 +99,7 @@ void PhraseDictionaryCompact::Load()
phraseSize = m_targetPhrasesMapped.load(pFile, true);
UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
- "Not successfully loaded");
+ "Not successfully loaded");
}
// now properly declared in TargetPhraseCollection.h
diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
index 00983fd53..a2bd879b4 100644
--- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp
@@ -714,10 +714,10 @@ std::string PhraseTableCreator::EncodeLine(std::vector<std::string>& tokens, siz
std::vector<float> scores = Tokenize<float>(scoresStr);
if(scores.size() != m_numScoreComponent) {
- std::stringstream strme;
- strme << "Error: Wrong number of scores detected ("
- << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
- strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[3] << " ..." << std::endl;
+ std::stringstream strme;
+ strme << "Error: Wrong number of scores detected ("
+ << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
+ strme << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[3] << " ..." << std::endl;
UTIL_THROW2(strme.str());
}
@@ -1040,30 +1040,30 @@ void RankingTask::operator()()
*it = Moses::Trim(*it);
if(tokens.size() < 4) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong format:" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong format:" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
- std::stringstream strme;
- strme << "Error: It seems the following line contains no alignment information, " << std::endl;
- strme << "but you are using ";
- strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
- strme << " encoding which makes use of alignment data. " << std::endl;
- strme << "Use -encoding None" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line contains no alignment information, " << std::endl;
+ strme << "but you are using ";
+ strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ strme << " encoding which makes use of alignment data. " << std::endl;
+ strme << "Use -encoding None" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
std::vector<float> scores = Tokenize<float>(tokens[2]);
if(scores.size() != m_creator.m_numScoreComponent) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong number of scores ("
- << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
- UTIL_THROW2(strme.str());
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong number of scores ("
+ << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
+ UTIL_THROW2(strme.str());
}
float sortScore = scores[m_creator.m_sortScoreIndex];
@@ -1140,20 +1140,20 @@ void EncodingTask::operator()()
*it = Moses::Trim(*it);
if(tokens.size() < 3) {
- std::stringstream strme;
- strme << "Error: It seems the following line has a wrong format:" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line has a wrong format:" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
- std::stringstream strme;
- strme << "Error: It seems the following line contains no alignment information, " << std::endl;
- strme << "but you are using ";
- strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
- strme << " encoding which makes use of alignment data. " << std::endl;
- strme << "Use -encoding None" << std::endl;
- strme << "Line " << i << ": " << lines[i] << std::endl;
+ std::stringstream strme;
+ strme << "Error: It seems the following line contains no alignment information, " << std::endl;
+ strme << "but you are using ";
+ strme << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ strme << " encoding which makes use of alignment data. " << std::endl;
+ strme << "Use -encoding None" << std::endl;
+ strme << "Line " << i << ": " << lines[i] << std::endl;
UTIL_THROW2(strme.str());
}
diff --git a/moses/TranslationModel/DynSAInclude/FileHandler.cpp b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
index 4e92ad907..5f9cd7c45 100644
--- a/moses/TranslationModel/DynSAInclude/FileHandler.cpp
+++ b/moses/TranslationModel/DynSAInclude/FileHandler.cpp
@@ -71,13 +71,13 @@ bool FileHandler::setStreamBuffer(bool checkExists)
{
// redirect stdin or stdout if necesary
if (path_ == FileHandler::kStdInDescriptor) {
- UTIL_THROW_IF2(flags_ & std::ios::in == 0,
- "Incorrect flags: " << flags_);
+ UTIL_THROW_IF2(flags_ & std::ios::in == 0,
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cin.rdbuf();
buffer_ = sb;
} else if (path_ == FileHandler::kStdOutDescriptor) {
- UTIL_THROW_IF2(flags_ & std::ios::out == 0,
- "Incorrect flags: " << flags_);
+ UTIL_THROW_IF2(flags_ & std::ios::out == 0,
+ "Incorrect flags: " << flags_);
std::streambuf* sb = std::cout.rdbuf();
buffer_ = sb;
} else {
diff --git a/moses/TranslationModel/DynSAInclude/RandLMFilter.h b/moses/TranslationModel/DynSAInclude/RandLMFilter.h
index e8defb110..19566ff40 100644
--- a/moses/TranslationModel/DynSAInclude/RandLMFilter.h
+++ b/moses/TranslationModel/DynSAInclude/RandLMFilter.h
@@ -62,9 +62,9 @@ public:
address_mask_ = full_mask_ >> first_bit_;
}
Filter(FileHandler* fin, bool loaddata = true) : data_(NULL) {
- assert(loadHeader(fin));
+ assert(loadHeader(fin));
if (loaddata)
- assert(loadData(fin));
+ assert(loadData(fin));
}
virtual ~Filter() {
delete[] data_;
@@ -80,7 +80,7 @@ public:
}
// read / write functions
inline bool read(uint64_t address, T* value) {
- assert(address <= addresses_);
+ assert(address <= addresses_);
// copy address to 'value'
uint64_t data_bit = address * width_;
uint32_t data_cell = (data_bit >> log_cell_width_); // % cells_;
@@ -102,7 +102,7 @@ public:
return true;
}
inline T read(uint64_t address) {
- assert(address <= addresses_);
+ assert(address <= addresses_);
// return value at address
T value = 0;
uint64_t data_bit = address * width_;
@@ -124,7 +124,7 @@ public:
return value;
}
inline bool write(uint64_t address, T value) {
- assert(address <= addresses_);
+ assert(address <= addresses_);
assert(log2(value) <= width_);
// write 'value' to address
uint64_t data_bit = address * width_;
diff --git a/moses/TranslationModel/DynSAInclude/onlineRLM.h b/moses/TranslationModel/DynSAInclude/onlineRLM.h
index 929602399..cd9ed8a87 100644
--- a/moses/TranslationModel/DynSAInclude/onlineRLM.h
+++ b/moses/TranslationModel/DynSAInclude/onlineRLM.h
@@ -148,8 +148,8 @@ int OnlineRLM<T>::query(const wordID_t* IDs, int len)
//markQueried(hpdItr); // mark this event as "hit"
value -= ((value & this->hitMask_) != 0) ? this->hitMask_ : 0; // check for previous hit marks
} else {
- UTIL_THROW_IF2(filterIdx >= this->cells_,
- "Out of bound: " << filterIdx);
+ UTIL_THROW_IF2(filterIdx >= this->cells_,
+ "Out of bound: " << filterIdx);
//markQueried(filterIdx);
}
}
@@ -341,7 +341,7 @@ const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len)
int dummy(0);
float**addresses = new float*[len]; // only interested in addresses of cache
UTIL_THROW_IF2(cache_->getCache2(ngram, len, &addresses[0], &dummy) != len,
- "Error");
+ "Error");
// return address of cache node
float *addr0 = addresses[0];
diff --git a/moses/TranslationModel/DynSAInclude/params.cpp b/moses/TranslationModel/DynSAInclude/params.cpp
index 27cf06a51..4696258f8 100644
--- a/moses/TranslationModel/DynSAInclude/params.cpp
+++ b/moses/TranslationModel/DynSAInclude/params.cpp
@@ -65,10 +65,10 @@ bool Parameters::loadParams(int argc, char ** argv)
if( getValueType(param) == kBoolValue ) {
jumpBy = 1;
UTIL_THROW_IF2(!setParamValue(param, kTrueValue),
- "Couldn't set parameter " << param);
+ "Couldn't set parameter " << param);
} else { //not of type bool so must have corresponding value
UTIL_THROW_IF2(i+1 >= argc,
- "Out of bound error: " << i+1);
+ "Out of bound error: " << i+1);
jumpBy = 2;
std::string val = argv[i+1];
diff --git a/moses/TranslationModel/DynSAInclude/quantizer.h b/moses/TranslationModel/DynSAInclude/quantizer.h
index 002535dd0..6dbcc3cc4 100644
--- a/moses/TranslationModel/DynSAInclude/quantizer.h
+++ b/moses/TranslationModel/DynSAInclude/quantizer.h
@@ -17,7 +17,7 @@ class LogQtizer
{
public:
LogQtizer(float i): base_(pow(2, 1 / i)) {
- UTIL_THROW_IF2(base_ <= 1, "Can't calculate log base less than 1");
+ UTIL_THROW_IF2(base_ <= 1, "Can't calculate log base less than 1");
max_code_ = 0;
float value = 1; // code = 1 -> value = 1 for any base
std::vector<float> code_to_value_vec;
@@ -40,13 +40,13 @@ public:
std::cerr << "Initialized quantization (size = " << max_code_ + 1 << ")" << std::endl;
}
LogQtizer(FileHandler* fin) {
- UTIL_THROW_IF2(fin == NULL, "Null file handle");
+ UTIL_THROW_IF2(fin == NULL, "Null file handle");
load(fin);
}
int code(float value) {
// should just be: return log_b(value)
UTIL_THROW_IF2(value < min_value_ || value > max_value_,
- "Value " << value << " out of bound");
+ "Value " << value << " out of bound");
// but binary search removes errors due to floor operator above
int code = static_cast<int>(std::lower_bound(code_to_value_, code_to_value_+ max_code_,
diff --git a/moses/TranslationModel/DynSAInclude/vocab.cpp b/moses/TranslationModel/DynSAInclude/vocab.cpp
index 1e6c92203..b717f533c 100644
--- a/moses/TranslationModel/DynSAInclude/vocab.cpp
+++ b/moses/TranslationModel/DynSAInclude/vocab.cpp
@@ -134,7 +134,7 @@ bool Vocab::Load(FileHandler* vcbin, const FactorDirection& direction,
if (id == 0 && word != GetkOOVWord())
id = m_ids2words.size() + 1; // assign ids sequentially starting from 1
UTIL_THROW_IF2(m_ids2words.count(id) != 0 || m_words2ids.count(word) != 0,
- "Error");
+ "Error");
m_ids2words[id] = word;
m_words2ids[word] = id;
diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
index 9d2e3fa20..b6e779078 100644
--- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp
+++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp
@@ -73,7 +73,7 @@ void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t
// no-term
vector<string> split = Tokenize(tok, ",");
UTIL_THROW_IF2(split.size() != 2,
- "Incorrectly formmatted non-terminal: " << tok);
+ "Incorrectly formmatted non-terminal: " << tok);
tok = "[X]" + split[0] + "]";
size_t coIndex = Scan<size_t>(split[1]);
@@ -100,7 +100,7 @@ void ReformateHieroScore(string &scoreString)
string &tok = toks[i];
vector<string> nameValue = Tokenize(tok, "=");
UTIL_THROW_IF2(nameValue.size() != 2,
- "Incorrectly formatted score: " << tok);
+ "Incorrectly formatted score: " << tok);
float score = Scan<float>(nameValue[1]);
score = exp(-score);
@@ -212,7 +212,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
- << numScoreComponents << ") of score components on line " << count);
+ << numScoreComponents << ") of score components on line " << count);
}
// parse source & find pt node
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
index 81fd43fcb..4c2f4d186 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp
@@ -50,7 +50,7 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
*this);
UTIL_THROW_IF2(ret == NULL,
- "Rules not successfully loaded for sentence id " << translationId);
+ "Rules not successfully loaded for sentence id " << translationId);
}
void PhraseDictionaryALSuffixArray::CleanUpAfterSentenceProcessing(const InputType &source)
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
index 7cb26ad88..5d8c95428 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@@ -84,13 +84,13 @@ PhraseDictionaryFuzzyMatch::
SetParameter(const std::string& key, const std::string& value)
{
if (key == "source") {
- m_config[0] = value;
+ m_config[0] = value;
} else if (key == "target") {
- m_config[1] = value;
+ m_config[1] = value;
} else if (key == "alignment") {
- m_config[2] = value;
+ m_config[2] = value;
} else {
- PhraseDictionary::SetParameter(key, value);
+ PhraseDictionary::SetParameter(key, value);
}
}
@@ -150,7 +150,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
char dirName[] = "/tmp/moses.XXXXXX";
char *temp = mkdtemp(dirName);
UTIL_THROW_IF2(temp == NULL,
- "Couldn't create temporary directory " << dirName);
+ "Couldn't create temporary directory " << dirName);
string dirNameStr(dirName);
@@ -218,11 +218,11 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
const size_t numScoreComponents = GetNumScoreComponents();
if (scoreVector.size() != numScoreComponents) {
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
- << numScoreComponents << ") of score components on line " << count);
+ << numScoreComponents << ") of score components on line " << count);
}
UTIL_THROW_IF2(scoreVector.size() != numScoreComponents,
- "Number of scores incorrectly specified");
+ "Number of scores incorrectly specified");
// parse source & find pt node
@@ -298,9 +298,9 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDi
const Word &sourceNonTerm = word;
UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
- "No alignment for non-term at position " << pos);
+ "No alignment for non-term at position " << pos);
UTIL_THROW_IF2(iterAlign->first != pos,
- "Alignment info incorrect at position " << pos);
+ "Alignment info incorrect at position " << pos);
size_t targetNonTermInd = iterAlign->second;
++iterAlign;
@@ -312,7 +312,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDi
}
UTIL_THROW_IF2(currNode == NULL,
- "Node not found at position " << pos);
+ "Node not found at position " << pos);
}
@@ -338,7 +338,7 @@ const PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(long t
{
std::map<long, PhraseDictionaryNodeMemory>::const_iterator iter = m_collection.find(translationId);
UTIL_THROW_IF2(iter == m_collection.end(),
- "Couldn't find root node for input: " << translationId);
+ "Couldn't find root node for input: " << translationId);
return iter->second;
}
PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(const InputType &source)
@@ -346,7 +346,7 @@ PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(const InputT
long transId = source.GetTranslationId();
std::map<long, PhraseDictionaryNodeMemory>::iterator iter = m_collection.find(transId);
UTIL_THROW_IF2(iter == m_collection.end(),
- "Couldn't find root node for input: " << transId);
+ "Couldn't find root node for input: " << transId);
return iter->second;
}
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
index ea60d4d23..778e349d2 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
@@ -85,20 +85,20 @@ void PhraseDictionaryOnDisk::InitializeForInput(InputType const& source)
obj->BeginLoad(m_filePath);
UTIL_THROW_IF2(obj->GetMisc("Version") != OnDiskPt::OnDiskWrapper::VERSION_NUM,
- "On-disk phrase table is version " << obj->GetMisc("Version")
- << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM);
+ "On-disk phrase table is version " << obj->GetMisc("Version")
+ << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM);
UTIL_THROW_IF2(obj->GetMisc("NumSourceFactors") != m_input.size(),
- "On-disk phrase table has " << obj->GetMisc("NumSourceFactors") << " source factors."
- << ". The ini file specified " << m_input.size() << " source factors");
+ "On-disk phrase table has " << obj->GetMisc("NumSourceFactors") << " source factors."
+ << ". The ini file specified " << m_input.size() << " source factors");
UTIL_THROW_IF2(obj->GetMisc("NumTargetFactors") != m_output.size(),
- "On-disk phrase table has " << obj->GetMisc("NumTargetFactors") << " target factors."
- << ". The ini file specified " << m_output.size() << " target factors");
+ "On-disk phrase table has " << obj->GetMisc("NumTargetFactors") << " target factors."
+ << ". The ini file specified " << m_output.size() << " target factors");
UTIL_THROW_IF2(obj->GetMisc("NumScores") != m_numScoreComponents,
- "On-disk phrase table has " << obj->GetMisc("NumScores") << " scores."
- << ". The ini file specified " << m_numScoreComponents << " scores");
+ "On-disk phrase table has " << obj->GetMisc("NumScores") << " scores."
+ << ". The ini file specified " << m_numScoreComponents << " scores");
m_implementation.reset(obj);
}
@@ -194,7 +194,7 @@ const TargetPhraseCollection *PhraseDictionaryOnDisk::GetTargetPhraseCollectionN
const OnDiskPt::TargetPhraseCollection *targetPhrasesOnDisk = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
TargetPhraseCollection *targetPhrases
- = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
+ = targetPhrasesOnDisk->ConvertToMoses(m_input, m_output, *this, weightT, vocab, false);
delete targetPhrasesOnDisk;
diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp
index e0e9fd3e2..e5071cbba 100644
--- a/phrase-extract/InternalStructFeature.cpp
+++ b/phrase-extract/InternalStructFeature.cpp
@@ -6,67 +6,72 @@ namespace MosesTraining
{
InternalStructFeature::InternalStructFeature()
- :m_type(0){
- //cout<<"InternalStructFeature: Construct "<<m_type<<"\n";
+ :m_type(0)
+{
+ //cout<<"InternalStructFeature: Construct "<<m_type<<"\n";
}
-bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const{
- //cout<<"InternalStructFeature: Equals\n";
- //don't know what it's used for and what we should compare
- //-> if the dense score is the same
- //-> if the sparse feature is set
- // compare phrases? with the internalStrucutre string?
- /** Return true if the two phrase pairs are equal from the point of this feature. Assume
- that they already compare true according to PhraseAlignment.equals()
- **/
+bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const
+{
+ //cout<<"InternalStructFeature: Equals\n";
+ //don't know what it's used for and what we should compare
+ //-> if the dense score is the same
+ //-> if the sparse feature is set
+ // compare phrases? with the internalStrucutre string?
+ /** Return true if the two phrase pairs are equal from the point of this feature. Assume
+ that they already compare true according to PhraseAlignment.equals()
+ **/
-/* if(lhs.ghkmParse==rhs.ghkmParse)
- return true;
- else
- return false;
-*/
- //return true;
+ /* if(lhs.ghkmParse==rhs.ghkmParse)
+ return true;
+ else
+ return false;
+ */
+ //return true;
}
void InternalStructFeature::add(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const{
- for(size_t i=0; i<context.phrasePair.size(); i++) {
- add(&context.phrasePair[i]->treeFragment, denseValues, sparseValues);
- }
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
+{
+ for(size_t i=0; i<context.phrasePair.size(); i++) {
+ add(&context.phrasePair[i]->treeFragment, denseValues, sparseValues);
+ }
}
void InternalStructFeatureDense::add(std::string *internalStruct,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const{
- //cout<<"Dense: "<<*internalStruct<<endl;
- size_t start=0;
- int countNP=0;
- while((start = internalStruct->find("NP", start)) != string::npos) {
- countNP++;
- start+=2; //length of "NP"
- }
- //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
- //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
- denseValues.push_back(exp(countNP));
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
+{
+ //cout<<"Dense: "<<*internalStruct<<endl;
+ size_t start=0;
+ int countNP=0;
+ while((start = internalStruct->find("NP", start)) != string::npos) {
+ countNP++;
+ start+=2; //length of "NP"
+ }
+ //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
+ //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
+ denseValues.push_back(exp(countNP));
}
void InternalStructFeatureSparse::add(std::string *internalStruct,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const{
- //cout<<"Sparse: "<<*internalStruct<<endl;
- if(internalStruct->find("VBZ")!=std::string::npos)
- sparseValues["NTVBZ"] = 1;
- if(internalStruct->find("VBD")!=std::string::npos)
- sparseValues["NTVBD"] = 1;
- if(internalStruct->find("VBP")!=std::string::npos)
- sparseValues["NTVBP"] = 1;
- if(internalStruct->find("PP")!=std::string::npos)
- sparseValues["NTPP"] = 1;
- if(internalStruct->find("SBAR")!=std::string::npos)
- sparseValues["NTSBAR"] = 1;
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const
+{
+ //cout<<"Sparse: "<<*internalStruct<<endl;
+ if(internalStruct->find("VBZ")!=std::string::npos)
+ sparseValues["NTVBZ"] = 1;
+ if(internalStruct->find("VBD")!=std::string::npos)
+ sparseValues["NTVBD"] = 1;
+ if(internalStruct->find("VBP")!=std::string::npos)
+ sparseValues["NTVBP"] = 1;
+ if(internalStruct->find("PP")!=std::string::npos)
+ sparseValues["NTPP"] = 1;
+ if(internalStruct->find("SBAR")!=std::string::npos)
+ sparseValues["NTSBAR"] = 1;
}
diff --git a/phrase-extract/InternalStructFeature.h b/phrase-extract/InternalStructFeature.h
index bd513a715..fe6dedc15 100644
--- a/phrase-extract/InternalStructFeature.h
+++ b/phrase-extract/InternalStructFeature.h
@@ -21,46 +21,50 @@ namespace MosesTraining
class InternalStructFeature : public ScoreFeature
{
public:
- InternalStructFeature();
- /** Return true if the two phrase pairs are equal from the point of this feature. Assume
- that they already compare true according to PhraseAlignment.equals()
- **/
- bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const;
- /** Add the values for this feature function. */
- void add(const ScoreFeatureContext& context,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+ InternalStructFeature();
+ /** Return true if the two phrase pairs are equal from the point of this feature. Assume
+ that they already compare true according to PhraseAlignment.equals()
+ **/
+ bool equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const;
+ /** Add the values for this feature function. */
+ void add(const ScoreFeatureContext& context,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
protected:
- /** Overriden in subclass */
- virtual void add(std::string *internalStruct,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const = 0;
- int m_type;
+ /** Overriden in subclass */
+ virtual void add(std::string *internalStruct,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const = 0;
+ int m_type;
};
class InternalStructFeatureDense : public InternalStructFeature
{
public:
- InternalStructFeatureDense()
- :InternalStructFeature(){m_type=1;} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
+ InternalStructFeatureDense()
+ :InternalStructFeature() {
+ m_type=1;
+ } //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
protected:
- virtual void add(std::string *internalStruct,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+ virtual void add(std::string *internalStruct,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
class InternalStructFeatureSparse : public InternalStructFeature
{
public:
- InternalStructFeatureSparse()
- :InternalStructFeature(){m_type=2;}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
+ InternalStructFeatureSparse()
+ :InternalStructFeature() {
+ m_type=2;
+ }// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
protected:
- virtual void add(std::string *internalStruct,
- std::vector<float>& denseValues,
- std::map<std::string,float>& sparseValues) const;
+ virtual void add(std::string *internalStruct,
+ std::vector<float>& denseValues,
+ std::map<std::string,float>& sparseValues) const;
};
}
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h
index 574b9afc1..8aae418ab 100644
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@@ -68,9 +68,9 @@ public:
includeSentenceIdFlag(false),
onlyOutputSpanInfo(false),
gzOutput(false),
- flexScoreFlag(false),
- debug(false)
-{}
+ flexScoreFlag(false),
+ debug(false)
+ {}
//functions for initialization of options
void initAllModelsOutputFlag(const bool initallModelsOutputFlag) {
diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp
index f98759755..233be0168 100644
--- a/phrase-extract/ScoreFeature.cpp
+++ b/phrase-extract/ScoreFeature.cpp
@@ -39,7 +39,7 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
bool sparseDomainAdded = false;
for (size_t i = 0; i < args.size(); ++i) {
- if (args[i] == "--IgnoreSentenceId") {
+ if (args[i] == "--IgnoreSentenceId") {
m_includeSentenceId = true;
} else if (args[i].substr(0,8) == "--Domain") {
string type = args[i].substr(8);
@@ -77,15 +77,15 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
sparseDomainAdded = true;
m_includeSentenceId = true;
- } else if(args[i] == "--GHKMFeatureSparse"){
- //MARIA
- m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
- } else if(args[i] == "--GHKMFeatureDense"){
- //MARIA
- m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
+ } else if(args[i] == "--GHKMFeatureSparse") {
+ //MARIA
+ m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
+ } else if(args[i] == "--GHKMFeatureDense") {
+ //MARIA
+ m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else {
UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
- }
+ }
}
diff --git a/phrase-extract/ScoreFeatureTest.cpp b/phrase-extract/ScoreFeatureTest.cpp
index 5cf7185f5..399714856 100644
--- a/phrase-extract/ScoreFeatureTest.cpp
+++ b/phrase-extract/ScoreFeatureTest.cpp
@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except)
args.clear();
args = Tokenize("--DomainSubset"," ");
BOOST_CHECK_THROW(manager.configure(args), ScoreFeatureArgumentException);
-
+
}
template <class Expected>
diff --git a/phrase-extract/Util.cpp___ b/phrase-extract/Util.cpp___
new file mode 100644
index 000000000..812705660
--- /dev/null
+++ b/phrase-extract/Util.cpp___
@@ -0,0 +1,28 @@
+/*
+ * Util.cpp
+ * mert - Minimum Error Rate Training
+ *
+ * Created by Nicola Bertoldi on 13/05/08.
+ *
+ */
+
+#include "Util.h"
+
+using namespace std;
+
+namespace MosesTuning
+{
+
+void Tokenize(const char *str, const char delim,
+ std::vector<std::string> *res)
+{
+ while (1) {
+ const char *begin = str;
+ while (*str != delim && *str) str++;
+ if (begin != str) // Don't create empty string objects.
+ res->push_back(std::string(begin, str));
+ if (*str++ == 0) break;
+ }
+}
+
+}
diff --git a/phrase-extract/Util.h___ b/phrase-extract/Util.h___
new file mode 100644
index 000000000..f63124f1d
--- /dev/null
+++ b/phrase-extract/Util.h___
@@ -0,0 +1,140 @@
+/*
+ * Util.h
+ * mert - Minimum Error Rate Training
+ *
+ * Created by Nicola Bertoldi on 13/05/08.
+ *
+ */
+
+#ifndef MERT_UTIL_H_
+#define MERT_UTIL_H_
+
+#include <cmath>
+#include <cstdlib>
+#include <stdexcept>
+#include <limits>
+#include <vector>
+#include <map>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <cstring>
+
+#include "Types.h"
+
+namespace MosesTuning
+{
+
+#ifdef TRACE_ENABLE
+#define TRACE_ERR(str) { std::cerr << str; }
+#else
+#define TRACE_ERR(str) { }
+#endif
+
+#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
+// gcc nth_element() bug
+#define NTH_ELEMENT3(begin, middle, end) std::sort(begin, end)
+#define NTH_ELEMENT4(begin, middle, end, orderer) std::sort(begin, end, orderer)
+#else
+#define NTH_ELEMENT3(begin, middle, end) std::nth_element(begin, middle, end)
+#define NTH_ELEMENT4(begin, middle, end, orderer) std::nth_element(begin, middle, end, orderer)
+#endif
+
+const char kDefaultDelimiterSymbol[] = " ";
+
+int verboselevel();
+int setverboselevel(int v);
+
+
+const float kEPS = 0.0001f;
+
+template <typename T>
+bool IsAlmostEqual(T expected, T actual, float round=kEPS)
+{
+ if (std::abs(expected - actual) < round) {
+ return true;
+ } else {
+ std::cerr << "Fail: expected = " << expected
+ << " (actual = " << actual << ")" << std::endl;
+ return false;
+ }
+}
+
+/**
+ * Find the specified delimiter for the string 'str', and 'str' is assigned
+ * to a substring object that starts at the position of first occurrence of
+ * the delimiter in 'str'. 'substr' is copied from 'str' ranging from
+ * the start position of 'str' to the position of first occurrence of
+ * the delimiter.
+ *
+ * It returns the position of first occurrence in the queried string.
+ * If the content is not found, std::string::npos is returned.
+ */
+size_t getNextPound(std::string &str, std::string &substr,
+ const std::string &delimiter = kDefaultDelimiterSymbol);
+
+void split(const std::string &s, char delim, std::vector<std::string> &elems);
+
+/**
+ * Split the string 'str' with specified delimitter 'delim' into tokens.
+ * The resulting tokens are set to 'res'.
+ *
+ * ex. "a,b,c" => {"a", "b", "c"}.
+ */
+void Tokenize(const char *str, const char delim, std::vector<std::string> *res);
+
+/**
+ * Returns true iff "str" ends with "suffix".
+ * e.g., Given str = "abc:" and suffix = ":", this function returns true.
+ */
+inline bool EndsWith(const std::string& str, const char* suffix)
+{
+ return str.find_last_of(suffix) == str.size() - 1;
+}
+
+template<typename T>
+inline std::string stringify(T x)
+{
+ std::ostringstream o;
+ if (!(o << x))
+ throw std::runtime_error("stringify(template<typename T>)");
+ return o.str();
+}
+
+inline ScoreStatsType ConvertCharToScoreStatsType(const char *str)
+{
+ return std::atoi(str);
+}
+
+inline ScoreStatsType ConvertStringToScoreStatsType(const std::string& str)
+{
+ return ConvertCharToScoreStatsType(str.c_str());
+}
+
+inline FeatureStatsType ConvertCharToFeatureStatsType(const char *str)
+{
+ return static_cast<FeatureStatsType>(std::atof(str));
+}
+
+inline FeatureStatsType ConvertStringToFeatureStatsType(const std::string &str)
+{
+ return ConvertCharToFeatureStatsType(str.c_str());
+}
+
+inline std::string trimStr(const std::string& Src, const std::string& c = " \r\n")
+{
+ size_t p2 = Src.find_last_not_of(c);
+ if (p2 == std::string::npos) return std::string();
+ size_t p1 = Src.find_first_not_of(c);
+ if (p1 == std::string::npos) p1 = 0;
+ return Src.substr(p1, (p2-p1)+1);
+}
+
+// Utilities to measure decoding time
+void ResetUserTime();
+void PrintUserTime(const std::string &message);
+double GetUserTime();
+
+}
+
+#endif // MERT_UTIL_H_
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index b2cde6d64..bf306f30d 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -163,13 +163,13 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
}
}
-void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g)
+void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g)
{
- Write(rule,false);
- m_fwd << " Tree ";
- g.PrintTree(m_fwd);
- m_fwd << std::endl;
- m_inv << std::endl;
+ Write(rule,false);
+ m_fwd << " Tree ";
+ g.PrintTree(m_fwd);
+ m_fwd << std::endl;
+ m_inv << std::endl;
}
} // namespace GHKM
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
index 18f423149..01883cdff 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
@@ -44,7 +44,7 @@ public:
void Write(const ScfgRule &rule, bool printEndl=true);
- void Write(const ScfgRule &rule, const Subgraph &g);
+ void Write(const ScfgRule &rule, const Subgraph &g);
private:
// Disallow copying
diff --git a/phrase-extract/extract-ghkm/Subgraph.cpp b/phrase-extract/extract-ghkm/Subgraph.cpp
index 0c941218d..6796cec95 100644
--- a/phrase-extract/extract-ghkm/Subgraph.cpp
+++ b/phrase-extract/extract-ghkm/Subgraph.cpp
@@ -119,12 +119,12 @@ float Subgraph::CalcPcfgScore() const
return score;
}
-void Subgraph::PrintTree(std::ostream &out) const
+void Subgraph::PrintTree(std::ostream &out) const
{
RecursivelyPrintTree(m_root,out);
}
-void Subgraph::RecursivelyPrintTree(const Node *n, std::ostream &out) const
+void Subgraph::RecursivelyPrintTree(const Node *n, std::ostream &out) const
{
NodeType nodeType = n->GetType();
if (nodeType == TREE) {
diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp
index 5d58028d6..76f695d2a 100644
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@@ -171,7 +171,7 @@ int main(int argc, char* argv[])
}
options.initInstanceWeightsFile(argv[++i]);
} else if (strcmp(argv[i], "--Debug") == 0) {
- options.debug = true;
+ options.debug = true;
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@@ -722,9 +722,9 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
}
if (m_options.debug) {
- outextractstr << "sentenceID=" << sentence.sentenceID << " ";
- outextractstrInv << "sentenceID=" << sentence.sentenceID << " ";
- outextractstrOrientation << "sentenceID=" << sentence.sentenceID << " ";
+ outextractstr << "sentenceID=" << sentence.sentenceID << " ";
+ outextractstrInv << "sentenceID=" << sentence.sentenceID << " ";
+ outextractstrOrientation << "sentenceID=" << sentence.sentenceID << " ";
}
for(int fi=startF; fi<=endF; fi++) {