beautify

author: Nicola Bertoldi <bertoldi@fbk.eu> 2014-05-19 17:35:08 +0400
committer: Nicola Bertoldi <bertoldi@fbk.eu> 2014-05-19 17:35:08 +0400
commit: 20b3e8929e93c40e0f1ec61b9268330fe59607f9 (patch)
tree: ae6102023df2adfd6bfb287cbc6d89caad5afeac /phrase-extract
parent: 2f3cd5e2fe54f0352eee4657ea91e0039073a95a (diff)
10 files changed, 186 insertions, 179 deletions
diff --git a/phrase-extract/DomainFeature.cpp b/phrase-extract/DomainFeature.cpp
index 2f99a8709..0526d058b 100644
--- a/phrase-extract/DomainFeature.cpp
+++ b/phrase-extract/DomainFeature.cpp
@@ -55,9 +55,9 @@ DomainFeature::DomainFeature(const string& domainFile) : m_propertyKey("domain")
   m_domain.load(domainFile);
 }
 
-void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, 
-                                              float count, 
-                                              int sentenceId) const
+void DomainFeature::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+    float count,
+    int sentenceId) const
 {
   std::string value = m_domain.getDomainOfSentence(sentenceId);
   phrasePair.AddProperty(m_propertyKey, value, count);
@@ -69,13 +69,13 @@ void DomainFeature::add(const ScoreFeatureContext& context,
 {
   const map<string,float> *domainCount = context.phrasePair.GetProperty(m_propertyKey);
   assert( domainCount != NULL );
-  add(*domainCount, 
-      context.phrasePair.GetCount(), 
-      context.maybeLog, 
+  add(*domainCount,
+      context.phrasePair.GetCount(),
+      context.maybeLog,
       denseValues, sparseValues);
 }
 
-void SubsetDomainFeature::add(const map<string,float>& domainCount, 
+void SubsetDomainFeature::add(const map<string,float>& domainCount,
                               float count,
                               const MaybeLog& maybeLog,
                               std::vector<float>& denseValues,
diff --git a/phrase-extract/DomainFeature.h b/phrase-extract/DomainFeature.h
index 8ebc599e2..bcb2e63a2 100644
--- a/phrase-extract/DomainFeature.h
+++ b/phrase-extract/DomainFeature.h
@@ -35,8 +35,8 @@ public:
 
   DomainFeature(const std::string& domainFile);
 
-  void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, 
-                                 float count, 
+  void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+                                 float count,
                                  int sentenceId) const;
 
   void add(const ScoreFeatureContext& context,
diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp
index a975b4126..102537ca1 100644
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@@ -29,7 +29,8 @@
 using namespace std;
 
 
-namespace MosesTraining {
+namespace MosesTraining
+{
 
 
 extern Vocabulary vcbT;
@@ -38,23 +39,23 @@ extern Vocabulary vcbS;
 extern bool hierarchicalFlag;
 
 
-ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource, 
-                                            const PHRASE *phraseTarget, 
-                                            ALIGNMENT *targetToSourceAlignment, 
-                                            float count, float pcfgSum ) :
-    m_phraseSource(phraseSource),
-    m_phraseTarget(phraseTarget),
-    m_count(count),
-    m_pcfgSum(pcfgSum)
+ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
+    const PHRASE *phraseTarget,
+    ALIGNMENT *targetToSourceAlignment,
+    float count, float pcfgSum ) :
+  m_phraseSource(phraseSource),
+  m_phraseTarget(phraseTarget),
+  m_count(count),
+  m_pcfgSum(pcfgSum)
 {
   assert(phraseSource->empty());
   assert(phraseTarget->empty());
 
   m_count = count;
   m_pcfgSum = pcfgSum;
-  
+
   std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
-      m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
+    m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
 
   m_lastTargetToSourceAlignment = insertedAlignment.first;
   m_lastCount = m_count;
@@ -64,29 +65,30 @@ ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
 }
 
 
-ExtractionPhrasePair::~ExtractionPhrasePair( ) {
+ExtractionPhrasePair::~ExtractionPhrasePair( )
+{
   Clear();
 }
 
 
 // return value: true if the given alignment was seen for the first time and thus will be stored,
 //               false if it was present already (the pointer may thus be deleted(
-bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment, 
-                                float count, float pcfgSum ) 
+bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment,
+                                float count, float pcfgSum )
 {
   m_count += count;
   m_pcfgSum += pcfgSum;
 
   m_lastCount = count;
   m_lastPcfgSum = pcfgSum;
-  
+
   std::map<ALIGNMENT*,float>::iterator iter = m_lastTargetToSourceAlignment;
   if ( *(iter->first) == *targetToSourceAlignment ) {
     iter->second += count;
     return false;
   } else {
     std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
-        m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
+      m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
     if ( !insertedAlignment.second ) {
       // the alignment already exists: increment count
       insertedAlignment.first->second += count;
@@ -105,7 +107,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum )
   m_pcfgSum += pcfgSum;
   m_lastTargetToSourceAlignment->second += count;
   // properties
-  for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin(); 
+  for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin();
         iter !=m_properties.end(); ++iter ) {
     LAST_PROPERTY_VALUE *lastPropertyValue = (iter->second).second;
     (*lastPropertyValue)->second += count;
@@ -116,7 +118,7 @@ void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum )
 }
 
 
-// Check for lexical match 
+// Check for lexical match
 // and in case of SCFG rules for equal non-terminal alignment.
 bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
                                     const PHRASE *otherPhraseTarget,
@@ -132,9 +134,9 @@ bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
   return MatchesAlignment( otherTargetToSourceAlignment );
 }
 
-// Check for lexical match 
+// Check for lexical match
 // and in case of SCFG rules for equal non-terminal alignment.
-// Set boolean indicators. 
+// Set boolean indicators.
 // (Note that we check in the order: target - source - alignment
 //  and do not touch the subsequent boolean indicators once a previous one has been set to false.)
 bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
@@ -194,7 +196,7 @@ bool ExtractionPhrasePair::MatchesAlignment( ALIGNMENT *otherTargetToSourceAlign
   return true;
 }
 
-void ExtractionPhrasePair::Clear() 
+void ExtractionPhrasePair::Clear()
 {
   delete m_phraseSource;
   delete m_phraseTarget;
@@ -218,7 +220,7 @@ void ExtractionPhrasePair::Clear()
   m_lastCount = 0.0f;
   m_lastPcfgSum = 0.0f;
   m_lastTargetToSourceAlignment = m_targetToSourceAlignments.begin();
-  
+
   m_isValid = false;
 }
 
@@ -252,7 +254,7 @@ const ALIGNMENT *ExtractionPhrasePair::FindBestAlignmentTargetToSource() const
 
   std::map<ALIGNMENT*,float>::const_iterator bestAlignment = m_targetToSourceAlignments.end();
 
-  for (std::map<ALIGNMENT*,float>::const_iterator iter=m_targetToSourceAlignments.begin(); 
+  for (std::map<ALIGNMENT*,float>::const_iterator iter=m_targetToSourceAlignments.begin();
        iter!=m_targetToSourceAlignments.end(); ++iter) {
     if ( (iter->second > bestAlignmentCount) ||
          ( (iter->second == bestAlignmentCount) &&
@@ -281,7 +283,7 @@ const std::string *ExtractionPhrasePair::FindBestPropertyValue(const std::string
 
   PROPERTY_VALUES::const_iterator bestPropertyValue = allPropertyValues->end();
 
-  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin(); 
+  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
        iter!=allPropertyValues->end(); ++iter) {
     if ( (iter->second > bestPropertyCount) ||
          ( (iter->second == bestPropertyCount) &&
@@ -308,7 +310,7 @@ std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &ke
   }
 
   std::ostringstream oss;
-  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin(); 
+  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
        iter!=allPropertyValues->end(); ++iter) {
     if (iter!=allPropertyValues->begin()) {
       oss << " ";
diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h
index f04984391..e9f643d2c 100644
--- a/phrase-extract/ExtractionPhrasePair.h
+++ b/phrase-extract/ExtractionPhrasePair.h
@@ -24,20 +24,22 @@
 #include <set>
 #include <map>
 
-namespace MosesTraining {
+namespace MosesTraining
+{
 
 
 typedef std::vector< std::set<size_t> > ALIGNMENT;
 
 
-class ExtractionPhrasePair {
+class ExtractionPhrasePair
+{
 
 protected:
 
   typedef std::map<std::string,float> PROPERTY_VALUES;
   typedef std::map<std::string,float>::iterator LAST_PROPERTY_VALUE;
 
-  
+
   bool m_isValid;
 
   const PHRASE *m_phraseSource;
@@ -47,8 +49,8 @@ protected:
   float m_pcfgSum;
 
   std::map<ALIGNMENT*,float> m_targetToSourceAlignments;
-  std::map<std::string, 
-           std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
+  std::map<std::string,
+      std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > > m_properties;
 
   float m_lastCount;
   float m_lastPcfgSum;
@@ -56,14 +58,14 @@ protected:
 
 public:
 
-  ExtractionPhrasePair( const PHRASE *phraseSource, 
-                        const PHRASE *phraseTarget, 
-                        ALIGNMENT *targetToSourceAlignment, 
+  ExtractionPhrasePair( const PHRASE *phraseSource,
+                        const PHRASE *phraseTarget,
+                        ALIGNMENT *targetToSourceAlignment,
                         float count, float pcfgSum );
 
   ~ExtractionPhrasePair();
 
-  bool Add( ALIGNMENT *targetToSourceAlignment, 
+  bool Add( ALIGNMENT *targetToSourceAlignment,
             float count, float pcfgSum );
 
   void IncrementPrevious( float count, float pcfgSum );
@@ -91,7 +93,7 @@ public:
   const PHRASE *GetSource() const {
     return m_phraseSource;
   }
-  
+
   const PHRASE *GetTarget() const {
     return m_phraseTarget;
   }
@@ -126,10 +128,9 @@ public:
 
   void AddProperties( const std::string &str, float count );
 
-  void AddProperty( const std::string &key, const std::string &value, float count ) 
-  {
+  void AddProperty( const std::string &key, const std::string &value, float count ) {
     std::map<std::string,
-             std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
+        std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
     if ( iter == m_properties.end() ) {
       // key not found: insert property key and value
       PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();
diff --git a/phrase-extract/InternalStructFeature.cpp b/phrase-extract/InternalStructFeature.cpp
index 3757b0e43..a2369a80c 100644
--- a/phrase-extract/InternalStructFeature.cpp
+++ b/phrase-extract/InternalStructFeature.cpp
@@ -8,7 +8,8 @@ namespace MosesTraining
 
 void InternalStructFeature::add(const ScoreFeatureContext& context,
                                 std::vector<float>& denseValues,
-                                std::map<std::string,float>& sparseValues) const {
+                                std::map<std::string,float>& sparseValues) const
+{
   const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only?
   for ( std::map<std::string,float>::const_iterator iter=allTrees->begin();
         iter!=allTrees->end(); ++iter ) {
@@ -19,24 +20,26 @@ void InternalStructFeature::add(const ScoreFeatureContext& context,
 void InternalStructFeatureDense::add(const std::string *treeFragment,
                                      float count,
                                      std::vector<float>& denseValues,
-                                     std::map<std::string,float>& sparseValues) const {
-	//cout<<"Dense: "<<*internalStruct<<endl;
-	size_t start=0;
-	int countNP=0;
-	while((start = treeFragment->find("NP", start)) != string::npos) {
-		countNP += count;
-		start+=2; //length of "NP"
-	}
-	//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
-	//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
-	denseValues.push_back(exp(countNP));
+                                     std::map<std::string,float>& sparseValues) const
+{
+  //cout<<"Dense: "<<*internalStruct<<endl;
+  size_t start=0;
+  int countNP=0;
+  while((start = treeFragment->find("NP", start)) != string::npos) {
+    countNP += count;
+    start+=2; //length of "NP"
+  }
+  //should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
+  //should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
+  denseValues.push_back(exp(countNP));
 
 }
 
 void InternalStructFeatureSparse::add(const std::string *treeFragment,
                                       float count,
                                       std::vector<float>& denseValues,
-                                      std::map<std::string,float>& sparseValues) const {
+                                      std::map<std::string,float>& sparseValues) const
+{
   //cout<<"Sparse: "<<*internalStruct<<endl;
   if(treeFragment->find("VBZ")!=std::string::npos)
     sparseValues["NTVBZ"] += count;
diff --git a/phrase-extract/InternalStructFeature.h b/phrase-extract/InternalStructFeature.h
index 7969dc8a8..bd44f61fb 100644
--- a/phrase-extract/InternalStructFeature.h
+++ b/phrase-extract/InternalStructFeature.h
@@ -21,20 +21,20 @@ namespace MosesTraining
 class InternalStructFeature : public ScoreFeature
 {
 public:
-        InternalStructFeature() : m_type(0) {};
-	/** Add the values for this feature function. */
-	void add(const ScoreFeatureContext& context,
-                 std::vector<float>& denseValues,
-                 std::map<std::string,float>& sparseValues) const;
+  InternalStructFeature() : m_type(0) {};
+  /** Add the values for this feature function. */
+  void add(const ScoreFeatureContext& context,
+           std::vector<float>& denseValues,
+           std::map<std::string,float>& sparseValues) const;
 
 
 protected:
-	/** Overridden in subclass */
-	 virtual void add(const std::string *treeFragment,
-                          float count,
-                          std::vector<float>& denseValues,
-                          std::map<std::string,float>& sparseValues) const = 0;
-	int m_type;
+  /** Overridden in subclass */
+  virtual void add(const std::string *treeFragment,
+                   float count,
+                   std::vector<float>& denseValues,
+                   std::map<std::string,float>& sparseValues) const = 0;
+  int m_type;
 };
 
 class InternalStructFeatureDense : public InternalStructFeature
@@ -45,10 +45,10 @@ public:
     m_type=1;
   } //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
 protected:
-	virtual void add(const std::string *treeFragment,
-                         float count,
-                         std::vector<float>& denseValues,
-                         std::map<std::string,float>& sparseValues) const;
+  virtual void add(const std::string *treeFragment,
+                   float count,
+                   std::vector<float>& denseValues,
+                   std::map<std::string,float>& sparseValues) const;
 };
 
 class InternalStructFeatureSparse : public InternalStructFeature
@@ -59,10 +59,10 @@ public:
     m_type=2;
   }// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
 protected:
-	virtual void add(const std::string *treeFragment,
-                         float count,
-                         std::vector<float>& denseValues,
-                         std::map<std::string,float>& sparseValues) const;
+  virtual void add(const std::string *treeFragment,
+                   float count,
+                   std::vector<float>& denseValues,
+                   std::map<std::string,float>& sparseValues) const;
 };
 
 }
diff --git a/phrase-extract/ScoreFeature.cpp b/phrase-extract/ScoreFeature.cpp
index 7db57b38e..c037ab584 100644
--- a/phrase-extract/ScoreFeature.cpp
+++ b/phrase-extract/ScoreFeature.cpp
@@ -77,12 +77,12 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
       }
       sparseDomainAdded = true;
       m_includeSentenceId = true;
-    } else if(args[i] == "--TreeFeatureSparse"){
-    	//MARIA
-    	m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
-    } else if(args[i] == "--TreeFeatureDense"){
-    	//MARIA
-    	m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
+    } else if(args[i] == "--TreeFeatureSparse") {
+      //MARIA
+      m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
+    } else if(args[i] == "--TreeFeatureDense") {
+      //MARIA
+      m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
     } else {
       UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
     }
@@ -91,9 +91,9 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
 
 }
 
-void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, 
-                                                    float count, 
-                                                    int sentenceId) const
+void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+    float count,
+    int sentenceId) const
 {
   for (size_t i = 0; i < m_features.size(); ++i) {
     m_features[i]->addPropertiesToPhrasePair(phrasePair, count, sentenceId);
diff --git a/phrase-extract/ScoreFeature.h b/phrase-extract/ScoreFeature.h
index 926397e71..1f697c989 100644
--- a/phrase-extract/ScoreFeature.h
+++ b/phrase-extract/ScoreFeature.h
@@ -84,10 +84,10 @@ class ScoreFeature
 public:
 
   /** Some features might need to store properties in ExtractionPhrasePair,
-   *  e.g. to pass along external information loaded by a feature 
+   *  e.g. to pass along external information loaded by a feature
    *  which may distinguish several phrase occurrences based on sentence ID */
-  virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, 
-                                         float count, 
+  virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+                                         float count,
                                          int sentenceId) const {};
 
   /** Add the values for this feature function. */
@@ -113,10 +113,10 @@ public:
   void configure(const std::vector<std::string> args);
 
   /** Some features might need to store properties in ExtractionPhrasePair,
-   *  e.g. to pass along external information loaded by a feature 
+   *  e.g. to pass along external information loaded by a feature
    *  which may distinguish several phrase occurrences based on sentence ID */
-  void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, 
-                                 float count, 
+  void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
+                                 float count,
                                  int sentenceId) const;
 
   /** Add all the features */
diff --git a/phrase-extract/extract-ordering-main.cpp b/phrase-extract/extract-ordering-main.cpp
index 104457b01..b37309d47 100644
--- a/phrase-extract/extract-ordering-main.cpp
+++ b/phrase-extract/extract-ordering-main.cpp
@@ -92,9 +92,9 @@ class ExtractTask
 public:
   ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFileOrientation)
     :m_sentence(sentence),
-    m_options(initoptions),
-    m_extractFileOrientation(extractFileOrientation)
-	{}
+     m_options(initoptions),
+     m_extractFileOrientation(extractFileOrientation)
+  {}
   void Run();
 private:
   void extract(SentenceAlignment &);
@@ -151,11 +151,11 @@ int main(int argc, char* argv[])
       }
       options.initInstanceWeightsFile(argv[++i]);
     } else if (strcmp(argv[i], "--Debug") == 0) {
-    	options.debug = true;
+      options.debug = true;
     } else if (strcmp(argv[i], "--MinPhraseLength") == 0) {
-    	options.minPhraseLength = atoi(argv[++i]);
+      options.minPhraseLength = atoi(argv[++i]);
     } else if (strcmp(argv[i], "--Separator") == 0) {
-    	options.separator = argv[++i];
+      options.separator = argv[++i];
     } else if(strcmp(argv[i],"--model") == 0) {
       if (i+1 >= argc) {
         cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@@ -605,16 +605,14 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
 
 int getClass(const std::string &str)
 {
-	size_t pos = str.find("swap");
-	if (pos == str.npos) {
-		return 0;
-	}
-	else if (pos == 0) {
-		return 1;
-	}
-	else {
-		return 2;
-	}
+  size_t pos = str.find("swap");
+  if (pos == str.npos) {
+    return 0;
+  } else if (pos == 0) {
+    return 1;
+  } else {
+    return 2;
+  }
 }
 
 void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
@@ -635,19 +633,19 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
   // start
   m_extractFileOrientation << "<s> ";
   for(int fi=0; fi<startF; fi++) {
-	  m_extractFileOrientation << sentence.source[fi] << " ";
+    m_extractFileOrientation << sentence.source[fi] << " ";
   }
   m_extractFileOrientation << sep << " ";
 
   // middle
   for(int fi=startF; fi<=endF; fi++) {
-	  m_extractFileOrientation << sentence.source[fi] << " ";
+    m_extractFileOrientation << sentence.source[fi] << " ";
   }
   m_extractFileOrientation << sep << " ";
 
   // end
   for(int fi=endF+1; fi<sentence.source.size(); fi++) {
-	  m_extractFileOrientation << sentence.source[fi] << " ";
+    m_extractFileOrientation << sentence.source[fi] << " ";
   }
   m_extractFileOrientation << "</s> ";
 
@@ -655,7 +653,7 @@ void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE,
   // target
   /*
   for(int ei=startE; ei<=endE; ei++) {
-	  m_extractFileOrientation << sentence.target[ei] << " ";
+    m_extractFileOrientation << sentence.target[ei] << " ";
   }
   */
   m_extractFileOrientation << endl;
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index cd8f9ddaa..72c4c1476 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -68,7 +68,7 @@ std::map<std::string,float> sourceLHSCounts;
 std::map<std::string, std::map<std::string,float>* > targetLHSAndSourceLHSJointCounts;
 
 std::set<std::string> sourceLabelSet;
-std::map<std::string,size_t> sourceLabels; 
+std::map<std::string,size_t> sourceLabels;
 std::vector<std::string> sourceLabelsByIndex;
 
 Vocabulary vcbT;
@@ -79,12 +79,12 @@ Vocabulary vcbS;
 std::vector<std::string> tokenize( const char [] );
 
 void processLine( std::string line,
-                  int lineID, bool includeSentenceIdFlag, int &sentenceId,  
+                  int lineID, bool includeSentenceIdFlag, int &sentenceId,
                   PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
                   std::string &additionalPropertiesString,
                   float &count, float &pcfgSum );
 void writeCountOfCounts( const std::string &fileNameCountOfCounts );
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, 
+void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
                          const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb );
 void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog );
 double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
@@ -100,7 +100,7 @@ void invertAlignment( const PHRASE *phraseSource, const PHRASE *phraseTarget, co
 
 int main(int argc, char* argv[])
 {
-  std::cerr << "Score v2.1 -- " 
+  std::cerr << "Score v2.1 -- "
             << "scoring methods for extracted rules" << std::endl;
 
   ScoreFeatureManager featureManager;
@@ -155,7 +155,7 @@ int main(int argc, char* argv[])
     } else if (strcmp(argv[i],"--UnalignedFunctionWordPenalty") == 0) {
       unalignedFWFlag = true;
       if (i+1==argc) {
-          std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
+        std::cerr << "ERROR: specify function words file for unaligned function word penalty!" << std::endl;
         exit(1);
       }
       fileNameFunctionWords = argv[++i];
@@ -224,8 +224,8 @@ int main(int argc, char* argv[])
     Moses::OutputFileStream *outputFile = new Moses::OutputFileStream();
     bool success = outputFile->Open(fileNamePhraseTable);
     if (!success) {
-        std::cerr << "ERROR: could not open file phrase table file "
-                  << fileNamePhraseTable << std::endl;
+      std::cerr << "ERROR: could not open file phrase table file "
+                << fileNamePhraseTable << std::endl;
       exit(1);
     }
     phraseTableFile = outputFile;
@@ -251,12 +251,12 @@ int main(int argc, char* argv[])
     tmpPhraseSource = new PHRASE();
     tmpPhraseTarget = new PHRASE();
     tmpTargetToSourceAlignment = new ALIGNMENT();
-    processLine( std::string(line), 
+    processLine( std::string(line),
                  i, featureManager.includeSentenceId(), tmpSentenceId,
-                 tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, 
+                 tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
                  tmpAdditionalPropertiesString,
                  tmpCount, tmpPcfgSum);
-    phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, 
+    phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
                                            tmpTargetToSourceAlignment,
                                            tmpCount, tmpPcfgSum );
     phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
@@ -288,14 +288,16 @@ int main(int argc, char* argv[])
     tmpPhraseTarget = new PHRASE();
     tmpTargetToSourceAlignment = new ALIGNMENT();
     tmpAdditionalPropertiesString.clear();
-    processLine( std::string(line), 
+    processLine( std::string(line),
                  i, featureManager.includeSentenceId(), tmpSentenceId,
-                 tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment, 
+                 tmpPhraseSource, tmpPhraseTarget, tmpTargetToSourceAlignment,
                  tmpAdditionalPropertiesString,
-                 tmpCount, tmpPcfgSum); 
+                 tmpCount, tmpPcfgSum);
 
     bool matchesPrevious = false;
-    bool sourceMatch = true; bool targetMatch = true; bool alignmentMatch = true; // be careful with these,
+    bool sourceMatch = true;
+    bool targetMatch = true;
+    bool alignmentMatch = true; // be careful with these,
     // ExtractionPhrasePair::Matches() checks them in order and does not continue with the others
     // once the first of them has been found to have to be set to false
 
@@ -330,7 +332,7 @@ int main(int argc, char* argv[])
       if ( !phrasePairsWithSameSource.empty() &&
            !sourceMatch ) {
         processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
-        for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
+        for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
               iter!=phrasePairsWithSameSource.end(); ++iter) {
           delete *iter;
         }
@@ -347,8 +349,8 @@ int main(int argc, char* argv[])
         }
       }
 
-      phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget, 
-                                             tmpTargetToSourceAlignment, 
+      phrasePair = new ExtractionPhrasePair( tmpPhraseSource, tmpPhraseTarget,
+                                             tmpTargetToSourceAlignment,
                                              tmpCount, tmpPcfgSum );
       phrasePair->AddProperties( tmpAdditionalPropertiesString, tmpCount );
       featureManager.addPropertiesToPhrasePair( *phrasePair, tmpCount, tmpSentenceId );
@@ -364,7 +366,7 @@ int main(int argc, char* argv[])
   }
 
   processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
-  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
+  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
         iter!=phrasePairsWithSameSource.end(); ++iter) {
     delete *iter;
   }
@@ -384,7 +386,7 @@ int main(int argc, char* argv[])
 
 
 void processLine( std::string line,
-                  int lineID, bool includeSentenceIdFlag, int &sentenceId,  
+                  int lineID, bool includeSentenceIdFlag, int &sentenceId,
                   PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment,
                   std::string &additionalPropertiesString,
                   float &count, float &pcfgSum )
@@ -474,7 +476,7 @@ void writeCountOfCounts( const string &fileNameCountOfCounts )
 }
 
 
-void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile, 
+void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSameSource, ostream &phraseTableFile,
                          const ScoreFeatureManager& featureManager, const MaybeLog& maybeLogProb )
 {
   if (phrasePairsWithSameSource.size() == 0) {
@@ -486,23 +488,23 @@ void processPhrasePairs( std::vector< ExtractionPhrasePair* > &phrasePairsWithSa
   //std::cerr << "phrasePairs.size() = " << phrasePairs.size() << std::endl;
 
   // loop through phrase pairs
-  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
+  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
         iter!=phrasePairsWithSameSource.end(); ++iter) {
     // add to total count
     totalSource += (*iter)->GetCount();
   }
 
   // output the distinct phrase pairs, one at a time
-  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin(); 
+  for ( std::vector< ExtractionPhrasePair* >::const_iterator iter=phrasePairsWithSameSource.begin();
         iter!=phrasePairsWithSameSource.end(); ++iter) {
     // add to total count
     outputPhrasePair( **iter, totalSource, phrasePairsWithSameSource.size(), phraseTableFile, featureManager, maybeLogProb );
   }
 }
 
-void outputPhrasePair(const ExtractionPhrasePair &phrasePair, 
-                      float totalCount, int distinctCount, 
-                      ostream &phraseTableFile, 
+void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
+                      float totalCount, int distinctCount,
+                      ostream &phraseTableFile,
                       const ScoreFeatureManager& featureManager,
                       const MaybeLog& maybeLogProb )
 {
@@ -557,45 +559,45 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
 
   // alignment
   if ( hierarchicalFlag ) {
-      // always output alignment if hiero style
-      assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
-      std::vector<std::string> alignment;
-      for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
-        if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
-          if ( bestAlignmentT2S->at(j).size() != 1 ) {
-            std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
-            phraseTableFile.flush();
-            assert(bestAlignmentT2S->at(j).size() == 1);
-          }
-          size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
-          //phraseTableFile << sourcePos << "-" << j << " ";
+    // always output alignment if hiero style
+    assert(phraseTarget->size() == bestAlignmentT2S->size()+1);
+    std::vector<std::string> alignment;
+    for ( size_t j = 0; j < phraseTarget->size() - 1; ++j ) {
+      if ( isNonTerminal(vcbT.getWord( phraseTarget->at(j) ))) {
+        if ( bestAlignmentT2S->at(j).size() != 1 ) {
+          std::cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << std::endl;
+          phraseTableFile.flush();
+          assert(bestAlignmentT2S->at(j).size() == 1);
+        }
+        size_t sourcePos = *(bestAlignmentT2S->at(j).begin());
+        //phraseTableFile << sourcePos << "-" << j << " ";
+        std::stringstream point;
+        point << sourcePos << "-" << j;
+        alignment.push_back(point.str());
+      } else {
+        for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
+              setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
+          size_t sourcePos = *setIter;
           std::stringstream point;
           point << sourcePos << "-" << j;
           alignment.push_back(point.str());
-        } else {
-          for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
-                setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
-            size_t sourcePos = *setIter;
-            std::stringstream point;
-            point << sourcePos << "-" << j;
-            alignment.push_back(point.str());
-          }
         }
       }
-      // now print all alignments, sorted by source index
-      sort(alignment.begin(), alignment.end());
-      for (size_t i = 0; i < alignment.size(); ++i) {
-        phraseTableFile << alignment[i] << " ";
-      }
+    }
+    // now print all alignments, sorted by source index
+    sort(alignment.begin(), alignment.end());
+    for (size_t i = 0; i < alignment.size(); ++i) {
+      phraseTableFile << alignment[i] << " ";
+    }
   } else if ( !inverseFlag && wordAlignmentFlag) {
-      // alignment info in pb model
-      for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
-        for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
-              setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
-          size_t sourcePos = *setIter;
-          phraseTableFile << sourcePos << "-" << j << " ";
-        }
+    // alignment info in pb model
+    for (size_t j = 0; j < bestAlignmentT2S->size(); ++j) {
+      for ( std::set<size_t>::iterator setIter = (bestAlignmentT2S->at(j)).begin();
+            setIter != (bestAlignmentT2S->at(j)).end(); ++setIter ) {
+        size_t sourcePos = *setIter;
+        phraseTableFile << sourcePos << "-" << j << " ";
       }
+    }
   }
 
   phraseTableFile << " ||| ";
@@ -646,7 +648,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
   if (kneserNeyFlag)
     phraseTableFile << " " << distinctCount;
 
-  if ((treeFragmentsFlag) && 
+  if ((treeFragmentsFlag) &&
       !inverseFlag) {
     phraseTableFile << " |||";
   }
@@ -671,7 +673,7 @@ bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *al
       // skip
     } else {
       const std::set<size_t> &sourceSet = alignmentTargetToSource->at(currTarget);
-      for (std::set<size_t>::const_iterator iter = sourceSet.begin(); 
+      for (std::set<size_t>::const_iterator iter = sourceSet.begin();
            iter != sourceSet.end(); ++iter) {
         size_t currSource = *iter;
 
@@ -808,9 +810,9 @@ void LexicalTable::load( const string &fileName )
 
     std::vector<string> token = tokenize( line );
     if (token.size() != 3) {
-        std::cerr << "line " << i << " in " << fileName
-           << " has wrong number of tokens, skipping:" << std::endl
-           << token.size() << " " << token[0] << " " << line << std::endl;
+      std::cerr << "line " << i << " in " << fileName
+                << " has wrong number of tokens, skipping:" << std::endl
+                << token.size() << " " << token[0] << " " << line << std::endl;
       continue;
     }
 
@@ -889,15 +891,16 @@ void printTargetPhrase(const PHRASE *phraseSource, const PHRASE *phraseTarget,
 
 
 void invertAlignment(const PHRASE *phraseSource, const PHRASE *phraseTarget,
-                     const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) {
-// typedef std::vector< std::set<size_t> > ALIGNMENT; 
+                     const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment)
+{
+// typedef std::vector< std::set<size_t> > ALIGNMENT;
 
   outSourceToTargetAlignment->clear();
   size_t numberOfSourceSymbols = (hierarchicalFlag ? phraseSource->size()-1 : phraseSource->size());
   outSourceToTargetAlignment->resize(numberOfSourceSymbols);
   // add alignment point
   for (size_t targetPosition = 0; targetPosition < inTargetToSourceAlignment->size(); ++targetPosition) {
-    for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin(); 
+    for ( std::set<size_t>::iterator setIter = (inTargetToSourceAlignment->at(targetPosition)).begin();
           setIter != (inTargetToSourceAlignment->at(targetPosition)).end(); ++setIter ) {
       size_t sourcePosition = *setIter;
       outSourceToTargetAlignment->at(sourcePosition).insert(targetPosition);
author	Nicola Bertoldi <bertoldi@fbk.eu>	2014-05-19 17:35:08 +0400
committer	Nicola Bertoldi <bertoldi@fbk.eu>	2014-05-19 17:35:08 +0400
commit	20b3e8929e93c40e0f1ec61b9268330fe59607f9 (patch)
tree	ae6102023df2adfd6bfb287cbc6d89caad5afeac /phrase-extract
parent	2f3cd5e2fe54f0352eee4657ea91e0039073a95a (diff)