beautify

author: Hieu Hoang <hieu@hoang.co.uk> 2013-05-29 21:16:15 +0400
committer: Hieu Hoang <hieu@hoang.co.uk> 2013-05-29 21:16:15 +0400
commit: 6249432407af8730c10bccc7894c0725fcaf5e47 (patch)
tree: 3ac1f094b9fdc199b04bc5ef209ce00e3596e37d /phrase-extract/consolidate-main.cpp
parent: 59bd7deb4b6b9c4f7b3b7dbb055783528fbc31ca (diff)
1 files changed, 37 insertions, 34 deletions
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index fd33907de..67a097910 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -42,7 +42,10 @@ bool goodTuringFlag = false;
 bool kneserNeyFlag = false;
 bool logProbFlag = false;
 bool outputNTLengths = false;
-inline float maybeLogProb( float a ) { return logProbFlag ? log(a) : a; }
+inline float maybeLogProb( float a )
+{
+  return logProbFlag ? log(a) : a;
+}
 
 char line[LINE_MAX_LENGTH];
 void processFiles( char*, char*, char*, char* );
@@ -79,7 +82,7 @@ int main(int argc, char* argv[])
       cerr << "not including the phrase count feature\n";
     } else if (strcmp(argv[i],"--GoodTuring") == 0) {
       goodTuringFlag = true;
-      if (i+1==argc) { 
+      if (i+1==argc) {
         cerr << "ERROR: specify count of count files for Good Turing discounting!\n";
         exit(1);
       }
@@ -87,7 +90,7 @@ int main(int argc, char* argv[])
       cerr << "adjusting phrase translation probabilities with Good Turing discounting\n";
     } else if (strcmp(argv[i],"--KneserNey") == 0) {
       kneserNeyFlag = true;
-      if (i+1==argc) { 
+      if (i+1==argc) {
         cerr << "ERROR: specify count of count files for Kneser Ney discounting!\n";
         exit(1);
       }
@@ -105,8 +108,11 @@ int main(int argc, char* argv[])
       while(i+1<argc && argv[i+1][0]>='0' && argv[i+1][0]<='9') {
         int binCount = atoi(argv[++i]);
         countBin.push_back( binCount );
-        if (prev+1 == binCount) { cerr << " " << binCount; }
-        else { cerr << " " << (prev+1) << "-" << binCount; }
+        if (prev+1 == binCount) {
+          cerr << " " << binCount;
+        } else {
+          cerr << " " << (prev+1) << "-" << binCount;
+        }
         prev = binCount;
       }
       cerr << " " << (prev+1) << "+\n";
@@ -152,7 +158,7 @@ void loadCountOfCounts( char* fileNameCountOfCounts )
   if (goodTuringFlag) {
     goodTuringDiscount.push_back(0.01); // floor value
     for( size_t i=1; i<countOfCounts.size()-1; i++ ) {
-      goodTuringDiscount.push_back(((float)i+1)/(float)i*((countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1))); 
+      goodTuringDiscount.push_back(((float)i+1)/(float)i*((countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1)));
       if (goodTuringDiscount[i]>1)
         goodTuringDiscount[i] = 1;
       if (goodTuringDiscount[i]<goodTuringDiscount[i-1])
@@ -253,21 +259,21 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
     float adjustedCountEF_indirect = adjustedCountEF;
 
     // Kneser Ney discounting [Foster et al, 2006]
-   if (kneserNeyFlag) {
-     float D = kneserNey_D3;
-     if (countEF < 2) D = kneserNey_D1;
-     else if (countEF < 3) D = kneserNey_D2;
-     if (D > countEF) D = countEF - 0.01; // sanity constraint
-
-     float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
-     float alpha_F = D * n1_F / countF; // available mass
-     adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
-
-     // for indirect
-     float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
-     float alpha_E = D * n1_E / countE; // available mass
-     adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
-   }
+    if (kneserNeyFlag) {
+      float D = kneserNey_D3;
+      if (countEF < 2) D = kneserNey_D1;
+      else if (countEF < 3) D = kneserNey_D2;
+      if (D > countEF) D = countEF - 0.01; // sanity constraint
+
+      float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
+      float alpha_F = D * n1_F / countF; // available mass
+      adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
+
+      // for indirect
+      float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
+      float alpha_E = D * n1_E / countE; // available mass
+      adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
+    }
 
     // prob indirect
     if (!onlyDirectFlag) {
@@ -296,30 +302,27 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
         if (!foundBin && countEF <= countBin[i]) {
           fileConsolidated << " " << maybeLogProb(2.718);
           foundBin = true;
-        }
-        else {
+        } else {
           fileConsolidated << " " << maybeLogProb(1);
         }
       }
-      fileConsolidated << " " << maybeLogProb( foundBin ? 1 : 2.718 );   
+      fileConsolidated << " " << maybeLogProb( foundBin ? 1 : 2.718 );
     }
 
     // alignment
     fileConsolidated << " ||| " << itemDirect[3];
 
     // counts, for debugging
-    fileConsolidated << "||| " << countE << " " << countF << " " << countEF; 
+    fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
 
-    if (outputNTLengths)
-    {
+    if (outputNTLengths) {
       fileConsolidated << " ||| " << itemDirect[5];
     }
-    
+
     // count bin feature (as a sparse feature)
-    if (sparseCountBinFeatureFlag || 
-        directSparseScores.compare("") != 0 || 
-        indirectSparseScores.compare("") != 0)
-    {
+    if (sparseCountBinFeatureFlag ||
+        directSparseScores.compare("") != 0 ||
+        indirectSparseScores.compare("") != 0) {
       fileConsolidated << " |||";
       if (directSparseScores.compare("") != 0)
         fileConsolidated << " " << directSparseScores;
@@ -351,13 +354,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
   fileConsolidated.Close();
 }
 
-void breakdownCoreAndSparse( string combined, string &core, string &sparse ) 
+void breakdownCoreAndSparse( string combined, string &core, string &sparse )
 {
   core = "";
   sparse = "";
   vector<string> score = tokenize( combined.c_str() );
   for(size_t i=0; i<score.size(); i++) {
-    if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size()) 
+    if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size())
       core += " " + score[i];
     else {
       sparse += " " + score[i];
author	Hieu Hoang <hieu@hoang.co.uk>	2013-05-29 21:16:15 +0400
committer	Hieu Hoang <hieu@hoang.co.uk>	2013-05-29 21:16:15 +0400
commit	6249432407af8730c10bccc7894c0725fcaf5e47 (patch)
tree	3ac1f094b9fdc199b04bc5ef209ce00e3596e37d /phrase-extract/consolidate-main.cpp
parent	59bd7deb4b6b9c4f7b3b7dbb055783528fbc31ca (diff)