Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2013-05-29 21:16:15 +0400
committerHieu Hoang <hieu@hoang.co.uk>2013-05-29 21:16:15 +0400
commit6249432407af8730c10bccc7894c0725fcaf5e47 (patch)
tree3ac1f094b9fdc199b04bc5ef209ce00e3596e37d /phrase-extract/consolidate-main.cpp
parent59bd7deb4b6b9c4f7b3b7dbb055783528fbc31ca (diff)
beautify
Diffstat (limited to 'phrase-extract/consolidate-main.cpp')
-rw-r--r--phrase-extract/consolidate-main.cpp71
1 files changed, 37 insertions, 34 deletions
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index fd33907de..67a097910 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -42,7 +42,10 @@ bool goodTuringFlag = false;
bool kneserNeyFlag = false;
bool logProbFlag = false;
bool outputNTLengths = false;
-inline float maybeLogProb( float a ) { return logProbFlag ? log(a) : a; }
+inline float maybeLogProb( float a )
+{
+ return logProbFlag ? log(a) : a;
+}
char line[LINE_MAX_LENGTH];
void processFiles( char*, char*, char*, char* );
@@ -79,7 +82,7 @@ int main(int argc, char* argv[])
cerr << "not including the phrase count feature\n";
} else if (strcmp(argv[i],"--GoodTuring") == 0) {
goodTuringFlag = true;
- if (i+1==argc) {
+ if (i+1==argc) {
cerr << "ERROR: specify count of count files for Good Turing discounting!\n";
exit(1);
}
@@ -87,7 +90,7 @@ int main(int argc, char* argv[])
cerr << "adjusting phrase translation probabilities with Good Turing discounting\n";
} else if (strcmp(argv[i],"--KneserNey") == 0) {
kneserNeyFlag = true;
- if (i+1==argc) {
+ if (i+1==argc) {
cerr << "ERROR: specify count of count files for Kneser Ney discounting!\n";
exit(1);
}
@@ -105,8 +108,11 @@ int main(int argc, char* argv[])
while(i+1<argc && argv[i+1][0]>='0' && argv[i+1][0]<='9') {
int binCount = atoi(argv[++i]);
countBin.push_back( binCount );
- if (prev+1 == binCount) { cerr << " " << binCount; }
- else { cerr << " " << (prev+1) << "-" << binCount; }
+ if (prev+1 == binCount) {
+ cerr << " " << binCount;
+ } else {
+ cerr << " " << (prev+1) << "-" << binCount;
+ }
prev = binCount;
}
cerr << " " << (prev+1) << "+\n";
@@ -152,7 +158,7 @@ void loadCountOfCounts( char* fileNameCountOfCounts )
if (goodTuringFlag) {
goodTuringDiscount.push_back(0.01); // floor value
for( size_t i=1; i<countOfCounts.size()-1; i++ ) {
- goodTuringDiscount.push_back(((float)i+1)/(float)i*((countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1)));
+ goodTuringDiscount.push_back(((float)i+1)/(float)i*((countOfCounts[i+1]+0.1) / ((float)countOfCounts[i]+0.1)));
if (goodTuringDiscount[i]>1)
goodTuringDiscount[i] = 1;
if (goodTuringDiscount[i]<goodTuringDiscount[i-1])
@@ -253,21 +259,21 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
float adjustedCountEF_indirect = adjustedCountEF;
// Kneser Ney discounting [Foster et al, 2006]
- if (kneserNeyFlag) {
- float D = kneserNey_D3;
- if (countEF < 2) D = kneserNey_D1;
- else if (countEF < 3) D = kneserNey_D2;
- if (D > countEF) D = countEF - 0.01; // sanity constraint
-
- float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
- float alpha_F = D * n1_F / countF; // available mass
- adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
-
- // for indirect
- float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
- float alpha_E = D * n1_E / countE; // available mass
- adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
- }
+ if (kneserNeyFlag) {
+ float D = kneserNey_D3;
+ if (countEF < 2) D = kneserNey_D1;
+ else if (countEF < 3) D = kneserNey_D2;
+ if (D > countEF) D = countEF - 0.01; // sanity constraint
+
+ float p_b_E = n1_E / totalCount; // target phrase prob based on distinct
+ float alpha_F = D * n1_F / countF; // available mass
+ adjustedCountEF = countEF - D + countF * alpha_F * p_b_E;
+
+ // for indirect
+ float p_b_F = n1_F / totalCount; // target phrase prob based on distinct
+ float alpha_E = D * n1_E / countE; // available mass
+ adjustedCountEF_indirect = countEF - D + countE * alpha_E * p_b_F;
+ }
// prob indirect
if (!onlyDirectFlag) {
@@ -296,30 +302,27 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
if (!foundBin && countEF <= countBin[i]) {
fileConsolidated << " " << maybeLogProb(2.718);
foundBin = true;
- }
- else {
+ } else {
fileConsolidated << " " << maybeLogProb(1);
}
}
- fileConsolidated << " " << maybeLogProb( foundBin ? 1 : 2.718 );
+ fileConsolidated << " " << maybeLogProb( foundBin ? 1 : 2.718 );
}
// alignment
fileConsolidated << " ||| " << itemDirect[3];
// counts, for debugging
- fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
+ fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
- if (outputNTLengths)
- {
+ if (outputNTLengths) {
fileConsolidated << " ||| " << itemDirect[5];
}
-
+
// count bin feature (as a sparse feature)
- if (sparseCountBinFeatureFlag ||
- directSparseScores.compare("") != 0 ||
- indirectSparseScores.compare("") != 0)
- {
+ if (sparseCountBinFeatureFlag ||
+ directSparseScores.compare("") != 0 ||
+ indirectSparseScores.compare("") != 0) {
fileConsolidated << " |||";
if (directSparseScores.compare("") != 0)
fileConsolidated << " " << directSparseScores;
@@ -351,13 +354,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated.Close();
}
-void breakdownCoreAndSparse( string combined, string &core, string &sparse )
+void breakdownCoreAndSparse( string combined, string &core, string &sparse )
{
core = "";
sparse = "";
vector<string> score = tokenize( combined.c_str() );
for(size_t i=0; i<score.size(); i++) {
- if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size())
+ if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size())
core += " " + score[i];
else {
sparse += " " + score[i];