Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Koehn <phi@jhu.edu>2015-02-04 12:18:09 +0300
committerPhilipp Koehn <phi@jhu.edu>2015-02-04 12:18:09 +0300
commitf69c1dab02e72f5e73c2099d126cd4be86c9224d (patch)
treec69c66cb83a427c82e5ee802ec0a9fb8cd0da90c /phrase-extract/consolidate-direct-main.cpp
parent6b9da6c585c8f2a8d34b7fa3775a12832470d705 (diff)
more efficient default recaser training
Diffstat (limited to 'phrase-extract/consolidate-direct-main.cpp')
-rw-r--r--phrase-extract/consolidate-direct-main.cpp26
1 files changed, 14 insertions, 12 deletions
diff --git a/phrase-extract/consolidate-direct-main.cpp b/phrase-extract/consolidate-direct-main.cpp
index 576cdd568..423a3909b 100644
--- a/phrase-extract/consolidate-direct-main.cpp
+++ b/phrase-extract/consolidate-direct-main.cpp
@@ -28,6 +28,8 @@
using namespace std;
+std::vector<std::string> tokenize( const char [] );
+
vector< string > splitLine(const char *line)
{
vector< string > item;
@@ -58,7 +60,7 @@ bool getLine( istream &fileP, vector< string > &item )
string line;
if (getline(fileP, line)) {
item = splitLine(line.c_str());
- return false;
+ return true;
} else {
return false;
}
@@ -107,17 +109,17 @@ int main(int argc, char* argv[])
if (! getLine(fileDirectP, itemDirect ))
break;
- (*fileConsolidated) << itemDirect[0] << " ||| " << itemDirect[1] << " ||| ";
-
- // output alignment and probabilities
- (*fileConsolidated) << itemDirect[2] // prob direct
- << " 2.718" // phrase count feature
- << " ||| " << itemDirect[3]; // alignment
-
- // counts
- (*fileConsolidated) << "||| 0 " << itemDirect[4]; // indirect
- (*fileConsolidated) << endl;
-
+ vector< string > count = tokenize( itemDirect[4].c_str() );
+ float countEF = atof(count[0].c_str());
+ float countF = atof(count[1].c_str());
+ float prob = countF/countEF;
+
+ (*fileConsolidated) << itemDirect[0] << " ||| " // source
+ << itemDirect[1] << " ||| " // target
+ << prob << " ||| " // prob
+ << itemDirect[2] << "||| " // alignment
+ << itemDirect[4] << " " << countEF // counts
+ << " ||| " << endl;
}
fileConsolidated->flush();