Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Huck <huck@i6.informatik.rwth-aachen.de>2014-08-08 00:02:51 +0400
committerMatthias Huck <huck@i6.informatik.rwth-aachen.de>2014-08-08 00:02:51 +0400
commitc27cbf55eacd4c72685507b9bab624437d9adb4b (patch)
treed493c7f3607b9fc78d22b8fd04bdb0f016a9fa2a /phrase-extract/consolidate-main.cpp
parentcda9d1d5aee25b3ba6598742bea44f1da624252b (diff)
source labels: integration into EMS
Diffstat (limited to 'phrase-extract/consolidate-main.cpp')
-rw-r--r--phrase-extract/consolidate-main.cpp37
1 files changed, 30 insertions, 7 deletions
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index a2174805c..10697a956 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -28,6 +28,7 @@
#include "tables-core.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
+#include "PropertiesConsolidator.h"
using namespace std;
@@ -37,13 +38,14 @@ bool phraseCountFlag = false;
bool lowCountFlag = false;
bool goodTuringFlag = false;
bool kneserNeyFlag = false;
+bool sourceLabelsFlag = false;
bool logProbFlag = false;
inline float maybeLogProb( float a )
{
return logProbFlag ? log(a) : a;
}
-void processFiles( char*, char*, char*, char* );
+void processFiles( char*, char*, char*, char*, char* );
void loadCountOfCounts( char* );
void breakdownCoreAndSparse( string combined, string &core, string &sparse );
bool getLine( istream &fileP, vector< string > &item );
@@ -57,13 +59,14 @@ int main(int argc, char* argv[])
<< "consolidating direct and indirect rule tables\n";
if (argc < 4) {
- cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--PhraseCount] \n";
+ cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--PhraseCount] [--GoodTuring counts-of-counts-file] [--KneserNey counts-of-counts-file] [--LowCountFeature] [--SourceLabels source-labels-file] \n";
exit(1);
}
char* &fileNameDirect = argv[1];
char* &fileNameIndirect = argv[2];
char* &fileNameConsolidated = argv[3];
char* fileNameCountOfCounts;
+ char* fileNameSourceLabelSet;
for(int i=4; i<argc; i++) {
if (strcmp(argv[i],"--Hierarchical") == 0) {
@@ -114,13 +117,21 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
+ } else if (strcmp(argv[i],"--SourceLabels") == 0) {
+ sourceLabelsFlag = true;
+ if (i+1==argc) {
+ cerr << "ERROR: specify source label set file!\n";
+ exit(1);
+ }
+ fileNameSourceLabelSet = argv[++i];
+ cerr << "processing source labels property\n";
} else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
}
}
- processFiles( fileNameDirect, fileNameIndirect, fileNameConsolidated, fileNameCountOfCounts );
+ processFiles( fileNameDirect, fileNameIndirect, fileNameConsolidated, fileNameCountOfCounts, fileNameSourceLabelSet );
}
vector< float > countOfCounts;
@@ -169,7 +180,7 @@ void loadCountOfCounts( char* fileNameCountOfCounts )
if (kneserNey_D3 > 2.9) kneserNey_D3 = 2.9;
}
-void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameConsolidated, char* fileNameCountOfCounts )
+void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameConsolidated, char* fileNameCountOfCounts, char* fileNameSourceLabelSet )
{
if (goodTuringFlag || kneserNeyFlag)
loadCountOfCounts( fileNameCountOfCounts );
@@ -198,6 +209,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
exit(1);
}
+ // create properties consolidator
+ // (in case any additional phrase property requires further processing)
+ MosesTraining::PropertiesConsolidator propertiesConsolidator = MosesTraining::PropertiesConsolidator();
+ if (sourceLabelsFlag) {
+ propertiesConsolidator.ActivateSourceLabelsProcessing(fileNameSourceLabelSet);
+ }
+
// loop through all extracted phrase translations
int i=0;
while(true) {
@@ -307,12 +325,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// counts, for debugging
fileConsolidated << "||| " << countE << " " << countF << " " << countEF;
- // count bin feature (as a sparse feature)
+ // sparse features
fileConsolidated << " |||";
if (directSparseScores.compare("") != 0)
fileConsolidated << " " << directSparseScores;
if (indirectSparseScores.compare("") != 0)
fileConsolidated << " " << indirectSparseScores;
+ // count bin feature (as a sparse feature)
if (sparseCountBinFeatureFlag) {
bool foundBin = false;
for(size_t i=0; i < countBin.size(); i++) {
@@ -332,9 +351,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
}
// arbitrary key-value pairs
- fileConsolidated << " ||| ";
+ fileConsolidated << " |||";
if (itemDirect.size() >= 6) {
- fileConsolidated << itemDirect[5];
+ //if (sourceLabelsFlag) {
+ fileConsolidated << propertiesConsolidator.ProcessPropertiesString(itemDirect[5]);
+ //} else {
+ // fileConsolidated << itemDirect[5];
+ //}
}
fileConsolidated << endl;