Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2011-09-16 21:13:34 +0400
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2011-09-16 21:13:34 +0400
commit4313e335b534102f18c3d6308b60caa1ffdcfc98 (patch)
tree0198d7a3c857ff778d0dca2e87729fee1c3befa8 /scripts
parent4d5b17f44432b0ffba35c630bc3ed58f3bb6bf2f (diff)
print out span widths of non-terms. Extra argument --OutputNTLengths
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4230 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'scripts')
-rw-r--r--scripts/training/phrase-extract/consolidate.cpp10
-rw-r--r--scripts/training/phrase-extract/score.cpp19
2 files changed, 20 insertions, 9 deletions
diff --git a/scripts/training/phrase-extract/consolidate.cpp b/scripts/training/phrase-extract/consolidate.cpp
index 8d31a1d27..cb4b96659 100644
--- a/scripts/training/phrase-extract/consolidate.cpp
+++ b/scripts/training/phrase-extract/consolidate.cpp
@@ -40,6 +40,7 @@ bool lowCountFlag = false;
bool goodTuringFlag = false;
bool kneserNeyFlag = false;
bool logProbFlag = false;
+bool outputNTLengths = false;
inline float maybeLogProb( float a ) { return logProbFlag ? log(a) : a; }
char line[LINE_MAX_LENGTH];
@@ -54,7 +55,7 @@ int main(int argc, char* argv[])
<< "consolidating direct and indirect rule tables\n";
if (argc < 4) {
- cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect]\n";
+ cerr << "syntax: consolidate phrase-table.direct phrase-table.indirect phrase-table.consolidated [--Hierarchical] [--OnlyDirect] [--OutputNTLengths] \n";
exit(1);
}
char* &fileNameDirect = argv[1];
@@ -94,6 +95,8 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
+ } else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
+ outputNTLengths = true;
} else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
@@ -271,6 +274,11 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// counts, for debugging
fileConsolidated << "||| " << countE << " " << countF; // << " " << countEF;
+ if (outputNTLengths)
+ {
+ fileConsolidated << " ||| " << itemDirect[5];
+ }
+
fileConsolidated << endl;
}
fileDirect.Close();
diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
index 00f6dfcf6..5cf71bf91 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/scripts/training/phrase-extract/score.cpp
@@ -529,15 +529,18 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount,
// nt lengths
if (outputNTLengths)
{
- map<size_t, map<size_t, float> > sourceProb, targetProb;
- // 1st sourcePos, 2nd = length, 3rd = prob
-
- calcNTLengthProb(phrasePair, sourceProb, targetProb);
-
phraseTableFile << " ||| ";
- outputNTLengthProbs(phraseTableFile, sourceProb, "S");
- outputNTLengthProbs(phraseTableFile, targetProb, "T");
-
+
+ if (!inverseFlag)
+ {
+ map<size_t, map<size_t, float> > sourceProb, targetProb;
+ // 1st sourcePos, 2nd = length, 3rd = prob
+
+ calcNTLengthProb(phrasePair, sourceProb, targetProb);
+
+ outputNTLengthProbs(phraseTableFile, sourceProb, "S");
+ outputNTLengthProbs(phraseTableFile, targetProb, "T");
+ }
}
phraseTableFile << endl;