Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-04-30 08:30:29 +0400
committerEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-04-30 08:30:29 +0400
commit1bf5f37197965244a6dad9324ed296e409a8c36e (patch)
treeefc0875d4215c3266a2b8f586b97b24ce9573f3f /scripts
parentef552fe91a99fd4838d083f6d0fcecbe429db2be (diff)
sort hierarchical alignments
Diffstat (limited to 'scripts')
-rw-r--r--scripts/training/phrase-extract/score.cpp19
1 files changed, 16 insertions, 3 deletions
diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp
index 8f1e351a1..e04de4f93 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/scripts/training/phrase-extract/score.cpp
@@ -26,6 +26,7 @@
#include <assert.h>
#include <cstring>
#include <set>
+#include <algorithm>
#include "SafeGetline.h"
#include "tables-core.h"
@@ -500,7 +501,8 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount,
// always output alignment if hiero style, but only for non-terms
// (eh: output all alignments, needed for some feature functions)
assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
- for(int j = 0; j < phraseT.size() - 1; j++) {
+ std::vector<std::string> alignment;
+ for(int j = 0; j < phraseT.size() - 1; j++) {
if (isNonTerminal(vcbT.getWord( phraseT[j] ))) {
if (bestAlignment->alignedToT[ j ].size() != 1) {
cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << endl;
@@ -508,16 +510,27 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount,
assert(bestAlignment->alignedToT[ j ].size() == 1);
}
int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
- phraseTableFile << sourcePos << "-" << j << " ";
+ //phraseTableFile << sourcePos << "-" << j << " ";
+ std::stringstream point;
+ point << sourcePos << "-" << j;
+ alignment.push_back(point.str());
}
else {
set<size_t>::iterator setIter;
for(setIter = (bestAlignment->alignedToT[j]).begin(); setIter != (bestAlignment->alignedToT[j]).end(); setIter++) {
int sourcePos = *setIter;
- phraseTableFile << sourcePos << "-" << j << " ";
+ //phraseTableFile << sourcePos << "-" << j << " ";
+ std::stringstream point;
+ point << sourcePos << "-" << j;
+ alignment.push_back(point.str());
}
}
}
+ // now print all alignments, sorted by source index
+ sort(alignment.begin(), alignment.end());
+ for (size_t i = 0; i < alignment.size(); ++i) {
+ phraseTableFile << alignment[i] << " ";
+ }
} else if (wordAlignmentFlag) {
// alignment info in pb model
for(int j=0; j<bestAlignment->alignedToT.size(); j++) {