Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2008-01-16 23:25:01 +0300
committerphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2008-01-16 23:25:01 +0300
commit1044b16522c856c497b5dc4fd4b6064cd00ecec5 (patch)
treec98ed638e0cafd3314eb01a45a5ae79a62acb12d /moses-cmd
parentaa6f605a2df6342bca10bc24e2462ed608ec0e7c (diff)
bug fixes to distinct n-best list, mbr decoding; also changed switches for mbr
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1540 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses-cmd')
-rwxr-xr-xmoses-cmd/src/IOStream.cpp4
-rw-r--r--moses-cmd/src/Main.cpp58
-rw-r--r--moses-cmd/src/mbr.cpp42
3 files changed, 57 insertions, 47 deletions
diff --git a/moses-cmd/src/IOStream.cpp b/moses-cmd/src/IOStream.cpp
index 158b4fe53..98a0b780b 100755
--- a/moses-cmd/src/IOStream.cpp
+++ b/moses-cmd/src/IOStream.cpp
@@ -190,8 +190,10 @@ void IOStream::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, lo
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++)
{
const Factor *factor = mbrBestHypo[i];
- cout << *factor << " ";
+ if (i>0) cout << " ";
+ cout << factor->GetString();
}
+ cout << endl;
}
void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp
index 8dcd2b79b..4d29780b5 100644
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@@ -136,8 +136,10 @@ int main(int argc, char* argv[])
Manager manager(*source);
manager.ProcessSentence();
- cerr << "DECODER TYPE : " << staticData.GetDecoderType() << endl;
- if (staticData.GetDecoderType() == MAP){
+
+ // pick best translation (maximum a posteriori decoding)
+ cerr << "using MBR ? " << ( staticData.UseMBR() ? "yes" : "no" ) << endl;
+ if (! staticData.UseMBR()) {
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors()
@@ -157,29 +159,31 @@ int main(int argc, char* argv[])
IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
}
}
- else if (staticData.GetDecoderType() == MBR){
- size_t nBestSize = staticData.GetNBestSize();
- cerr << "NBEST SIZE : " << nBestSize << endl;
- assert(nBestSize > 0);
-
- if (nBestSize > 0)
- {
- VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
- TrellisPathList nBestList;
- manager.CalcNBest(nBestSize, nBestList,true);
- std::vector<const Factor*> mbrBestHypo = doMBR(nBestList);
- ioStream->OutputBestHypo(mbrBestHypo, source->GetTranslationId(),
- staticData.GetReportSegmentation(),
- staticData.GetReportAllFactors()
- );
- IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
- }
-
-
+ // consider top candidate translations to find minimum Bayes risk translation
+ else {
+ size_t nBestSize = staticData.GetMBRSize();
+
+ if (nBestSize <= 0)
+ {
+ cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
+ return EXIT_FAILURE;
+ }
+ else
+ {
+ TrellisPathList nBestList;
+ manager.CalcNBest(nBestSize, nBestList,true);
+ cerr << "size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl;
+ IFVERBOSE(2) { PrintUserTime("calculated n-best list for MBR decoding"); }
+ std::vector<const Factor*> mbrBestHypo = doMBR(nBestList);
+ ioStream->OutputBestHypo(mbrBestHypo, source->GetTranslationId(),
+ staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors());
+ IFVERBOSE(2) { PrintUserTime("finished MBR decoding"); }
+ }
}
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
+ if (staticData.IsDetailedTranslationReportingEnabled()) {
+ TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
}
IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); }
@@ -192,12 +196,12 @@ int main(int argc, char* argv[])
IFVERBOSE(1)
PrintUserTime("End.");
- #ifdef HACK_EXIT
+ //#ifdef HACK_EXIT
//This avoids that detructors are called (it can take a long time)
exit(EXIT_SUCCESS);
- #else
- return EXIT_SUCCESS;
- #endif
+ //#else
+ // return EXIT_SUCCESS;
+ //#endif
}
IOStream *GetIODevice(const StaticData &staticData)
diff --git a/moses-cmd/src/mbr.cpp b/moses-cmd/src/mbr.cpp
index 448a08c43..ac7b7697a 100644
--- a/moses-cmd/src/mbr.cpp
+++ b/moses-cmd/src/mbr.cpp
@@ -32,7 +32,6 @@ using namespace std ;
int BLEU_ORDER = 4;
int SMOOTH = 1;
-int DEBUG = 0;
float min_interval = 1e-4;
void extract_ngrams(const vector<const Factor* >& sentence, map < vector < const Factor* >, int > & allngrams)
{
@@ -78,12 +77,6 @@ float calculate_score(const vector< vector<const Factor*> > & sents, int ref, in
}
comps[comps_n-1] = sents[ref].size();
- if (DEBUG)
- {
- for ( int i = 0; i < comps_n; i++)
- cerr << "Comp " << i << " : " << comps[i];
- }
-
for (int i=0; i<BLEU_ORDER; i++)
{
if (comps[0] == 0)
@@ -101,7 +94,6 @@ float calculate_score(const vector< vector<const Factor*> > & sents, int ref, in
}
vector<const Factor*> doMBR(const TrellisPathList& nBestList){
-// cerr << "Sentence " << sent << " has " << sents.size() << " candidate translations" << endl;
float marginal = 0;
vector<float> joint_prob_vec;
@@ -110,24 +102,36 @@ vector<const Factor*> doMBR(const TrellisPathList& nBestList){
vector< map < vector <const Factor *>, int > > ngram_stats;
TrellisPathList::const_iterator iter;
- TrellisPath* hyp = NULL;
- for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
- {
- const TrellisPath &path = **iter;
- joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights()));
+
+ // get max score to prevent underflow
+ float maxScore = -1e20;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
+ {
+ const TrellisPath &path = **iter;
+ float score = StaticData::Instance().GetMBRScale()
+ * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights());
+ if (maxScore < score) maxScore = score;
+ }
+
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
+ {
+ const TrellisPath &path = **iter;
+ joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights()) - maxScore);
marginal += joint_prob;
joint_prob_vec.push_back(joint_prob);
- //Cache ngram counts
- map < vector < const Factor *>, int > counts;
+
+ // get words in translation
vector<const Factor*> translation;
GetOutputFactors(path, translation);
- //TO DO
+ // collect n-gram counts
+ map < vector < const Factor *>, int > counts;
extract_ngrams(translation,counts);
+
ngram_stats.push_back(counts);
translations.push_back(translation);
}
-
+
vector<float> mbr_loss;
float bleu, weightedLoss;
float weightedLossCumul = 0;
@@ -135,9 +139,9 @@ vector<const Factor*> doMBR(const TrellisPathList& nBestList){
int minMBRLossIdx = -1;
/* Main MBR computation done here */
- for (int i = 0; i < nBestList.GetSize(); i++){
+ for (unsigned int i = 0; i < nBestList.GetSize(); i++){
weightedLossCumul = 0;
- for (int j = 0; j < nBestList.GetSize(); j++){
+ for (unsigned int j = 0; j < nBestList.GetSize(); j++){
if ( i != j) {
bleu = calculate_score(translations, j, i,ngram_stats );
weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal);