From e2f4d64608fad4e59d7e9bba423d4dca1dcaa670 Mon Sep 17 00:00:00 2001 From: hieuhoang1972 Date: Wed, 27 Feb 2008 13:31:37 +0000 Subject: output wordgraph for german git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1574 1f5c12ca-751b-0410-a591-d2e778427230 --- moses/src/Manager.cpp | 98 +++++++++++++++++++++++++++++++++++++++++--------- moses/src/Manager.h | 2 +- moses/src/StaticData.h | 4 +-- 3 files changed, 85 insertions(+), 19 deletions(-) (limited to 'moses') diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp index 692b6ba3f..4c85ce33b 100755 --- a/moses/src/Manager.cpp +++ b/moses/src/Manager.cpp @@ -36,6 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "TranslationOption.h" #include "LMList.h" #include "TranslationOptionCollection.h" +#include "DummyScoreProducers.h" using namespace std; @@ -480,7 +481,81 @@ void Manager::CalcDecoderStatistics() const } } -void Manager::GetWordGraph() const +void OutputWordGraph(std::ofstream &wordGraphFile, const Hypothesis *hypo, size_t &linkId) +{ + const StaticData &staticData = StaticData::Instance(); + + const Hypothesis *prevHypo = hypo->GetPrevHypo(); + const Phrase *sourcePhrase = hypo->GetSourcePhrase(); + const Phrase &targetPhrase = hypo->GetCurrTargetPhrase(); + + + wordGraphFile << "J=" << linkId++ + << "\tS=" << prevHypo->GetId() + << "\tE=" << hypo->GetId() + << "\ta="; + + // phrase table scores + const std::vector &phraseTables = staticData.GetPhraseDictionaries(); + std::vector::const_iterator iterPhraseTable; + for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable) + { + const PhraseDictionary *phraseTable = *iterPhraseTable; + vector scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable); + + wordGraphFile << scores[0]; + vector::const_iterator iterScore; + for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) + { + wordGraphFile << ", " << *iterScore; + } + } + + // language model scores + wordGraphFile << "\tl="; + const LMList &lmList = staticData.GetAllLM(); + LMList::const_iterator iterLM; + for (iterLM = lmList.begin() ; iterLM != lmList.end() ; ++iterLM) + { + LanguageModel *lm = *iterLM; + vector scores = hypo->GetScoreBreakdown().GetScoresForProducer(lm); + + wordGraphFile << scores[0]; + vector::const_iterator iterScore; + for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) + { + wordGraphFile << ", " << *iterScore; + } + } + + // re-ordering + wordGraphFile << "\tr="; + + wordGraphFile << hypo->GetScoreBreakdown().GetScoreForProducer(staticData.GetDistortionScoreProducer()); + + // lexicalised re-ordering + std::vector &lexOrderings = staticData.GetReorderModels(); + std::vector::const_iterator iterLexOrdering; + for (iterLexOrdering = lexOrderings.begin() ; iterLexOrdering != lexOrderings.end() ; ++iterLexOrdering) + { + LexicalReordering *lexicalReordering = *iterLexOrdering; + vector scores = hypo->GetScoreBreakdown().GetScoresForProducer(lexicalReordering); + + wordGraphFile << scores[0]; + vector::const_iterator iterScore; + for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) + { + wordGraphFile << ", " << *iterScore; + } + } + + // words !! + wordGraphFile << "\tw=" << hypo->GetCurrTargetPhrase(); + + wordGraphFile << endl; +} + +void Manager::GetWordGraph(long translationId) const { const StaticData &staticData = StaticData::Instance(); string fileName = staticData.GetParam("output-word-graph")[0]; @@ -489,6 +564,10 @@ void Manager::GetWordGraph() const std::ofstream wordGraphFile; wordGraphFile.open(fileName.c_str()); + wordGraphFile << "VERSION=1.0" << endl + << "UTTERANCE=" << translationId << endl; + + size_t linkId = 0; size_t stackNo = 1; std::vector < HypothesisStack >::const_iterator iterStack; for (iterStack = ++m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) @@ -499,16 +578,8 @@ void Manager::GetWordGraph() const for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) { const Hypothesis *hypo = *iterHypo; - const Hypothesis *prevHypo = hypo->GetPrevHypo(); - const Phrase *sourcePhrase = hypo->GetSourcePhrase(); - const Phrase &targetPhrase = hypo->GetCurrTargetPhrase(); - + OutputWordGraph(wordGraphFile, hypo, linkId); - wordGraphFile << prevHypo->GetId() << " -> " << hypo->GetId() << ": " - << *sourcePhrase << " " - << targetPhrase << " " - << hypo->GetTranslationOption().GetScoreBreakdown() << endl; - if (outputNBest) { const ArcList *arcList = hypo->GetArcList(); @@ -518,12 +589,7 @@ void Manager::GetWordGraph() const for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) { const Hypothesis *loserHypo = *iterArcList; - const Hypothesis *prevHypo = loserHypo->GetPrevHypo(); - - wordGraphFile << prevHypo->GetId() << " -> " << loserHypo->GetId() << ": " - << *sourcePhrase << " " - << targetPhrase << " " - << loserHypo->GetTranslationOption().GetScoreBreakdown() << endl; + OutputWordGraph(wordGraphFile, loserHypo, linkId); } } } //if (outputNBest) diff --git a/moses/src/Manager.h b/moses/src/Manager.h index 7ae9d9b74..34fadd466 100755 --- a/moses/src/Manager.h +++ b/moses/src/Manager.h @@ -97,7 +97,7 @@ public: const Hypothesis *GetBestHypothesis() const; void CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct=0) const; - void GetWordGraph() const; + void GetWordGraph(long translationId) const; /*** * to be called after processing a sentence (which may consist of more than just calling ProcessSentence() ) diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h index 8cb1dc5e7..814d31ef1 100755 --- a/moses/src/StaticData.h +++ b/moses/src/StaticData.h @@ -273,11 +273,11 @@ public: { return m_phraseDictionary.size(); } - std::vector GetPhraseDictionaries() const + const std::vector &GetPhraseDictionaries() const { return m_phraseDictionary; } - std::vector GetGenerationDictionaries() const + const std::vector &GetGenerationDictionaries() const { return m_generationDictionary; } -- cgit v1.2.3