diff options
author | hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> | 2011-02-24 15:40:21 +0300 |
---|---|---|
committer | hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> | 2011-02-24 15:40:21 +0300 |
commit | a59ad11b58acf5dd64ec6f5990bb4ef5b74ebf28 (patch) | |
tree | c341127a5d384ac0812c2d00842b7b44cba4a17a /moses-chart-cmd/src | |
parent | 67dd80fb7bf71f36f2b29b4ba7a39e3ad35e5f81 (diff) |
run beautify.perl. Consistent formatting for .h & .cpp files
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3898 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses-chart-cmd/src')
-rw-r--r-- | moses-chart-cmd/src/IOWrapper.cpp | 562 | ||||
-rw-r--r-- | moses-chart-cmd/src/IOWrapper.h | 85 | ||||
-rw-r--r-- | moses-chart-cmd/src/Main.cpp | 284 | ||||
-rw-r--r-- | moses-chart-cmd/src/Main.h | 32 | ||||
-rw-r--r-- | moses-chart-cmd/src/TranslationAnalysis.cpp | 45 | ||||
-rw-r--r-- | moses-chart-cmd/src/TranslationAnalysis.h | 2 | ||||
-rw-r--r-- | moses-chart-cmd/src/mbr.cpp | 134 |
7 files changed, 555 insertions, 589 deletions
diff --git a/moses-chart-cmd/src/IOWrapper.cpp b/moses-chart-cmd/src/IOWrapper.cpp index 438b4fe8c..4a0418a1d 100644 --- a/moses-chart-cmd/src/IOWrapper.cpp +++ b/moses-chart-cmd/src/IOWrapper.cpp @@ -49,70 +49,60 @@ using namespace Moses; using namespace MosesChart; IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder - , const std::vector<FactorType> &outputFactorOrder - , const FactorMask &inputFactorUsed - , size_t nBestSize - , const std::string &nBestFilePath - , const std::string &inputFilePath) -:m_inputFactorOrder(inputFactorOrder) -,m_outputFactorOrder(outputFactorOrder) -,m_inputFactorUsed(inputFactorUsed) -,m_nBestStream(NULL) -,m_outputSearchGraphStream(NULL) -,m_detailedTranslationReportingStream(NULL) -,m_inputFilePath(inputFilePath) -,m_detailOutputCollector(NULL) -,m_nBestOutputCollector(NULL) -,m_searchGraphOutputCollector(NULL) -,m_singleBestOutputCollector(NULL) + , const std::vector<FactorType> &outputFactorOrder + , const FactorMask &inputFactorUsed + , size_t nBestSize + , const std::string &nBestFilePath + , const std::string &inputFilePath) + :m_inputFactorOrder(inputFactorOrder) + ,m_outputFactorOrder(outputFactorOrder) + ,m_inputFactorUsed(inputFactorUsed) + ,m_nBestStream(NULL) + ,m_outputSearchGraphStream(NULL) + ,m_detailedTranslationReportingStream(NULL) + ,m_inputFilePath(inputFilePath) + ,m_detailOutputCollector(NULL) + ,m_nBestOutputCollector(NULL) + ,m_searchGraphOutputCollector(NULL) + ,m_singleBestOutputCollector(NULL) { - const StaticData &staticData = StaticData::Instance(); + const StaticData &staticData = StaticData::Instance(); - if (m_inputFilePath.empty()) - { + if (m_inputFilePath.empty()) { m_inputStream = &std::cin; - } - else - { + } else { m_inputStream = new InputFileStream(inputFilePath); } - m_surpressSingleBestOutput = false; + m_surpressSingleBestOutput = false; - if (nBestSize > 0) - { - if (nBestFilePath == "-") - { - m_nBestStream = &std::cout; - m_surpressSingleBestOutput = true; - } - else - { - std::ofstream *nBestFile = new std::ofstream; - m_nBestStream = nBestFile; - nBestFile->open(nBestFilePath.c_str()); - } + if (nBestSize > 0) { + if (nBestFilePath == "-") { + m_nBestStream = &std::cout; + m_surpressSingleBestOutput = true; + } else { + std::ofstream *nBestFile = new std::ofstream; + m_nBestStream = nBestFile; + nBestFile->open(nBestFilePath.c_str()); + } m_nBestOutputCollector = new Moses::OutputCollector(m_nBestStream); - } + } - if (!m_surpressSingleBestOutput) - { + if (!m_surpressSingleBestOutput) { m_singleBestOutputCollector = new Moses::OutputCollector(&std::cout); } - // search graph output - if (staticData.GetOutputSearchGraph()) - { - string fileName = staticData.GetParam("output-search-graph")[0]; - std::ofstream *file = new std::ofstream; - m_outputSearchGraphStream = file; - file->open(fileName.c_str()); + // search graph output + if (staticData.GetOutputSearchGraph()) { + string fileName = staticData.GetParam("output-search-graph")[0]; + std::ofstream *file = new std::ofstream; + m_outputSearchGraphStream = file; + file->open(fileName.c_str()); m_searchGraphOutputCollector = new Moses::OutputCollector(m_outputSearchGraphStream); - } + } // detailed translation reporting - if (staticData.IsDetailedTranslationReportingEnabled()) - { + if (staticData.IsDetailedTranslationReportingEnabled()) { const std::string &path = staticData.GetDetailedTranslationReportingFilePath(); m_detailedTranslationReportingStream = new std::ofstream(path.c_str()); m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream); @@ -121,15 +111,14 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder IOWrapper::~IOWrapper() { - if (!m_inputFilePath.empty()) - { + if (!m_inputFilePath.empty()) { delete m_inputStream; } - if (!m_surpressSingleBestOutput) - { // outputting n-best to file, rather than stdout. need to close file and delete obj - delete m_nBestStream; - } - delete m_outputSearchGraphStream; + if (!m_surpressSingleBestOutput) { + // outputting n-best to file, rather than stdout. need to close file and delete obj + delete m_nBestStream; + } + delete m_outputSearchGraphStream; delete m_detailedTranslationReportingStream; delete m_detailOutputCollector; delete m_nBestOutputCollector; @@ -139,18 +128,16 @@ IOWrapper::~IOWrapper() InputType*IOWrapper::GetInput(InputType* inputType) { - if(inputType->Read(*m_inputStream, m_inputFactorOrder)) - { - if (long x = inputType->GetTranslationId()) { if (x>=m_translationId) m_translationId = x+1; } - else inputType->SetTranslationId(m_translationId++); - - return inputType; - } - else - { - delete inputType; - return NULL; - } + if(inputType->Read(*m_inputStream, m_inputFactorOrder)) { + if (long x = inputType->GetTranslationId()) { + if (x>=m_translationId) m_translationId = x+1; + } else inputType->SetTranslationId(m_translationId++); + + return inputType; + } else { + delete inputType; + return NULL; + } } /*** @@ -158,69 +145,60 @@ InputType*IOWrapper::GetInput(InputType* inputType) */ void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors) { - assert(outputFactorOrder.size() > 0); - if (reportAllFactors == true) - { - out << phrase; - } - else - { - size_t size = phrase.GetSize(); - for (size_t pos = 0 ; pos < size ; pos++) - { - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); - out << *factor; - - for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) - { - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); - out << "|" << *factor; - } - out << " "; - } - } + assert(outputFactorOrder.size() > 0); + if (reportAllFactors == true) { + out << phrase; + } else { + size_t size = phrase.GetSize(); + for (size_t pos = 0 ; pos < size ; pos++) { + const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); + out << *factor; + + for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { + const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); + out << "|" << *factor; + } + out << " "; + } + } } void OutputSurface(std::ostream &out, const MosesChart::Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder - ,bool reportSegmentation, bool reportAllFactors) + ,bool reportSegmentation, bool reportAllFactors) { - if ( hypo != NULL) - { - //OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); + if ( hypo != NULL) { + //OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); - const vector<const MosesChart::Hypothesis*> &prevHypos = hypo->GetPrevHypos(); + const vector<const MosesChart::Hypothesis*> &prevHypos = hypo->GetPrevHypos(); - vector<const MosesChart::Hypothesis*>::const_iterator iter; - for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) - { - const MosesChart::Hypothesis *prevHypo = *iter; + vector<const MosesChart::Hypothesis*>::const_iterator iter; + for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) { + const MosesChart::Hypothesis *prevHypo = *iter; - OutputSurface(out, prevHypo, outputFactorOrder, reportSegmentation, reportAllFactors); - } - } + OutputSurface(out, prevHypo, outputFactorOrder, reportSegmentation, reportAllFactors); + } + } } void IOWrapper::Backtrack(const MosesChart::Hypothesis *hypo) { - const vector<const MosesChart::Hypothesis*> &prevHypos = hypo->GetPrevHypos(); + const vector<const MosesChart::Hypothesis*> &prevHypos = hypo->GetPrevHypos(); - vector<const MosesChart::Hypothesis*>::const_iterator iter; - for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) - { - const MosesChart::Hypothesis *prevHypo = *iter; + vector<const MosesChart::Hypothesis*>::const_iterator iter; + for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) { + const MosesChart::Hypothesis *prevHypo = *iter; - VERBOSE(3,prevHypo->GetId() << " <= "); - Backtrack(prevHypo); - } + VERBOSE(3,prevHypo->GetId() << " <= "); + Backtrack(prevHypo); + } } void IOWrapper::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors) { - for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) - { - const Factor *factor = mbrBestHypo[i]; - cout << *factor << " "; - } + for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) { + const Factor *factor = mbrBestHypo[i]; + cout << *factor << " "; + } } /* void OutputInput(std::vector<const Phrase*>& map, const MosesChart::Hypothesis* hypo) @@ -243,29 +221,27 @@ void OutputInput(std::ostream& os, const MosesChart::Hypothesis* hypo) */ void OutputTranslationOptions(std::ostream &out, const MosesChart::Hypothesis *hypo, long translationId) -{ // recursive - if (hypo != NULL) - { - out << "Trans Opt " << translationId << " " << hypo->GetCurrSourceRange() << ": " << hypo->GetTranslationOption() - << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown() - << endl; - } - - const std::vector<const MosesChart::Hypothesis*> &prevHypos = hypo->GetPrevHypos(); - std::vector<const MosesChart::Hypothesis*>::const_iterator iter; - for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) - { - const MosesChart::Hypothesis *prevHypo = *iter; - OutputTranslationOptions(out, prevHypo, translationId); - } +{ + // recursive + if (hypo != NULL) { + out << "Trans Opt " << translationId << " " << hypo->GetCurrSourceRange() << ": " << hypo->GetTranslationOption() + << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown() + << endl; + } + + const std::vector<const MosesChart::Hypothesis*> &prevHypos = hypo->GetPrevHypos(); + std::vector<const MosesChart::Hypothesis*>::const_iterator iter; + for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) { + const MosesChart::Hypothesis *prevHypo = *iter; + OutputTranslationOptions(out, prevHypo, translationId); + } } void IOWrapper::OutputDetailedTranslationReport( - const MosesChart::Hypothesis *hypo, - long translationId) + const MosesChart::Hypothesis *hypo, + long translationId) { - if (hypo == NULL) - { + if (hypo == NULL) { return; } std::ostringstream out; @@ -278,51 +254,44 @@ void IOWrapper::OutputBestHypo(const MosesChart::Hypothesis *hypo, long translat { std::ostringstream out; IOWrapper::FixPrecision(out); - if (hypo != NULL) - { - VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl); - VERBOSE(3,"Best path: "); - Backtrack(hypo); - VERBOSE(3,"0" << std::endl); - - if (StaticData::Instance().GetOutputHypoScore()) - { - out << hypo->GetTotalScore() << " " - << MosesChart::Hypothesis::GetHypoCount() << " "; - } - - if (!m_surpressSingleBestOutput) - { - if (StaticData::Instance().IsPathRecoveryEnabled()) { - out << "||| "; - } - Phrase outPhrase(Output); - hypo->CreateOutputPhrase(outPhrase); - - // delete 1st & last - assert(outPhrase.GetSize() >= 2); - outPhrase.RemoveWord(0); - outPhrase.RemoveWord(outPhrase.GetSize() - 1); - - const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - string output = outPhrase.GetStringRep(outputFactorOrder); - out << output << endl; - } - } - else - { - VERBOSE(1, "NO BEST TRANSLATION" << endl); - - if (StaticData::Instance().GetOutputHypoScore()) - { - out << "0 "; - } - - out << endl; - } + if (hypo != NULL) { + VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl); + VERBOSE(3,"Best path: "); + Backtrack(hypo); + VERBOSE(3,"0" << std::endl); + + if (StaticData::Instance().GetOutputHypoScore()) { + out << hypo->GetTotalScore() << " " + << MosesChart::Hypothesis::GetHypoCount() << " "; + } + + if (!m_surpressSingleBestOutput) { + if (StaticData::Instance().IsPathRecoveryEnabled()) { + out << "||| "; + } + Phrase outPhrase(Output); + hypo->CreateOutputPhrase(outPhrase); - if (m_singleBestOutputCollector) - { + // delete 1st & last + assert(outPhrase.GetSize() >= 2); + outPhrase.RemoveWord(0); + outPhrase.RemoveWord(outPhrase.GetSize() - 1); + + const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); + string output = outPhrase.GetStringRep(outputFactorOrder); + out << output << endl; + } + } else { + VERBOSE(1, "NO BEST TRANSLATION" << endl); + + if (StaticData::Instance().GetOutputHypoScore()) { + out << "0 "; + } + + out << endl; + } + + if (m_singleBestOutputCollector) { m_singleBestOutputCollector->Write(translationId, out.str()); } } @@ -332,162 +301,155 @@ void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, co std::ostringstream out; // Check if we're writing to std::cout. - if (m_surpressSingleBestOutput) - { + if (m_surpressSingleBestOutput) { // Set precision only if we're writing the n-best list to cout. This is to // preserve existing behaviour, but should probably be done either way. IOWrapper::FixPrecision(out); // The output from -output-hypo-score is always written to std::cout. - if (StaticData::Instance().GetOutputHypoScore()) - { - if (bestHypo != NULL) - { + if (StaticData::Instance().GetOutputHypoScore()) { + if (bestHypo != NULL) { out << bestHypo->GetTotalScore() << " " << MosesChart::Hypothesis::GetHypoCount() << " "; - } - else - { + } else { out << "0 "; } } } - bool labeledOutput = StaticData::Instance().IsLabeledNBestList(); - //bool includeAlignment = StaticData::Instance().NBestIncludesAlignment(); + bool labeledOutput = StaticData::Instance().IsLabeledNBestList(); + //bool includeAlignment = StaticData::Instance().NBestIncludesAlignment(); - MosesChart::TrellisPathList::const_iterator iter; - for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) - { - const MosesChart::TrellisPath &path = **iter; - //cerr << path << endl << endl; + MosesChart::TrellisPathList::const_iterator iter; + for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { + const MosesChart::TrellisPath &path = **iter; + //cerr << path << endl << endl; - Moses::Phrase outputPhrase = path.GetOutputPhrase(); + Moses::Phrase outputPhrase = path.GetOutputPhrase(); - // delete 1st & last - assert(outputPhrase.GetSize() >= 2); - outputPhrase.RemoveWord(0); - outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); + // delete 1st & last + assert(outputPhrase.GetSize() >= 2); + outputPhrase.RemoveWord(0); + outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); - // print the surface factor of the translation - out << translationId << " ||| "; - OutputSurface(out, outputPhrase, m_outputFactorOrder, false); - out << " ||| "; + // print the surface factor of the translation + out << translationId << " ||| "; + OutputSurface(out, outputPhrase, m_outputFactorOrder, false); + out << " ||| "; - // print the scores in a hardwired order + // print the scores in a hardwired order // before each model type, the corresponding command-line-like name must be emitted // MERT script relies on this - // lm - const LMList& lml = system->GetLanguageModels(); + // lm + const LMList& lml = system->GetLanguageModels(); if (lml.size() > 0) { - if (labeledOutput) - out << "lm: "; - LMList::const_iterator lmi = lml.begin(); - for (; lmi != lml.end(); ++lmi) { - out << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " "; - } + if (labeledOutput) + out << "lm: "; + LMList::const_iterator lmi = lml.begin(); + for (; lmi != lml.end(); ++lmi) { + out << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " "; + } } - // translation components - if (StaticData::Instance().GetInputType()==SentenceInput){ - // translation components for text input - vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries(); - if (pds.size() > 0) { - if (labeledOutput) - out << "tm: "; - vector<PhraseDictionaryFeature*>::iterator iter; - for (iter = pds.begin(); iter != pds.end(); ++iter) { - vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); - for (size_t j = 0; j<scores.size(); ++j) - out << scores[j] << " "; - } - } - } - else{ - // translation components for Confusion Network input - // first translation component has GetNumInputScores() scores from the input Confusion Network - // at the beginning of the vector - vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries(); - if (pds.size() > 0) { - vector<PhraseDictionaryFeature*>::iterator iter; - - iter = pds.begin(); - vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); - - size_t pd_numinputscore = (*iter)->GetNumInputScores(); - - if (pd_numinputscore){ - - if (labeledOutput) - out << "I: "; - - for (size_t j = 0; j < pd_numinputscore; ++j) - out << scores[j] << " "; - } - - - for (iter = pds.begin() ; iter != pds.end(); ++iter) { - vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); - - size_t pd_numinputscore = (*iter)->GetNumInputScores(); - - if (iter == pds.begin() && labeledOutput) - out << "tm: "; - for (size_t j = pd_numinputscore; j < scores.size() ; ++j) - out << scores[j] << " "; - } - } - } - - - - // word penalty - if (labeledOutput) - out << "w: "; - out << path.GetScoreBreakdown().GetScoreForProducer(system->GetWordPenaltyProducer()) << " "; - - // generation - const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries(); + // translation components + if (StaticData::Instance().GetInputType()==SentenceInput) { + // translation components for text input + vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries(); + if (pds.size() > 0) { + if (labeledOutput) + out << "tm: "; + vector<PhraseDictionaryFeature*>::iterator iter; + for (iter = pds.begin(); iter != pds.end(); ++iter) { + vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); + for (size_t j = 0; j<scores.size(); ++j) + out << scores[j] << " "; + } + } + } else { + // translation components for Confusion Network input + // first translation component has GetNumInputScores() scores from the input Confusion Network + // at the beginning of the vector + vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries(); + if (pds.size() > 0) { + vector<PhraseDictionaryFeature*>::iterator iter; + + iter = pds.begin(); + vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); + + size_t pd_numinputscore = (*iter)->GetNumInputScores(); + + if (pd_numinputscore) { + + if (labeledOutput) + out << "I: "; + + for (size_t j = 0; j < pd_numinputscore; ++j) + out << scores[j] << " "; + } + + + for (iter = pds.begin() ; iter != pds.end(); ++iter) { + vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); + + size_t pd_numinputscore = (*iter)->GetNumInputScores(); + + if (iter == pds.begin() && labeledOutput) + out << "tm: "; + for (size_t j = pd_numinputscore; j < scores.size() ; ++j) + out << scores[j] << " "; + } + } + } + + + + // word penalty + if (labeledOutput) + out << "w: "; + out << path.GetScoreBreakdown().GetScoreForProducer(system->GetWordPenaltyProducer()) << " "; + + // generation + const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { - if (labeledOutput) - out << "g: "; - vector<GenerationDictionary*>::const_iterator iter; - for (iter = gds.begin(); iter != gds.end(); ++iter) { - vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); - for (size_t j = 0; j<scores.size(); j++) { - out << scores[j] << " "; - } - } + if (labeledOutput) + out << "g: "; + vector<GenerationDictionary*>::const_iterator iter; + for (iter = gds.begin(); iter != gds.end(); ++iter) { + vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); + for (size_t j = 0; j<scores.size(); j++) { + out << scores[j] << " "; + } + } } - // total + // total out << "||| " << path.GetTotalScore(); - /* + /* if (includeAlignment) { - *m_nBestStream << " |||"; - for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) - { - const MosesChart::Hypothesis &edge = *edges[currEdge]; - WordsRange sourceRange = edge.GetCurrSourceWordsRange(); - WordsRange targetRange = edge.GetCurrTargetWordsRange(); - *m_nBestStream << " " << sourceRange.GetStartPos(); - if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { - *m_nBestStream << "-" << sourceRange.GetEndPos(); - } - *m_nBestStream << "=" << targetRange.GetStartPos(); - if (targetRange.GetStartPos() < targetRange.GetEndPos()) { - *m_nBestStream << "-" << targetRange.GetEndPos(); - } - } + *m_nBestStream << " |||"; + for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) + { + const MosesChart::Hypothesis &edge = *edges[currEdge]; + WordsRange sourceRange = edge.GetCurrSourceWordsRange(); + WordsRange targetRange = edge.GetCurrTargetWordsRange(); + *m_nBestStream << " " << sourceRange.GetStartPos(); + if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { + *m_nBestStream << "-" << sourceRange.GetEndPos(); + } + *m_nBestStream << "=" << targetRange.GetStartPos(); + if (targetRange.GetStartPos() < targetRange.GetEndPos()) { + *m_nBestStream << "-" << targetRange.GetEndPos(); + } + } } - */ + */ out << endl; - } + } - out <<std::flush; + out <<std::flush; assert(m_nBestOutputCollector); m_nBestOutputCollector->Write(translationId, out.str()); diff --git a/moses-chart-cmd/src/IOWrapper.h b/moses-chart-cmd/src/IOWrapper.h index 8dedfc4a6..fca183436 100644 --- a/moses-chart-cmd/src/IOWrapper.h +++ b/moses-chart-cmd/src/IOWrapper.h @@ -5,28 +5,28 @@ Moses - factored phrase-based language decoder Copyright (c) 2006 University of Edinburgh All rights reserved. -Redistribution and use in source and binary forms, with or without modification, +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the University of Edinburgh nor the names of its contributors - may be used to endorse or promote products derived from this software + * Neither the name of the University of Edinburgh nor the names of its contributors + may be used to endorse or promote products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ @@ -58,41 +58,44 @@ class IOWrapper { protected: - long m_translationId; + long m_translationId; - const std::vector<Moses::FactorType> &m_inputFactorOrder; - const std::vector<Moses::FactorType> &m_outputFactorOrder; - const Moses::FactorMask &m_inputFactorUsed; - std::ostream *m_nBestStream, *m_outputSearchGraphStream; + const std::vector<Moses::FactorType> &m_inputFactorOrder; + const std::vector<Moses::FactorType> &m_outputFactorOrder; + const Moses::FactorMask &m_inputFactorUsed; + std::ostream *m_nBestStream, *m_outputSearchGraphStream; std::ostream *m_detailedTranslationReportingStream; - std::string m_inputFilePath; - std::istream *m_inputStream; - bool m_surpressSingleBestOutput; + std::string m_inputFilePath; + std::istream *m_inputStream; + bool m_surpressSingleBestOutput; Moses::OutputCollector *m_detailOutputCollector; Moses::OutputCollector *m_nBestOutputCollector; Moses::OutputCollector *m_searchGraphOutputCollector; Moses::OutputCollector *m_singleBestOutputCollector; public: - IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder - , const std::vector<Moses::FactorType> &outputFactorOrder - , const Moses::FactorMask &inputFactorUsed - , size_t nBestSize - , const std::string &nBestFilePath - , const std::string &inputFilePath=""); - ~IOWrapper(); - - Moses::InputType* GetInput(Moses::InputType *inputType); - void OutputBestHypo(const MosesChart::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors); - void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors); - void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const MosesChart::Hypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId); + IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder + , const std::vector<Moses::FactorType> &outputFactorOrder + , const Moses::FactorMask &inputFactorUsed + , size_t nBestSize + , const std::string &nBestFilePath + , const std::string &inputFilePath=""); + ~IOWrapper(); + + Moses::InputType* GetInput(Moses::InputType *inputType); + void OutputBestHypo(const MosesChart::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors); + void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors); + void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const MosesChart::Hypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId); void OutputDetailedTranslationReport(const MosesChart::Hypothesis *hypo, long translationId); - void Backtrack(const MosesChart::Hypothesis *hypo); + void Backtrack(const MosesChart::Hypothesis *hypo); - void ResetTranslationId() { m_translationId = 0; } + void ResetTranslationId() { + m_translationId = 0; + } + + Moses::OutputCollector *GetSearchGraphOutputCollector() { + return m_searchGraphOutputCollector; + } - Moses::OutputCollector *GetSearchGraphOutputCollector() - { return m_searchGraphOutputCollector; } - static void FixPrecision(std::ostream &, size_t size=3); }; diff --git a/moses-chart-cmd/src/Main.cpp b/moses-chart-cmd/src/Main.cpp index 27289fe0c..25da8b5ba 100644 --- a/moses-chart-cmd/src/Main.cpp +++ b/moses-chart-cmd/src/Main.cpp @@ -5,28 +5,28 @@ Moses - factored phrase-based language decoder Copyright (c) 2006 University of Edinburgh All rights reserved. -Redistribution and use in source and binary forms, with or without modification, +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the University of Edinburgh nor the names of its contributors - may be used to endorse or promote products derived from this software + * Neither the name of the University of Edinburgh nor the names of its contributors + may be used to endorse or promote products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ @@ -74,16 +74,17 @@ using namespace MosesChart; **/ class TranslationTask : public Task { - public: +public: TranslationTask(InputType *source, IOWrapper &ioWrapper) : m_source(source) , m_ioWrapper(ioWrapper) {} - ~TranslationTask() { delete m_source; } + ~TranslationTask() { + delete m_source; + } - void Run() - { + void Run() { const StaticData &staticData = StaticData::Instance(); const TranslationSystem &system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT); const size_t lineNumber = m_source->GetTranslationId(); @@ -100,26 +101,27 @@ class TranslationTask : public Task m_ioWrapper.OutputBestHypo(bestHypo, lineNumber, staticData.GetReportSegmentation(), staticData.GetReportAllFactors()); - IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); } + IFVERBOSE(2) { + PrintUserTime("Best Hypothesis Generation Time:"); + } - if (staticData.IsDetailedTranslationReportingEnabled()) - { + if (staticData.IsDetailedTranslationReportingEnabled()) { m_ioWrapper.OutputDetailedTranslationReport(bestHypo, lineNumber); } - + // n-best size_t nBestSize = staticData.GetNBestSize(); - if (nBestSize > 0) - { + if (nBestSize > 0) { VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl); MosesChart::TrellisPathList nBestList; manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest()); m_ioWrapper.OutputNBestList(nBestList, bestHypo, &system, lineNumber); - IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); } + IFVERBOSE(2) { + PrintUserTime("N-Best Hypotheses Generation Time:"); + } } - - if (staticData.GetOutputSearchGraph()) - { + + if (staticData.GetOutputSearchGraph()) { std::ostringstream out; manager.GetSearchGraph(lineNumber, out); OutputCollector *oc = m_ioWrapper.GetSearchGraphOutputCollector(); @@ -127,11 +129,13 @@ class TranslationTask : public Task oc->Write(lineNumber, out.str()); } - IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); } + IFVERBOSE(2) { + PrintUserTime("Sentence Decoding Time:"); + } manager.CalcDecoderStatistics(); } - private: +private: // Non-copyable: copy constructor and assignment operator not implemented. TranslationTask(const TranslationTask &); TranslationTask &operator=(const TranslationTask &); @@ -140,32 +144,42 @@ class TranslationTask : public Task IOWrapper &m_ioWrapper; }; -bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source) +bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source) { - delete source; - switch(inputType) - { - case SentenceInput: source = ioWrapper.GetInput(new Sentence(Input)); break; - case ConfusionNetworkInput: source = ioWrapper.GetInput(new ConfusionNet); break; - case WordLatticeInput: source = ioWrapper.GetInput(new WordLattice); break; - case TreeInputType: source = ioWrapper.GetInput(new TreeInput(Input));break; - default: TRACE_ERR("Unknown input type: " << inputType << "\n"); - } - return (source ? true : false); + delete source; + switch(inputType) { + case SentenceInput: + source = ioWrapper.GetInput(new Sentence(Input)); + break; + case ConfusionNetworkInput: + source = ioWrapper.GetInput(new ConfusionNet); + break; + case WordLatticeInput: + source = ioWrapper.GetInput(new WordLattice); + break; + case TreeInputType: + source = ioWrapper.GetInput(new TreeInput(Input)); + break; + default: + TRACE_ERR("Unknown input type: " << inputType << "\n"); + } + return (source ? true : false); } -static void PrintFeatureWeight(const FeatureFunction* ff) { - +static void PrintFeatureWeight(const FeatureFunction* ff) +{ + size_t weightStart = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(ff->GetScoreBookkeepingID()); size_t weightEnd = StaticData::Instance().GetScoreIndexManager().GetEndIndex(ff->GetScoreBookkeepingID()); for (size_t i = weightStart; i < weightEnd; ++i) { - cout << ff->GetScoreProducerDescription() << " " << ff->GetScoreProducerWeightShortName() << " " - << StaticData::Instance().GetAllWeights()[i] << endl; + cout << ff->GetScoreProducerDescription() << " " << ff->GetScoreProducerWeightShortName() << " " + << StaticData::Instance().GetAllWeights()[i] << endl; } } -static void ShowWeights() { +static void ShowWeights() +{ cout.precision(6); const StaticData& staticData = StaticData::Instance(); const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT); @@ -190,80 +204,77 @@ static void ShowWeights() { int main(int argc, char* argv[]) { - IFVERBOSE(1) - { - TRACE_ERR("command: "); - for(int i=0;i<argc;++i) TRACE_ERR(argv[i]<<" "); - TRACE_ERR(endl); - } + IFVERBOSE(1) { + TRACE_ERR("command: "); + for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" "); + TRACE_ERR(endl); + } IOWrapper::FixPrecision(cout); IOWrapper::FixPrecision(cerr); - // load data structures - Parameter parameter; - if (!parameter.LoadParam(argc, argv)) - { - return EXIT_FAILURE; - } + // load data structures + Parameter parameter; + if (!parameter.LoadParam(argc, argv)) { + return EXIT_FAILURE; + } // create threadpool, if necessary int threadcount = (parameter.GetParam("threads").size() > 0) ? Scan<size_t>(parameter.GetParam("threads")[0]) : 1; #ifdef WITH_THREADS - if (threadcount < 1) - { + if (threadcount < 1) { cerr << "Error: Need to specify a positive number of threads" << endl; exit(1); } ThreadPool pool(threadcount); #else - if (threadcount > 1) - { + if (threadcount > 1) { cerr << "Error: Thread count of " << threadcount << " but moses not built with thread support" << endl; exit(1); } #endif - const StaticData &staticData = StaticData::Instance(); - if (!StaticData::LoadDataStatic(¶meter)) - return EXIT_FAILURE; - - if (parameter.isParamSpecified("show-weights")) { - ShowWeights(); - exit(0); + const StaticData &staticData = StaticData::Instance(); + if (!StaticData::LoadDataStatic(¶meter)) + return EXIT_FAILURE; + + if (parameter.isParamSpecified("show-weights")) { + ShowWeights(); + exit(0); + } + + assert(staticData.GetSearchAlgorithm() == ChartDecoding); + + // set up read/writing class + IOWrapper *ioWrapper = GetIODevice(staticData); + + // check on weights + vector<float> weights = staticData.GetAllWeights(); + IFVERBOSE(2) { + TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager()); + TRACE_ERR("The global weight vector looks like this:"); + for (size_t j=0; j<weights.size(); j++) { + TRACE_ERR(" " << weights[j]); } + TRACE_ERR("\n"); + } + // every score must have a weight! check that here: + if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) { + TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl); + return EXIT_FAILURE; + } - assert(staticData.GetSearchAlgorithm() == ChartDecoding); - - // set up read/writing class - IOWrapper *ioWrapper = GetIODevice(staticData); - - // check on weights - vector<float> weights = staticData.GetAllWeights(); - IFVERBOSE(2) { - TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager()); - TRACE_ERR("The global weight vector looks like this:"); - for (size_t j=0; j<weights.size(); j++) { TRACE_ERR(" " << weights[j]); } - TRACE_ERR("\n"); - } - // every score must have a weight! check that here: - if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) { - TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl); - return EXIT_FAILURE; - } - - if (ioWrapper == NULL) - return EXIT_FAILURE; - - // read each sentence & decode - InputType *source=0; - while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) - { - IFVERBOSE(1) - ResetUserTime(); + if (ioWrapper == NULL) + return EXIT_FAILURE; + + // read each sentence & decode + InputType *source=0; + while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) { + IFVERBOSE(1) + ResetUserTime(); TranslationTask *task = new TranslationTask(source, *ioWrapper); source = NULL; // task will delete source #ifdef WITH_THREADS @@ -278,48 +289,45 @@ int main(int argc, char* argv[]) pool.Stop(true); // flush remaining jobs #endif - delete ioWrapper; + delete ioWrapper; - IFVERBOSE(1) - PrintUserTime("End."); + IFVERBOSE(1) + PrintUserTime("End."); - #ifdef HACK_EXIT - //This avoids that detructors are called (it can take a long time) - exit(EXIT_SUCCESS); - #else - return EXIT_SUCCESS; - #endif +#ifdef HACK_EXIT + //This avoids that detructors are called (it can take a long time) + exit(EXIT_SUCCESS); +#else + return EXIT_SUCCESS; +#endif } -IOWrapper *GetIODevice(const StaticData &staticData) +IOWrapper *GetIODevice(const StaticData &staticData) { - IOWrapper *ioWrapper; - const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder() - ,&outputFactorOrder = staticData.GetOutputFactorOrder(); - FactorMask inputFactorUsed(inputFactorOrder); - - // io - if (staticData.GetParam("input-file").size() == 1) - { - VERBOSE(2,"IO from File" << endl); - string filePath = staticData.GetParam("input-file")[0]; - - ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed - , staticData.GetNBestSize() - , staticData.GetNBestFilePath() - , filePath); - } - else - { - VERBOSE(1,"IO from STDOUT/STDIN" << endl); - ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed - , staticData.GetNBestSize() - , staticData.GetNBestFilePath()); - } - ioWrapper->ResetTranslationId(); - - IFVERBOSE(1) - PrintUserTime("Created input-output object"); - - return ioWrapper; + IOWrapper *ioWrapper; + const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder() + ,&outputFactorOrder = staticData.GetOutputFactorOrder(); + FactorMask inputFactorUsed(inputFactorOrder); + + // io + if (staticData.GetParam("input-file").size() == 1) { + VERBOSE(2,"IO from File" << endl); + string filePath = staticData.GetParam("input-file")[0]; + + ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed + , staticData.GetNBestSize() + , staticData.GetNBestFilePath() + , filePath); + } else { + VERBOSE(1,"IO from STDOUT/STDIN" << endl); + ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed + , staticData.GetNBestSize() + , staticData.GetNBestFilePath()); + } + ioWrapper->ResetTranslationId(); + + IFVERBOSE(1) + PrintUserTime("Created input-output object"); + + return ioWrapper; } diff --git a/moses-chart-cmd/src/Main.h b/moses-chart-cmd/src/Main.h index e470de06c..b9d9a32d6 100644 --- a/moses-chart-cmd/src/Main.h +++ b/moses-chart-cmd/src/Main.h @@ -5,28 +5,28 @@ Moses - factored phrase-based language decoder Copyright (c) 2006 University of Edinburgh All rights reserved. -Redistribution and use in source and binary forms, with or without modification, +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the University of Edinburgh nor the names of its contributors - may be used to endorse or promote products derived from this software + * Neither the name of the University of Edinburgh nor the names of its contributors + may be used to endorse or promote products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ diff --git a/moses-chart-cmd/src/TranslationAnalysis.cpp b/moses-chart-cmd/src/TranslationAnalysis.cpp index e60ea0754..939954fc8 100644 --- a/moses-chart-cmd/src/TranslationAnalysis.cpp +++ b/moses-chart-cmd/src/TranslationAnalysis.cpp @@ -11,35 +11,36 @@ using namespace std; using namespace Moses; -namespace TranslationAnalysis { +namespace TranslationAnalysis +{ void PrintTranslationAnalysis(ostream &os, const Hypothesis* hypo) { - /* - os << endl << "TRANSLATION HYPOTHESIS DETAILS:" << endl; + /* + os << endl << "TRANSLATION HYPOTHESIS DETAILS:" << endl; queue<const Hypothesis*> translationPath; - while (hypo) - { - translationPath.push(hypo); + while (hypo) + { + translationPath.push(hypo); hypo = hypo->GetPrevHypo(); } - while (!translationPath.empty()) - { - hypo = translationPath.front(); - translationPath.pop(); - const TranslationOption *transOpt = hypo->GetTranslationOption(); - if (transOpt != NULL) - { - os << hypo->GetCurrSourceWordsRange() << " "; - for (size_t decodeStepId = 0; decodeStepId < DecodeStepTranslation::GetNumTransStep(); ++decodeStepId) - os << decodeStepId << "=" << transOpt->GetSubRangeCount(decodeStepId) << ","; - os << *transOpt << endl; - } - } - - os << "END TRANSLATION" << endl; - */ + while (!translationPath.empty()) + { + hypo = translationPath.front(); + translationPath.pop(); + const TranslationOption *transOpt = hypo->GetTranslationOption(); + if (transOpt != NULL) + { + os << hypo->GetCurrSourceWordsRange() << " "; + for (size_t decodeStepId = 0; decodeStepId < DecodeStepTranslation::GetNumTransStep(); ++decodeStepId) + os << decodeStepId << "=" << transOpt->GetSubRangeCount(decodeStepId) << ","; + os << *transOpt << endl; + } + } + + os << "END TRANSLATION" << endl; + */ } } diff --git a/moses-chart-cmd/src/TranslationAnalysis.h b/moses-chart-cmd/src/TranslationAnalysis.h index 31efab0bd..426158a42 100644 --- a/moses-chart-cmd/src/TranslationAnalysis.h +++ b/moses-chart-cmd/src/TranslationAnalysis.h @@ -17,7 +17,7 @@ namespace TranslationAnalysis * print details about the translation represented in hypothesis to * os. Included information: phrase alignment, words dropped, scores */ - void PrintTranslationAnalysis(std::ostream &os, const Moses::Hypothesis* hypo); +void PrintTranslationAnalysis(std::ostream &os, const Moses::Hypothesis* hypo); } diff --git a/moses-chart-cmd/src/mbr.cpp b/moses-chart-cmd/src/mbr.cpp index 5b8902aae..633a518b8 100644 --- a/moses-chart-cmd/src/mbr.cpp +++ b/moses-chart-cmd/src/mbr.cpp @@ -18,11 +18,11 @@ using namespace std ; using namespace Moses; -/* Input : - 1. a sorted n-best list, with duplicates filtered out in the following format +/* Input : + 1. a sorted n-best list, with duplicates filtered out in the following format 0 ||| amr moussa is currently on a visit to libya , tomorrow , sunday , to hold talks with regard to the in sudan . ||| 0 -4.94418 0 0 -2.16036 0 0 -81.4462 -106.593 -114.43 -105.55 -12.7873 -26.9057 -25.3715 -52.9336 7.99917 -24 ||| -4.58432 - 2. a weight vector + 2. a weight vector 3. bleu order ( default = 4) 4. scaling factor to weigh the weight vector (default = 1.0) @@ -39,12 +39,9 @@ float min_interval = 1e-4; void extract_ngrams(const vector<const Factor* >& sentence, map < vector < const Factor* >, int > & allngrams) { vector< const Factor* > ngram; - for (int k = 0; k < BLEU_ORDER; k++) - { - for(int i =0; i < max((int)sentence.size()-k,0); i++) - { - for ( int j = i; j<= i+k; j++) - { + for (int k = 0; k < BLEU_ORDER; k++) { + for(int i =0; i < max((int)sentence.size()-k,0); i++) { + for ( int j = i; j<= i+k; j++) { ngram.push_back(sentence[j]); } ++allngrams[ngram]; @@ -53,15 +50,15 @@ void extract_ngrams(const vector<const Factor* >& sentence, map < vector < const } } -float calculate_score(const vector< vector<const Factor*> > & sents, int ref, int hyp, vector < map < vector < const Factor *>, int > > & ngram_stats ) { +float calculate_score(const vector< vector<const Factor*> > & sents, int ref, int hyp, vector < map < vector < const Factor *>, int > > & ngram_stats ) +{ int comps_n = 2*BLEU_ORDER+1; vector<int> comps(comps_n); float logbleu = 0.0, brevity; - + int hyp_length = sents[hyp].size(); - for (int i =0; i<BLEU_ORDER;i++) - { + for (int i =0; i<BLEU_ORDER; i++) { comps[2*i] = 0; comps[2*i+1] = max(hyp_length-i,0); } @@ -70,24 +67,20 @@ float calculate_score(const vector< vector<const Factor*> > & sents, int ref, in map< vector < const Factor * >, int > & ref_ngrams = ngram_stats[ref] ; for (map< vector< const Factor * >, int >::iterator it = hyp_ngrams.begin(); - it != hyp_ngrams.end(); it++) - { + it != hyp_ngrams.end(); it++) { map< vector< const Factor * >, int >::iterator ref_it = ref_ngrams.find(it->first); - if(ref_it != ref_ngrams.end()) - { + if(ref_it != ref_ngrams.end()) { comps[2* (it->first.size()-1)] += min(ref_it->second,it->second); } } comps[comps_n-1] = sents[ref].size(); - if (DEBUG) - { + if (DEBUG) { for ( int i = 0; i < comps_n; i++) cerr << "Comp " << i << " : " << comps[i]; } - for (int i=0; i<BLEU_ORDER; i++) - { + for (int i=0; i<BLEU_ORDER; i++) { if (comps[0] == 0) return 0.0; if ( i > 0 ) @@ -102,7 +95,8 @@ float calculate_score(const vector< vector<const Factor*> > & sents, int ref, in return exp(logbleu); } -vector<const Factor*> doMBR(const TrellisPathList& nBestList){ +vector<const Factor*> doMBR(const TrellisPathList& nBestList) +{ // cerr << "Sentence " << sent << " has " << sents.size() << " candidate translations" << endl; float marginal = 0; @@ -113,9 +107,8 @@ vector<const Factor*> doMBR(const TrellisPathList& nBestList){ TrellisPathList::const_iterator iter; //TrellisPath* hyp = NULL; - for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) - { - const TrellisPath &path = **iter; + for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { + const TrellisPath &path = **iter; joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights())); marginal += joint_prob; joint_prob_vec.push_back(joint_prob); @@ -123,57 +116,56 @@ vector<const Factor*> doMBR(const TrellisPathList& nBestList){ map < vector < const Factor *>, int > counts; vector<const Factor*> translation; GetOutputFactors(path, translation); - + //TO DO extract_ngrams(translation,counts); ngram_stats.push_back(counts); translations.push_back(translation); - } - - vector<float> mbr_loss; - float bleu, weightedLoss; - float weightedLossCumul = 0; - float minMBRLoss = 1000000; - int minMBRLossIdx = -1; - - /* Main MBR computation done here */ - for (int i = 0; i < nBestList.GetSize(); i++){ - weightedLossCumul = 0; - for (int j = 0; j < nBestList.GetSize(); j++){ - if ( i != j) { - bleu = calculate_score(translations, j, i,ngram_stats ); - weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal); - weightedLossCumul += weightedLoss; - if (weightedLossCumul > minMBRLoss) - break; - } - } - if (weightedLossCumul < minMBRLoss){ - minMBRLoss = weightedLossCumul; - minMBRLossIdx = i; - } - } - /* Find sentence that minimises Bayes Risk under 1- BLEU loss */ - return translations[minMBRLossIdx]; + } + + vector<float> mbr_loss; + float bleu, weightedLoss; + float weightedLossCumul = 0; + float minMBRLoss = 1000000; + int minMBRLossIdx = -1; + + /* Main MBR computation done here */ + for (int i = 0; i < nBestList.GetSize(); i++) { + weightedLossCumul = 0; + for (int j = 0; j < nBestList.GetSize(); j++) { + if ( i != j) { + bleu = calculate_score(translations, j, i,ngram_stats ); + weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal); + weightedLossCumul += weightedLoss; + if (weightedLossCumul > minMBRLoss) + break; + } + } + if (weightedLossCumul < minMBRLoss) { + minMBRLoss = weightedLossCumul; + minMBRLossIdx = i; + } + } + /* Find sentence that minimises Bayes Risk under 1- BLEU loss */ + return translations[minMBRLossIdx]; } -void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translation){ - const std::vector<const Hypothesis *> &edges = path.GetEdges(); - const std::vector<FactorType>& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - assert (outputFactorOrder.size() == 1); - - // print the surface factor of the translation - for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) - { - const Hypothesis &edge = *edges[currEdge]; - const Phrase &phrase = edge.GetCurrTargetPhrase(); - size_t size = phrase.GetSize(); - for (size_t pos = 0 ; pos < size ; pos++) - { - - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); - translation.push_back(factor); - } - } +void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translation) +{ + const std::vector<const Hypothesis *> &edges = path.GetEdges(); + const std::vector<FactorType>& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); + assert (outputFactorOrder.size() == 1); + + // print the surface factor of the translation + for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { + const Hypothesis &edge = *edges[currEdge]; + const Phrase &phrase = edge.GetCurrTargetPhrase(); + size_t size = phrase.GetSize(); + for (size_t pos = 0 ; pos < size ; pos++) { + + const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); + translation.push_back(factor); + } + } } |