diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2014-12-28 22:18:40 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2014-12-28 22:18:40 +0300 |
commit | cd6ec019999565380c7067317ea8af9232aace9c (patch) | |
tree | e24a4d4db4d3537400f80a3b4a391f2aaff26bb0 /moses/Hypothesis.cpp | |
parent | f865e7e43c8f593cce75ee7157d77c4b94e95814 (diff) |
move OutputBestSurface() to Hypothesis class
Diffstat (limited to 'moses/Hypothesis.cpp')
-rw-r--r-- | moses/Hypothesis.cpp | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index b0a24d736..f5dd9709a 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "StaticData.h" #include "InputType.h" #include "Manager.h" +#include "IOWrapper.h" #include "moses/FF/FFState.h" #include "moses/FF/StatefulFeatureFunction.h" #include "moses/FF/StatelessFeatureFunction.h" @@ -489,5 +490,112 @@ void Hypothesis::OutputInput(std::ostream& os) const if (inp_phrases[i]) os << *inp_phrases[i]; } +void Hypothesis::OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder, + char reportSegmentation, bool reportAllFactors) const +{ + if (hypo != NULL) { + // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence + OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors); + OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors); + } +} + +////////////////////////////////////////////////////////////////////////// +/*** + * print surface factor only for the given phrase + */ +void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, + char reportSegmentation, bool reportAllFactors) const +{ + UTIL_THROW_IF2(outputFactorOrder.size() == 0, + "Must specific at least 1 output factor"); + const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); + bool markUnknown = StaticData::Instance().GetMarkUnknown(); + if (reportAllFactors == true) { + out << phrase; + } else { + FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); + + std::map<size_t, const Factor*> placeholders; + if (placeholderFactor != NOT_FOUND) { + // creates map of target position -> factor for placeholders + placeholders = GetPlaceholders(edge, placeholderFactor); + } + + size_t size = phrase.GetSize(); + for (size_t pos = 0 ; pos < size ; pos++) { + const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); + + if (placeholders.size()) { + // do placeholders + std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos); + if (iter != placeholders.end()) { + factor = iter->second; + } + } + + UTIL_THROW_IF2(factor == NULL, + "No factor 0 at position " << pos); + + //preface surface form with UNK if marking unknowns + const Word &word = phrase.GetWord(pos); + if(markUnknown && word.IsOOV()) { + out << "UNK" << *factor; + } else { + out << *factor; + } + + for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { + const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); + UTIL_THROW_IF2(factor == NULL, + "No factor " << i << " at position " << pos); + + out << "|" << *factor; + } + out << " "; + } + } + + // trace ("report segmentation") option "-t" / "-tt" + if (reportSegmentation > 0 && phrase.GetSize() > 0) { + const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); + const int sourceStart = sourceRange.GetStartPos(); + const int sourceEnd = sourceRange.GetEndPos(); + out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" + if (reportSegmentation == 2) { + out << ",wa="; + const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); + Hypothesis::OutputAlignment(out, ai, 0, 0); + out << ",total="; + out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); + out << ","; + ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); + scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); + IOWrapper::OutputAllFeatureScores(scoreBreakdown, out); + } + out << "| "; + } +} + +std::map<size_t, const Factor*> Hypothesis::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const +{ + const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath(); + const Phrase &inputPhrase = inputPath.GetPhrase(); + + std::map<size_t, const Factor*> ret; + + for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) { + const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor); + if (factor) { + std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos); + UTIL_THROW_IF2(targetPos.size() != 1, + "Placeholder should be aligned to 1, and only 1, word"); + ret[*targetPos.begin()] = factor; + } + } + + return ret; +} + } |