Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-12-28 22:18:40 +0300
committerHieu Hoang <hieuhoang@gmail.com>2014-12-28 22:18:40 +0300
commitcd6ec019999565380c7067317ea8af9232aace9c (patch)
treee24a4d4db4d3537400f80a3b4a391f2aaff26bb0 /moses/Hypothesis.cpp
parentf865e7e43c8f593cce75ee7157d77c4b94e95814 (diff)
move OutputBestSurface() to Hypothesis class
Diffstat (limited to 'moses/Hypothesis.cpp')
-rw-r--r--moses/Hypothesis.cpp108
1 files changed, 108 insertions, 0 deletions
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index b0a24d736..f5dd9709a 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "InputType.h"
#include "Manager.h"
+#include "IOWrapper.h"
#include "moses/FF/FFState.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
@@ -489,5 +490,112 @@ void Hypothesis::OutputInput(std::ostream& os) const
if (inp_phrases[i]) os << *inp_phrases[i];
}
+void Hypothesis::OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ if (hypo != NULL) {
+ // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
+ OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
+ OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+/***
+ * print surface factor only for the given phrase
+ */
+void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Must specific at least 1 output factor");
+ const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(edge, placeholderFactor);
+ }
+
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+
+ if (placeholders.size()) {
+ // do placeholders
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
+ if (iter != placeholders.end()) {
+ factor = iter->second;
+ }
+ }
+
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << pos);
+
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ } else {
+ out << *factor;
+ }
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+
+ // trace ("report segmentation") option "-t" / "-tt"
+ if (reportSegmentation > 0 && phrase.GetSize() > 0) {
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ const int sourceStart = sourceRange.GetStartPos();
+ const int sourceEnd = sourceRange.GetEndPos();
+ out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
+ if (reportSegmentation == 2) {
+ out << ",wa=";
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+ Hypothesis::OutputAlignment(out, ai, 0, 0);
+ out << ",total=";
+ out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
+ out << ",";
+ ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
+ scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
+ IOWrapper::OutputAllFeatureScores(scoreBreakdown, out);
+ }
+ out << "| ";
+ }
+}
+
+std::map<size_t, const Factor*> Hypothesis::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
+{
+ const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
+ const Phrase &inputPhrase = inputPath.GetPhrase();
+
+ std::map<size_t, const Factor*> ret;
+
+ for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
+ const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
+ if (factor) {
+ std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
+ UTIL_THROW_IF2(targetPos.size() != 1,
+ "Placeholder should be aligned to 1, and only 1, word");
+ ret[*targetPos.begin()] = factor;
+ }
+ }
+
+ return ret;
+}
+
}