Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-12-28 22:18:40 +0300
committerHieu Hoang <hieuhoang@gmail.com>2014-12-28 22:18:40 +0300
commitcd6ec019999565380c7067317ea8af9232aace9c (patch)
treee24a4d4db4d3537400f80a3b4a391f2aaff26bb0
parentf865e7e43c8f593cce75ee7157d77c4b94e95814 (diff)
move OutputBestSurface() to Hypothesis class
-rw-r--r--moses/Hypothesis.cpp108
-rw-r--r--moses/Hypothesis.h7
-rw-r--r--moses/IOWrapper.cpp36
-rw-r--r--moses/IOWrapper.h2
-rw-r--r--moses/TranslationTask.cpp2
5 files changed, 116 insertions, 39 deletions
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index b0a24d736..f5dd9709a 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "InputType.h"
#include "Manager.h"
+#include "IOWrapper.h"
#include "moses/FF/FFState.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
@@ -489,5 +490,112 @@ void Hypothesis::OutputInput(std::ostream& os) const
if (inp_phrases[i]) os << *inp_phrases[i];
}
+void Hypothesis::OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ if (hypo != NULL) {
+ // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
+ OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
+ OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+/***
+ * print surface factor only for the given phrase
+ */
+void Hypothesis::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Must specific at least 1 output factor");
+ const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(edge, placeholderFactor);
+ }
+
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+
+ if (placeholders.size()) {
+ // do placeholders
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
+ if (iter != placeholders.end()) {
+ factor = iter->second;
+ }
+ }
+
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << pos);
+
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ } else {
+ out << *factor;
+ }
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+
+ // trace ("report segmentation") option "-t" / "-tt"
+ if (reportSegmentation > 0 && phrase.GetSize() > 0) {
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ const int sourceStart = sourceRange.GetStartPos();
+ const int sourceEnd = sourceRange.GetEndPos();
+ out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
+ if (reportSegmentation == 2) {
+ out << ",wa=";
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+ Hypothesis::OutputAlignment(out, ai, 0, 0);
+ out << ",total=";
+ out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
+ out << ",";
+ ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
+ scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
+ IOWrapper::OutputAllFeatureScores(scoreBreakdown, out);
+ }
+ out << "| ";
+ }
+}
+
+std::map<size_t, const Factor*> Hypothesis::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
+{
+ const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
+ const Phrase &inputPhrase = inputPath.GetPhrase();
+
+ std::map<size_t, const Factor*> ret;
+
+ for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
+ const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
+ if (factor) {
+ std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
+ UTIL_THROW_IF2(targetPos.size() != 1,
+ "Placeholder should be aligned to 1, and only 1, word");
+ ret[*targetPos.begin()] = factor;
+ }
+ }
+
+ return ret;
+}
+
}
diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h
index 8e160b11e..70dd463d8 100644
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@@ -277,6 +277,13 @@ public:
void OutputInput(std::ostream& os) const;
static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);
+ void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) const;
+ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const;
+
+ // creates a map of TARGET positions which should be replaced by word using placeholder
+ std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor) const;
+
};
std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp
index e7f1ba565..351b194ae 100644
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@@ -520,15 +520,6 @@ void IOWrapper::OutputSurface(std::ostream &out, const Hypothesis &edge, const s
}
}
-void IOWrapper::OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
- char reportSegmentation, bool reportAllFactors)
-{
- if (hypo != NULL) {
- // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
- OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
- OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
- }
-}
void IOWrapper::OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
@@ -593,33 +584,6 @@ void IOWrapper::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*tra
out << endl;
}
-void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, char reportSegmentation, bool reportAllFactors)
-{
- if (hypo != NULL) {
- VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
- VERBOSE(3,"Best path: ");
- Backtrack(hypo);
- VERBOSE(3,"0" << std::endl);
- if (!m_surpressSingleBestOutput) {
- if (StaticData::Instance().GetOutputHypoScore()) {
- cout << hypo->GetTotalScore() << " ";
- }
-
- if (StaticData::Instance().IsPathRecoveryEnabled()) {
- hypo->OutputInput(cout);
- cout << "||| ";
- }
- OutputBestSurface(cout, hypo, *m_outputFactorOrder, reportSegmentation, reportAllFactors);
- cout << endl;
- }
- } else {
- VERBOSE(1, "NO BEST TRANSLATION" << endl);
- if (!m_surpressSingleBestOutput) {
- cout << endl;
- }
- }
-}
-
bool IOWrapper::ReadInput(InputTypeEnum inputType, InputType*& source)
{
delete source;
diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h
index 440c9d3c7..0f59a06d6 100644
--- a/moses/IOWrapper.h
+++ b/moses/IOWrapper.h
@@ -128,7 +128,6 @@ public:
Moses::InputType* GetInput(Moses::InputType *inputType);
bool ReadInput(Moses::InputTypeEnum inputType, Moses::InputType*& source);
- void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, char reportSegmentation, bool reportAllFactors);
void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
void Backtrack(const Moses::Hypothesis *hypo);
@@ -172,7 +171,6 @@ public:
// CHART
// phrase-based
- void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
char reportSegmentation, bool reportAllFactors, std::ostream& out);
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index c72196173..cfa79acc9 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -129,7 +129,7 @@ void TranslationTask::RunPb()
if (staticData.GetReportSegmentation() == 2) {
manager.GetOutputLanguageModelOrder(out, bestHypo);
}
- m_ioWrapper.OutputBestSurface(
+ bestHypo->OutputBestSurface(
out,
bestHypo,
staticData.GetOutputFactorOrder(),