Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-12-04 21:35:55 +0300
committerHieu Hoang <hieuhoang@gmail.com>2014-12-04 21:35:55 +0300
commitbb6d04a9b780d761d2e8e4b6c335fc44dbe33483 (patch)
tree20f4eb03f338a1049ec23a33ade16360df23b8df
parentba33cf93bac7dcce3735b02cda67e5abcc2725fd (diff)
parentb23c00989b30a916d3a3fba98bf033dec5081699 (diff)
Merge ../mosesdecoder.merge-cmd
-rw-r--r--moses/BaseManager.cpp18
-rw-r--r--moses/BaseManager.h24
-rw-r--r--moses/ChartManager.cpp172
-rw-r--r--moses/ChartManager.h44
-rw-r--r--moses/IOWrapper.cpp568
-rw-r--r--moses/IOWrapper.h57
-rw-r--r--moses/Incremental.cpp141
-rw-r--r--moses/Incremental.h34
-rw-r--r--moses/Manager.cpp61
-rw-r--r--moses/Manager.h7
-rw-r--r--moses/Syntax/S2T/Manager-inl.h212
-rw-r--r--moses/Syntax/S2T/Manager.h23
-rw-r--r--moses/TranslationTask.cpp46
-rw-r--r--moses/TranslationTask.h19
14 files changed, 733 insertions, 693 deletions
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
index 485cdd182..e41685344 100644
--- a/moses/BaseManager.cpp
+++ b/moses/BaseManager.cpp
@@ -88,6 +88,24 @@ void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
}
}
+// Emulates the old operator<<(ostream &, const DottedRule &) function. The
+// output format is a bit odd (reverse order and double spacing between symbols)
+// but there are scripts and tools that expect the output of -T to look like
+// that.
+void BaseManager::WriteApplicationContext(std::ostream &out,
+ const ApplicationContext &context) const
+{
+ assert(!context.empty());
+ ApplicationContext::const_reverse_iterator p = context.rbegin();
+ while (true) {
+ out << p->second << "=" << p->first << " ";
+ if (++p == context.rend()) {
+ break;
+ }
+ out << " ";
+ }
+}
+
} // namespace
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
index 3f0dbcb92..49e28b005 100644
--- a/moses/BaseManager.h
+++ b/moses/BaseManager.h
@@ -13,6 +13,11 @@ class OutputCollector;
class BaseManager
{
protected:
+
+ // output
+ typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
+ typedef std::set< std::pair<size_t, size_t> > Alignments;
+
void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features,
std::ostream &out) const;
void OutputFeatureScores( std::ostream& out,
@@ -23,11 +28,30 @@ protected:
const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const;
+ void WriteApplicationContext(std::ostream &out,
+ const ApplicationContext &context) const;
+
+ template <class T>
+ void ShiftOffsets(std::vector<T> &offsets, T shift) const
+ {
+ T currPos = shift;
+ for (size_t i = 0; i < offsets.size(); ++i) {
+ if (offsets[i] == 0) {
+ offsets[i] = currPos;
+ ++currPos;
+ } else {
+ currPos += offsets[i];
+ }
+ }
+ }
public:
// outputs
virtual void OutputNBest(OutputCollector *collector) const = 0;
virtual void OutputLatticeSamples(OutputCollector *collector) const = 0;
+ virtual void OutputAlignment(OutputCollector *collector) const = 0;
+ virtual void OutputDetailedTranslationReport(OutputCollector *collector) const = 0;
+ virtual void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const = 0;
};
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index e9ba7af28..824b440c8 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -29,6 +29,7 @@
#include "StaticData.h"
#include "DecodeStep.h"
#include "TreeInput.h"
+#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/OutputCollector.h"
#include "moses/ChartKBestExtractor.h"
@@ -572,4 +573,175 @@ size_t ChartManager::OutputAlignment(Alignments &retAlign,
return totalTargetSize;
}
+void ChartManager::OutputDetailedTranslationReport(OutputCollector *collector) const
+{
+ if (collector) {
+ OutputDetailedTranslationReport(collector,
+ GetBestHypothesis(),
+ static_cast<const Sentence&>(m_source),
+ m_source.GetTranslationId());
+ }
+}
+
+void ChartManager::OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+ if (hypo == NULL) {
+ return;
+ }
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
+ collector->Write(translationId, out.str());
+}
+
+void ChartManager::OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+ if (hypo != NULL) {
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
+ out << std::endl;
+ }
+
+ // recursive
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+ std::vector<const ChartHypothesis*>::const_iterator iter;
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
+ const ChartHypothesis *prevHypo = *iter;
+ OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
+ }
+}
+
+void ChartManager::OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+ ReconstructApplicationContext(*hypo, sentence, applicationContext);
+ out << "Trans Opt " << translationId
+ << " " << hypo->GetCurrSourceRange()
+ << ": ";
+ WriteApplicationContext(out, applicationContext);
+ out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
+ << "->" << hypo->GetCurrTargetPhrase()
+ << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown();
+}
+
+// Given a hypothesis and sentence, reconstructs the 'application context' --
+// the source RHS symbols of the SCFG rule that was applied, plus their spans.
+void ChartManager::ReconstructApplicationContext(const ChartHypothesis &hypo,
+ const Sentence &sentence,
+ ApplicationContext &context) const
+{
+ context.clear();
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
+ std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
+ std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
+ const WordsRange &span = hypo.GetCurrSourceRange();
+ size_t i = span.GetStartPos();
+ while (i <= span.GetEndPos()) {
+ if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
+ // Symbol is a terminal.
+ const Word &symbol = sentence.GetWord(i);
+ context.push_back(std::make_pair(symbol, WordsRange(i, i)));
+ ++i;
+ } else {
+ // Symbol is a non-terminal.
+ const Word &symbol = (*p)->GetTargetLHS();
+ const WordsRange &range = (*p)->GetCurrSourceRange();
+ context.push_back(std::make_pair(symbol, range));
+ i = range.GetEndPos()+1;
+ ++p;
+ }
+ }
+}
+
+void ChartManager::OutputUnknowns(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ const std::vector<Phrase*> &oovs = GetParser().GetUnknownSources();
+
+ std::ostringstream out;
+ for (std::vector<Phrase*>::const_iterator p = oovs.begin();
+ p != oovs.end(); ++p) {
+ out << *p;
+ }
+ out << std::endl;
+ collector->Write(translationId, out.str());
+ }
+
+}
+
+void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
+{
+ const ChartHypothesis *hypo = GetBestHypothesis();
+ if (collector == NULL || hypo == NULL) {
+ return;
+ }
+
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
+ const size_t translationId = m_source.GetTranslationId();
+
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
+
+ //Tree of full sentence
+ const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
+ if (treeStructure != NULL) {
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for( size_t i=0; i<sff.size(); i++ ) {
+ if (sff[i] == treeStructure) {
+ const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
+ out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
+ break;
+ }
+ }
+ }
+
+ collector->Write(translationId, out.str());
+
+}
+
+void ChartManager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const
+{
+
+ if (hypo != NULL) {
+ OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
+
+ const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
+
+ out << " ||| ";
+ if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
+ out << " " << *property->GetValueString();
+ } else {
+ out << " " << "noTreeInfo";
+ }
+ out << std::endl;
+ }
+
+ // recursive
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+ std::vector<const ChartHypothesis*>::const_iterator iter;
+ for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
+ const ChartHypothesis *prevHypo = *iter;
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
+ }
+}
+
+
} // namespace Moses
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index 80f466f75..ba3617123 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -63,8 +63,6 @@ private:
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
// output
- typedef std::set< std::pair<size_t, size_t> > Alignments;
-
void OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const;
@@ -75,20 +73,29 @@ private:
size_t OutputAlignment(Alignments &retAlign,
const Moses::ChartHypothesis *hypo,
size_t startTarget) const;
-
- template <class T>
- void ShiftOffsets(std::vector<T> &offsets, T shift) const
- {
- T currPos = shift;
- for (size_t i = 0; i < offsets.size(); ++i) {
- if (offsets[i] == 0) {
- offsets[i] = currPos;
- ++currPos;
- } else {
- currPos += offsets[i];
- }
- }
- }
+ void OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
+ void ReconstructApplicationContext(const ChartHypothesis &hypo,
+ const Sentence &sentence,
+ ApplicationContext &context) const;
+ void OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const ChartHypothesis *hypo,
+ const Sentence &sentence,
+ long translationId) const;
public:
ChartManager(InputType const& source);
@@ -142,8 +149,11 @@ public:
void OutputNBest(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const
{}
-
void OutputAlignment(OutputCollector *collector) const;
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+ void OutputUnknowns(OutputCollector *collector) const;
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
+
};
}
diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp
index becde9b65..9e3e0f57f 100644
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@@ -368,22 +368,6 @@ void IOWrapper::Backtrack(const ChartHypothesis *hypo)
}
}
-void IOWrapper::OutputDetailedTranslationReport(
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId)
-{
- if (applied == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTranslationOptions(out, applicationContext, applied, sentence, translationId);
- UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
- "No ouput file for detailed reports specified");
- m_detailedTranslationCollector->Write(translationId, out.str());
-}
void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
{
@@ -518,110 +502,6 @@ void IOWrapper::WriteApplicationContext(std::ostream &out,
}
}
-void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId)
-{
- if (hypo == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
- UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
- "No output file for tree fragments specified");
-
- //Tree of full sentence
- const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
- if (treeStructure != NULL) {
- const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
- for( size_t i=0; i<sff.size(); i++ ) {
- if (sff[i] == treeStructure) {
- const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
- out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
- break;
- }
- }
- }
-
- m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
-
-}
-
-void IOWrapper::OutputDetailedTreeFragmentsTranslationReport(
- const search::Applied *applied,
- const Sentence &sentence,
- long translationId)
-{
- if (applied == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
- UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
- "No output file for tree fragments specified");
-
- //Tree of full sentence
- //TODO: incremental search doesn't support stateful features
-
- m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
-
-}
-
-void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
-{
-
- if (hypo != NULL) {
- OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
-
- const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
-
- out << " ||| ";
- if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
- out << " " << *property->GetValueString();
- } else {
- out << " " << "noTreeInfo";
- }
- out << std::endl;
- }
-
- // recursive
- const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
- std::vector<const ChartHypothesis*>::const_iterator iter;
- for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
- const ChartHypothesis *prevHypo = *iter;
- OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
- }
-}
-
-void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId)
-{
-
- if (applied != NULL) {
- OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
-
- const TargetPhrase &currTarPhr = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
-
- out << " ||| ";
- if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
- out << " " << *property->GetValueString();
- } else {
- out << " " << "noTreeInfo";
- }
- out << std::endl;
- }
-
- // recursive
- const search::Applied *child = applied->Children();
- for (size_t i = 0; i < applied->GetArity(); i++) {
- OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
- }
-}
-
/***
* print surface factor only for the given phrase
*/
@@ -651,127 +531,7 @@ void IOWrapper::OutputSurface(std::ostream &out, const Phrase &phrase, const std
}
}
-void IOWrapper::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
-{
- ostringstream out;
-
- if (hypo) {
- Alignments retAlign;
- OutputAlignment(retAlign, hypo, 0);
- // output alignments
- Alignments::const_iterator iter;
- for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
- const pair<size_t, size_t> &alignPoint = *iter;
- out << alignPoint.first << "-" << alignPoint.second << " ";
- }
- }
- out << endl;
-
- m_alignmentInfoCollector->Write(translationId, out.str());
-}
-
-size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget)
-{
- size_t totalTargetSize = 0;
- size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
-
- const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
-
- size_t thisSourceSize = CalcSourceSize(hypo);
-
- // position of each terminal word in translation rule, irrespective of alignment
- // if non-term, number is undefined
- vector<size_t> sourceOffsets(thisSourceSize, 0);
- vector<size_t> targetOffsets(tp.GetSize(), 0);
-
- const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
-
- const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
- vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
- const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
-
- UTIL_THROW_IF2(sourceInd2pos.size() != prevHypos.size(), "Error");
-
- size_t targetInd = 0;
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
- size_t sourceInd = targetPos2SourceInd[targetPos];
- size_t sourcePos = sourceInd2pos[sourceInd];
-
- const ChartHypothesis *prevHypo = prevHypos[sourceInd];
-
- // calc source size
- size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
- sourceOffsets[sourcePos] = sourceSize;
-
- // calc target size.
- // Recursively look thru child hypos
- size_t currStartTarget = startTarget + totalTargetSize;
- size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
- targetOffsets[targetPos] = targetSize;
-
- totalTargetSize += targetSize;
- ++targetInd;
- } else {
- ++totalTargetSize;
- }
- }
-
- // convert position within translation rule to absolute position within
- // source sentence / output sentence
- ShiftOffsets(sourceOffsets, startSource);
- ShiftOffsets(targetOffsets, startTarget);
-
- // get alignments from this hypo
- const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
-
- // add to output arg, offsetting by source & target
- AlignmentInfo::const_iterator iter;
- for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
- const std::pair<size_t,size_t> &align = *iter;
- size_t relSource = align.first;
- size_t relTarget = align.second;
- size_t absSource = sourceOffsets[relSource];
- size_t absTarget = targetOffsets[relTarget];
-
- pair<size_t, size_t> alignPoint(absSource, absTarget);
- pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
- UTIL_THROW_IF2(!ret.second, "Error");
-
- }
-
- return totalTargetSize;
-}
-
-size_t IOWrapper::CalcSourceSize(const Moses::ChartHypothesis *hypo)
-{
- size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
- const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
- for (size_t i = 0; i < prevHypos.size(); ++i) {
- size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
- ret -= (childSize - 1);
- }
- return ret;
-}
-
-void IOWrapper::OutputDetailedTranslationReport(
- const ChartHypothesis *hypo,
- const Sentence &sentence,
- long translationId)
-{
- if (hypo == NULL) {
- return;
- }
- std::ostringstream out;
- ApplicationContext applicationContext;
-
- OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
- UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
- "No ouput file for detailed reports specified");
- m_detailedTranslationCollector->Write(translationId, out.str());
-}
//DIMw
void IOWrapper::OutputDetailedAllTranslationReport(
@@ -807,156 +567,6 @@ void IOWrapper::OutputDetailedAllTranslationReport(
m_detailedTranslationCollector->Write(translationId, out.str());
}
-void IOWrapper::OutputUnknowns(const std::vector<Moses::Phrase*> &unknowns,
- long translationId)
-{
- std::ostringstream out;
- for (std::size_t i = 0; i < unknowns.size(); ++i) {
- out << *(unknowns[i]);
- }
- out << std::endl;
- m_unknownsCollector->Write(translationId, out.str());
-}
-
-void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
- long translationId)
-{
- std::ostringstream out;
-
- if (m_nBestOutputCollector->OutputIsCout()) {
- // Set precision only if we're writing the n-best list to cout. This is to
- // preserve existing behaviour, but should probably be done either way.
- FixPrecision(out);
- }
-
- bool includeWordAlignment =
- StaticData::Instance().PrintAlignmentInfoInNbest();
-
- bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
-
- for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
- p != nBestList.end(); ++p) {
- const ChartKBestExtractor::Derivation &derivation = **p;
-
- // get the derivation's target-side yield
- Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
-
- // delete <s> and </s>
- UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
- outputPhrase.RemoveWord(0);
- outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
-
- // print the translation ID, surface factors, and scores
- out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, *m_outputFactorOrder, false);
- out << " ||| ";
- OutputAllFeatureScores(derivation.scoreBreakdown, out);
- out << " ||| " << derivation.score;
-
- // optionally, print word alignments
- if (includeWordAlignment) {
- out << " ||| ";
- Alignments align;
- OutputAlignmentNBest(align, derivation, 0);
- for (Alignments::const_iterator q = align.begin(); q != align.end();
- ++q) {
- out << q->first << "-" << q->second << " ";
- }
- }
-
- // optionally, print tree
- if (PrintNBestTrees) {
- TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
- out << " ||| " << tree->GetString();
- }
-
- out << std::endl;
- }
-
- assert(m_nBestOutputCollector);
- m_nBestOutputCollector->Write(translationId, out.str());
-}
-
-size_t IOWrapper::OutputAlignmentNBest(
- Alignments &retAlign,
- const Moses::ChartKBestExtractor::Derivation &derivation,
- size_t startTarget)
-{
- const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
-
- size_t totalTargetSize = 0;
- size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
-
- const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
-
- size_t thisSourceSize = CalcSourceSize(&hypo);
-
- // position of each terminal word in translation rule, irrespective of alignment
- // if non-term, number is undefined
- vector<size_t> sourceOffsets(thisSourceSize, 0);
- vector<size_t> targetOffsets(tp.GetSize(), 0);
-
- const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
- vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
- const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
-
- UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
- "Error");
-
- size_t targetInd = 0;
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
- size_t sourceInd = targetPos2SourceInd[targetPos];
- size_t sourcePos = sourceInd2pos[sourceInd];
-
- const Moses::ChartKBestExtractor::Derivation &subderivation =
- *derivation.subderivations[sourceInd];
-
- // calc source size
- size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
- sourceOffsets[sourcePos] = sourceSize;
-
- // calc target size.
- // Recursively look thru child hypos
- size_t currStartTarget = startTarget + totalTargetSize;
- size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
- currStartTarget);
- targetOffsets[targetPos] = targetSize;
-
- totalTargetSize += targetSize;
- ++targetInd;
- } else {
- ++totalTargetSize;
- }
- }
-
- // convert position within translation rule to absolute position within
- // source sentence / output sentence
- ShiftOffsets(sourceOffsets, startSource);
- ShiftOffsets(targetOffsets, startTarget);
-
- // get alignments from this hypo
- const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
-
- // add to output arg, offsetting by source & target
- AlignmentInfo::const_iterator iter;
- for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
- const std::pair<size_t,size_t> &align = *iter;
- size_t relSource = align.first;
- size_t relTarget = align.second;
- size_t absSource = sourceOffsets[relSource];
- size_t absTarget = targetOffsets[relTarget];
-
- pair<size_t, size_t> alignPoint(absSource, absTarget);
- pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
- UTIL_THROW_IF2(!ret.second, "Error");
- }
-
- return totalTargetSize;
-}
-
//////////////////////////////////////////////////////////////////////////
/***
* print surface factor only for the given phrase
@@ -1299,19 +909,6 @@ void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solu
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
-void IOWrapper::OutputDetailedTranslationReport(const Syntax::SHyperedge *best,
- long translationId)
-{
- if (best == NULL) {
- return;
- }
- std::ostringstream out;
- Syntax::S2T::DerivationWriter::Write(*best, translationId, out);
- UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
- "No ouput file for detailed reports specified");
- m_detailedTranslationCollector->Write(translationId, out.str());
-}
-
void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best,
long translationId)
{
@@ -1341,170 +938,5 @@ void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best,
m_singleBestOutputCollector->Write(translationId, out.str());
}
-void IOWrapper::OutputNBestList(
- const Syntax::KBestExtractor::KBestVec &nBestList, long translationId)
-{
- std::ostringstream out;
-
- if (m_nBestOutputCollector->OutputIsCout()) {
- // Set precision only if we're writing the n-best list to cout. This is to
- // preserve existing behaviour, but should probably be done either way.
- FixPrecision(out);
- }
-
- bool includeWordAlignment =
- StaticData::Instance().PrintAlignmentInfoInNbest();
-
- bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
-
- for (Syntax::KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
- p != nBestList.end(); ++p) {
- const Syntax::KBestExtractor::Derivation &derivation = **p;
-
- // get the derivation's target-side yield
- Phrase outputPhrase = Syntax::KBestExtractor::GetOutputPhrase(derivation);
-
- // delete <s> and </s>
- UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
- outputPhrase.RemoveWord(0);
- outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
-
- // print the translation ID, surface factors, and scores
- out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, *m_outputFactorOrder, false);
- out << " ||| ";
- OutputAllFeatureScores(derivation.scoreBreakdown, out);
- out << " ||| " << derivation.score;
-
- // optionally, print word alignments
- if (includeWordAlignment) {
- out << " ||| ";
- Alignments align;
- OutputAlignmentNBest(align, derivation, 0);
- for (Alignments::const_iterator q = align.begin(); q != align.end();
- ++q) {
- out << q->first << "-" << q->second << " ";
- }
- }
-
- // optionally, print tree
- if (PrintNBestTrees) {
- TreePointer tree = Syntax::KBestExtractor::GetOutputTree(derivation);
- out << " ||| " << tree->GetString();
- }
-
- out << std::endl;
- }
-
- assert(m_nBestOutputCollector);
- m_nBestOutputCollector->Write(translationId, out.str());
-}
-
-size_t IOWrapper::CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const
-{
- using namespace Moses::Syntax;
-
- const Syntax::SHyperedge &shyperedge = d.edge->shyperedge;
- size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
- for (size_t i = 0; i < shyperedge.tail.size(); ++i) {
- size_t childSize = shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
- ret -= (childSize - 1);
- }
- return ret;
-}
-
-size_t IOWrapper::OutputAlignmentNBest(
- Alignments &retAlign,
- const Syntax::KBestExtractor::Derivation &derivation,
- size_t startTarget)
-{
- const Syntax::SHyperedge &shyperedge = derivation.edge->shyperedge;
-
- size_t totalTargetSize = 0;
- size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
-
- const TargetPhrase &tp = *(shyperedge.translation);
-
- size_t thisSourceSize = CalcSourceSize(derivation);
-
- // position of each terminal word in translation rule, irrespective of alignment
- // if non-term, number is undefined
- vector<size_t> sourceOffsets(thisSourceSize, 0);
- vector<size_t> targetOffsets(tp.GetSize(), 0);
-
- const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
- vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
- const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
-
- UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
- "Error");
-
- size_t targetInd = 0;
- for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
- if (tp.GetWord(targetPos).IsNonTerminal()) {
- UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
- size_t sourceInd = targetPos2SourceInd[targetPos];
- size_t sourcePos = sourceInd2pos[sourceInd];
-
- const Moses::Syntax::KBestExtractor::Derivation &subderivation =
- *derivation.subderivations[sourceInd];
-
- // calc source size
- size_t sourceSize =
- subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
- sourceOffsets[sourcePos] = sourceSize;
-
- // calc target size.
- // Recursively look thru child hypos
- size_t currStartTarget = startTarget + totalTargetSize;
- size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
- currStartTarget);
- targetOffsets[targetPos] = targetSize;
-
- totalTargetSize += targetSize;
- ++targetInd;
- } else {
- ++totalTargetSize;
- }
- }
-
- // convert position within translation rule to absolute position within
- // source sentence / output sentence
- ShiftOffsets(sourceOffsets, startSource);
- ShiftOffsets(targetOffsets, startTarget);
-
- // get alignments from this hypo
- const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm();
-
- // add to output arg, offsetting by source & target
- AlignmentInfo::const_iterator iter;
- for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
- const std::pair<size_t,size_t> &align = *iter;
- size_t relSource = align.first;
- size_t relTarget = align.second;
- size_t absSource = sourceOffsets[relSource];
- size_t absTarget = targetOffsets[relTarget];
-
- pair<size_t, size_t> alignPoint(absSource, absTarget);
- pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
- UTIL_THROW_IF2(!ret.second, "Error");
- }
-
- return totalTargetSize;
-}
-
-void IOWrapper::OutputUnknowns(const std::set<Moses::Word> &unknowns,
- long translationId)
-{
- std::ostringstream out;
- for (std::set<Moses::Word>::const_iterator p = unknowns.begin();
- p != unknowns.end(); ++p) {
- out << *p;
- }
- out << std::endl;
- m_unknownsCollector->Write(translationId, out.str());
-}
-
} // namespace
diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h
index 17781b2ac..3244be44c 100644
--- a/moses/IOWrapper.h
+++ b/moses/IOWrapper.h
@@ -102,7 +102,6 @@ protected:
// CHART
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
- typedef std::set< std::pair<size_t, size_t> > Alignments;
void Backtrack(const ChartHypothesis *hypo);
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
@@ -118,44 +117,11 @@ protected:
ApplicationContext &context);
void WriteApplicationContext(std::ostream &out,
const ApplicationContext &context);
- void OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const Moses::ChartHypothesis *hypo,
- const Moses::Sentence &sentence,
- long translationId);
- void OutputTreeFragmentsTranslationOptions(std::ostream &out,
- ApplicationContext &applicationContext,
- const search::Applied *applied,
- const Moses::Sentence &sentence,
- long translationId);
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors);
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors);
- size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
- size_t OutputAlignmentNBest(Alignments &retAlign,
- const Moses::ChartKBestExtractor::Derivation &derivation,
- size_t startTarget);
- std::size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::Syntax::KBestExtractor::Derivation &derivation, std::size_t startTarget);
-
- size_t CalcSourceSize(const Moses::ChartHypothesis *hypo);
- size_t CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const;
-
- template <class T>
- void ShiftOffsets(std::vector<T> &offsets, T shift)
- {
- T currPos = shift;
- for (size_t i = 0; i < offsets.size(); ++i) {
- if (offsets[i] == 0) {
- offsets[i] = currPos;
- ++currPos;
- } else {
- currPos += offsets[i];
- }
- }
- }
-
public:
IOWrapper();
~IOWrapper();
@@ -199,6 +165,11 @@ public:
return m_latticeSamplesCollector;
}
+ Moses::OutputCollector *GetDetailTreeFragmentsOutputCollector() {
+ return m_detailTreeFragmentsOutputCollector;
+ }
+
+
// CHART
void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId);
void OutputBestHypo(search::Applied applied, long translationId);
@@ -206,26 +177,8 @@ public:
void OutputBestNone(long translationId);
- void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
- void OutputNBestList(const Moses::Syntax::KBestExtractor::KBestVec &nBestList, long translationId);
-
- void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
- void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
- void OutputDetailedTranslationReport(const Moses::Syntax::SHyperedge *, long translationId);
-
void OutputDetailedAllTranslationReport(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
- void OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo);
- void OutputUnknowns(const std::vector<Moses::Phrase*> &, long);
- void OutputUnknowns(const std::set<Moses::Word> &, long);
-
- void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo,
- const Moses::Sentence &sentence,
- long translationId);
- void OutputDetailedTreeFragmentsTranslationReport(const search::Applied *applied,
- const Moses::Sentence &sentence,
- long translationId);
-
// phrase-based
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index d366065a5..bb20d0640 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -319,6 +319,147 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector<sear
collector->Write(translationId, out.str());
}
+void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
+{
+ if (collector && !completed_nbest_->empty()) {
+ const search::Applied &applied = completed_nbest_->at(0);
+ OutputDetailedTranslationReport(collector,
+ &applied,
+ static_cast<const Sentence&>(source_),
+ source_.GetTranslationId());
+ }
+
+}
+
+void Manager::OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
+{
+ if (applied == NULL) {
+ return;
+ }
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ OutputTranslationOptions(out, applicationContext, applied, sentence, translationId);
+ collector->Write(translationId, out.str());
+}
+
+void Manager::OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence, long translationId) const
+{
+ if (applied != NULL) {
+ OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
+ out << std::endl;
+ }
+
+ // recursive
+ const search::Applied *child = applied->Children();
+ for (size_t i = 0; i < applied->GetArity(); i++) {
+ OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
+ }
+}
+
+void Manager::OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
+{
+ ReconstructApplicationContext(applied, sentence, applicationContext);
+ const TargetPhrase &phrase = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
+ out << "Trans Opt " << translationId
+ << " " << applied->GetRange()
+ << ": ";
+ WriteApplicationContext(out, applicationContext);
+ out << ": " << phrase.GetTargetLHS()
+ << "->" << phrase
+ << " " << applied->GetScore(); // << hypo->GetScoreBreakdown() TODO: missing in incremental search hypothesis
+}
+
+// Given a hypothesis and sentence, reconstructs the 'application context' --
+// the source RHS symbols of the SCFG rule that was applied, plus their spans.
+void Manager::ReconstructApplicationContext(const search::Applied *applied,
+ const Sentence &sentence,
+ ApplicationContext &context) const
+{
+ context.clear();
+ const WordsRange &span = applied->GetRange();
+ const search::Applied *child = applied->Children();
+ size_t i = span.GetStartPos();
+ size_t j = 0;
+
+ while (i <= span.GetEndPos()) {
+ if (j == applied->GetArity() || i < child->GetRange().GetStartPos()) {
+ // Symbol is a terminal.
+ const Word &symbol = sentence.GetWord(i);
+ context.push_back(std::make_pair(symbol, WordsRange(i, i)));
+ ++i;
+ } else {
+ // Symbol is a non-terminal.
+ const Word &symbol = static_cast<const TargetPhrase*>(child->GetNote().vp)->GetTargetLHS();
+ const WordsRange &range = child->GetRange();
+ context.push_back(std::make_pair(symbol, range));
+ i = range.GetEndPos()+1;
+ ++child;
+ ++j;
+ }
+ }
+}
+
+void Manager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
+{
+ if (collector == NULL || Completed().empty()) {
+ return;
+ }
+
+ const search::Applied *applied = &Completed()[0];
+ const Sentence &sentence = dynamic_cast<const Sentence &>(source_);
+ const size_t translationId = source_.GetTranslationId();
+
+ std::ostringstream out;
+ ApplicationContext applicationContext;
+
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
+
+ //Tree of full sentence
+ //TODO: incremental search doesn't support stateful features
+
+ collector->Write(translationId, out.str());
+
+}
+
+void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const
+{
+
+ if (applied != NULL) {
+ OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
+
+ const TargetPhrase &currTarPhr = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
+
+ out << " ||| ";
+ if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
+ out << " " << *property->GetValueString();
+ } else {
+ out << " " << "noTreeInfo";
+ }
+ out << std::endl;
+ }
+
+ // recursive
+ const search::Applied *child = applied->Children();
+ for (size_t i = 0; i < applied->GetArity(); i++) {
+ OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
+ }
+}
namespace
{
diff --git a/moses/Incremental.h b/moses/Incremental.h
index 759cf96c4..fb304075b 100644
--- a/moses/Incremental.h
+++ b/moses/Incremental.h
@@ -39,6 +39,14 @@ public:
// output
void OutputNBest(OutputCollector *collector) const;
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+ void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
+ void OutputLatticeSamples(OutputCollector *collector) const
+ {}
+ void OutputAlignment(OutputCollector *collector) const
+ {}
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const;
+
private:
@@ -59,9 +67,29 @@ private:
const std::vector<search::Applied> *completed_nbest_;
// outputs
- void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
- void OutputLatticeSamples(OutputCollector *collector) const
- {}
+ void OutputDetailedTranslationReport(
+ OutputCollector *collector,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void OutputTranslationOption(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
+ void ReconstructApplicationContext(const search::Applied *applied,
+ const Sentence &sentence,
+ ApplicationContext &context) const;
+ void OutputTreeFragmentsTranslationOptions(std::ostream &out,
+ ApplicationContext &applicationContext,
+ const search::Applied *applied,
+ const Sentence &sentence,
+ long translationId) const;
};
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index dcaa3c961..d5f17c51c 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -43,6 +43,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/LM/Base.h"
#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/TranslationAnalysis.h"
#ifdef HAVE_PROTOBUF
#include "hypergraph.pb.h"
@@ -1680,10 +1681,68 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const
void Manager::OutputAlignment(OutputCollector *collector) const
{
+ if (collector) {
+ std::vector<const Hypothesis *> edges;
+ const Hypothesis *currentHypo = GetBestHypothesis();
+ while (currentHypo) {
+ edges.push_back(currentHypo);
+ currentHypo = currentHypo->GetPrevHypo();
+ }
+
+ OutputAlignment(collector,m_source.GetTranslationId(), edges);
+ }
+}
+
+void Manager::OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges) const
+{
+ ostringstream out;
+ OutputAlignment(out, edges);
+
+ collector->Write(lineNo,out.str());
+}
+
+void Manager::OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges) const
+{
+ size_t targetOffset = 0;
+
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const TargetPhrase &tp = edge.GetCurrTargetPhrase();
+ size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
+
+ OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
+
+ targetOffset += tp.GetSize();
+ }
+ // Removing std::endl here breaks -alignment-output-file, so stop doing that, please :)
+ // Or fix it somewhere else.
+ out << std::endl;
+}
+
+void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
+{
+ if (collector) {
+ ostringstream out;
+ FixPrecision(out,PRECISION);
+ TranslationAnalysis::PrintTranslationAnalysis(out, GetBestHypothesis());
+ collector->Write(m_source.GetTranslationId(),out.str());
+ }
+
}
-void Manager::OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo) const
+void Manager::OutputUnknowns(OutputCollector *collector) const
{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+ const vector<const Phrase*>& unknowns = m_transOptColl->GetUnknownSources();
+ ostringstream out;
+ for (size_t i = 0; i < unknowns.size(); ++i) {
+ out << *(unknowns[i]);
+ }
+ out << endl;
+ collector->Write(translationId, out.str());
+ }
+
}
}
diff --git a/moses/Manager.h b/moses/Manager.h
index 6973d4b55..3b09b6f0d 100644
--- a/moses/Manager.h
+++ b/moses/Manager.h
@@ -140,7 +140,8 @@ protected:
void OutputInput(std::ostream& os, const Hypothesis* hypo) const;
void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const;
std::map<size_t, const Factor*> GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const;
- void OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo) const;
+ void OutputAlignment(OutputCollector* collector, size_t lineNo , const std::vector<const Hypothesis *> &edges) const;
+ void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges) const;
public:
InputType const& m_source; /**< source sentence to be translated */
@@ -189,6 +190,10 @@ public:
void OutputNBest(OutputCollector *collector) const;
void OutputAlignment(OutputCollector *collector) const;
void OutputLatticeSamples(OutputCollector *collector) const;
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+ void OutputUnknowns(OutputCollector *collector) const;
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
+ {}
};
diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h
index a08c320f0..9df717edc 100644
--- a/moses/Syntax/S2T/Manager-inl.h
+++ b/moses/Syntax/S2T/Manager-inl.h
@@ -1,5 +1,7 @@
#pragma once
+#include <iostream>
+#include <sstream>
#include "moses/DecodeGraph.h"
#include "moses/StaticData.h"
#include "moses/Syntax/BoundedPriorityContainer.h"
@@ -12,6 +14,7 @@
#include "moses/Syntax/SVertexRecombinationOrderer.h"
#include "moses/Syntax/SymbolEqualityPred.h"
#include "moses/Syntax/SymbolHasher.h"
+#include "DerivationWriter.h"
#include "OovHandler.h"
#include "PChart.h"
@@ -382,6 +385,215 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
std::sort(stack.begin(), stack.end(), SVertexStackContentOrderer());
}
+template<typename Parser>
+void Manager<Parser>::OutputNBest(OutputCollector *collector) const
+{
+ if (collector) {
+ const StaticData &staticData = StaticData::Instance();
+ long translationId = m_source.GetTranslationId();
+
+ Syntax::KBestExtractor::KBestVec nBestList;
+ ExtractKBest(staticData.GetNBestSize(), nBestList,
+ staticData.GetDistinctNBest());
+ OutputNBestList(collector, nBestList, translationId);
+ }
+
+}
+
+
+template<typename Parser>
+void Manager<Parser>::OutputDetailedTranslationReport(OutputCollector *collector) const
+{
+ const SHyperedge *best = GetBestSHyperedge();
+ if (best == NULL || collector == NULL) {
+ return;
+ }
+
+ long translationId = m_source.GetTranslationId();
+ std::ostringstream out;
+ Syntax::S2T::DerivationWriter::Write(*best, translationId, out);
+ collector->Write(translationId, out.str());
+
+}
+
+template<typename Parser>
+void Manager<Parser>::OutputUnknowns(OutputCollector *collector) const
+{
+ if (collector) {
+ long translationId = m_source.GetTranslationId();
+
+ std::ostringstream out;
+ for (std::set<Moses::Word>::const_iterator p = m_oovs.begin();
+ p != m_oovs.end(); ++p) {
+ out << *p;
+ }
+ out << std::endl;
+ collector->Write(translationId, out.str());
+ }
+
+}
+
+template<typename Parser>
+void Manager<Parser>::OutputNBestList(OutputCollector *collector,
+ const Syntax::KBestExtractor::KBestVec &nBestList,
+ long translationId) const
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+
+ std::ostringstream out;
+
+ if (collector->OutputIsCout()) {
+ // Set precision only if we're writing the n-best list to cout. This is to
+ // preserve existing behaviour, but should probably be done either way.
+ FixPrecision(out);
+ }
+
+ bool includeWordAlignment =
+ staticData.PrintAlignmentInfoInNbest();
+
+ bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
+
+ for (Syntax::KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+ p != nBestList.end(); ++p) {
+ const Syntax::KBestExtractor::Derivation &derivation = **p;
+
+ // get the derivation's target-side yield
+ Phrase outputPhrase = Syntax::KBestExtractor::GetOutputPhrase(derivation);
+
+ // delete <s> and </s>
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ outputPhrase.RemoveWord(0);
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+
+ // print the translation ID, surface factors, and scores
+ out << translationId << " ||| ";
+ OutputSurface(out, outputPhrase, outputFactorOrder, false);
+ out << " ||| ";
+ OutputAllFeatureScores(derivation.scoreBreakdown, out);
+ out << " ||| " << derivation.score;
+
+ // optionally, print word alignments
+ if (includeWordAlignment) {
+ out << " ||| ";
+ Alignments align;
+ OutputAlignmentNBest(align, derivation, 0);
+ for (Alignments::const_iterator q = align.begin(); q != align.end();
+ ++q) {
+ out << q->first << "-" << q->second << " ";
+ }
+ }
+
+ // optionally, print tree
+ if (PrintNBestTrees) {
+ TreePointer tree = Syntax::KBestExtractor::GetOutputTree(derivation);
+ out << " ||| " << tree->GetString();
+ }
+
+ out << std::endl;
+ }
+
+ assert(collector);
+ collector->Write(translationId, out.str());
+}
+
+template<typename Parser>
+size_t Manager<Parser>::OutputAlignmentNBest(
+ Alignments &retAlign,
+ const Syntax::KBestExtractor::Derivation &derivation,
+ size_t startTarget) const
+{
+ const Syntax::SHyperedge &shyperedge = derivation.edge->shyperedge;
+
+ size_t totalTargetSize = 0;
+ size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
+
+ const TargetPhrase &tp = *(shyperedge.translation);
+
+ size_t thisSourceSize = CalcSourceSize(derivation);
+
+ // position of each terminal word in translation rule, irrespective of alignment
+ // if non-term, number is undefined
+ std::vector<size_t> sourceOffsets(thisSourceSize, 0);
+ std::vector<size_t> targetOffsets(tp.GetSize(), 0);
+
+ const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
+ std::vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
+
+ UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
+ "Error");
+
+ size_t targetInd = 0;
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ size_t sourceInd = targetPos2SourceInd[targetPos];
+ size_t sourcePos = sourceInd2pos[sourceInd];
+
+ const Moses::Syntax::KBestExtractor::Derivation &subderivation =
+ *derivation.subderivations[sourceInd];
+
+ // calc source size
+ size_t sourceSize =
+ subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
+ sourceOffsets[sourcePos] = sourceSize;
+
+ // calc target size.
+ // Recursively look thru child hypos
+ size_t currStartTarget = startTarget + totalTargetSize;
+ size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
+ currStartTarget);
+ targetOffsets[targetPos] = targetSize;
+
+ totalTargetSize += targetSize;
+ ++targetInd;
+ } else {
+ ++totalTargetSize;
+ }
+ }
+
+ // convert position within translation rule to absolute position within
+ // source sentence / output sentence
+ ShiftOffsets(sourceOffsets, startSource);
+ ShiftOffsets(targetOffsets, startTarget);
+
+ // get alignments from this hypo
+ const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm();
+
+ // add to output arg, offsetting by source & target
+ AlignmentInfo::const_iterator iter;
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
+ const std::pair<size_t,size_t> &align = *iter;
+ size_t relSource = align.first;
+ size_t relTarget = align.second;
+ size_t absSource = sourceOffsets[relSource];
+ size_t absTarget = targetOffsets[relTarget];
+
+ std::pair<size_t, size_t> alignPoint(absSource, absTarget);
+ std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
+ UTIL_THROW_IF2(!ret.second, "Error");
+ }
+
+ return totalTargetSize;
+}
+
+template<typename Parser>
+size_t Manager<Parser>::CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const
+{
+ using namespace Moses::Syntax;
+
+ const Syntax::SHyperedge &shyperedge = d.edge->shyperedge;
+ size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
+ for (size_t i = 0; i < shyperedge.tail.size(); ++i) {
+ size_t childSize = shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
+ ret -= (childSize - 1);
+ }
+ return ret;
+}
+
} // S2T
} // Syntax
} // Moses
diff --git a/moses/Syntax/S2T/Manager.h b/moses/Syntax/S2T/Manager.h
index f6bea903d..72e5f1aee 100644
--- a/moses/Syntax/S2T/Manager.h
+++ b/moses/Syntax/S2T/Manager.h
@@ -5,6 +5,7 @@
#include <boost/shared_ptr.hpp>
#include "moses/InputType.h"
+#include "moses/BaseManager.h"
#include "moses/Syntax/KBestExtractor.h"
#include "moses/Syntax/SVertexStack.h"
@@ -25,7 +26,7 @@ namespace S2T
{
template<typename Parser>
-class Manager
+class Manager : public BaseManager
{
public:
Manager(const InputType &);
@@ -42,6 +43,16 @@ class Manager
const std::set<Word> &GetUnknownWords() const { return m_oovs; }
+ void OutputNBest(OutputCollector *collector) const;
+ void OutputLatticeSamples(OutputCollector *collector) const
+ {}
+ void OutputAlignment(OutputCollector *collector) const
+ {}
+ void OutputDetailedTranslationReport(OutputCollector *collector) const;
+ void OutputUnknowns(OutputCollector *collector) const;
+ void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
+ {}
+
private:
void FindOovs(const PChart &, std::set<Word> &, std::size_t);
@@ -59,6 +70,16 @@ class Manager
std::set<Word> m_oovs;
boost::shared_ptr<typename Parser::RuleTrie> m_oovRuleTrie;
std::vector<boost::shared_ptr<Parser> > m_parsers;
+
+ // output
+ void OutputNBestList(OutputCollector *collector,
+ const Moses::Syntax::KBestExtractor::KBestVec &nBestList,
+ long translationId) const;
+ std::size_t OutputAlignmentNBest(Alignments &retAlign,
+ const Moses::Syntax::KBestExtractor::Derivation &derivation,
+ std::size_t startTarget) const;
+ size_t CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const;
+
};
} // S2T
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index e5511b894..26aded951 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -193,7 +193,6 @@ void TranslationTask::RunPb()
m_ioWrapper.OutputAlignment(out, bestHypo);
}
- m_ioWrapper.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector(), m_source->GetTranslationId(), bestHypo);
manager.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector());
IFVERBOSE(1) {
@@ -283,23 +282,10 @@ void TranslationTask::RunPb()
manager.OutputLatticeSamples(m_ioWrapper.GetLatticeSamplesCollector());
// detailed translation reporting
- if (m_ioWrapper.GetDetailedTranslationCollector()) {
- ostringstream out;
- FixPrecision(out,PRECISION);
- TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
- m_ioWrapper.GetDetailedTranslationCollector()->Write(m_source->GetTranslationId(),out.str());
- }
+ manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
//list of unknown words
- if (m_ioWrapper.GetUnknownsCollector()) {
- const vector<const Phrase*>& unknowns = manager.getSntTranslationOptions()->GetUnknownSources();
- ostringstream out;
- for (size_t i = 0; i < unknowns.size(); ++i) {
- out << *(unknowns[i]);
- }
- out << endl;
- m_ioWrapper.GetUnknownsCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager.OutputUnknowns(m_ioWrapper.GetUnknownsCollector());
// report additional statistics
manager.CalcDecoderStatistics();
@@ -339,14 +325,10 @@ void TranslationTask::RunChart()
const std::vector<search::Applied> &nbest = manager.ProcessSentence();
if (!nbest.empty()) {
m_ioWrapper.OutputBestHypo(nbest[0], translationId);
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTranslationReport(&nbest[0], sentence, translationId);
- }
- if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTreeFragmentsTranslationReport(&nbest[0], sentence, translationId);
- }
+
+ manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
+ manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector());
+
} else {
m_ioWrapper.OutputBestNone(translationId);
}
@@ -375,19 +357,9 @@ void TranslationTask::RunChart()
}
manager.OutputAlignment(m_ioWrapper.GetAlignmentInfoCollector());
-
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTranslationReport(bestHypo, sentence, translationId);
- }
- if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
- const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
- m_ioWrapper.OutputDetailedTreeFragmentsTranslationReport(bestHypo, sentence, translationId);
- }
- if (!staticData.GetOutputUnknownsFile().empty()) {
- m_ioWrapper.OutputUnknowns(manager.GetParser().GetUnknownSources(),
- translationId);
- }
+ manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
+ manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector());
+ manager.OutputUnknowns(m_ioWrapper.GetUnknownsCollector());
//DIMw
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h
index 3b5fe26d1..fa280a804 100644
--- a/moses/TranslationTask.h
+++ b/moses/TranslationTask.h
@@ -63,20 +63,13 @@ private:
const Syntax::SHyperedge *best = manager.GetBestSHyperedge();
m_ioWrapper.OutputBestHypo(best, translationId);
// n-best
- if (staticData.GetNBestSize() > 0) {
- Syntax::KBestExtractor::KBestVec nBestList;
- manager.ExtractKBest(staticData.GetNBestSize(), nBestList,
- staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBestList(nBestList, translationId);
- }
+ manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
+
// Write 1-best derivation (-translation-details / -T option).
- if (staticData.IsDetailedTranslationReportingEnabled()) {
- m_ioWrapper.OutputDetailedTranslationReport(best, translationId);
- }
- // Write unknown words file (-output-unknowns option)
- if (!staticData.GetOutputUnknownsFile().empty()) {
- m_ioWrapper.OutputUnknowns(manager.GetUnknownWords(), translationId);
- }
+
+ manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector());
+
+ manager.OutputUnknowns(m_ioWrapper.GetUnknownsCollector());
}
};