Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-12-04 17:11:49 +0300
committerHieu Hoang <hieuhoang@gmail.com>2014-12-04 17:11:49 +0300
commit5e4b27412a0db2ded959156bba42e826dac91b50 (patch)
treee704181805dc2d10e2b9a84e28fdec2442f2bb21
parent2b3202dbc86031ef3460d7afdc85b8ad6fda4d1a (diff)
move n-best output into Syntax::Manager
-rw-r--r--moses/BaseManager.h19
-rw-r--r--moses/ChartManager.h16
-rw-r--r--moses/Syntax/S2T/Manager-inl.h176
-rw-r--r--moses/Syntax/S2T/Manager.h10
-rw-r--r--moses/TranslationTask.h8
5 files changed, 205 insertions, 24 deletions
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
index 035532682..41bac2b51 100644
--- a/moses/BaseManager.h
+++ b/moses/BaseManager.h
@@ -13,7 +13,10 @@ class OutputCollector;
class BaseManager
{
protected:
+
+ // output
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
+ typedef std::set< std::pair<size_t, size_t> > Alignments;
void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features,
std::ostream &out) const;
@@ -26,7 +29,21 @@ protected:
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const;
void WriteApplicationContext(std::ostream &out,
- const ApplicationContext &context) const;
+ const ApplicationContext &context) const;
+
+ template <class T>
+ void ShiftOffsets(std::vector<T> &offsets, T shift) const
+ {
+ T currPos = shift;
+ for (size_t i = 0; i < offsets.size(); ++i) {
+ if (offsets[i] == 0) {
+ offsets[i] = currPos;
+ ++currPos;
+ } else {
+ currPos += offsets[i];
+ }
+ }
+ }
public:
// outputs
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index 86638a561..cfe32c42f 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -63,8 +63,6 @@ private:
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
// output
- typedef std::set< std::pair<size_t, size_t> > Alignments;
-
void OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const;
@@ -94,20 +92,6 @@ private:
const Sentence &sentence,
ApplicationContext &context) const;
- template <class T>
- void ShiftOffsets(std::vector<T> &offsets, T shift) const
- {
- T currPos = shift;
- for (size_t i = 0; i < offsets.size(); ++i) {
- if (offsets[i] == 0) {
- offsets[i] = currPos;
- ++currPos;
- } else {
- currPos += offsets[i];
- }
- }
- }
-
public:
ChartManager(InputType const& source);
~ChartManager();
diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h
index 6ac0612b7..3e0a58840 100644
--- a/moses/Syntax/S2T/Manager-inl.h
+++ b/moses/Syntax/S2T/Manager-inl.h
@@ -1,5 +1,7 @@
#pragma once
+#include <iostream>
+#include <sstream>
#include "moses/DecodeGraph.h"
#include "moses/StaticData.h"
#include "moses/Syntax/BoundedPriorityContainer.h"
@@ -384,7 +386,18 @@ void Manager<Parser>::RecombineAndSort(const std::vector<SHyperedge*> &buffer,
template<typename Parser>
void Manager<Parser>::OutputNBest(OutputCollector *collector) const
-{}
+{
+ if (collector) {
+ const StaticData &staticData = StaticData::Instance();
+ long translationId = m_source.GetTranslationId();
+
+ Syntax::KBestExtractor::KBestVec nBestList;
+ ExtractKBest(staticData.GetNBestSize(), nBestList,
+ staticData.GetDistinctNBest());
+ OutputNBestList(collector, nBestList, translationId);
+ }
+
+}
template<typename Parser>
void Manager<Parser>::OutputLatticeSamples(OutputCollector *collector) const
@@ -398,6 +411,167 @@ template<typename Parser>
void Manager<Parser>::OutputDetailedTranslationReport(OutputCollector *collector) const
{}
+template<typename Parser>
+void Manager<Parser>::OutputNBestList(OutputCollector *collector,
+ const Syntax::KBestExtractor::KBestVec &nBestList,
+ long translationId) const
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+
+ std::ostringstream out;
+
+ if (collector->OutputIsCout()) {
+ // Set precision only if we're writing the n-best list to cout. This is to
+ // preserve existing behaviour, but should probably be done either way.
+ FixPrecision(out);
+ }
+
+ bool includeWordAlignment =
+ staticData.PrintAlignmentInfoInNbest();
+
+ bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
+
+ for (Syntax::KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+ p != nBestList.end(); ++p) {
+ const Syntax::KBestExtractor::Derivation &derivation = **p;
+
+ // get the derivation's target-side yield
+ Phrase outputPhrase = Syntax::KBestExtractor::GetOutputPhrase(derivation);
+
+ // delete <s> and </s>
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ outputPhrase.RemoveWord(0);
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+
+ // print the translation ID, surface factors, and scores
+ out << translationId << " ||| ";
+ OutputSurface(out, outputPhrase, outputFactorOrder, false);
+ out << " ||| ";
+ OutputAllFeatureScores(derivation.scoreBreakdown, out);
+ out << " ||| " << derivation.score;
+
+ // optionally, print word alignments
+ if (includeWordAlignment) {
+ out << " ||| ";
+ Alignments align;
+ OutputAlignmentNBest(align, derivation, 0);
+ for (Alignments::const_iterator q = align.begin(); q != align.end();
+ ++q) {
+ out << q->first << "-" << q->second << " ";
+ }
+ }
+
+ // optionally, print tree
+ if (PrintNBestTrees) {
+ TreePointer tree = Syntax::KBestExtractor::GetOutputTree(derivation);
+ out << " ||| " << tree->GetString();
+ }
+
+ out << std::endl;
+ }
+
+ assert(collector);
+ collector->Write(translationId, out.str());
+}
+
+template<typename Parser>
+size_t Manager<Parser>::OutputAlignmentNBest(
+ Alignments &retAlign,
+ const Syntax::KBestExtractor::Derivation &derivation,
+ size_t startTarget) const
+{
+ const Syntax::SHyperedge &shyperedge = derivation.edge->shyperedge;
+
+ size_t totalTargetSize = 0;
+ size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
+
+ const TargetPhrase &tp = *(shyperedge.translation);
+
+ size_t thisSourceSize = CalcSourceSize(derivation);
+
+ // position of each terminal word in translation rule, irrespective of alignment
+ // if non-term, number is undefined
+ std::vector<size_t> sourceOffsets(thisSourceSize, 0);
+ std::vector<size_t> targetOffsets(tp.GetSize(), 0);
+
+ const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
+ std::vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
+
+ UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
+ "Error");
+
+ size_t targetInd = 0;
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ size_t sourceInd = targetPos2SourceInd[targetPos];
+ size_t sourcePos = sourceInd2pos[sourceInd];
+
+ const Moses::Syntax::KBestExtractor::Derivation &subderivation =
+ *derivation.subderivations[sourceInd];
+
+ // calc source size
+ size_t sourceSize =
+ subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
+ sourceOffsets[sourcePos] = sourceSize;
+
+ // calc target size.
+ // Recursively look thru child hypos
+ size_t currStartTarget = startTarget + totalTargetSize;
+ size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
+ currStartTarget);
+ targetOffsets[targetPos] = targetSize;
+
+ totalTargetSize += targetSize;
+ ++targetInd;
+ } else {
+ ++totalTargetSize;
+ }
+ }
+
+ // convert position within translation rule to absolute position within
+ // source sentence / output sentence
+ ShiftOffsets(sourceOffsets, startSource);
+ ShiftOffsets(targetOffsets, startTarget);
+
+ // get alignments from this hypo
+ const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm();
+
+ // add to output arg, offsetting by source & target
+ AlignmentInfo::const_iterator iter;
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
+ const std::pair<size_t,size_t> &align = *iter;
+ size_t relSource = align.first;
+ size_t relTarget = align.second;
+ size_t absSource = sourceOffsets[relSource];
+ size_t absTarget = targetOffsets[relTarget];
+
+ std::pair<size_t, size_t> alignPoint(absSource, absTarget);
+ std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
+ UTIL_THROW_IF2(!ret.second, "Error");
+ }
+
+ return totalTargetSize;
+}
+
+template<typename Parser>
+size_t Manager<Parser>::CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const
+{
+ using namespace Moses::Syntax;
+
+ const Syntax::SHyperedge &shyperedge = d.edge->shyperedge;
+ size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
+ for (size_t i = 0; i < shyperedge.tail.size(); ++i) {
+ size_t childSize = shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
+ ret -= (childSize - 1);
+ }
+ return ret;
+}
+
} // S2T
} // Syntax
} // Moses
diff --git a/moses/Syntax/S2T/Manager.h b/moses/Syntax/S2T/Manager.h
index 7aa57a8a0..087a1c472 100644
--- a/moses/Syntax/S2T/Manager.h
+++ b/moses/Syntax/S2T/Manager.h
@@ -65,6 +65,16 @@ class Manager : public BaseManager
std::set<Word> m_oovs;
boost::shared_ptr<typename Parser::RuleTrie> m_oovRuleTrie;
std::vector<boost::shared_ptr<Parser> > m_parsers;
+
+ // output
+ void OutputNBestList(OutputCollector *collector,
+ const Moses::Syntax::KBestExtractor::KBestVec &nBestList,
+ long translationId) const;
+ std::size_t OutputAlignmentNBest(Alignments &retAlign,
+ const Moses::Syntax::KBestExtractor::Derivation &derivation,
+ std::size_t startTarget) const;
+ size_t CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const;
+
};
} // S2T
diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h
index fa96c2480..674616891 100644
--- a/moses/TranslationTask.h
+++ b/moses/TranslationTask.h
@@ -63,12 +63,8 @@ private:
const Syntax::SHyperedge *best = manager.GetBestSHyperedge();
m_ioWrapper.OutputBestHypo(best, translationId);
// n-best
- if (staticData.GetNBestSize() > 0) {
- Syntax::KBestExtractor::KBestVec nBestList;
- manager.ExtractKBest(staticData.GetNBestSize(), nBestList,
- staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBestList(nBestList, translationId);
- }
+ manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
+
// Write 1-best derivation (-translation-details / -T option).
/*