Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-12-02 17:38:03 +0300
committerHieu Hoang <hieuhoang@gmail.com>2014-12-02 17:38:03 +0300
commit29ead5be7ba6e47e82c42e8dd613a1dd5181a123 (patch)
treef7928ab2d64f8a3cc1b0bb3ad03a60cb8cec62db
parent2fdd49d97310b93ed4f59d331f8e75755c71617e (diff)
move n-best code for phrase-based from IOWrapper to Manager
-rw-r--r--contrib/other-builds/moses/.project10
-rw-r--r--moses/BaseManager.cpp8
-rw-r--r--moses/BaseManager.h10
-rw-r--r--moses/ChartManager.h3
-rw-r--r--moses/Incremental.h4
-rw-r--r--moses/Manager.cpp265
-rw-r--r--moses/Manager.h25
-rw-r--r--moses/TranslationTask.cpp9
8 files changed, 323 insertions, 11 deletions
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index f35afc2ec..d26a65d86 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -61,6 +61,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/AlignmentInfoTest.cpp</locationURI>
</link>
<link>
+ <name>BaseManager.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.cpp</locationURI>
+ </link>
+ <link>
+ <name>BaseManager.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.h</locationURI>
+ </link>
+ <link>
<name>BitmapContainer.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/BitmapContainer.cpp</locationURI>
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
new file mode 100644
index 000000000..f4d8123a0
--- /dev/null
+++ b/moses/BaseManager.cpp
@@ -0,0 +1,8 @@
+#include "BaseManager.h"
+
+namespace Moses
+{
+
+}
+
+
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
new file mode 100644
index 000000000..c0f8c1d50
--- /dev/null
+++ b/moses/BaseManager.h
@@ -0,0 +1,10 @@
+#pragma once
+
+namespace Moses
+{
+class BaseManager
+{
+
+};
+
+}
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index 9ad4f4b85..b9a6f45ba 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -31,6 +31,7 @@
#include "ChartTranslationOptionList.h"
#include "ChartParser.h"
#include "ChartKBestExtractor.h"
+#include "BaseManager.h"
#include <boost/shared_ptr.hpp>
@@ -42,7 +43,7 @@ class ChartSearchGraphWriter;
/** Holds everything you need to decode 1 sentence with the hierachical/syntax decoder
*/
-class ChartManager
+class ChartManager : public BaseManager
{
private:
InputType const& m_source; /**< source sentence to be translated */
diff --git a/moses/Incremental.h b/moses/Incremental.h
index 20040bf45..d3d317244 100644
--- a/moses/Incremental.h
+++ b/moses/Incremental.h
@@ -7,6 +7,8 @@
#include "moses/ChartCellCollection.h"
#include "moses/ChartParser.h"
+#include "BaseManager.h"
+
#include <vector>
#include <string>
@@ -19,7 +21,7 @@ class LanguageModel;
namespace Incremental
{
-class Manager
+class Manager : public BaseManager
{
public:
Manager(const InputType &source);
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 00b9ede8d..61a09c2fc 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -39,6 +39,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "Timer.h"
+#include "moses/OutputCollector.h"
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/LM/Base.h"
#include "moses/TranslationModel/PhraseDictionary.h"
@@ -1447,4 +1448,268 @@ SentenceStats& Manager::GetSentenceStats() const
}
+void Manager::OutputNBest(OutputCollector *collector)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ if (collector && !staticData.UseLatticeMBR()) {
+ TrellisPathList nBestList;
+ ostringstream out;
+ CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
+ OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
+ staticData.GetReportSegmentation());
+ collector->Write(m_source.GetTranslationId(), out.str());
+ }
+
+}
+
+void Manager::OutputNBest(std::ostream& out
+ , const Moses::TrellisPathList &nBestList
+ , const std::vector<Moses::FactorType>& outputFactorOrder
+ , long translationId
+ , char reportSegmentation)
+{
+ const StaticData &staticData = StaticData::Instance();
+ bool reportAllFactors = staticData.GetReportAllFactorsNBest();
+ bool includeSegmentation = staticData.NBestIncludesSegmentation();
+ bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
+
+ TrellisPathList::const_iterator iter;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+ const TrellisPath &path = **iter;
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ // print the surface factor of the translation
+ out << translationId << " ||| ";
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+ out << " |||";
+
+ // print scores with feature names
+ OutputAllFeatureScores(path.GetScoreBreakdown(), out );
+
+ // total
+ out << " ||| " << path.GetTotalScore();
+
+ //phrase-to-phrase segmentation
+ if (includeSegmentation) {
+ out << " |||";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ out << " " << sourceRange.GetStartPos();
+ if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+ out << "-" << sourceRange.GetEndPos();
+ }
+ out<< "=" << targetRange.GetStartPos();
+ if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+ out<< "-" << targetRange.GetEndPos();
+ }
+ }
+ }
+
+ if (includeWordAlignment) {
+ out << " ||| ";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ const int sourceOffset = sourceRange.GetStartPos();
+ const int targetOffset = targetRange.GetStartPos();
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+
+ OutputAlignment(out, ai, sourceOffset, targetOffset);
+
+ }
+ }
+
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
+ out << " ||| ";
+ OutputInput(out, edges[0]);
+ }
+
+ out << endl;
+ }
+
+ out << std::flush;
+}
+
+//////////////////////////////////////////////////////////////////////////
+/***
+ * print surface factor only for the given phrase
+ */
+void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors)
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Must specific at least 1 output factor");
+ const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(edge, placeholderFactor);
+ }
+
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+
+ if (placeholders.size()) {
+ // do placeholders
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
+ if (iter != placeholders.end()) {
+ factor = iter->second;
+ }
+ }
+
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << pos);
+
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ } else {
+ out << *factor;
+ }
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+
+ // trace ("report segmentation") option "-t" / "-tt"
+ if (reportSegmentation > 0 && phrase.GetSize() > 0) {
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ const int sourceStart = sourceRange.GetStartPos();
+ const int sourceEnd = sourceRange.GetEndPos();
+ out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
+ if (reportSegmentation == 2) {
+ out << ",wa=";
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+ OutputAlignment(out, ai, 0, 0);
+ out << ",total=";
+ out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
+ out << ",";
+ ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
+ scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
+ OutputAllFeatureScores(scoreBreakdown, out);
+ }
+ out << "| ";
+ }
+}
+
+void Manager::OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
+ , std::ostream &out)
+{
+ std::string lastName = "";
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for( size_t i=0; i<sff.size(); i++ ) {
+ const StatefulFeatureFunction *ff = sff[i];
+ if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
+ && ff->IsTuneable()) {
+ OutputFeatureScores( out, features, ff, lastName );
+ }
+ }
+ const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
+ for( size_t i=0; i<slf.size(); i++ ) {
+ const StatelessFeatureFunction *ff = slf[i];
+ if (ff->IsTuneable()) {
+ OutputFeatureScores( out, features, ff, lastName );
+ }
+ }
+}
+
+void Manager::OutputFeatureScores( std::ostream& out
+ , const ScoreComponentCollection &features
+ , const FeatureFunction *ff
+ , std::string &lastName )
+{
+ const StaticData &staticData = StaticData::Instance();
+ bool labeledOutput = staticData.IsLabeledNBestList();
+
+ // regular features (not sparse)
+ if (ff->GetNumScoreComponents() != 0) {
+ if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
+ lastName = ff->GetScoreProducerDescription();
+ out << " " << lastName << "=";
+ }
+ vector<float> scores = features.GetScoresForProducer( ff );
+ for (size_t j = 0; j<scores.size(); ++j) {
+ out << " " << scores[j];
+ }
+ }
+
+ // sparse features
+ const FVector scores = features.GetVectorForProducer( ff );
+ for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
+ out << " " << i->first << "= " << i->second;
+ }
+}
+
+void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
+{
+ typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+ AlignVec alignments = ai.GetSortedAlignments();
+
+ AlignVec::const_iterator it;
+ for (it = alignments.begin(); it != alignments.end(); ++it) {
+ const std::pair<size_t,size_t> &alignment = **it;
+ out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+ }
+
+}
+
+void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo)
+{
+ size_t len = hypo->GetInput().GetSize();
+ std::vector<const Phrase*> inp_phrases(len, 0);
+ OutputInput(inp_phrases, hypo);
+ for (size_t i=0; i<len; ++i)
+ if (inp_phrases[i]) os << *inp_phrases[i];
+}
+
+void Manager::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
+{
+ if (hypo->GetPrevHypo()) {
+ OutputInput(map, hypo->GetPrevHypo());
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
+ }
+}
+
+std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor)
+{
+ const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
+ const Phrase &inputPhrase = inputPath.GetPhrase();
+
+ std::map<size_t, const Factor*> ret;
+
+ for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
+ const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
+ if (factor) {
+ std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
+ UTIL_THROW_IF2(targetPos.size() != 1,
+ "Placeholder should be aligned to 1, and only 1, word");
+ ret[*targetPos.begin()] = factor;
+ }
+ }
+
+ return ret;
+}
+
}
diff --git a/moses/Manager.h b/moses/Manager.h
index ef4612de1..203e6c2fd 100644
--- a/moses/Manager.h
+++ b/moses/Manager.h
@@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "WordsBitmap.h"
#include "Search.h"
#include "SearchCubePruning.h"
+#include "BaseManager.h"
namespace Moses
{
@@ -41,6 +42,7 @@ namespace Moses
class SentenceStats;
class TrellisPath;
class TranslationOptionCollection;
+class OutputCollector;
/** Used to output the search graph */
struct SearchGraphNode {
@@ -91,7 +93,7 @@ struct SearchGraphNode {
* the appropriate stack, or re-combined with existing hypotheses
**/
-class Manager
+class Manager : public BaseManager
{
Manager();
Manager(Manager const&);
@@ -126,6 +128,25 @@ protected:
std::map< int, bool >* pConnected,
std::vector< const Hypothesis* >* pConnectedList) const;
+ // output
+ // nbest
+ void OutputNBest(std::ostream& out
+ , const Moses::TrellisPathList &nBestList
+ , const std::vector<Moses::FactorType>& outputFactorOrder
+ , long translationId
+ , char reportSegmentation);
+ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors);
+ void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
+ , std::ostream &out);
+ void OutputFeatureScores( std::ostream& out
+ , const ScoreComponentCollection &features
+ , const FeatureFunction *ff
+ , std::string &lastName );
+ void OutputAlignment(std::ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset);
+ void OutputInput(std::ostream& os, const Hypothesis* hypo);
+ void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo);
+ std::map<size_t, const Factor*> GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor);
public:
InputType const& m_source; /**< source sentence to be translated */
@@ -170,6 +191,8 @@ public:
void GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const;
+ // outputs
+ void OutputNBest(OutputCollector *collector);
};
}
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 6b2812d0b..2ff07d1d1 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -275,14 +275,7 @@ void TranslationTask::RunPb()
additionalReportingTime.start();
// output n-best list
- if (m_ioWrapper.GetNBestOutputCollector() && !staticData.UseLatticeMBR()) {
- TrellisPathList nBestList;
- ostringstream out;
- manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source->GetTranslationId(),
- staticData.GetReportSegmentation());
- m_ioWrapper.GetNBestOutputCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
//lattice samples
if (m_ioWrapper.GetLatticeSamplesCollector()) {