Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-12-02 22:09:34 +0300
committerHieu Hoang <hieuhoang@gmail.com>2014-12-02 22:09:34 +0300
commit49a2ff1faa2ef3659fcfb02554278f7ea91172c4 (patch)
treef47d993a2456ea1bfdf29c8be31bcd4ced1f44ad
parent9d55ce13c08c2f5b3be58385d6ae97c1d82ca494 (diff)
parent3da84150959de4dc99edf3ee35665e81a23aae26 (diff)
Merge branch 'merge-cmd'
-rw-r--r--contrib/other-builds/moses/.project10
-rw-r--r--moses-cmd/Jamfile14
-rw-r--r--moses-cmd/LatticeMBRGrid.cpp2
-rw-r--r--moses-cmd/Main.cpp12
-rw-r--r--moses-cmd/simulate-pe.cc284
-rw-r--r--moses/BaseManager.cpp93
-rw-r--r--moses/BaseManager.h33
-rw-r--r--moses/ChartManager.cpp175
-rw-r--r--moses/ChartManager.h31
-rw-r--r--moses/IOWrapper.cpp172
-rw-r--r--moses/IOWrapper.h23
-rw-r--r--moses/Incremental.cpp42
-rw-r--r--moses/Incremental.h11
-rw-r--r--moses/Manager.cpp231
-rw-r--r--moses/Manager.h19
-rw-r--r--moses/TranslationTask.cpp44
-rw-r--r--moses/Util.cpp2
-rw-r--r--moses/Util.h2
-rw-r--r--phrase-extract/extract-ghkm/Alignment.h3
19 files changed, 687 insertions, 516 deletions
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index f35afc2ec..d26a65d86 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -61,6 +61,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/AlignmentInfoTest.cpp</locationURI>
</link>
<link>
+ <name>BaseManager.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.cpp</locationURI>
+ </link>
+ <link>
+ <name>BaseManager.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.h</locationURI>
+ </link>
+ <link>
<name>BitmapContainer.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/BitmapContainer.cpp</locationURI>
diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile
index 7ee90850c..ee762823e 100644
--- a/moses-cmd/Jamfile
+++ b/moses-cmd/Jamfile
@@ -2,17 +2,5 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ;
exe moses : Main.cpp deps ;
exe lmbrgrid : LatticeMBRGrid.cpp deps ;
-
-if [ option.get "with-mm" : : "yes" ] {
-
-exe simulate-pe :
-simulate-pe.cc
-$(TOP)/moses/TranslationModel/UG/generic//generic
-$(TOP)//boost_program_options
-deps
-;
-
-alias programs : moses lmbrgrid simulate-pe ;
-} else {
alias programs : moses lmbrgrid ;
-}
+
diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp
index f00f40fd0..c4faa5776 100644
--- a/moses-cmd/LatticeMBRGrid.cpp
+++ b/moses-cmd/LatticeMBRGrid.cpp
@@ -158,8 +158,8 @@ int main(int argc, char* argv[])
StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
staticData.SetUseLatticeMBR(true);
- IOWrapper* ioWrapper = IOWrapper::GetIOWrapper(staticData);
+ IOWrapper* ioWrapper = new IOWrapper();
if (!ioWrapper) {
throw runtime_error("Failed to initialise IOWrapper");
}
diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp
index 319aede20..f224f2d8d 100644
--- a/moses-cmd/Main.cpp
+++ b/moses-cmd/Main.cpp
@@ -90,8 +90,8 @@ int main(int argc, char** argv)
}
// set number of significant decimals in output
- IOWrapper::FixPrecision(cout);
- IOWrapper::FixPrecision(cerr);
+ FixPrecision(cout);
+ FixPrecision(cerr);
// load all the settings into the Parameter class
// (stores them as strings, or array of strings)
@@ -121,8 +121,12 @@ int main(int argc, char** argv)
srand(time(NULL));
// set up read/writing class
- IOWrapper* ioWrapper = IOWrapper::GetIOWrapper(staticData);
- if (!ioWrapper) {
+ IFVERBOSE(1) {
+ PrintUserTime("Created input-output object");
+ }
+
+ IOWrapper* ioWrapper = new IOWrapper();
+ if (ioWrapper == NULL) {
cerr << "Error; Failed to create IO object" << endl;
exit(1);
}
diff --git a/moses-cmd/simulate-pe.cc b/moses-cmd/simulate-pe.cc
deleted file mode 100644
index f05c0f510..000000000
--- a/moses-cmd/simulate-pe.cc
+++ /dev/null
@@ -1,284 +0,0 @@
-// Fork of Main.cpp, to simulate post-editing sessions.
-// Written by Ulrich Germann.
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2009 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-/**
- * Moses main, for single-threaded and multi-threaded.
- **/
-#include <exception>
-#include <fstream>
-#include <sstream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-#include "util/usage.hh"
-#include "util/exception.hh"
-#include "moses/Util.h"
-
-#ifdef PT_UG
-#include "moses/TranslationModel/UG/mmsapt.h"
-#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
-#endif
-
-#ifdef WIN32
-// Include Visual Leak Detector
-//#include <vld.h>
-#endif
-
-#include "moses/IOWrapper.h"
-#include "moses/Hypothesis.h"
-#include "moses/HypergraphOutput.h"
-#include "moses/Manager.h"
-#include "moses/StaticData.h"
-#include "moses/TypeDef.h"
-#include "moses/Util.h"
-#include "moses/Timer.h"
-#include "moses/TranslationModel/PhraseDictionary.h"
-#include "moses/FF/StatefulFeatureFunction.h"
-#include "moses/FF/StatelessFeatureFunction.h"
-#include "moses/TranslationTask.h"
-
-#ifdef HAVE_PROTOBUF
-#include "hypergraph.pb.h"
-#endif
-
-using namespace std;
-using namespace Moses;
-
-namespace Moses
-{
-
-void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
-{
- outputSearchGraphStream.setf(std::ios::fixed);
- outputSearchGraphStream.precision(6);
- StaticData::Instance().GetAllWeights().Save(outputSearchGraphStream);
-}
-
-
-} //namespace
-
-/** main function of the command line version of the decoder **/
-int main(int argc, char** argv)
-{
- try {
-
-#ifdef HAVE_PROTOBUF
- GOOGLE_PROTOBUF_VERIFY_VERSION;
-#endif
-
- // echo command line, if verbose
- IFVERBOSE(1) {
- TRACE_ERR("command: ");
- for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
- TRACE_ERR(endl);
- }
-
- // set number of significant decimals in output
- // By the way, having a static member function in an Moses-specific class
- // just to do
- // cout.setf(ios::fixed);cout.precision(3);
- // doesn't make sense.
- // 1. It makes the program harder to understand. If I see
- // cout.setf(ios::fixed);cout.precision(3);
- // I know immediately what's going on. If I see,
- // IOWrapper::FixPrecision(cout);
- // I assume that something much more elaborate is going on (side effects?)
- // and need to look it up.
- // After all, one reads programs in order to understand them.
- // 2. If you want to enforce a certain default behaviour on stdout,
- // that should happen only once. Other components should not do that.
- // 3. If you want to format locally, use
- // string IOWrapper::score_default_formatting = "%.3f";
- // cout << boost::format(IOWrapper::score_default_formatting) % my_variable;
- // This even allows you to do pretty printing by default if you wish.
- // (Actually, the format variable should be constructed
- // from an int IOWrapper::score_output_default_precision itself.)
- // 4. If output speed is an issue and you are afraid of the overhead that
- // boost::format may add, don't use streams to begin with.
- // IOWrapper::FixPrecision(cout);
- // IOWrapper::FixPrecision(cerr);
- cout.setf(ios::fixed); cout.precision(3);
- cout.setf(ios::fixed); cout.precision(3);
-
-#ifdef PT_UG
- // see my notes on the moses support mailing list, this is a hack around
- // the misguided implementation of LoadParameter(), which should leave
- // arguments it doesn't know alone.
- vector<pair<string,int> > argfilter(4);
- argfilter[0] = std::make_pair(string("--spe-src"),1);
- argfilter[1] = std::make_pair(string("--spe-trg"),1);
- argfilter[2] = std::make_pair(string("--spe-aln"),1);
-
- char** my_args; int my_acnt;
- char** mo_args; int mo_acnt;
- filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
-
- ifstream spe_src,spe_trg,spe_aln;
- for (int i = 0; i < my_acnt; i += 2)
- {
- if (!strcmp(my_args[i],"--spe-src"))
- spe_src.open(my_args[i+1]);
- else if (!strcmp(my_args[i],"--spe-trg"))
- spe_trg.open(my_args[i+1]);
- else if (!strcmp(my_args[i],"--spe-aln"))
- spe_aln.open(my_args[i+1]);
- }
-#endif
-
- // load all the settings into the Parameter class
- // (stores them as strings, or array of strings)
- Parameter params;
- if (!params.LoadParam(mo_acnt,mo_args)) {
- exit(1);
- }
-
- // initialize all "global" variables, which are stored in StaticData
- // note: this also loads models such as the language model, etc.
- if (!StaticData::LoadDataStatic(&params, argv[0])) {
- exit(1);
- }
-
- // setting "-show-weights" -> just dump out weights and exit
- if (params.isParamSpecified("show-weights")) {
- ShowWeights();
- exit(0);
- }
-
- // shorthand for accessing information in StaticData
- const StaticData& staticData = StaticData::Instance();
-
- //initialise random numbers
- srand(time(NULL));
-
- // set up read/writing class
- IOWrapper* ioWrapper = IOWrapper::GetIOWrapper(staticData);
- if (!ioWrapper) {
- cerr << "Error; Failed to create IO object" << endl;
- exit(1);
- }
-
- // check on weights
- const ScoreComponentCollection& weights = staticData.GetAllWeights();
- IFVERBOSE(2) {
- TRACE_ERR("The global weight vector looks like this: ");
- TRACE_ERR(weights);
- TRACE_ERR("\n");
- }
-
- boost::shared_ptr<HypergraphOutput<Manager> > hypergraphOutput;
- boost::shared_ptr<HypergraphOutput<ChartManager> > hypergraphOutputChart;
-
- if (staticData.GetOutputSearchGraphHypergraph()) {
- if (staticData.IsChart()) {
- hypergraphOutputChart.reset(new HypergraphOutput<ChartManager>(PRECISION));
- }
- else {
- hypergraphOutput.reset(new HypergraphOutput<Manager>(PRECISION));
- }
- }
-
-#ifdef WITH_THREADS
- ThreadPool pool(staticData.ThreadCount());
-#endif
-
- // main loop over set of input sentences
- InputType* source = NULL;
- size_t lineCount = staticData.GetStartTranslationId();
- while(ioWrapper->ReadInput(staticData.GetInputType(),source)) {
- source->SetTranslationId(lineCount);
- IFVERBOSE(1) {
- ResetUserTime();
- }
-
- FeatureFunction::CallChangeSource(source);
-
- // set up task of translating one sentence
- TranslationTask* task;
- if (staticData.IsChart()) {
- // scfg
- task = new TranslationTask(source, *ioWrapper, hypergraphOutputChart);
- }
- else {
- // pb
- task = new TranslationTask(source, *ioWrapper,
- staticData.GetOutputSearchGraphSLF(),
- hypergraphOutput);
- }
-
- // execute task
-#ifdef WITH_THREADS
-#ifdef PT_UG
- if (my_acnt)
- {
- // simulated post-editing: always run single-threaded!
- task->Run();
- delete task;
- string src,trg,aln;
- UTIL_THROW_IF2(!getline(spe_src,src), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
- UTIL_THROW_IF2(!getline(spe_trg,trg), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
- UTIL_THROW_IF2(!getline(spe_aln,aln), "[" << HERE << "] "
- << "missing update data for simulated post-editing.");
- BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
- {
- Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
- if (sapt) sapt->add(src,trg,aln);
- VERBOSE(1,"[" << HERE << " added src] " << src << endl);
- VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
- VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
- }
- }
- else
-#endif
- pool.Submit(task);
-#else
- task->Run();
- delete task;
-#endif
-
- source = NULL; //make sure it doesn't get deleted
- ++lineCount;
- }
-
- // we are done, finishing up
-#ifdef WITH_THREADS
- pool.Stop(true); //flush remaining jobs
-#endif
-
- delete ioWrapper;
- FeatureFunction::Destroy();
-
- } catch (const std::exception &e) {
- std::cerr << "Exception: " << e.what() << std::endl;
- return EXIT_FAILURE;
- }
-
- IFVERBOSE(1) util::PrintUsage(std::cerr);
-
-#ifndef EXIT_RETURN
- //This avoids that destructors are called (it can take a long time)
- exit(EXIT_SUCCESS);
-#else
- return EXIT_SUCCESS;
-#endif
-}
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
new file mode 100644
index 000000000..485cdd182
--- /dev/null
+++ b/moses/BaseManager.cpp
@@ -0,0 +1,93 @@
+#include <vector>
+
+#include "StaticData.h"
+#include "BaseManager.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+
+using namespace std;
+
+namespace Moses
+{
+void BaseManager::OutputAllFeatureScores(const Moses::ScoreComponentCollection &features,
+ std::ostream &out) const
+{
+ std::string lastName = "";
+ const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+ for( size_t i=0; i<sff.size(); i++ ) {
+ const StatefulFeatureFunction *ff = sff[i];
+ if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
+ && ff->IsTuneable()) {
+ OutputFeatureScores( out, features, ff, lastName );
+ }
+ }
+ const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
+ for( size_t i=0; i<slf.size(); i++ ) {
+ const StatelessFeatureFunction *ff = slf[i];
+ if (ff->IsTuneable()) {
+ OutputFeatureScores( out, features, ff, lastName );
+ }
+ }
+}
+
+void BaseManager::OutputFeatureScores( std::ostream& out,
+ const ScoreComponentCollection &features,
+ const FeatureFunction *ff,
+ std::string &lastName ) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ bool labeledOutput = staticData.IsLabeledNBestList();
+
+ // regular features (not sparse)
+ if (ff->GetNumScoreComponents() != 0) {
+ if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
+ lastName = ff->GetScoreProducerDescription();
+ out << " " << lastName << "=";
+ }
+ vector<float> scores = features.GetScoresForProducer( ff );
+ for (size_t j = 0; j<scores.size(); ++j) {
+ out << " " << scores[j];
+ }
+ }
+
+ // sparse features
+ const FVector scores = features.GetVectorForProducer( ff );
+ for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
+ out << " " << i->first << "= " << i->second;
+ }
+}
+
+/***
+ * print surface factor only for the given phrase
+ */
+void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
+ const std::vector<FactorType> &outputFactorOrder,
+ bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Cannot be empty phrase");
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+ out << *factor;
+ UTIL_THROW_IF2(factor == NULL,
+ "Empty factor 0 at position " << pos);
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "Empty factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+}
+
+} // namespace
+
+
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
new file mode 100644
index 000000000..0c995a596
--- /dev/null
+++ b/moses/BaseManager.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <iostream>
+#include <string>
+#include "ScoreComponentCollection.h"
+
+namespace Moses
+{
+class ScoreComponentCollection;
+class FeatureFunction;
+class OutputCollector;
+
+class BaseManager
+{
+protected:
+ void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features,
+ std::ostream &out) const;
+ void OutputFeatureScores( std::ostream& out,
+ const ScoreComponentCollection &features,
+ const FeatureFunction *ff,
+ std::string &lastName ) const;
+ void OutputSurface(std::ostream &out,
+ const Phrase &phrase,
+ const std::vector<FactorType> &outputFactorOrder,
+ bool reportAllFactors) const;
+
+public:
+ // outputs
+ virtual void OutputNBest(OutputCollector *collector) const = 0;
+
+};
+
+}
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index 56bc8529d..745a940de 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -30,9 +30,10 @@
#include "DecodeStep.h"
#include "TreeInput.h"
#include "moses/FF/WordPenaltyProducer.h"
+#include "moses/OutputCollector.h"
+#include "moses/ChartKBestExtractor.h"
using namespace std;
-using namespace Moses;
namespace Moses
{
@@ -297,4 +298,176 @@ void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream)
WriteSearchGraph(writer);
}
+void ChartManager::OutputNBest(OutputCollector *collector) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ size_t nBestSize = staticData.GetNBestSize();
+ if (nBestSize > 0) {
+ const size_t translationId = m_source.GetTranslationId();
+
+ VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
+ std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
+ CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
+ OutputNBestList(collector, nBestList, translationId);
+ IFVERBOSE(2) {
+ PrintUserTime("N-Best Hypotheses Generation Time:");
+ }
+ }
+
+}
+
+void ChartManager::OutputNBestList(OutputCollector *collector,
+ const ChartKBestExtractor::KBestVec &nBestList,
+ long translationId) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+
+ std::ostringstream out;
+
+ if (collector->OutputIsCout()) {
+ // Set precision only if we're writing the n-best list to cout. This is to
+ // preserve existing behaviour, but should probably be done either way.
+ FixPrecision(out);
+ }
+
+ bool includeWordAlignment =
+ StaticData::Instance().PrintAlignmentInfoInNbest();
+
+ bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
+
+ for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+ p != nBestList.end(); ++p) {
+ const ChartKBestExtractor::Derivation &derivation = **p;
+
+ // get the derivation's target-side yield
+ Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
+
+ // delete <s> and </s>
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+ outputPhrase.RemoveWord(0);
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+
+ // print the translation ID, surface factors, and scores
+ out << translationId << " ||| ";
+ OutputSurface(out, outputPhrase, outputFactorOrder, false);
+ out << " ||| ";
+ OutputAllFeatureScores(derivation.scoreBreakdown, out);
+ out << " ||| " << derivation.score;
+
+ // optionally, print word alignments
+ if (includeWordAlignment) {
+ out << " ||| ";
+ Alignments align;
+ OutputAlignmentNBest(align, derivation, 0);
+ for (Alignments::const_iterator q = align.begin(); q != align.end();
+ ++q) {
+ out << q->first << "-" << q->second << " ";
+ }
+ }
+
+ // optionally, print tree
+ if (PrintNBestTrees) {
+ TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
+ out << " ||| " << tree->GetString();
+ }
+
+ out << std::endl;
+ }
+
+ assert(collector);
+ collector->Write(translationId, out.str());
+}
+
+size_t ChartManager::CalcSourceSize(const Moses::ChartHypothesis *hypo) const
+{
+ size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
+ const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
+ for (size_t i = 0; i < prevHypos.size(); ++i) {
+ size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
+ ret -= (childSize - 1);
+ }
+ return ret;
+}
+
+size_t ChartManager::OutputAlignmentNBest(
+ Alignments &retAlign,
+ const Moses::ChartKBestExtractor::Derivation &derivation,
+ size_t startTarget) const
+{
+ const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
+
+ size_t totalTargetSize = 0;
+ size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
+
+ const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
+
+ size_t thisSourceSize = CalcSourceSize(&hypo);
+
+ // position of each terminal word in translation rule, irrespective of alignment
+ // if non-term, number is undefined
+ vector<size_t> sourceOffsets(thisSourceSize, 0);
+ vector<size_t> targetOffsets(tp.GetSize(), 0);
+
+ const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
+ vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
+ const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
+
+ UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
+ "Error");
+
+ size_t targetInd = 0;
+ for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
+ if (tp.GetWord(targetPos).IsNonTerminal()) {
+ UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
+ size_t sourceInd = targetPos2SourceInd[targetPos];
+ size_t sourcePos = sourceInd2pos[sourceInd];
+
+ const Moses::ChartKBestExtractor::Derivation &subderivation =
+ *derivation.subderivations[sourceInd];
+
+ // calc source size
+ size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
+ sourceOffsets[sourcePos] = sourceSize;
+
+ // calc target size.
+ // Recursively look thru child hypos
+ size_t currStartTarget = startTarget + totalTargetSize;
+ size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
+ currStartTarget);
+ targetOffsets[targetPos] = targetSize;
+
+ totalTargetSize += targetSize;
+ ++targetInd;
+ } else {
+ ++totalTargetSize;
+ }
+ }
+
+ // convert position within translation rule to absolute position within
+ // source sentence / output sentence
+ ShiftOffsets(sourceOffsets, startSource);
+ ShiftOffsets(targetOffsets, startTarget);
+
+ // get alignments from this hypo
+ const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
+
+ // add to output arg, offsetting by source & target
+ AlignmentInfo::const_iterator iter;
+ for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
+ const std::pair<size_t,size_t> &align = *iter;
+ size_t relSource = align.first;
+ size_t relTarget = align.second;
+ size_t absSource = sourceOffsets[relSource];
+ size_t absTarget = targetOffsets[relTarget];
+
+ pair<size_t, size_t> alignPoint(absSource, absTarget);
+ pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
+ UTIL_THROW_IF2(!ret.second, "Error");
+ }
+
+ return totalTargetSize;
+}
+
} // namespace Moses
diff --git a/moses/ChartManager.h b/moses/ChartManager.h
index 9ad4f4b85..80f6586bb 100644
--- a/moses/ChartManager.h
+++ b/moses/ChartManager.h
@@ -31,6 +31,8 @@
#include "ChartTranslationOptionList.h"
#include "ChartParser.h"
#include "ChartKBestExtractor.h"
+#include "BaseManager.h"
+#include "moses/Syntax/KBestExtractor.h"
#include <boost/shared_ptr.hpp>
@@ -42,7 +44,7 @@ class ChartSearchGraphWriter;
/** Holds everything you need to decode 1 sentence with the hierachical/syntax decoder
*/
-class ChartManager
+class ChartManager : public BaseManager
{
private:
InputType const& m_source; /**< source sentence to be translated */
@@ -60,6 +62,31 @@ private:
const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable , size_t* winners, size_t* losers) const;
void WriteSearchGraph(const ChartSearchGraphWriter& writer) const;
+ // output
+ typedef std::set< std::pair<size_t, size_t> > Alignments;
+
+ void OutputNBestList(OutputCollector *collector,
+ const ChartKBestExtractor::KBestVec &nBestList,
+ long translationId) const;
+ size_t CalcSourceSize(const Moses::ChartHypothesis *hypo) const;
+ size_t OutputAlignmentNBest(Alignments &retAlign,
+ const Moses::ChartKBestExtractor::Derivation &derivation,
+ size_t startTarget) const;
+
+ template <class T>
+ void ShiftOffsets(std::vector<T> &offsets, T shift) const
+ {
+ T currPos = shift;
+ for (size_t i = 0; i < offsets.size(); ++i) {
+ if (offsets[i] == 0) {
+ offsets[i] = currPos;
+ ++currPos;
+ } else {
+ currPos += offsets[i];
+ }
+ }
+ }
+
public:
ChartManager(InputType const& source);
~ChartManager();
@@ -108,6 +135,8 @@ public:
const ChartParser &GetParser() const { return m_parser; }
+ // outputs
+ void OutputNBest(OutputCollector *collector) const;
};
}
diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp
index bec21fc5a..becde9b65 100644
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@@ -68,17 +68,8 @@ using namespace std;
namespace Moses
{
-IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
- , const std::vector<FactorType> &outputFactorOrder
- , const FactorMask &inputFactorUsed
- , size_t nBestSize
- , const std::string &nBestFilePath
- , const std::string &inputFilePath)
- :m_inputFactorOrder(inputFactorOrder)
- ,m_outputFactorOrder(outputFactorOrder)
- ,m_inputFactorUsed(inputFactorUsed)
- ,m_inputFilePath(inputFilePath)
- ,m_nBestStream(NULL)
+IOWrapper::IOWrapper()
+ :m_nBestStream(NULL)
,m_outputWordGraphStream(NULL)
,m_outputSearchGraphStream(NULL)
@@ -105,12 +96,21 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
{
const StaticData &staticData = StaticData::Instance();
- if (inputFilePath.empty()) {
+ m_inputFactorOrder = &staticData.GetInputFactorOrder();
+ m_outputFactorOrder = &staticData.GetOutputFactorOrder();
+ m_inputFactorUsed = FactorMask(*m_inputFactorOrder);
+
+ size_t nBestSize = staticData.GetNBestSize();
+ string nBestFilePath = staticData.GetNBestFilePath();
+
+ staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
+ if (m_inputFilePath.empty()) {
m_inputFile = NULL;
m_inputStream = &cin;
}
else {
- m_inputFile = new InputFileStream(inputFilePath);
+ VERBOSE(2,"IO from File" << endl);
+ m_inputFile = new InputFileStream(m_inputFilePath);
m_inputStream = m_inputFile;
}
@@ -250,7 +250,7 @@ InputType*
IOWrapper::
GetInput(InputType* inputType)
{
- if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
+ if(inputType->Read(*m_inputStream, *m_inputFactorOrder)) {
return inputType;
} else {
delete inputType;
@@ -258,12 +258,6 @@ GetInput(InputType* inputType)
}
}
-void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
-{
- stream.setf(std::ios::fixed);
- stream.precision(size);
-}
-
std::map<size_t, const Factor*> IOWrapper::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor)
{
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
@@ -628,34 +622,6 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica
}
}
-void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long translationId)
-{
- std::ostringstream out;
- // wtf? copied from the original OutputNBestList
- if (m_nBestOutputCollector->OutputIsCout()) {
- FixPrecision(out);
- }
- Phrase outputPhrase;
- ScoreComponentCollection features;
- for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
- Incremental::PhraseAndFeatures(*i, outputPhrase, features);
- // <s> and </s>
- UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
- "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
-
- outputPhrase.RemoveWord(0);
- outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
- out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
- out << " ||| ";
- OutputAllFeatureScores(features, out);
- out << " ||| " << i->GetScore() << '\n';
- }
- out << std::flush;
- assert(m_nBestOutputCollector);
- m_nBestOutputCollector->Write(translationId, out.str());
-}
-
/***
* print surface factor only for the given phrase
*/
@@ -883,7 +849,7 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
+ OutputSurface(out, outputPhrase, *m_outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
out << " ||| " << derivation.score;
@@ -1218,7 +1184,7 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, c
OutputInput(cout, hypo);
cout << "||| ";
}
- OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
+ OutputBestSurface(cout, hypo, *m_outputFactorOrder, reportSegmentation, reportAllFactors);
cout << endl;
}
} else {
@@ -1251,80 +1217,6 @@ bool IOWrapper::ReadInput(InputTypeEnum inputType, InputType*& source)
return (source ? true : false);
}
-void IOWrapper::OutputNBest(std::ostream& out
- , const Moses::TrellisPathList &nBestList
- , const std::vector<Moses::FactorType>& outputFactorOrder
- , long translationId
- , char reportSegmentation)
-{
- const StaticData &staticData = StaticData::Instance();
- bool reportAllFactors = staticData.GetReportAllFactorsNBest();
- bool includeSegmentation = staticData.NBestIncludesSegmentation();
- bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
-
- TrellisPathList::const_iterator iter;
- for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
- const TrellisPath &path = **iter;
- const std::vector<const Hypothesis *> &edges = path.GetEdges();
-
- // print the surface factor of the translation
- out << translationId << " ||| ";
- for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
- }
- out << " |||";
-
- // print scores with feature names
- OutputAllFeatureScores(path.GetScoreBreakdown(), out );
-
- // total
- out << " ||| " << path.GetTotalScore();
-
- //phrase-to-phrase segmentation
- if (includeSegmentation) {
- out << " |||";
- for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
- WordsRange targetRange = path.GetTargetWordsRange(edge);
- out << " " << sourceRange.GetStartPos();
- if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
- out << "-" << sourceRange.GetEndPos();
- }
- out<< "=" << targetRange.GetStartPos();
- if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
- out<< "-" << targetRange.GetEndPos();
- }
- }
- }
-
- if (includeWordAlignment) {
- out << " ||| ";
- for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
- const Hypothesis &edge = *edges[currEdge];
- const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
- WordsRange targetRange = path.GetTargetWordsRange(edge);
- const int sourceOffset = sourceRange.GetStartPos();
- const int targetOffset = targetRange.GetStartPos();
- const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
-
- OutputAlignment(out, ai, sourceOffset, targetOffset);
-
- }
- }
-
- if (StaticData::Instance().IsPathRecoveryEnabled()) {
- out << " ||| ";
- OutputInput(out, edges[0]);
- }
-
- out << endl;
- }
-
- out << std::flush;
-}
-
void IOWrapper::OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
, std::ostream &out)
{
@@ -1403,32 +1295,6 @@ void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solu
OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
}
-IOWrapper *IOWrapper::GetIOWrapper(const StaticData &staticData)
-{
- IOWrapper *ioWrapper;
- const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
- ,&outputFactorOrder = staticData.GetOutputFactorOrder();
- FactorMask inputFactorUsed(inputFactorOrder);
-
- // io
- string inputPath;
- staticData.GetParameter().SetParameter<string>(inputPath, "input-file", "");
- if (!inputPath.empty()) {
- VERBOSE(2,"IO from File" << endl);
- }
-
- ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
- , staticData.GetNBestSize()
- , staticData.GetNBestFilePath()
- , inputPath);
-
- IFVERBOSE(1)
- PrintUserTime("Created input-output object");
-
- return ioWrapper;
-}
-
-
////////////////////////////
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
@@ -1453,7 +1319,7 @@ void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best,
return;
}
std::ostringstream out;
- IOWrapper::FixPrecision(out);
+ FixPrecision(out);
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
@@ -1483,7 +1349,7 @@ void IOWrapper::OutputNBestList(
if (m_nBestOutputCollector->OutputIsCout()) {
// Set precision only if we're writing the n-best list to cout. This is to
// preserve existing behaviour, but should probably be done either way.
- IOWrapper::FixPrecision(out);
+ FixPrecision(out);
}
bool includeWordAlignment =
@@ -1506,7 +1372,7 @@ void IOWrapper::OutputNBestList(
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
- OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
+ OutputSurface(out, outputPhrase, *m_outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
out << " ||| " << derivation.score;
diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h
index 4253871b3..17781b2ac 100644
--- a/moses/IOWrapper.h
+++ b/moses/IOWrapper.h
@@ -73,9 +73,9 @@ class IOWrapper
{
protected:
- const std::vector<Moses::FactorType> &m_inputFactorOrder;
- const std::vector<Moses::FactorType> &m_outputFactorOrder;
- const Moses::FactorMask &m_inputFactorUsed;
+ const std::vector<Moses::FactorType> *m_inputFactorOrder;
+ const std::vector<Moses::FactorType> *m_outputFactorOrder;
+ Moses::FactorMask m_inputFactorUsed;
std::string m_inputFilePath;
Moses::InputFileStream *m_inputFile;
std::istream *m_inputStream;
@@ -157,15 +157,7 @@ protected:
}
public:
- static IOWrapper *GetIOWrapper(const Moses::StaticData &staticData);
- static void FixPrecision(std::ostream &, size_t size=3);
-
- IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder
- , const std::vector<Moses::FactorType> &outputFactorOrder
- , const Moses::FactorMask &inputFactorUsed
- , size_t nBestSize
- , const std::string &nBestFilePath
- , const std::string &inputFilePath = "");
+ IOWrapper();
~IOWrapper();
Moses::InputType* GetInput(Moses::InputType *inputType);
@@ -215,7 +207,6 @@ public:
void OutputBestNone(long translationId);
void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
- void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
void OutputNBestList(const Moses::Syntax::KBestExtractor::KBestVec &nBestList, long translationId);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
@@ -252,12 +243,6 @@ public:
static void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges);
static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset);
- void OutputNBest(std::ostream& out
- , const Moses::TrellisPathList &nBestList
- , const std::vector<Moses::FactorType>& outputFactorOrder
- , long translationId
- , char reportSegmentation);
-
static void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
, std::ostream &out);
static void OutputFeatureScores( std::ostream& out
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index 06c46b786..d366065a5 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -8,6 +8,7 @@
#include "moses/StaticData.h"
#include "moses/Util.h"
#include "moses/LM/Base.h"
+#include "moses/OutputCollector.h"
#include "lm/model.hh"
#include "search/applied.hh"
@@ -278,6 +279,47 @@ const std::vector<search::Applied> &Manager::ProcessSentence()
return *completed_nbest_;
}
+void Manager::OutputNBest(OutputCollector *collector) const
+{
+ if (collector == NULL) {
+ return;
+ }
+
+ OutputNBestList(collector, *completed_nbest_, source_.GetTranslationId());
+}
+
+void Manager::OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+
+ std::ostringstream out;
+ // wtf? copied from the original OutputNBestList
+ if (collector->OutputIsCout()) {
+ FixPrecision(out);
+ }
+ Phrase outputPhrase;
+ ScoreComponentCollection features;
+ for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
+ Incremental::PhraseAndFeatures(*i, outputPhrase, features);
+ // <s> and </s>
+ UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
+ "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
+
+ outputPhrase.RemoveWord(0);
+ outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
+ out << translationId << " ||| ";
+ OutputSurface(out, outputPhrase, outputFactorOrder, false);
+ out << " ||| ";
+ OutputAllFeatureScores(features, out);
+ out << " ||| " << i->GetScore() << '\n';
+ }
+ out << std::flush;
+ assert(collector);
+ collector->Write(translationId, out.str());
+}
+
+
namespace
{
diff --git a/moses/Incremental.h b/moses/Incremental.h
index 20040bf45..35d418fd3 100644
--- a/moses/Incremental.h
+++ b/moses/Incremental.h
@@ -7,6 +7,8 @@
#include "moses/ChartCellCollection.h"
#include "moses/ChartParser.h"
+#include "BaseManager.h"
+
#include <vector>
#include <string>
@@ -19,7 +21,7 @@ class LanguageModel;
namespace Incremental
{
-class Manager
+class Manager : public BaseManager
{
public:
Manager(const InputType &source);
@@ -35,6 +37,10 @@ public:
return *completed_nbest_;
}
+ // output
+ void OutputNBest(OutputCollector *collector) const;
+
+
private:
template <class Model, class Best> search::History PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out);
@@ -51,6 +57,9 @@ private:
search::NBest n_best_;
const std::vector<search::Applied> *completed_nbest_;
+
+ // outputs
+ void OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const;
};
// Just get the phrase.
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 00b9ede8d..4d01d5e09 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -39,6 +39,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "Timer.h"
+#include "moses/OutputCollector.h"
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/LM/Base.h"
#include "moses/TranslationModel/PhraseDictionary.h"
@@ -1447,4 +1448,234 @@ SentenceStats& Manager::GetSentenceStats() const
}
+void Manager::OutputNBest(OutputCollector *collector) const
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ if (collector && !staticData.UseLatticeMBR()) {
+ TrellisPathList nBestList;
+ ostringstream out;
+ CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
+ OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
+ staticData.GetReportSegmentation());
+ collector->Write(m_source.GetTranslationId(), out.str());
+ }
+
+}
+
+void Manager::OutputNBest(std::ostream& out
+ , const Moses::TrellisPathList &nBestList
+ , const std::vector<Moses::FactorType>& outputFactorOrder
+ , long translationId
+ , char reportSegmentation) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ bool reportAllFactors = staticData.GetReportAllFactorsNBest();
+ bool includeSegmentation = staticData.NBestIncludesSegmentation();
+ bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
+
+ TrellisPathList::const_iterator iter;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+ const TrellisPath &path = **iter;
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ // print the surface factor of the translation
+ out << translationId << " ||| ";
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+ out << " |||";
+
+ // print scores with feature names
+ OutputAllFeatureScores(path.GetScoreBreakdown(), out );
+
+ // total
+ out << " ||| " << path.GetTotalScore();
+
+ //phrase-to-phrase segmentation
+ if (includeSegmentation) {
+ out << " |||";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ out << " " << sourceRange.GetStartPos();
+ if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+ out << "-" << sourceRange.GetEndPos();
+ }
+ out<< "=" << targetRange.GetStartPos();
+ if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+ out<< "-" << targetRange.GetEndPos();
+ }
+ }
+ }
+
+ if (includeWordAlignment) {
+ out << " ||| ";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ const int sourceOffset = sourceRange.GetStartPos();
+ const int targetOffset = targetRange.GetStartPos();
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+
+ OutputAlignment(out, ai, sourceOffset, targetOffset);
+
+ }
+ }
+
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
+ out << " ||| ";
+ OutputInput(out, edges[0]);
+ }
+
+ out << endl;
+ }
+
+ out << std::flush;
+}
+
+//////////////////////////////////////////////////////////////////////////
+/***
+ * print surface factor only for the given phrase
+ */
+void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const
+{
+ UTIL_THROW_IF2(outputFactorOrder.size() == 0,
+ "Must specific at least 1 output factor");
+ const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
+ bool markUnknown = StaticData::Instance().GetMarkUnknown();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
+
+ std::map<size_t, const Factor*> placeholders;
+ if (placeholderFactor != NOT_FOUND) {
+ // creates map of target position -> factor for placeholders
+ placeholders = GetPlaceholders(edge, placeholderFactor);
+ }
+
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+
+ if (placeholders.size()) {
+ // do placeholders
+ std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
+ if (iter != placeholders.end()) {
+ factor = iter->second;
+ }
+ }
+
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor 0 at position " << pos);
+
+ //preface surface form with UNK if marking unknowns
+ const Word &word = phrase.GetWord(pos);
+ if(markUnknown && word.IsOOV()) {
+ out << "UNK" << *factor;
+ } else {
+ out << *factor;
+ }
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ UTIL_THROW_IF2(factor == NULL,
+ "No factor " << i << " at position " << pos);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+
+ // trace ("report segmentation") option "-t" / "-tt"
+ if (reportSegmentation > 0 && phrase.GetSize() > 0) {
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ const int sourceStart = sourceRange.GetStartPos();
+ const int sourceEnd = sourceRange.GetEndPos();
+ out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt"
+ if (reportSegmentation == 2) {
+ out << ",wa=";
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
+ OutputAlignment(out, ai, 0, 0);
+ out << ",total=";
+ out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
+ out << ",";
+ ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
+ scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
+ OutputAllFeatureScores(scoreBreakdown, out);
+ }
+ out << "| ";
+ }
+}
+
+void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const
+{
+ typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+ AlignVec alignments = ai.GetSortedAlignments();
+
+ AlignVec::const_iterator it;
+ for (it = alignments.begin(); it != alignments.end(); ++it) {
+ const std::pair<size_t,size_t> &alignment = **it;
+ out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+ }
+
+}
+
+void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo) const
+{
+ size_t len = hypo->GetInput().GetSize();
+ std::vector<const Phrase*> inp_phrases(len, 0);
+ OutputInput(inp_phrases, hypo);
+ for (size_t i=0; i<len; ++i)
+ if (inp_phrases[i]) os << *inp_phrases[i];
+}
+
+void Manager::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const
+{
+ if (hypo->GetPrevHypo()) {
+ OutputInput(map, hypo->GetPrevHypo());
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
+ }
+}
+
+std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
+{
+ const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
+ const Phrase &inputPhrase = inputPath.GetPhrase();
+
+ std::map<size_t, const Factor*> ret;
+
+ for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
+ const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
+ if (factor) {
+ std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos);
+ UTIL_THROW_IF2(targetPos.size() != 1,
+ "Placeholder should be aligned to 1, and only 1, word");
+ ret[*targetPos.begin()] = factor;
+ }
+ }
+
+ return ret;
+}
+
+void Manager::OutputLatticeSamples(OutputCollector *collector) const
+{
+ const StaticData &staticData = StaticData::Instance();
+ if (collector) {
+ TrellisPathList latticeSamples;
+ ostringstream out;
+ CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
+ OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(),
+ staticData.GetReportSegmentation());
+ collector->Write(m_source.GetTranslationId(), out.str());
+ }
+
+}
+
}
diff --git a/moses/Manager.h b/moses/Manager.h
index ef4612de1..b078c5c8c 100644
--- a/moses/Manager.h
+++ b/moses/Manager.h
@@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "WordsBitmap.h"
#include "Search.h"
#include "SearchCubePruning.h"
+#include "BaseManager.h"
namespace Moses
{
@@ -91,7 +92,7 @@ struct SearchGraphNode {
* the appropriate stack, or re-combined with existing hypotheses
**/
-class Manager
+class Manager : public BaseManager
{
Manager();
Manager(Manager const&);
@@ -126,6 +127,19 @@ protected:
std::map< int, bool >* pConnected,
std::vector< const Hypothesis* >* pConnectedList) const;
+ // output
+ // nbest
+ void OutputNBest(std::ostream& out
+ , const Moses::TrellisPathList &nBestList
+ , const std::vector<Moses::FactorType>& outputFactorOrder
+ , long translationId
+ , char reportSegmentation) const;
+ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ char reportSegmentation, bool reportAllFactors) const;
+ void OutputAlignment(std::ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const;
+ void OutputInput(std::ostream& os, const Hypothesis* hypo) const;
+ void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const;
+ std::map<size_t, const Factor*> GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const;
public:
InputType const& m_source; /**< source sentence to be translated */
@@ -170,6 +184,9 @@ public:
void GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected,
std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const;
+ // outputs
+ void OutputNBest(OutputCollector *collector) const;
+ void OutputLatticeSamples(OutputCollector *collector) const;
};
}
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 6b2812d0b..a39766180 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -92,7 +92,7 @@ void TranslationTask::RunPb()
// output word graph
if (m_ioWrapper.GetWordGraphCollector()) {
ostringstream out;
- fix(out,PRECISION);
+ FixPrecision(out,PRECISION);
manager.GetWordGraph(m_source->GetTranslationId(), out);
m_ioWrapper.GetWordGraphCollector()->Write(m_source->GetTranslationId(), out.str());
}
@@ -100,7 +100,7 @@ void TranslationTask::RunPb()
// output search graph
if (m_ioWrapper.GetSearchGraphOutputCollector()) {
ostringstream out;
- fix(out,PRECISION);
+ FixPrecision(out,PRECISION);
manager.OutputSearchGraph(m_source->GetTranslationId(), out);
m_ioWrapper.GetSearchGraphOutputCollector()->Write(m_source->GetTranslationId(), out.str());
@@ -128,7 +128,7 @@ void TranslationTask::RunPb()
file->open(fileName.str().c_str());
if (file->is_open() && file->good()) {
ostringstream out;
- fix(out,PRECISION);
+ FixPrecision(out,PRECISION);
manager.OutputSearchGraphAsSLF(m_source->GetTranslationId(), out);
*file << out.str();
file -> flush();
@@ -149,7 +149,7 @@ void TranslationTask::RunPb()
if (m_ioWrapper.GetSingleBestOutputCollector()) {
ostringstream out;
ostringstream debug;
- fix(debug,PRECISION);
+ FixPrecision(debug,PRECISION);
// all derivations - send them to debug stream
if (staticData.PrintAllDerivations()) {
@@ -275,29 +275,15 @@ void TranslationTask::RunPb()
additionalReportingTime.start();
// output n-best list
- if (m_ioWrapper.GetNBestOutputCollector() && !staticData.UseLatticeMBR()) {
- TrellisPathList nBestList;
- ostringstream out;
- manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_source->GetTranslationId(),
- staticData.GetReportSegmentation());
- m_ioWrapper.GetNBestOutputCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
//lattice samples
- if (m_ioWrapper.GetLatticeSamplesCollector()) {
- TrellisPathList latticeSamples;
- ostringstream out;
- manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
- m_ioWrapper.OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source->GetTranslationId(),
- staticData.GetReportSegmentation());
- m_ioWrapper.GetLatticeSamplesCollector()->Write(m_source->GetTranslationId(), out.str());
- }
+ manager.OutputLatticeSamples(m_ioWrapper.GetLatticeSamplesCollector());
// detailed translation reporting
if (m_ioWrapper.GetDetailedTranslationCollector()) {
ostringstream out;
- fix(out,PRECISION);
+ FixPrecision(out,PRECISION);
TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
m_ioWrapper.GetDetailedTranslationCollector()->Write(m_source->GetTranslationId(),out.str());
}
@@ -362,8 +348,9 @@ void TranslationTask::RunChart()
} else {
m_ioWrapper.OutputBestNone(translationId);
}
- if (staticData.GetNBestSize() > 0)
- m_ioWrapper.OutputNBestList(nbest, translationId);
+
+ manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
+
return;
}
@@ -412,16 +399,7 @@ void TranslationTask::RunChart()
}
// n-best
- size_t nBestSize = staticData.GetNBestSize();
- if (nBestSize > 0) {
- VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
- std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
- manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
- m_ioWrapper.OutputNBestList(nBestList, translationId);
- IFVERBOSE(2) {
- PrintUserTime("N-Best Hypotheses Generation Time:");
- }
- }
+ manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector());
if (staticData.GetOutputSearchGraph()) {
std::ostringstream out;
diff --git a/moses/Util.cpp b/moses/Util.cpp
index 9664c811e..9ad615861 100644
--- a/moses/Util.cpp
+++ b/moses/Util.cpp
@@ -220,7 +220,7 @@ void PrintFeatureWeight(const FeatureFunction* ff)
void ShowWeights()
{
- fix(cout,6);
+ FixPrecision(cout,6);
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
diff --git a/moses/Util.h b/moses/Util.h
index 74a130b9e..4d2ccea10 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -478,7 +478,7 @@ T log_sum (T log_a, T log_b)
}
/** Enforce rounding */
-inline void fix(std::ostream& stream, size_t size)
+inline void FixPrecision(std::ostream& stream, size_t size = 3)
{
stream.setf(std::ios::fixed);
stream.precision(size);
diff --git a/phrase-extract/extract-ghkm/Alignment.h b/phrase-extract/extract-ghkm/Alignment.h
index 5aa24a712..e8381a602 100644
--- a/phrase-extract/extract-ghkm/Alignment.h
+++ b/phrase-extract/extract-ghkm/Alignment.h
@@ -18,8 +18,6 @@
***********************************************************************/
#pragma once
-#ifndef EXTRACT_GHKM_ALIGNMENT_H_
-#define EXTRACT_GHKM_ALIGNMENT_H_
#include <string>
#include <utility>
@@ -39,4 +37,3 @@ void FlipAlignment(Alignment &);
} // namespace GHKM
} // namespace Moses
-#endif