#include "TranslationTask.h" #include "moses/StaticData.h" #include "moses/Sentence.h" #include "moses/IOWrapper.h" #include "moses/TranslationAnalysis.h" #include "moses/TypeDef.h" #include "moses/Util.h" #include "moses/Timer.h" #include "moses/InputType.h" #include "moses/OutputCollector.h" #include "moses/Incremental.h" #include "mbr.h" #include "moses/Syntax/F2S/RuleMatcherCallback.h" #include "moses/Syntax/F2S/RuleMatcherHyperTree.h" #include "moses/Syntax/S2T/Parsers/RecursiveCYKPlusParser/RecursiveCYKPlusParser.h" #include "moses/Syntax/S2T/Parsers/Scope3Parser/Parser.h" #include "moses/Syntax/T2S/RuleMatcherSCFG.h" #include "moses/TranslationModel/PhraseDictionaryCache.h" #include "util/exception.hh" using namespace std; namespace Moses { boost::shared_ptr > TranslationTask:: GetContextWindow() const { return m_context; } void TranslationTask:: SetContextWindow(boost::shared_ptr > const& cw) { m_context = cw; } boost::shared_ptr TranslationTask ::create(boost::shared_ptr const& source) { boost::shared_ptr nix; boost::shared_ptr ret(new TranslationTask(source, nix)); ret->m_self = ret; ret->m_scope.reset(new ContextScope); return ret; } boost::shared_ptr TranslationTask ::create(boost::shared_ptr const& source, boost::shared_ptr const& ioWrapper) { boost::shared_ptr ret(new TranslationTask(source, ioWrapper)); ret->m_self = ret; ret->m_scope.reset(new ContextScope); return ret; } boost::shared_ptr TranslationTask ::create(boost::shared_ptr const& source, boost::shared_ptr const& ioWrapper, boost::shared_ptr const& scope) { boost::shared_ptr ret(new TranslationTask(source, ioWrapper)); ret->m_self = ret; ret->m_scope = scope; return ret; } TranslationTask ::TranslationTask(boost::shared_ptr const& source, boost::shared_ptr const& ioWrapper) : m_source(source) , m_ioWrapper(ioWrapper) { m_options = source->options(); } TranslationTask::~TranslationTask() { } boost::shared_ptr TranslationTask ::SetupManager(SearchAlgorithm algo) { boost::shared_ptr manager; // StaticData const& staticData = StaticData::Instance(); // if (algo == DefaultSearchAlgorithm) algo = staticData.options().search.algo; if (!is_syntax(algo)) manager.reset(new Manager(this->self())); // phrase-based else if (algo == SyntaxF2S || algo == SyntaxT2S) { // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams) typedef Syntax::F2S::RuleMatcherCallback Callback; typedef Syntax::F2S::RuleMatcherHyperTree RuleMatcher; manager.reset(new Syntax::F2S::Manager(this->self())); } else if (algo == SyntaxS2T) { // new-style string-to-tree decoding (ask Phil Williams) S2TParsingAlgorithm algorithm = m_options->syntax.s2t_parsing_algo; if (algorithm == RecursiveCYKPlus) { typedef Syntax::S2T::EagerParserCallback Callback; typedef Syntax::S2T::RecursiveCYKPlusParser Parser; manager.reset(new Syntax::S2T::Manager(this->self())); } else if (algorithm == Scope3) { typedef Syntax::S2T::StandardParserCallback Callback; typedef Syntax::S2T::Scope3Parser Parser; manager.reset(new Syntax::S2T::Manager(this->self())); } else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm"); } else if (algo == SyntaxT2S_SCFG) { // SCFG-based tree-to-string decoding (ask Phil Williams) typedef Syntax::F2S::RuleMatcherCallback Callback; typedef Syntax::T2S::RuleMatcherSCFG RuleMatcher; manager.reset(new Syntax::T2S::Manager(this->self())); } else if (algo == ChartIncremental) // Ken's incremental decoding manager.reset(new Incremental::Manager(this->self())); else // original SCFG manager manager.reset(new ChartManager(this->self())); return manager; } AllOptions::ptr const& TranslationTask:: options() const { return m_options; } /// parse document-level translation info stored on the input void TranslationTask:: interpret_dlt() { if (m_source->GetType() != SentenceInput) return; Sentence const& snt = static_cast(*m_source); typedef std::map dltmap_t; BOOST_FOREACH(dltmap_t const& M, snt.GetDltMeta()) { dltmap_t::const_iterator i = M.find("type"); if (i->second == "cache") { map::const_iterator k = M.find("id"); string id = k == M.end() ? "default" : k->second; PhraseDictionaryCache* cache; cache = PhraseDictionaryCache::InstanceNonConst(id); if (cache) cache->ExecuteDlt(M, this->GetSource()->GetTranslationId()); } if (i == M.end() || i->second != "adaptive-lm") continue; dltmap_t::const_iterator j = M.find("context-weights"); if (j == M.end()) continue; m_scope->SetContextWeights(j->second); } } void TranslationTask::Run() { UTIL_THROW_IF2(!m_source || !m_ioWrapper, "Base Instances of TranslationTask must be initialized with" << " input and iowrapper."); const size_t translationId = m_source->GetTranslationId(); // report wall time spent on translation Timer translationTime; translationTime.start(); interpret_dlt(); // parse document-level translation info stored on the input // report thread number #if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS) VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl); #endif // execute the translation // note: this executes the search, resulting in a search graph // we still need to apply the decision rule (MAP, MBR, ...) Timer initTime; initTime.start(); boost::shared_ptr manager = SetupManager(m_options->search.algo); VERBOSE(1, "Line " << translationId << ": Initialize search took " << initTime << " seconds total" << endl); manager->Decode(); // new: stop here if m_ioWrapper is NULL. This means that the // owner of the TranslationTask will take care of the output // oh, and by the way, all the output should be handled by the // output wrapper along the lines of *m_iwWrapper << *manager; // Just sayin' ... if (m_ioWrapper == NULL) return; // we are done with search, let's look what we got OutputCollector* ocoll; Timer additionalReportingTime; additionalReportingTime.start(); boost::shared_ptr const& io = m_ioWrapper; manager->OutputBest(io->GetSingleBestOutputCollector()); // output word graph manager->OutputWordGraph(io->GetWordGraphCollector()); // output search graph manager->OutputSearchGraph(io->GetSearchGraphOutputCollector()); // ??? manager->OutputSearchGraphSLF(); // Output search graph in hypergraph format for Kenneth Heafield's // lazy hypergraph decoder; writes to stderr if (m_options->output.SearchGraphHG.size()) { size_t transId = manager->GetSource().GetTranslationId(); string fname = io->GetHypergraphOutputFileName(transId); manager->OutputSearchGraphAsHypergraph(fname, PRECISION); } additionalReportingTime.stop(); additionalReportingTime.start(); // output n-best list manager->OutputNBest(io->GetNBestOutputCollector()); //lattice samples manager->OutputLatticeSamples(io->GetLatticeSamplesCollector()); // detailed translation reporting ocoll = io->GetDetailedTranslationCollector(); manager->OutputDetailedTranslationReport(ocoll); ocoll = io->GetDetailTreeFragmentsOutputCollector(); manager->OutputDetailedTreeFragmentsTranslationReport(ocoll); //list of unknown words manager->OutputUnknowns(io->GetUnknownsCollector()); manager->OutputAlignment(io->GetAlignmentInfoCollector()); // report additional statistics manager->CalcDecoderStatistics(); VERBOSE(1, "Line " << translationId << ": Additional reporting took " << additionalReportingTime << " seconds total" << endl); VERBOSE(1, "Line " << translationId << ": Translation took " << translationTime << " seconds total" << endl); IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); } } }