From c20af584e73873ce4d06b14a7ff793752fb07505 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 30 Sep 2014 12:59:31 +0100 Subject: move TranslationTask into moses/ --- moses-cmd/Jamfile | 2 +- moses-cmd/Main.cpp | 3 +- moses-cmd/TranslationTask.cpp | 300 ------------------------------------------ moses-cmd/TranslationTask.h | 63 --------- moses-cmd/mbr.cpp | 178 ------------------------- moses-cmd/mbr.h | 28 ---- 6 files changed, 2 insertions(+), 572 deletions(-) delete mode 100644 moses-cmd/TranslationTask.cpp delete mode 100644 moses-cmd/TranslationTask.h delete mode 100644 moses-cmd/mbr.cpp delete mode 100644 moses-cmd/mbr.h (limited to 'moses-cmd') diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile index 993e0df02..7ee90850c 100644 --- a/moses-cmd/Jamfile +++ b/moses-cmd/Jamfile @@ -1,4 +1,4 @@ -alias deps : mbr.cpp TranslationTask.cpp ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ; +alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ; exe moses : Main.cpp deps ; exe lmbrgrid : LatticeMBRGrid.cpp deps ; diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index 1aace5401..8328ddc78 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -45,8 +45,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/TranslationModel/PhraseDictionary.h" #include "moses/FF/StatefulFeatureFunction.h" #include "moses/FF/StatelessFeatureFunction.h" - -#include "TranslationTask.h" +#include "moses/TranslationTask.h" #ifdef HAVE_PROTOBUF #include "hypergraph.pb.h" diff --git a/moses-cmd/TranslationTask.cpp b/moses-cmd/TranslationTask.cpp deleted file mode 100644 index 65e7e08bc..000000000 --- a/moses-cmd/TranslationTask.cpp +++ /dev/null @@ -1,300 +0,0 @@ -#include "TranslationTask.h" -#include "moses/StaticData.h" -#include "moses/Sentence.h" -#include "moses/IOWrapper.h" -#include "moses/TranslationAnalysis.h" -#include "moses/TypeDef.h" -#include "moses/Util.h" -#include "moses/InputType.h" -#include "moses/OutputCollector.h" -#include "mbr.h" - -using namespace std; -using namespace Moses; - -namespace MosesCmd -{ - -TranslationTask::TranslationTask(size_t lineNumber, - InputType* source, OutputCollector* outputCollector, OutputCollector* nbestCollector, - OutputCollector* latticeSamplesCollector, - OutputCollector* wordGraphCollector, OutputCollector* searchGraphCollector, - OutputCollector* detailedTranslationCollector, - OutputCollector* alignmentInfoCollector, - OutputCollector* unknownsCollector, - bool outputSearchGraphSLF, - boost::shared_ptr > hypergraphOutput) : - m_source(source), m_lineNumber(lineNumber), - m_outputCollector(outputCollector), m_nbestCollector(nbestCollector), - m_latticeSamplesCollector(latticeSamplesCollector), - m_wordGraphCollector(wordGraphCollector), m_searchGraphCollector(searchGraphCollector), - m_detailedTranslationCollector(detailedTranslationCollector), - m_alignmentInfoCollector(alignmentInfoCollector), - m_unknownsCollector(unknownsCollector), - m_outputSearchGraphSLF(outputSearchGraphSLF), - m_hypergraphOutput(hypergraphOutput) -{} - -TranslationTask::~TranslationTask() { - delete m_source; -} - -void TranslationTask::Run() { - // shorthand for "global data" - const StaticData &staticData = StaticData::Instance(); - - // input sentence - Sentence sentence; - - // report wall time spent on translation - Timer translationTime; - translationTime.start(); - - // report thread number -#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS) - TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << pthread_self() << endl); -#endif - - - // execute the translation - // note: this executes the search, resulting in a search graph - // we still need to apply the decision rule (MAP, MBR, ...) - Timer initTime; - initTime.start(); - Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm()); - VERBOSE(1, "Line " << m_lineNumber << ": Initialize search took " << initTime << " seconds total" << endl); - manager.ProcessSentence(); - - // we are done with search, let's look what we got - Timer additionalReportingTime; - additionalReportingTime.start(); - - // output word graph - if (m_wordGraphCollector) { - ostringstream out; - fix(out,PRECISION); - manager.GetWordGraph(m_lineNumber, out); - m_wordGraphCollector->Write(m_lineNumber, out.str()); - } - - // output search graph - if (m_searchGraphCollector) { - ostringstream out; - fix(out,PRECISION); - manager.OutputSearchGraph(m_lineNumber, out); - m_searchGraphCollector->Write(m_lineNumber, out.str()); - -#ifdef HAVE_PROTOBUF - if (staticData.GetOutputSearchGraphPB()) { - ostringstream sfn; - sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << m_lineNumber << ".pb" << ends; - string fn = sfn.str(); - VERBOSE(2, "Writing search graph to " << fn << endl); - fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out); - manager.SerializeSearchGraphPB(m_lineNumber, output); - } -#endif - } - - // Output search graph in HTK standard lattice format (SLF) - if (m_outputSearchGraphSLF) { - stringstream fileName; - fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << m_lineNumber << ".slf"; - ofstream *file = new ofstream; - file->open(fileName.str().c_str()); - if (file->is_open() && file->good()) { - ostringstream out; - fix(out,PRECISION); - manager.OutputSearchGraphAsSLF(m_lineNumber, out); - *file << out.str(); - file -> flush(); - } else { - TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << endl); - } - delete file; - } - - // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder - if (m_hypergraphOutput.get()) { - m_hypergraphOutput->Write(manager); - } - - additionalReportingTime.stop(); - - // apply decision rule and output best translation(s) - if (m_outputCollector) { - ostringstream out; - ostringstream debug; - fix(debug,PRECISION); - - // all derivations - send them to debug stream - if (staticData.PrintAllDerivations()) { - additionalReportingTime.start(); - manager.PrintAllDerivations(m_lineNumber, debug); - additionalReportingTime.stop(); - } - - Timer decisionRuleTime; - decisionRuleTime.start(); - - // MAP decoding: best hypothesis - const Hypothesis* bestHypo = NULL; - if (!staticData.UseMBR()) { - bestHypo = manager.GetBestHypothesis(); - if (bestHypo) { - if (StaticData::Instance().GetOutputHypoScore()) { - out << bestHypo->GetTotalScore() << ' '; - } - if (staticData.IsPathRecoveryEnabled()) { - OutputInput(out, bestHypo); - out << "||| "; - } - if (staticData.GetParam("print-id").size() && Scan(staticData.GetParam("print-id")[0]) ) { - out << m_source->GetTranslationId() << " "; - } - - if (staticData.GetReportSegmentation() == 2) { - manager.GetOutputLanguageModelOrder(out, bestHypo); - } - OutputBestSurface( - out, - bestHypo, - staticData.GetOutputFactorOrder(), - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors()); - if (staticData.PrintAlignmentInfo()) { - out << "||| "; - OutputAlignment(out, bestHypo); - } - - OutputAlignment(m_alignmentInfoCollector, m_lineNumber, bestHypo); - IFVERBOSE(1) { - debug << "BEST TRANSLATION: " << *bestHypo << endl; - } - } else { - VERBOSE(1, "NO BEST TRANSLATION" << endl); - } - - out << endl; - } - - // MBR decoding (n-best MBR, lattice MBR, consensus) - else { - // we first need the n-best translations - size_t nBestSize = staticData.GetMBRSize(); - if (nBestSize <= 0) { - cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl; - exit(1); - } - TrellisPathList nBestList; - manager.CalcNBest(nBestSize, nBestList,true); - VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl); - IFVERBOSE(2) { - PrintUserTime("calculated n-best list for (L)MBR decoding"); - } - - // lattice MBR - if (staticData.UseLatticeMBR()) { - if (m_nbestCollector) { - //lattice mbr nbest - vector solutions; - size_t n = min(nBestSize, staticData.GetNBestSize()); - getLatticeMBRNBest(manager,nBestList,solutions,n); - ostringstream out; - OutputLatticeMBRNBest(out, solutions,m_lineNumber); - m_nbestCollector->Write(m_lineNumber, out.str()); - } else { - //Lattice MBR decoding - vector mbrBestHypo = doLatticeMBR(manager,nBestList); - OutputBestHypo(mbrBestHypo, m_lineNumber, staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); - IFVERBOSE(2) { - PrintUserTime("finished Lattice MBR decoding"); - } - } - } - - // consensus decoding - else if (staticData.UseConsensusDecoding()) { - const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList); - OutputBestHypo(conBestHypo, m_lineNumber, - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); - OutputAlignment(m_alignmentInfoCollector, m_lineNumber, conBestHypo); - IFVERBOSE(2) { - PrintUserTime("finished Consensus decoding"); - } - } - - // n-best MBR decoding - else { - const TrellisPath &mbrBestHypo = doMBR(nBestList); - OutputBestHypo(mbrBestHypo, m_lineNumber, - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); - OutputAlignment(m_alignmentInfoCollector, m_lineNumber, mbrBestHypo); - IFVERBOSE(2) { - PrintUserTime("finished MBR decoding"); - } - } - } - - // report best translation to output collector - m_outputCollector->Write(m_lineNumber,out.str(),debug.str()); - - decisionRuleTime.stop(); - VERBOSE(1, "Line " << m_lineNumber << ": Decision rule took " << decisionRuleTime << " seconds total" << endl); - } - - additionalReportingTime.start(); - - // output n-best list - if (m_nbestCollector && !staticData.UseLatticeMBR()) { - TrellisPathList nBestList; - ostringstream out; - manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest()); - OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_lineNumber, - staticData.GetReportSegmentation()); - m_nbestCollector->Write(m_lineNumber, out.str()); - } - - //lattice samples - if (m_latticeSamplesCollector) { - TrellisPathList latticeSamples; - ostringstream out; - manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples); - OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_lineNumber, - staticData.GetReportSegmentation()); - m_latticeSamplesCollector->Write(m_lineNumber, out.str()); - } - - // detailed translation reporting - if (m_detailedTranslationCollector) { - ostringstream out; - fix(out,PRECISION); - TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis()); - m_detailedTranslationCollector->Write(m_lineNumber,out.str()); - } - - //list of unknown words - if (m_unknownsCollector) { - const vector& unknowns = manager.getSntTranslationOptions()->GetUnknownSources(); - ostringstream out; - for (size_t i = 0; i < unknowns.size(); ++i) { - out << *(unknowns[i]); - } - out << endl; - m_unknownsCollector->Write(m_lineNumber, out.str()); - } - - // report additional statistics - manager.CalcDecoderStatistics(); - VERBOSE(1, "Line " << m_lineNumber << ": Additional reporting took " << additionalReportingTime << " seconds total" << endl); - VERBOSE(1, "Line " << m_lineNumber << ": Translation took " << translationTime << " seconds total" << endl); - IFVERBOSE(2) { - PrintUserTime("Sentence Decoding Time:"); - } -} - - -} diff --git a/moses-cmd/TranslationTask.h b/moses-cmd/TranslationTask.h deleted file mode 100644 index 05b257a6a..000000000 --- a/moses-cmd/TranslationTask.h +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once - -#include -#include "moses/ThreadPool.h" -#include "moses/Manager.h" -#include "moses/HypergraphOutput.h" - -namespace Moses -{ - class InputType; - class OutputCollector; -} - -namespace MosesCmd -{ - -/** Translates a sentence. - * - calls the search (Manager) - * - applies the decision rule - * - outputs best translation and additional reporting - **/ -class TranslationTask : public Moses::Task -{ - -public: - - TranslationTask(size_t lineNumber, - Moses::InputType* source, Moses::OutputCollector* outputCollector, Moses::OutputCollector* nbestCollector, - Moses::OutputCollector* latticeSamplesCollector, - Moses::OutputCollector* wordGraphCollector, Moses::OutputCollector* searchGraphCollector, - Moses::OutputCollector* detailedTranslationCollector, - Moses::OutputCollector* alignmentInfoCollector, - Moses::OutputCollector* unknownsCollector, - bool outputSearchGraphSLF, - boost::shared_ptr > hypergraphOutput); - - ~TranslationTask(); - - /** Translate one sentence - * gets called by main function implemented at end of this source file */ - void Run(); - - -private: - Moses::InputType* m_source; - size_t m_lineNumber; - Moses::OutputCollector* m_outputCollector; - Moses::OutputCollector* m_nbestCollector; - Moses::OutputCollector* m_latticeSamplesCollector; - Moses::OutputCollector* m_wordGraphCollector; - Moses::OutputCollector* m_searchGraphCollector; - Moses::OutputCollector* m_detailedTranslationCollector; - Moses::OutputCollector* m_alignmentInfoCollector; - Moses::OutputCollector* m_unknownsCollector; - bool m_outputSearchGraphSLF; - boost::shared_ptr > m_hypergraphOutput; - std::ofstream *m_alignmentStream; - - -}; - - -} //namespace diff --git a/moses-cmd/mbr.cpp b/moses-cmd/mbr.cpp deleted file mode 100644 index 6a8dfa823..000000000 --- a/moses-cmd/mbr.cpp +++ /dev/null @@ -1,178 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "moses/TrellisPathList.h" -#include "moses/TrellisPath.h" -#include "moses/StaticData.h" -#include "moses/Util.h" -#include "mbr.h" - -using namespace std ; -using namespace Moses; - - -/* Input : - 1. a sorted n-best list, with duplicates filtered out in the following format - 0 ||| amr moussa is currently on a visit to libya , tomorrow , sunday , to hold talks with regard to the in sudan . ||| 0 -4.94418 0 0 -2.16036 0 0 -81.4462 -106.593 -114.43 -105.55 -12.7873 -26.9057 -25.3715 -52.9336 7.99917 -24 ||| -4.58432 - - 2. a weight vector - 3. bleu order ( default = 4) - 4. scaling factor to weigh the weight vector (default = 1.0) - - Output : - translations that minimise the Bayes Risk of the n-best list - - -*/ - -int BLEU_ORDER = 4; -int SMOOTH = 1; -float min_interval = 1e-4; -void extract_ngrams(const vector& sentence, map < vector < const Factor* >, int > & allngrams) -{ - vector< const Factor* > ngram; - for (int k = 0; k < BLEU_ORDER; k++) { - for(int i =0; i < max((int)sentence.size()-k,0); i++) { - for ( int j = i; j<= i+k; j++) { - ngram.push_back(sentence[j]); - } - ++allngrams[ngram]; - ngram.clear(); - } - } -} - -float calculate_score(const vector< vector > & sents, int ref, int hyp, vector < map < vector < const Factor *>, int > > & ngram_stats ) -{ - int comps_n = 2*BLEU_ORDER+1; - vector comps(comps_n); - float logbleu = 0.0, brevity; - - int hyp_length = sents[hyp].size(); - - for (int i =0; i ,int > & hyp_ngrams = ngram_stats[hyp] ; - map< vector < const Factor * >, int > & ref_ngrams = ngram_stats[ref] ; - - for (map< vector< const Factor * >, int >::iterator it = hyp_ngrams.begin(); - it != hyp_ngrams.end(); it++) { - map< vector< const Factor * >, int >::iterator ref_it = ref_ngrams.find(it->first); - if(ref_it != ref_ngrams.end()) { - comps[2* (it->first.size()-1)] += min(ref_it->second,it->second); - } - } - comps[comps_n-1] = sents[ref].size(); - - for (int i=0; i 0 ) - logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH); - else - logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]); - } - logbleu /= BLEU_ORDER; - brevity = 1.0-(float)comps[comps_n-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length - if (brevity < 0.0) - logbleu += brevity; - return exp(logbleu); -} - -const TrellisPath doMBR(const TrellisPathList& nBestList) -{ - float marginal = 0; - - vector joint_prob_vec; - vector< vector > translations; - float joint_prob; - vector< map < vector , int > > ngram_stats; - - TrellisPathList::const_iterator iter; - - // get max score to prevent underflow - float maxScore = -1e20; - for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { - const TrellisPath &path = **iter; - float score = StaticData::Instance().GetMBRScale() - * path.GetScoreBreakdown().GetWeightedScore(); - if (maxScore < score) maxScore = score; - } - - for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { - const TrellisPath &path = **iter; - joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().GetWeightedScore() - maxScore); - marginal += joint_prob; - joint_prob_vec.push_back(joint_prob); - - // get words in translation - vector translation; - GetOutputFactors(path, translation); - - // collect n-gram counts - map < vector < const Factor *>, int > counts; - extract_ngrams(translation,counts); - - ngram_stats.push_back(counts); - translations.push_back(translation); - } - - vector mbr_loss; - float bleu, weightedLoss; - float weightedLossCumul = 0; - float minMBRLoss = 1000000; - int minMBRLossIdx = -1; - - /* Main MBR computation done here */ - iter = nBestList.begin(); - for (unsigned int i = 0; i < nBestList.GetSize(); i++) { - weightedLossCumul = 0; - for (unsigned int j = 0; j < nBestList.GetSize(); j++) { - if ( i != j) { - bleu = calculate_score(translations, j, i,ngram_stats ); - weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal); - weightedLossCumul += weightedLoss; - if (weightedLossCumul > minMBRLoss) - break; - } - } - if (weightedLossCumul < minMBRLoss) { - minMBRLoss = weightedLossCumul; - minMBRLossIdx = i; - } - iter++; - } - /* Find sentence that minimises Bayes Risk under 1- BLEU loss */ - return nBestList.at(minMBRLossIdx); - //return translations[minMBRLossIdx]; -} - -void GetOutputFactors(const TrellisPath &path, vector &translation) -{ - const std::vector &edges = path.GetEdges(); - const std::vector& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - assert (outputFactorOrder.size() == 1); - - // print the surface factor of the translation - for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { - const Hypothesis &edge = *edges[currEdge]; - const Phrase &phrase = edge.GetCurrTargetPhrase(); - size_t size = phrase.GetSize(); - for (size_t pos = 0 ; pos < size ; pos++) { - - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); - translation.push_back(factor); - } - } -} - diff --git a/moses-cmd/mbr.h b/moses-cmd/mbr.h deleted file mode 100644 index d08b11a98..000000000 --- a/moses-cmd/mbr.h +++ /dev/null @@ -1,28 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#ifndef moses_cmd_mbr_h -#define moses_cmd_mbr_h - -const Moses::TrellisPath doMBR(const Moses::TrellisPathList& nBestList); -void GetOutputFactors(const Moses::TrellisPath &path, std::vector &translation); -float calculate_score(const std::vector< std::vector > & sents, int ref, int hyp, std::vector < std::map < std::vector < const Moses::Factor *>, int > > & ngram_stats ); -#endif -- cgit v1.2.3