diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2015-12-11 20:29:10 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2015-12-11 20:29:10 +0300 |
commit | e5d3306519a43dc02a85cbf2e8e67e3699bf91b8 (patch) | |
tree | a8dfaa1bf86781df1c06e624a967dba9301166d4 | |
parent | 51d21b09c0c59b2449cce2ba2f524cd3dc5ff9b1 (diff) | |
parent | 40ed3df9cb513681569a2fcde32a5215c38253f3 (diff) |
Merge ../mosesdecoder into perf_moses2
261 files changed, 1987 insertions, 2462 deletions
diff --git a/contrib/server/Jamfile b/contrib/server/Jamfile index d6f9cdc13..8a8c6406c 100644 --- a/contrib/server/Jamfile +++ b/contrib/server/Jamfile @@ -12,9 +12,9 @@ else with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ; if $(with-xmlrpc-c) { echo While building mosesserver ... ; - echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; - echo "!!! You are linking the XMLRPC-C library; Must be v.1.32 (September 2012) or higher !!!" ; - echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; + # echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; + # echo "!!! You are linking the XMLRPC-C library; Must be v.1.32 (September 2012) or higher !!!" ; + # echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; build-moses-server = true ; xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ; diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp index 75afd27e2..118a3a62e 100644 --- a/contrib/server/mosesserver.cpp +++ b/contrib/server/mosesserver.cpp @@ -1,13 +1,5 @@ -#if 1 #include "moses/ExportInterface.h" -// The separate moses server executable is being phased out. -// Since there were problems with the migration into the main -// executable, this separate program is still included in the -// distribution for legacy reasons. Contributors are encouraged -// to add their contributions to moses/server rather than -// contrib/server. This recommendation does not apply to wrapper -// scripts. -// The future is this: +// The separate moses server executable has been phased out. /** main function of the command line version of the decoder **/ int main(int argc, char const** argv) @@ -25,747 +17,3 @@ int main(int argc, char const** argv) argv2[argc] = "--server"; return decoder_main(argc+1, argv2); } -#else - -// #include <stdexcept> -// #include <iostream> -// #include <vector> -// #include <algorithm> - - -// #include "moses/Util.h" -// #include "moses/ChartManager.h" -// #include "moses/Hypothesis.h" -// #include "moses/Manager.h" -// #include "moses/StaticData.h" -// #include "moses/ThreadPool.h" -// #include "moses/TranslationTask.h" -// #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h" -// #include "moses/FF/StatefulFeatureFunction.h" -// #if PT_UG -// #include "moses/TranslationModel/UG/mmsapt.h" -// #endif -// #include "moses/TreeInput.h" -// #include "moses/IOWrapper.h" - -// #include <boost/foreach.hpp> - -// #ifdef WITH_THREADS -// #include <boost/thread.hpp> -// #endif - -// #include <xmlrpc-c/base.hpp> -// #include <xmlrpc-c/registry.hpp> -// #include <xmlrpc-c/server_abyss.hpp> - -// // using namespace Moses; -// using namespace std; -// using namespace Moses; - -// typedef std::map<std::string, xmlrpc_c::value> params_t; - -// class Updater: public xmlrpc_c::method -// { -// public: -// Updater() { -// // signature and help strings are documentation -- the client -// // can query this information with a system.methodSignature and -// // system.methodHelp RPC. -// this->_signature = "S:S"; -// this->_help = "Updates stuff"; -// } -// void -// execute(xmlrpc_c::paramList const& paramList, -// xmlrpc_c::value * const retvalP) { -// const params_t params = paramList.getStruct(0); -// breakOutParams(params); -// #if PT_UG -// Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]); -// pdsa->add(source_,target_,alignment_); -// #else -// std::string msg; -// msg = "Server was compiled without a phrase table implementation that "; -// msg += "supports updates."; -// throw xmlrpc_c::fault(msg.c_str(), xmlrpc_c::fault::CODE_PARSE); -// #endif -// XVERBOSE(1,"Done inserting\n"); -// *retvalP = xmlrpc_c::value_string("Phrase table updated"); -// } -// string source_, target_, alignment_; -// bool bounded_; - -// void breakOutParams(const params_t& params) { -// params_t::const_iterator si = params.find("source"); -// if(si == params.end()) -// throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE); -// source_ = xmlrpc_c::value_string(si->second); -// XVERBOSE(1,"source = " << source_ << endl); -// si = params.find("target"); -// if(si == params.end()) -// throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE); -// target_ = xmlrpc_c::value_string(si->second); -// XVERBOSE(1,"target = " << target_ << endl); -// si = params.find("alignment"); -// if(si == params.end()) -// throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE); -// alignment_ = xmlrpc_c::value_string(si->second); -// XVERBOSE(1,"alignment = " << alignment_ << endl); -// si = params.find("bounded"); -// bounded_ = (si != params.end()); -// } -// }; - -// class Optimizer : public xmlrpc_c::method -// { -// public: -// Optimizer() { -// // signature and help strings are documentation -- the client -// // can query this information with a system.methodSignature and -// // system.methodHelp RPC. -// this->_signature = "S:S"; -// this->_help = "Optimizes multi-model translation model"; -// } - -// void -// execute(xmlrpc_c::paramList const& paramList, -// xmlrpc_c::value * const retvalP) { -// #ifdef WITH_DLIB -// const params_t params = paramList.getStruct(0); -// params_t::const_iterator si = params.find("model_name"); -// if (si == params.end()) { -// throw xmlrpc_c::fault( -// "Missing name of model to be optimized (e.g. PhraseDictionaryMultiModelCounts0)", -// xmlrpc_c::fault::CODE_PARSE); -// } -// const string model_name = xmlrpc_c::value_string(si->second); -// PhraseDictionaryMultiModel* pdmm = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); - -// si = params.find("phrase_pairs"); -// if (si == params.end()) { -// throw xmlrpc_c::fault( -// "Missing list of phrase pairs", -// xmlrpc_c::fault::CODE_PARSE); -// } - -// vector<pair<string, string> > phrase_pairs; - -// xmlrpc_c::value_array phrase_pairs_array = xmlrpc_c::value_array(si->second); -// vector<xmlrpc_c::value> phrasePairValueVector(phrase_pairs_array.vectorValueValue()); -// for (size_t i=0;i < phrasePairValueVector.size();i++) { -// xmlrpc_c::value_array phrasePairArray = xmlrpc_c::value_array(phrasePairValueVector[i]); -// vector<xmlrpc_c::value> phrasePair(phrasePairArray.vectorValueValue()); -// string L1 = xmlrpc_c::value_string(phrasePair[0]); -// string L2 = xmlrpc_c::value_string(phrasePair[1]); -// phrase_pairs.push_back(make_pair(L1,L2)); -// } - -// vector<float> weight_vector; -// weight_vector = pdmm->MinimizePerplexity(phrase_pairs); - -// vector<xmlrpc_c::value> weight_vector_ret; -// for (size_t i=0;i < weight_vector.size();i++) { -// weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i])); -// } -// *retvalP = xmlrpc_c::value_array(weight_vector_ret); -// #else -// string errmsg = "Error: Perplexity minimization requires dlib (compilation option --with-dlib)"; -// cerr << errmsg << endl; -// *retvalP = xmlrpc_c::value_string(errmsg); -// #endif -// } -// }; - -// /** -// * Required so that translations can be sent to a thread pool. -// **/ -// class TranslationTask : public virtual Moses::TranslationTask { -// protected: -// TranslationTask(xmlrpc_c::paramList const& paramList, -// boost::condition_variable& cond, boost::mutex& mut) -// : m_paramList(paramList), -// m_cond(cond), -// m_mut(mut), -// m_done(false) -// {} - -// public: -// static boost::shared_ptr<TranslationTask> -// create(xmlrpc_c::paramList const& paramList, -// boost::condition_variable& cond, boost::mutex& mut) -// { -// boost::shared_ptr<TranslationTask> ret(new TranslationTask(paramList, cond, mut)); -// ret->m_self = ret; -// return ret; -// } - -// virtual bool DeleteAfterExecution() {return false;} - -// bool IsDone() const {return m_done;} - -// const map<string, xmlrpc_c::value>& GetRetData() { return m_retData;} - -// virtual void -// Run() -// { -// using namespace xmlrpc_c; -// const params_t params = m_paramList.getStruct(0); -// m_paramList.verifyEnd(1); -// params_t::const_iterator si = params.find("text"); -// if (si == params.end()) { -// throw fault("Missing source text", fault::CODE_PARSE); -// } -// const string source = value_string(si->second); - -// XVERBOSE(1,"Input: " << source << endl); -// si = params.find("align"); -// bool addAlignInfo = (si != params.end()); -// si = params.find("word-align"); -// bool addWordAlignInfo = (si != params.end()); -// si = params.find("sg"); -// bool addGraphInfo = (si != params.end()); -// si = params.find("topt"); -// bool addTopts = (si != params.end()); -// si = params.find("report-all-factors"); -// bool reportAllFactors = (si != params.end()); -// si = params.find("nbest"); -// int nbest_size = (si == params.end()) ? 0 : int(value_int(si->second)); -// si = params.find("nbest-distinct"); -// bool nbest_distinct = (si != params.end()); - -// si = params.find("add-score-breakdown"); -// bool addScoreBreakdown = (si != params.end()); - -// vector<float> multiModelWeights; -// si = params.find("lambda"); -// if (si != params.end()) -// { -// value_array multiModelArray = value_array(si->second); -// vector<value> multiModelValueVector(multiModelArray.vectorValueValue()); -// for (size_t i=0;i < multiModelValueVector.size();i++) -// { -// multiModelWeights.push_back(value_double(multiModelValueVector[i])); -// } -// } - -// si = params.find("model_name"); -// if (si != params.end() && multiModelWeights.size() > 0) -// { -// const string model_name = value_string(si->second); -// PhraseDictionaryMultiModel* pdmm -// = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); -// pdmm->SetTemporaryMultiModelWeightsVector(multiModelWeights); -// } - -// const StaticData &staticData = StaticData::Instance(); - -// //Make sure alternative paths are retained, if necessary -// // if (addGraphInfo || nbest_size>0) { -// // (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(true); -// // } - - -// stringstream out, graphInfo, transCollOpts; - -// if (staticData.IsSyntax()) -// { -// boost::shared_ptr<TreeInput> tinput(new TreeInput); -// const vector<FactorType>& IFO = staticData.GetInputFactorOrder(); -// istringstream in(source + "\n"); -// tinput->Read(in,IFO,staticData.options()); -// ttasksptr task = Moses::TranslationTask::create(tinput); -// ChartManager manager(task); -// manager.Decode(); -// const ChartHypothesis *hypo = manager.GetBestHypothesis(); -// outputChartHypo(out,hypo); -// if (addGraphInfo) { -// // const size_t translationId = tinput.GetTranslationId(); -// std::ostringstream sgstream; -// manager.OutputSearchGraphMoses(sgstream); -// m_retData["sg"] = value_string(sgstream.str()); -// } -// } -// else -// { -// // size_t lineNumber = 0; // TODO: Include sentence request number here? -// boost::shared_ptr<Sentence> sentence; -// sentence.reset(new Sentence(0,source,staticData.options())); -// ttasksptr task = Moses::TranslationTask::create(sentence); -// Manager manager(task); -// manager.Decode(); -// const Hypothesis* hypo = manager.GetBestHypothesis(); - -// vector<xmlrpc_c::value> alignInfo; -// outputHypo(out,hypo,addAlignInfo,alignInfo,reportAllFactors); -// if (addAlignInfo) m_retData["align"] = value_array(alignInfo); -// if (addWordAlignInfo) -// { -// stringstream wordAlignment; -// hypo->OutputAlignment(wordAlignment); -// vector<xmlrpc_c::value> alignments; -// string alignmentPair; -// while (wordAlignment >> alignmentPair) -// { -// int pos = alignmentPair.find('-'); -// map<string, xmlrpc_c::value> wordAlignInfo; -// wordAlignInfo["source-word"] -// = value_int(atoi(alignmentPair.substr(0, pos).c_str())); -// wordAlignInfo["target-word"] -// = value_int(atoi(alignmentPair.substr(pos + 1).c_str())); -// alignments.push_back(value_struct(wordAlignInfo)); -// } -// m_retData["word-align"] = value_array(alignments); -// } - -// if (addGraphInfo) insertGraphInfo(manager,m_retData); -// if (addTopts) insertTranslationOptions(manager,m_retData); -// if (nbest_size > 0) -// { -// outputNBest(manager, m_retData, nbest_size, nbest_distinct, -// reportAllFactors, addAlignInfo, addScoreBreakdown); -// } -// // (const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false); -// } -// m_retData["text"] = value_string(out.str()); -// XVERBOSE(1,"Output: " << out.str() << endl); -// { -// boost::lock_guard<boost::mutex> lock(m_mut); -// m_done = true; -// } -// m_cond.notify_one(); - -// } - -// void outputHypo(ostream& out, const Hypothesis* hypo, -// bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo, -// bool reportAllFactors = false) { -// if (hypo->GetPrevHypo() != NULL) { -// outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, -// alignInfo, reportAllFactors); -// Phrase p = hypo->GetCurrTargetPhrase(); -// if(reportAllFactors) { -// out << p << " "; -// } else { -// for (size_t pos = 0 ; pos < p.GetSize() ; pos++) { -// const Factor *factor = p.GetFactor(pos, 0); -// out << *factor << " "; -// } -// } - -// if (addAlignmentInfo) { -// /** -// * Add the alignment info to the array. This is in target -// * order and consists of (tgt-start, src-start, src-end) -// * triples. -// **/ -// map<string, xmlrpc_c::value> phraseAlignInfo; -// phraseAlignInfo["tgt-start"] = xmlrpc_c::value_int(hypo->GetCurrTargetWordsRange().GetStartPos()); -// phraseAlignInfo["src-start"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos()); -// phraseAlignInfo["src-end"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos()); -// alignInfo.push_back(xmlrpc_c::value_struct(phraseAlignInfo)); -// } -// } -// } - -// void outputChartHypo(ostream& out, const ChartHypothesis* hypo) { -// Phrase outPhrase(20); -// hypo->GetOutputPhrase(outPhrase); - -// // delete 1st & last -// assert(outPhrase.GetSize() >= 2); -// outPhrase.RemoveWord(0); -// outPhrase.RemoveWord(outPhrase.GetSize() - 1); -// for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++) { -// const Factor *factor = outPhrase.GetFactor(pos, 0); -// out << *factor << " "; -// } - -// } - -// bool compareSearchGraphNode(const SearchGraphNode& a, const SearchGraphNode b) { -// return a.hypo->GetId() < b.hypo->GetId(); -// } - -// void insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData) { -// vector<xmlrpc_c::value> searchGraphXml; -// vector<SearchGraphNode> searchGraph; -// manager.GetSearchGraph(searchGraph); -// std::sort(searchGraph.begin(), searchGraph.end()); -// for (vector<SearchGraphNode>::const_iterator i = searchGraph.begin(); i != searchGraph.end(); ++i) { -// map<string, xmlrpc_c::value> searchGraphXmlNode; -// searchGraphXmlNode["forward"] = xmlrpc_c::value_double(i->forward); -// searchGraphXmlNode["fscore"] = xmlrpc_c::value_double(i->fscore); -// const Hypothesis* hypo = i->hypo; -// searchGraphXmlNode["hyp"] = xmlrpc_c::value_int(hypo->GetId()); -// searchGraphXmlNode["stack"] = xmlrpc_c::value_int(hypo->GetWordsBitmap().GetNumWordsCovered()); -// if (hypo->GetId() != 0) { -// const Hypothesis *prevHypo = hypo->GetPrevHypo(); -// searchGraphXmlNode["back"] = xmlrpc_c::value_int(prevHypo->GetId()); -// searchGraphXmlNode["score"] = xmlrpc_c::value_double(hypo->GetScore()); -// searchGraphXmlNode["transition"] = xmlrpc_c::value_double(hypo->GetScore() - prevHypo->GetScore()); -// if (i->recombinationHypo) { -// searchGraphXmlNode["recombined"] = xmlrpc_c::value_int(i->recombinationHypo->GetId()); -// } -// searchGraphXmlNode["cover-start"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos()); -// searchGraphXmlNode["cover-end"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos()); -// searchGraphXmlNode["out"] = -// xmlrpc_c::value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder())); -// } -// searchGraphXml.push_back(xmlrpc_c::value_struct(searchGraphXmlNode)); -// } -// retData.insert(pair<string, xmlrpc_c::value>("sg", xmlrpc_c::value_array(searchGraphXml))); -// } - -// void outputNBest(const Manager& manager, -// map<string, xmlrpc_c::value>& retData, -// const int n=100, -// const bool distinct=false, -// const bool reportAllFactors=false, -// const bool addAlignmentInfo=false, -// const bool addScoreBreakdown=false) -// { -// TrellisPathList nBestList; -// manager.CalcNBest(n, nBestList, distinct); - -// vector<xmlrpc_c::value> nBestXml; -// TrellisPathList::const_iterator iter; -// for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { -// const TrellisPath &path = **iter; -// const std::vector<const Hypothesis *> &edges = path.GetEdges(); -// map<string, xmlrpc_c::value> nBestXMLItem; - -// // output surface -// ostringstream out; -// vector<xmlrpc_c::value> alignInfo; -// for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { -// const Hypothesis &edge = *edges[currEdge]; -// const Phrase& phrase = edge.GetCurrTargetPhrase(); -// if(reportAllFactors) { -// out << phrase << " "; -// } else { -// for (size_t pos = 0 ; pos < phrase.GetSize() ; pos++) { -// const Factor *factor = phrase.GetFactor(pos, 0); -// out << *factor << " "; -// } -// } - -// if (addAlignmentInfo && currEdge != (int)edges.size() - 1) { -// map<string, xmlrpc_c::value> phraseAlignInfo; -// phraseAlignInfo["tgt-start"] = xmlrpc_c::value_int(edge.GetCurrTargetWordsRange().GetStartPos()); -// phraseAlignInfo["src-start"] = xmlrpc_c::value_int(edge.GetCurrSourceWordsRange().GetStartPos()); -// phraseAlignInfo["src-end"] = xmlrpc_c::value_int(edge.GetCurrSourceWordsRange().GetEndPos()); -// alignInfo.push_back(xmlrpc_c::value_struct(phraseAlignInfo)); -// } -// } -// nBestXMLItem["hyp"] = xmlrpc_c::value_string(out.str()); - -// if (addAlignmentInfo) { -// nBestXMLItem["align"] = xmlrpc_c::value_array(alignInfo); - -// if ((int)edges.size() > 0) { -// stringstream wordAlignment; -// const Hypothesis *edge = edges[0]; -// edge->OutputAlignment(wordAlignment); -// vector<xmlrpc_c::value> alignments; -// string alignmentPair; -// while (wordAlignment >> alignmentPair) { -// int pos = alignmentPair.find('-'); -// map<string, xmlrpc_c::value> wordAlignInfo; -// wordAlignInfo["source-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(0, pos).c_str())); -// wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str())); -// alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo)); -// } -// nBestXMLItem["word-align"] = xmlrpc_c::value_array(alignments); -// } -// } - -// if (addScoreBreakdown) -// { -// // should the score breakdown be reported in a more structured manner? -// ostringstream buf; -// bool with_labels -// = StaticData::Instance().options().nbest.include_feature_labels; -// path.GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels); -// nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str()); -// } - -// // weighted score -// nBestXMLItem["totalScore"] = xmlrpc_c::value_double(path.GetFutureScore()); -// nBestXml.push_back(xmlrpc_c::value_struct(nBestXMLItem)); -// } -// retData.insert(pair<string, xmlrpc_c::value>("nbest", xmlrpc_c::value_array(nBestXml))); -// } - -// void -// insertTranslationOptions(Manager& manager, map<string, xmlrpc_c::value>& retData) -// { -// const TranslationOptionCollection* toptsColl = manager.getSntTranslationOptions(); -// vector<xmlrpc_c::value> toptsXml; -// size_t const stop = toptsColl->GetSource().GetSize(); -// TranslationOptionList const* tol; -// for (size_t s = 0 ; s < stop ; ++s) -// { -// for (size_t e = s; (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL; ++e) -// { -// BOOST_FOREACH(TranslationOption const* topt, *tol) -// { -// map<string, xmlrpc_c::value> toptXml; -// TargetPhrase const& tp = topt->GetTargetPhrase(); -// StaticData const& GLOBAL = StaticData::Instance(); -// string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder()); -// toptXml["phrase"] = xmlrpc_c::value_string(tphrase); -// toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore()); -// toptXml["start"] = xmlrpc_c::value_int(s); -// toptXml["end"] = xmlrpc_c::value_int(e); -// vector<xmlrpc_c::value> scoresXml; -// const std::valarray<FValue> &scores -// = topt->GetScoreBreakdown().getCoreFeatures(); -// for (size_t j = 0; j < scores.size(); ++j) -// scoresXml.push_back(xmlrpc_c::value_double(scores[j])); - -// toptXml["scores"] = xmlrpc_c::value_array(scoresXml); -// toptsXml.push_back(xmlrpc_c::value_struct(toptXml)); -// } -// } -// } -// retData.insert(pair<string, xmlrpc_c::value>("topt", xmlrpc_c::value_array(toptsXml))); -// } - -// private: -// xmlrpc_c::paramList const& m_paramList; -// map<string, xmlrpc_c::value> m_retData; -// boost::condition_variable& m_cond; -// boost::mutex& m_mut; -// bool m_done; -// }; - -// class Translator : public xmlrpc_c::method -// { -// public: -// Translator(size_t numThreads = 10) : m_threadPool(numThreads) { -// // signature and help strings are documentation -- the client -// // can query this information with a system.methodSignature and -// // system.methodHelp RPC. -// this->_signature = "S:S"; -// this->_help = "Does translation"; -// } - -// void -// execute(xmlrpc_c::paramList const& paramList, -// xmlrpc_c::value * const retvalP) { -// boost::condition_variable cond; -// boost::mutex mut; -// typedef ::TranslationTask TTask; -// boost::shared_ptr<TTask> task = TTask::create(paramList,cond,mut); -// m_threadPool.Submit(task); -// boost::unique_lock<boost::mutex> lock(mut); -// while (!task->IsDone()) { -// cond.wait(lock); -// } -// *retvalP = xmlrpc_c::value_struct(task->GetRetData()); -// } -// private: -// Moses::ThreadPool m_threadPool; -// }; - -// static -// void -// PrintFeatureWeight(ostream& out, const FeatureFunction* ff) -// { -// out << ff->GetScoreProducerDescription() << "="; -// size_t numScoreComps = ff->GetNumScoreComponents(); -// vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff); -// for (size_t i = 0; i < numScoreComps; ++i) { -// out << " " << values[i]; -// } -// out << endl; -// } - -// static -// void -// ShowWeights(ostream& out) -// { -// // adapted from moses-cmd/Main.cpp -// std::ios::fmtflags old_flags = out.setf(std::ios::fixed); -// size_t old_precision = out.precision(6); -// const vector<const StatelessFeatureFunction*>& -// slf = StatelessFeatureFunction::GetStatelessFeatureFunctions(); -// const vector<const StatefulFeatureFunction*>& -// sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); - -// for (size_t i = 0; i < sff.size(); ++i) { -// const StatefulFeatureFunction *ff = sff[i]; -// if (ff->IsTuneable()) { -// PrintFeatureWeight(out,ff); -// } -// else { -// out << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; -// } -// } -// for (size_t i = 0; i < slf.size(); ++i) { -// const StatelessFeatureFunction *ff = slf[i]; -// if (ff->IsTuneable()) { -// PrintFeatureWeight(out,ff); -// } -// else { -// out << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; -// } -// } -// if (! (old_flags & std::ios::fixed)) -// out.unsetf(std::ios::fixed); -// out.precision(old_precision); -// } - -// int main(int argc, char** argv) -// { - -// //Extract port and log, send other args to moses -// char** mosesargv = new char*[argc+2]; // why "+2" [UG] -// int mosesargc = 0; -// int port = 8080; -// const char* logfile = "/dev/null"; -// bool isSerial = false; -// size_t numThreads = 10; //for translation tasks - -// //Abyss server configuration: initial values reflect hard-coded default -// //-> http://xmlrpc-c.sourceforge.net/doc/libxmlrpc_server_abyss.html#max_conn -// size_t maxConn = 15; -// size_t maxConnBacklog = 15; -// size_t keepaliveTimeout = 15; -// size_t keepaliveMaxConn = 30; -// size_t timeout = 15; - -// for (int i = 0; i < argc; ++i) { -// if (!strcmp(argv[i],"--server-port")) { -// ++i; -// if (i >= argc) { -// cerr << "Error: Missing argument to --server-port" << endl; -// exit(1); -// } else { -// port = atoi(argv[i]); -// } -// } else if (!strcmp(argv[i],"--server-log")) { -// ++i; -// if (i >= argc) { -// cerr << "Error: Missing argument to --server-log" << endl; -// exit(1); -// } else { -// logfile = argv[i]; -// } -// } else if (!strcmp(argv[i],"--server-maxconn")) { -// ++i; -// if (i >= argc) { -// cerr << "Error: Missing argument to --server-maxconn" << endl; -// exit(1); -// } else { -// maxConn = atoi(argv[i]); -// } -// } else if (!strcmp(argv[i],"--server-maxconn-backlog")) { -// ++i; -// if (i >= argc) { -// cerr << "Error: Missing argument to --server-maxconn-backlog" << endl; -// exit(1); -// } else { -// maxConnBacklog = atoi(argv[i]); -// } -// } else if (!strcmp(argv[i],"--server-keepalive-timeout")) { -// ++i; -// if (i >= argc) { -// cerr << "Error: Missing argument to --server-keepalive-timeout" << endl; -// exit(1); -// } else { -// keepaliveTimeout = atoi(argv[i]); -// } -// } else if (!strcmp(argv[i],"--server-keepalive-maxconn")) { -// ++i; -// if (i >= argc) { -// cerr << "Error: Missing argument to --server-keepalive-maxconn" << endl; -// exit(1); -// } else { -// keepaliveMaxConn = atoi(argv[i]); -// } -// } else if (!strcmp(argv[i],"--server-timeout")) { -// ++i; -// if (i >= argc) { -// cerr << "Error: Missing argument to --server-timeout" << endl; -// exit(1); -// } else { -// timeout = atoi(argv[i]); -// } -// } else if (!strcmp(argv[i], "--threads")) { -// ++i; -// if (i>=argc) { -// cerr << "Error: Missing argument to --threads" << endl; -// exit(1); -// } else { -// numThreads = atoi(argv[i]); -// } -// } else if (!strcmp(argv[i], "--serial")) { -// cerr << "Running single-threaded server" << endl; -// isSerial = true; -// } else { -// mosesargv[mosesargc] = new char[strlen(argv[i])+1]; -// strcpy(mosesargv[mosesargc],argv[i]); -// ++mosesargc; -// } -// } - -// Parameter* params = new Parameter(); -// if (!params->LoadParam(mosesargc,mosesargv)) { -// params->Explain(); -// exit(1); -// } -// if (!StaticData::LoadDataStatic(params, argv[0])) { -// exit(1); -// } - -// if (params->isParamSpecified("show-weights")) { -// ShowWeights(cout); -// exit(0); -// } - -// //512 MB data limit (512KB is not enough for optimization) -// xmlrpc_limit_set(XMLRPC_XML_SIZE_LIMIT_ID, 512*1024*1024); - -// xmlrpc_c::registry myRegistry; - -// xmlrpc_c::methodPtr const translator(new Translator(numThreads)); -// xmlrpc_c::methodPtr const updater(new Updater); -// xmlrpc_c::methodPtr const optimizer(new Optimizer); - -// myRegistry.addMethod("translate", translator); -// myRegistry.addMethod("updater", updater); -// myRegistry.addMethod("optimize", optimizer); - -// /* CODE FOR old xmlrpc-c v. 1.32 or lower -// xmlrpc_c::serverAbyss myAbyssServer( -// myRegistry, -// port, // TCP port on which to listen -// logfile -// ); -// */ - -// /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04 */ -// xmlrpc_c::serverAbyss myAbyssServer( -// xmlrpc_c::serverAbyss::constrOpt() -// .registryP(&myRegistry) -// .portNumber(port) // TCP port on which to listen -// .logFileName(logfile) -// .allowOrigin("*") -// .maxConn((unsigned int)maxConn) -// .maxConnBacklog((unsigned int)maxConnBacklog) -// .keepaliveTimeout((unsigned int)keepaliveTimeout) -// .keepaliveMaxConn((unsigned int)keepaliveMaxConn) -// .timeout((unsigned int)timeout) -// ); - -// XVERBOSE(1,"Listening on port " << port << endl); -// if (isSerial) { -// while(1) myAbyssServer.runOnce(); -// } else { -// myAbyssServer.run(); -// } -// std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl; -// return 1; -// } - -#endif diff --git a/defer/ExternalFeature.cpp b/defer/ExternalFeature.cpp index 602971d2b..0bbcfbfd0 100644 --- a/defer/ExternalFeature.cpp +++ b/defer/ExternalFeature.cpp @@ -15,7 +15,7 @@ ExternalFeatureState::ExternalFeatureState(int stateSize, void *data) memcpy(m_data, data, stateSize); } -void ExternalFeature::Load() +void ExternalFeature::Load(AllOptions const& opts) { string nparam = "testing"; diff --git a/defer/ExternalFeature.h b/defer/ExternalFeature.h index 3755bf4ff..403d3c6c4 100644 --- a/defer/ExternalFeature.h +++ b/defer/ExternalFeature.h @@ -43,7 +43,7 @@ public: } ~ExternalFeature(); - void Load(); + void Load(AllOptions const& opts); bool IsUseable(const FactorMask &mask) const { return true; diff --git a/defer/Joint.h b/defer/Joint.h index 1dbdb019e..88547585a 100644 --- a/defer/Joint.h +++ b/defer/Joint.h @@ -58,7 +58,7 @@ public: delete m_lmImpl; } - bool Load(const std::string &filePath + bool Load(AllOptions const& opts, const std::string &filePath , const std::vector<FactorType> &factorTypes , size_t nGramOrder) { m_factorTypes = FactorMask(factorTypes); @@ -77,7 +77,7 @@ public: m_sentenceEndWord[factorType] = factorCollection.AddFactor(Output, factorType, EOS_); } - m_lmImpl->Load(); + m_lmImpl->Load(AllOptions const& opts); } LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const { diff --git a/jam-files/xmlrpc-c.jam b/jam-files/xmlrpc-c.jam index 401bcc006..0a51f4f10 100644 --- a/jam-files/xmlrpc-c.jam +++ b/jam-files/xmlrpc-c.jam @@ -55,8 +55,28 @@ else } local prefix = [ shell_or_die "$(config) --prefix" ] ; local version = [ shell_or_die "$(config) --version" ] ; - echo "XMLRPC-C: USING VERSION $(version) FROM $(prefix)" ; + # Check the version is recent enough: + # If anyone knows a better way to do this, go ahead and fix this. + minversion = 1.32.0 ; + local cmd = "perl -e '@a = split(/\\./,@ARGV[0]); @b = split(/\\./,$ARGV[1]); ++$i while $i < $#a && $a[$i] == $b[$i]; die if $a[$i] < $b[$i];'" ; + local check = [ SHELL "$(cmd) $(version) $(minversion) 2>/dev/null" : exit-status ] ; + if $(check[2]) = 0 + { + echo "XMLRPC-C: USING VERSION $(version) FROM $(prefix)" ; + } + else + { + echo "\nCOMPILATION FAILED! YOUR VERSION OF THE XMLRPC-C LIBRARY IS TOO OLD.\n" ; + echo "Moses server functionality requires XMLRPC-C version $(minversion) or newer." ; + echo "The version provided in $(prefix) is $(version). " ; + echo "Install a suitable version if necessary and provide its location with\n " ; + echo " --with-xmlrpc-c=/path/to/correct/version\n" ; + echo "You can also use --no-xmlrpc-c to compile without the xmlrpc-c library, " ; + echo "in which case Moses will work in batch mode, but can't be run as a server. " ; + exit : 1 ; + } + # now add stuff to the requirements local xmlrpc-cxxflags = [ shell_or_die "$(config) c++2 abyss-server --cflags" ] ; requirements += <define>HAVE_XMLRPC_C ; diff --git a/misc/queryPhraseTableMin.cpp b/misc/queryPhraseTableMin.cpp index 71efcc79c..caca838ee 100644 --- a/misc/queryPhraseTableMin.cpp +++ b/misc/queryPhraseTableMin.cpp @@ -9,6 +9,7 @@ #include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h" #include "moses/Util.h" #include "moses/Phrase.h" +#include "moses/parameters/AllOptions.h" void usage(); @@ -50,7 +51,8 @@ int main(int argc, char **argv) std::stringstream ss; ss << nscores; PhraseDictionaryCompact pdc("PhraseDictionaryCompact input-factor=0 output-factor=0 num-features=" + ss.str() + " path=" + ttable); - pdc.Load(); + AllOptions::ptr opts(new AllOptions); + pdc.Load(opts); std::string line; while(getline(std::cin, line)) { diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp index 302db4204..3cb801342 100644 --- a/moses-cmd/LatticeMBRGrid.cpp +++ b/moses-cmd/LatticeMBRGrid.cpp @@ -163,11 +163,12 @@ int main(int argc, char const* argv[]) } StaticData& SD = const_cast<StaticData&>(StaticData::Instance()); - LMBR_Options& lmbr = SD.options().lmbr; - MBR_Options& mbr = SD.options().mbr; + boost::shared_ptr<AllOptions> opts(new AllOptions(*SD.options())); + LMBR_Options& lmbr = opts->lmbr; + MBR_Options& mbr = opts->mbr; lmbr.enabled = true; - boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper); + boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts)); if (!ioWrapper) { throw runtime_error("Failed to initialise IOWrapper"); } @@ -205,10 +206,7 @@ int main(int argc, char const* argv[]) << r << " " << size_t(prune_i) << " " << scale_i << " ||| "; vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList); - manager.OutputBestHypo(mbrBestHypo, lineCount, - manager.options().output.ReportSegmentation, - manager.options().output.ReportAllFactors, - cout); + manager.OutputBestHypo(mbrBestHypo, cout); } } } diff --git a/moses-cmd/MainVW.cpp b/moses-cmd/MainVW.cpp index 94eee66d9..2f313df01 100644 --- a/moses-cmd/MainVW.cpp +++ b/moses-cmd/MainVW.cpp @@ -125,8 +125,8 @@ int main(int argc, char const** argv) IFVERBOSE(1) { PrintUserTime("Created input-output object"); } - - boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper()); + AllOptions::ptr opts(new AllOptions(*StaticData::Instance().options())); + boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts)); if (ioWrapper == NULL) { cerr << "Error; Failed to create IO object" << endl; exit(1); diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp index 9806efd39..b4fdee865 100644 --- a/moses/BaseManager.cpp +++ b/moses/BaseManager.cpp @@ -98,10 +98,10 @@ void BaseManager:: OutputSurface(std::ostream &out, Phrase const& phrase) const { - std::vector<FactorType> const& factor_order = options().output.factor_order; + std::vector<FactorType> const& factor_order = options()->output.factor_order; - bool markUnknown = options().unk.mark; - std::string const& fd = options().output.FactorDelimiter; + bool markUnknown = options()->unk.mark; + std::string const& fd = options()->output.FactorDelimiter; size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { @@ -110,7 +110,7 @@ OutputSurface(std::ostream &out, Phrase const& phrase) const const Word &word = phrase.GetWord(pos); if(markUnknown && word.IsOOV()) { - out << options().unk.prefix; + out << options()->unk.prefix; } out << *factor; @@ -122,7 +122,7 @@ OutputSurface(std::ostream &out, Phrase const& phrase) const } if(markUnknown && word.IsOOV()) { - out << options().unk.suffix; + out << options()->unk.suffix; } out << " "; @@ -147,7 +147,7 @@ void BaseManager::WriteApplicationContext(std::ostream &out, } } -AllOptions const& +AllOptions::ptr const& BaseManager:: options() const { diff --git a/moses/BaseManager.h b/moses/BaseManager.h index 65b78d434..3d858c18a 100644 --- a/moses/BaseManager.h +++ b/moses/BaseManager.h @@ -49,7 +49,7 @@ public: //! the input sentence being decoded const InputType& GetSource() const; const ttasksptr GetTtask() const; - AllOptions const& options() const; + AllOptions::ptr const& options() const; virtual void Decode() = 0; // outputs diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp index c1823b33d..e8d095e3b 100644 --- a/moses/BitmapContainer.cpp +++ b/moses/BitmapContainer.cpp @@ -61,7 +61,7 @@ public: , m_transOptRange(transOptRange) { m_totalWeightDistortion = 0; const StaticData &staticData = StaticData::Instance(); - + const std::vector<const DistortionScoreProducer*> &ffs = DistortionScoreProducer::GetDistortionFeatureFunctions(); std::vector<const DistortionScoreProducer*>::const_iterator iter; for (iter = ffs.begin(); iter != ffs.end(); ++iter) { @@ -139,7 +139,8 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer } // Fetch the things we need for distortion cost computation. - int maxDistortion = StaticData::Instance().GetMaxDistortion(); + // int maxDistortion = StaticData::Instance().GetMaxDistortion(); + int maxDistortion = itype.options()->reordering.max_distortion; if (maxDistortion == -1) { for (HypothesisSet::const_iterator iter = m_prevBitmapContainer.GetHypotheses().begin(); iter != m_prevBitmapContainer.GetHypotheses().end(); ++iter) { diff --git a/moses/ChartCell.cpp b/moses/ChartCell.cpp index b84bb9eaa..780c9d93f 100644 --- a/moses/ChartCell.cpp +++ b/moses/ChartCell.cpp @@ -50,7 +50,7 @@ ChartCellBase::~ChartCellBase() {} ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) : ChartCellBase(startPos, endPos), m_manager(manager) { - m_nBestIsEnabled = manager.options().nbest.enabled; + m_nBestIsEnabled = manager.options()->nbest.enabled; } ChartCell::~ChartCell() {} @@ -66,7 +66,7 @@ bool ChartCell::AddHypothesis(ChartHypothesis *hypo) MapType::iterator m = m_hypoColl.find(targetLHS); if (m == m_hypoColl.end()) { std::pair<Word, ChartHypothesisCollection> - e(targetLHS, ChartHypothesisCollection(m_manager.options())); + e(targetLHS, ChartHypothesisCollection(*m_manager.options())); m = m_hypoColl.insert(e).first; } return m->second.AddHypothesis(hypo, m_manager); @@ -101,7 +101,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList } // pluck things out of queue and add to hypo collection - const size_t popLimit = m_manager.options().cube.pop_limit; + const size_t popLimit = m_manager.options()->cube.pop_limit; for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) { ChartHypothesis *hypo = queue.Pop(); AddHypothesis(hypo); diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp index c99aec45e..40aaecd46 100644 --- a/moses/ChartHypothesis.cpp +++ b/moses/ChartHypothesis.cpp @@ -102,7 +102,7 @@ ChartHypothesis::~ChartHypothesis() */ void ChartHypothesis::GetOutputPhrase(Phrase &outPhrase) const { - FactorType placeholderFactor = StaticData::Instance().options().input.placeholder_factor; + FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor; for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) { const Word &word = GetCurrTargetPhrase().GetWord(pos); @@ -256,7 +256,7 @@ void ChartHypothesis::CleanupArcList() * However, may not be enough if only unique candidates are needed, * so we'll keep all of arc list if nedd distinct n-best list */ - AllOptions const& opts = StaticData::Instance().options(); + AllOptions const& opts = *StaticData::Instance().options(); size_t nBestSize = opts.nbest.nbest_size; bool distinctNBest = (opts.nbest.only_distinct || opts.mbr.enabled @@ -336,7 +336,7 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo) out << "->" << hypo.GetWinningHypothesis()->GetId(); } - if (StaticData::Instance().GetIncludeLHSInSearchGraph()) { + if (hypo.GetManager().options()->output.include_lhs_in_search_graph) { out << " " << hypo.GetTargetLHS() << "=>"; } out << " " << hypo.GetCurrTargetPhrase() diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h index 6f8a578f7..9ed4f2f12 100644 --- a/moses/ChartHypothesis.h +++ b/moses/ChartHypothesis.h @@ -52,7 +52,7 @@ protected: boost::shared_ptr<ChartTranslationOption> m_transOpt; - Range m_currSourceWordsRange; + Range m_currSourceWordsRange; std::vector<const FFState*> m_ffStates; /*! stateful feature function states */ /*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */ mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown; @@ -62,8 +62,8 @@ protected: ,m_lmPrefix; float m_totalScore; - ChartArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */ - const ChartHypothesis *m_winningHypo; + ChartArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */ + const ChartHypothesis *m_winningHypo; std::vector<const ChartHypothesis*> m_prevHypos; // always sorted by source position? diff --git a/moses/ChartKBestExtractor.cpp b/moses/ChartKBestExtractor.cpp index 8d5997f21..ff766d6e1 100644 --- a/moses/ChartKBestExtractor.cpp +++ b/moses/ChartKBestExtractor.cpp @@ -82,7 +82,7 @@ void ChartKBestExtractor::Extract( // Generate the target-side yield of the derivation d. Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d) { - FactorType placeholderFactor = StaticData::Instance().options().input.placeholder_factor; + FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor; Phrase ret(ARRAY_SIZE_INCR); diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp index 082f3f05e..d83bc3a7d 100644 --- a/moses/ChartManager.cpp +++ b/moses/ChartManager.cpp @@ -128,16 +128,15 @@ void ChartManager::Decode() */ void ChartManager::AddXmlChartOptions() { - // const StaticData &staticData = StaticData::Instance(); - - const std::vector <ChartTranslationOptions*> xmlChartOptionsList = m_source.GetXmlChartTranslationOptions(); + const std::vector <ChartTranslationOptions*> xmlChartOptionsList + = m_source.GetXmlChartTranslationOptions(); IFVERBOSE(2) { cerr << "AddXmlChartOptions " << xmlChartOptionsList.size() << endl; } if (xmlChartOptionsList.size() == 0) return; - for(std::vector<ChartTranslationOptions*>::const_iterator i = xmlChartOptionsList.begin(); - i != xmlChartOptionsList.end(); ++i) { + typedef std::vector<ChartTranslationOptions*>::const_iterator citer; + for(citer i = xmlChartOptionsList.begin(); i != xmlChartOptionsList.end(); ++i) { ChartTranslationOptions* opt = *i; const Range &range = opt->GetSourceWordsRange(); @@ -204,8 +203,7 @@ void ChartManager::CalcNBest( // than n. The n-best factor determines how much bigger the limit should be, // with 0 being 'unlimited.' This actually sets a large-ish limit in case // too many translations are identical. - const StaticData &staticData = StaticData::Instance(); - const std::size_t nBestFactor = staticData.options().nbest.factor; + const std::size_t nBestFactor = options()->nbest.factor; std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor; // Extract the derivations. @@ -315,15 +313,14 @@ void ChartManager::OutputBest(OutputCollector *collector) const void ChartManager::OutputNBest(OutputCollector *collector) const { - // const StaticData &staticData = StaticData::Instance(); - size_t nBestSize = options().nbest.nbest_size; + size_t nBestSize = options()->nbest.nbest_size; if (nBestSize > 0) { const size_t translationId = m_source.GetTranslationId(); VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " - << options().nbest.output_file_path << endl); + << options()->nbest.output_file_path << endl); std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList; - CalcNBest(nBestSize, nBestList, options().nbest.only_distinct); + CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct); OutputNBestList(collector, nBestList, translationId); IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); @@ -336,9 +333,6 @@ void ChartManager::OutputNBestList(OutputCollector *collector, const ChartKBestExtractor::KBestVec &nBestList, long translationId) const { - // const StaticData &staticData = StaticData::Instance(); - // const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder(); - std::ostringstream out; if (collector->OutputIsCout()) { @@ -347,7 +341,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector, FixPrecision(out); } - NBestOptions const& nbo = options().nbest; + NBestOptions const& nbo = options()->nbest; bool includeWordAlignment = nbo.include_alignment_info; bool PrintNBestTrees = nbo.print_trees; @@ -369,7 +363,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector, OutputSurface(out, outputPhrase); // , outputFactorOrder, false); out << " ||| "; boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation); - bool with_labels = options().nbest.include_feature_labels; + bool with_labels = options()->nbest.include_feature_labels; scoreBreakdown->OutputAllFeatureScores(out, with_labels); out << " ||| " << derivation.score; @@ -615,13 +609,11 @@ void ChartManager::OutputDetailedTranslationReport( collector->Write(translationId, out.str()); //DIMw - const StaticData &staticData = StaticData::Instance(); - - if (options().output.detailed_all_transrep_filepath.size()) { + if (options()->output.detailed_all_transrep_filepath.size()) { const Sentence &sentence = static_cast<const Sentence &>(m_source); - size_t nBestSize = staticData.options().nbest.nbest_size; + size_t nBestSize = options()->nbest.nbest_size; std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList; - CalcNBest(nBestSize, nBestList, staticData.options().nbest.nbest_size); + CalcNBest(nBestSize, nBestList, options()->nbest.only_distinct); OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId); } @@ -725,7 +717,8 @@ void ChartManager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId); //Tree of full sentence - const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure(); + const StatefulFeatureFunction* treeStructure; + treeStructure = StaticData::Instance().GetTreeStructure(); if (treeStructure != NULL) { const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); for( size_t i=0; i<sff.size(); i++ ) { @@ -813,15 +806,6 @@ void ChartManager::OutputDetailedAllTranslationReport( collector->Write(translationId, out.str()); } -// void ChartManager::OutputSearchGraphHypergraph() const -// { -// const StaticData &staticData = StaticData::Instance(); -// if (staticData.GetOutputSearchGraphHypergraph()) { -// HypergraphOutput<ChartManager> hypergraphOutputChart(PRECISION); -// hypergraphOutputChart.Write(*this); -// } -// } - void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const { if (!collector) @@ -834,11 +818,11 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe Backtrack(hypo); VERBOSE(3,"0" << std::endl); - if (options().output.ReportHypoScore) { + if (options()->output.ReportHypoScore) { out << hypo->GetFutureScore() << " "; } - if (options().output.RecoverPath) { + if (options()->output.RecoverPath) { out << "||| "; } Phrase outPhrase(ARRAY_SIZE_INCR); @@ -851,14 +835,12 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe outPhrase.RemoveWord(0); outPhrase.RemoveWord(outPhrase.GetSize() - 1); - const std::vector<FactorType> outputFactorOrder - = StaticData::Instance().GetOutputFactorOrder(); - string output = outPhrase.GetStringRep(outputFactorOrder); + string output = outPhrase.GetStringRep(options()->output.factor_order); out << output << endl; } else { VERBOSE(1, "NO BEST TRANSLATION" << endl); - if (options().output.ReportHypoScore) { + if (options()->output.ReportHypoScore) { out << "0 "; } diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp index dafffc33a..60899c02d 100644 --- a/moses/ChartParser.cpp +++ b/moses/ChartParser.cpp @@ -44,17 +44,26 @@ ChartParserUnknown ChartParserUnknown::~ChartParserUnknown() { RemoveAllInColl(m_unksrcs); - // RemoveAllInColl(m_cacheTargetPhraseCollection); } -void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, ChartParserCallback &to) +AllOptions::ptr const& +ChartParserUnknown:: +options() const +{ + return m_ttask.lock()->options(); +} + +void +ChartParserUnknown:: +Process(const Word &sourceWord, const Range &range, ChartParserCallback &to) { // unknown word, add as trans opt const StaticData &staticData = StaticData::Instance(); - const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance(); + const UnknownWordPenaltyProducer &unknownWordPenaltyProducer + = UnknownWordPenaltyProducer::Instance(); size_t isDigit = 0; - if (staticData.options().unk.drop) { + if (options()->unk.drop) { const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface const StringPiece s = f->GetString(); isDigit = s.find_first_of("0123456789"); @@ -79,9 +88,9 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha } //TranslationOption *transOpt; - if (! staticData.options().unk.drop || isDigit) { + if (! options()->unk.drop || isDigit) { // loop - const UnknownLHSList &lhsList = staticData.GetUnknownLHS(); + const UnknownLHSList &lhsList = options()->syntax.unknown_lhs; // staticData.GetUnknownLHS(); UnknownLHSList::const_iterator iterLHS; for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) { const string &targetLHSStr = iterLHS->first; @@ -91,7 +100,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha //const Word &sourceLHS = staticData.GetInputDefaultNonTerminal(); Word *targetLHS = new Word(true); - targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true); + targetLHS->CreateFromString(Output, options()->output.factor_order, + targetLHSStr, true); UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS"); // add to dictionary @@ -107,9 +117,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha targetPhrase->SetAlignmentInfo("0-0"); targetPhrase->EvaluateInIsolation(*unksrc); - AllOptions const& opts = staticData.options(); - if (!opts.output.detailed_tree_transrep_filepath.empty() || - opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) { + if (!options()->output.detailed_tree_transrep_filepath.empty() || + options()->nbest.print_trees || staticData.GetTreeStructure() != NULL) { std::string prop = "[ "; prop += (*targetLHS)[0]->GetString().as_string() + " "; prop += sourceWord[0]->GetString().as_string() + " ]"; @@ -125,14 +134,15 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha TargetPhrase *targetPhrase = new TargetPhrase(firstPt); // loop - const UnknownLHSList &lhsList = staticData.GetUnknownLHS(); + const UnknownLHSList &lhsList = options()->syntax.unknown_lhs;//staticData.GetUnknownLHS(); UnknownLHSList::const_iterator iterLHS; for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) { const string &targetLHSStr = iterLHS->first; //float prob = iterLHS->second; Word *targetLHS = new Word(true); - targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true); + targetLHS->CreateFromString(Output, staticData.options()->output.factor_order, + targetLHSStr, true); UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS"); targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore); @@ -209,9 +219,11 @@ void ChartParser::Create(const Range &range, ChartParserCallback &to) } } - if (range.GetNumWordsCovered() == 1 && range.GetStartPos() != 0 && range.GetStartPos() != m_source.GetSize()-1) { - bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption(); - if (to.Empty() || alwaysCreateDirectTranslationOption) { + if (range.GetNumWordsCovered() == 1 + && range.GetStartPos() != 0 + && range.GetStartPos() != m_source.GetSize()-1) { + bool always = options()->unk.always_create_direct_transopt; + if (to.Empty() || always) { // create unknown words for 1 word coverage where we don't have any trans options const Word &sourceWord = m_source.GetWord(range.GetStartPos()); m_unknown.Process(sourceWord, range, to); @@ -285,4 +297,14 @@ long ChartParser::GetTranslationId() const { return m_source.GetTranslationId(); } + + +AllOptions::ptr const& +ChartParser:: +options() const +{ + return m_ttask.lock()->options(); +} + + } // namespace Moses diff --git a/moses/ChartParser.h b/moses/ChartParser.h index d6f20b6d6..b9d756abb 100644 --- a/moses/ChartParser.h +++ b/moses/ChartParser.h @@ -57,6 +57,7 @@ public: private: std::vector<Phrase*> m_unksrcs; std::list<TargetPhraseCollection::shared_ptr> m_cacheTargetPhraseCollection; + AllOptions::ptr const& options() const; }; class ChartParser @@ -78,6 +79,8 @@ public: return m_unknown.GetUnknownSources(); } + AllOptions::ptr const& options() const; + private: ChartParserUnknown m_unknown; std::vector <DecodeGraph*> m_decodeGraphList; diff --git a/moses/ChartTranslationOptions.cpp b/moses/ChartTranslationOptions.cpp index fefad3680..b02a694ba 100644 --- a/moses/ChartTranslationOptions.cpp +++ b/moses/ChartTranslationOptions.cpp @@ -68,7 +68,7 @@ void ChartTranslationOptions::EvaluateWithSourceContext(const InputType &input, { SetInputPath(&inputPath); // if (StaticData::Instance().GetPlaceholderFactor() != NOT_FOUND) { - if (inputPath.ttask.lock()->options().input.placeholder_factor != NOT_FOUND) { + if (inputPath.ttask.lock()->options()->input.placeholder_factor != NOT_FOUND) { CreateSourceRuleFromInputPath(); } diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp index 188c57438..f60b7907f 100644 --- a/moses/ConfusionNet.cpp +++ b/moses/ConfusionNet.cpp @@ -11,7 +11,7 @@ #include "Sentence.h" #include "moses/FF/InputFeature.h" #include "util/exception.hh" - +#include "moses/TranslationTask.h" namespace Moses { struct CNStats { @@ -62,13 +62,12 @@ GetColumnIncrement(size_t i, size_t j) const } ConfusionNet:: -ConfusionNet() : InputType() +ConfusionNet(AllOptions::ptr const& opts) : InputType(opts) { stats.createOne(); - const StaticData& SD = StaticData::Instance(); - if (SD.IsSyntax()) { - m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal()); + if (is_syntax(opts->search.algo)) { + m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal); } UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified"); } @@ -80,7 +79,7 @@ ConfusionNet:: } ConfusionNet:: -ConfusionNet(Sentence const& s) : InputType() +ConfusionNet(Sentence const& s) : InputType(s.options()) { data.resize(s.GetSize()); for(size_t i=0; i<s.GetSize(); ++i) { @@ -92,14 +91,14 @@ ConfusionNet(Sentence const& s) : InputType() bool ConfusionNet:: -ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format) +ReadF(std::istream& in, int format) { VERBOSE(2, "read confusion net with format "<<format<<"\n"); switch(format) { case 0: - return ReadFormat0(in,factorOrder); + return ReadFormat0(in); case 1: - return ReadFormat1(in,factorOrder); + return ReadFormat1(in); default: std::cerr << "ERROR: unknown format '"<<format <<"' in ConfusionNet::Read"; @@ -109,22 +108,20 @@ ReadF(std::istream& in, const std::vector<FactorType>& factorOrder, int format) int ConfusionNet:: -Read(std::istream& in, - const std::vector<FactorType>& factorOrder, - AllOptions const& opts) +Read(std::istream& in) { - int rv=ReadF(in,factorOrder,0); + int rv=ReadF(in,0); if(rv) stats.collect(*this); return rv; } bool ConfusionNet:: -ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder) +ReadFormat0(std::istream& in) { Clear(); + const std::vector<FactorType>& factorOrder = m_options->input.factor_order; - // const StaticData &staticData = StaticData::Instance(); const InputFeature *inputFeature = InputFeature::InstancePtr(); size_t numInputScores = inputFeature->GetNumInputScores(); size_t numRealWordCount = inputFeature->GetNumRealWordsInInput(); @@ -140,7 +137,6 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder) Column col; while(is>>word) { Word w; - // String2Word(word,w,factorOrder); w.CreateFromString(Input,factorOrder,StringPiece(word),false,false); std::vector<float> probs(totalCount, 0.0); for(size_t i=0; i < numInputScores; i++) { @@ -179,9 +175,10 @@ ReadFormat0(std::istream& in, const std::vector<FactorType>& factorOrder) bool ConfusionNet:: -ReadFormat1(std::istream& in, const std::vector<FactorType>& factorOrder) +ReadFormat1(std::istream& in) { Clear(); + const std::vector<FactorType>& factorOrder = m_options->input.factor_order; std::string line; if(!getline(in,line)) return 0; size_t s; @@ -282,9 +279,11 @@ ConfusionNet:: CreateTranslationOptionCollection(ttasksptr const& ttask) const { size_t maxNoTransOptPerCoverage - = StaticData::Instance().GetMaxNoTransOptPerCoverage(); + = ttask->options()->search.max_trans_opt_per_cov; + // StaticData::Instance().GetMaxNoTransOptPerCoverage(); float translationOptionThreshold - = StaticData::Instance().GetTranslationOptionThreshold(); + = ttask->options()->search.trans_opt_threshold; + // StaticData::Instance().GetTranslationOptionThreshold(); TranslationOptionCollection *rv = new TranslationOptionCollectionConfusionNet (ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold); diff --git a/moses/ConfusionNet.h b/moses/ConfusionNet.h index ef3e0294b..4d15f37b6 100644 --- a/moses/ConfusionNet.h +++ b/moses/ConfusionNet.h @@ -30,12 +30,12 @@ protected: std::vector<Column> data; NonTerminalSet m_defaultLabelSet; - bool ReadFormat0(std::istream&,const std::vector<FactorType>& factorOrder); - bool ReadFormat1(std::istream&,const std::vector<FactorType>& factorOrder); + bool ReadFormat0(std::istream&); + bool ReadFormat1(std::istream&); void String2Word(const std::string& s,Word& w,const std::vector<FactorType>& factorOrder); public: - ConfusionNet(); + ConfusionNet(AllOptions::ptr const& opts); virtual ~ConfusionNet(); ConfusionNet(Sentence const& s); @@ -46,7 +46,8 @@ public: const Column& GetColumn(size_t i) const { UTIL_THROW_IF2(i >= data.size(), - "Out of bounds. Trying to access " << i << " when vector only contains " << data.size()); + "Out of bounds. Trying to access " << i + << " when vector only contains " << data.size()); return data[i]; } const Column& operator[](size_t i) const { @@ -64,11 +65,10 @@ public: data.clear(); } - bool ReadF(std::istream&,const std::vector<FactorType>& factorOrder,int format=0); + bool ReadF(std::istream&, int format=0); virtual void Print(std::ostream&) const; - int Read(std::istream& in,const std::vector<FactorType>& factorOrder, - AllOptions const& opts); + int Read(std::istream& in); Phrase GetSubString(const Range&) const; //TODO not defined std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined diff --git a/moses/ExportInterface.cpp b/moses/ExportInterface.cpp index 270cc1baf..1c0336187 100644 --- a/moses/ExportInterface.cpp +++ b/moses/ExportInterface.cpp @@ -64,6 +64,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #ifdef HAVE_XMLRPC_C #include "moses/server/Server.h" #endif +#include <signal.h> using namespace std; using namespace Moses; @@ -102,7 +103,7 @@ SimpleTranslationInterface::~SimpleTranslationInterface() //the simplified version of string input/output translation string SimpleTranslationInterface::translate(const string &inputString) { - boost::shared_ptr<Moses::IOWrapper> ioWrapper(new IOWrapper); + boost::shared_ptr<Moses::IOWrapper> ioWrapper(new IOWrapper(*StaticData::Instance().options())); // main loop over set of input sentences size_t sentEnd = inputString.rfind('\n'); //find the last \n, the input stream has to be appended with \n to be translated const string &newString = sentEnd != string::npos ? inputString : inputString + '\n'; @@ -142,11 +143,25 @@ void SimpleTranslationInterface::DestroyFeatureFunctionStatic() Parameter params; +void +signal_handler(int signum) +{ + if (signum == SIGALRM) { + exit(0); // that's what we expected from the child process after forking + } else if (signum == SIGTERM || signum == SIGKILL) { + exit(0); + } else { + std::cerr << "Unexpected signal " << signum << std::endl; + exit(signum); + } +} + //! run moses in server mode int run_as_server() { #ifdef HAVE_XMLRPC_C + kill(getppid(),SIGALRM); MosesServer::Server server(params); return server.run(); // actually: don't return. see Server::run() #else @@ -167,7 +182,7 @@ batch_run() IFVERBOSE(1) PrintUserTime("Created input-output object"); // set up read/writing class: - boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper); + boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*staticData.options())); UTIL_THROW_IF2(ioWrapper == NULL, "Error; Failed to create IO object" << " [" << HERE << "]"); @@ -326,17 +341,28 @@ int decoder_main(int argc, char const** argv) if (!StaticData::LoadDataStatic(¶ms, argv[0])) exit(1); + // +#if 1 + pid_t pid; + if (params.GetParam("daemon")) { + pid = fork(); + if (pid) { + pause(); // parent process + exit(0); + } + } +#endif // setting "-show-weights" -> just dump out weights and exit if (params.isParamSpecified("show-weights")) { ShowWeights(); exit(0); } - if (params.GetParam("server")) + if (params.GetParam("server")) { + std::cerr << "RUN SERVER at pid " << pid << std::endl; return run_as_server(); - else + } else return batch_run(); - } #ifdef NDEBUG catch (const std::exception &e) { diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp index 2ae2bffcc..08feb0876 100644 --- a/moses/FF/ConstrainedDecoding.cpp +++ b/moses/FF/ConstrainedDecoding.cpp @@ -46,12 +46,12 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line) ReadParameters(); } -void ConstrainedDecoding::Load() +void ConstrainedDecoding::Load(AllOptions::ptr const& opts) { + m_options = opts; const StaticData &staticData = StaticData::Instance(); bool addBeginEndWord - = ((staticData.options().search.algo == CYKPlus) - || (staticData.options().search.algo == ChartIncremental)); + = ((opts->search.algo == CYKPlus) || (opts->search.algo == ChartIncremental)); for(size_t i = 0; i < m_paths.size(); ++i) { InputFileStream constraintFile(m_paths[i]); @@ -63,12 +63,10 @@ void ConstrainedDecoding::Load() Phrase phrase(0); if (vecStr.size() == 1) { sentenceID++; - // phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[0], staticData.GetFactorDelimiter(), NULL); - phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[0], NULL); + phrase.CreateFromString(Output, opts->output.factor_order, vecStr[0], NULL); } else if (vecStr.size() == 2) { sentenceID = Scan<long>(vecStr[0]); - // phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[1], staticData.GetFactorDelimiter(), NULL); - phrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), vecStr[1], NULL); + phrase.CreateFromString(Output, opts->output.factor_order, vecStr[1], NULL); } else { UTIL_THROW(util::Exception, "Reference file not loaded"); } diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h index 769edd80f..a695e3ce8 100644 --- a/moses/FF/ConstrainedDecoding.h +++ b/moses/FF/ConstrainedDecoding.h @@ -36,7 +36,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction public: ConstrainedDecoding(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); bool IsUseable(const FactorMask &mask) const { return true; diff --git a/moses/FF/CountNonTerms.cpp b/moses/FF/CountNonTerms.cpp index f49aecfc8..cab05de92 100644 --- a/moses/FF/CountNonTerms.cpp +++ b/moses/FF/CountNonTerms.cpp @@ -1,7 +1,6 @@ #include "CountNonTerms.h" #include "moses/Util.h" #include "moses/TargetPhrase.h" -#include "moses/StaticData.h" using namespace std; @@ -21,8 +20,6 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedScores) const { - const StaticData &staticData = StaticData::Instance(); - vector<float> scores(m_numScoreComponents, 0); size_t indScore = 0; @@ -39,7 +36,7 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase if (m_targetSyntax) { for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { const Word &word = targetPhrase.GetWord(i); - if (word.IsNonTerminal() && word != staticData.GetOutputDefaultNonTerminal()) { + if (word.IsNonTerminal() && word != m_options->syntax.output_default_non_terminal) { ++scores[indScore]; } } @@ -49,7 +46,7 @@ void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase if (m_sourceSyntax) { for (size_t i = 0; i < sourcePhrase.GetSize(); ++i) { const Word &word = sourcePhrase.GetWord(i); - if (word.IsNonTerminal() && word != staticData.GetInputDefaultNonTerminal()) { + if (word.IsNonTerminal() && word != m_options->syntax.input_default_non_terminal) { ++scores[indScore]; } } @@ -72,5 +69,12 @@ void CountNonTerms::SetParameter(const std::string& key, const std::string& valu } } +void +CountNonTerms:: +Load(AllOptions::ptr const& opts) +{ + m_options = opts; +} + } diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h index a9f6e884b..d2e697173 100644 --- a/moses/FF/CountNonTerms.h +++ b/moses/FF/CountNonTerms.h @@ -41,6 +41,7 @@ public: void SetParameter(const std::string& key, const std::string& value); + void Load(AllOptions::ptr const& opts); protected: bool m_all, m_sourceSyntax, m_targetSyntax; }; diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp index 5e4ada5b0..4d058c863 100644 --- a/moses/FF/CoveredReferenceFeature.cpp +++ b/moses/FF/CoveredReferenceFeature.cpp @@ -52,8 +52,9 @@ void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input estimatedScores->Assign(this, scores); } -void CoveredReferenceFeature::Load() +void CoveredReferenceFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; InputFileStream refFile(m_path); std::string line; const StaticData &staticData = StaticData::Instance(); diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h index aedfb3793..bb21b719f 100644 --- a/moses/FF/CoveredReferenceFeature.h +++ b/moses/FF/CoveredReferenceFeature.h @@ -44,7 +44,7 @@ public: ReadParameters(); } - void Load(); + void Load(AllOptions::ptr const& opts); bool IsUseable(const FactorMask &mask) const { return true; diff --git a/moses/FF/DeleteRules.cpp b/moses/FF/DeleteRules.cpp index 714e0e8a8..32473b2cf 100644 --- a/moses/FF/DeleteRules.cpp +++ b/moses/FF/DeleteRules.cpp @@ -16,8 +16,9 @@ DeleteRules::DeleteRules(const std::string &line) ReadParameters(); } -void DeleteRules::Load() +void DeleteRules::Load(AllOptions::ptr const& opts) { + m_options = opts; std::vector<FactorType> factorOrder; factorOrder.push_back(0); // unfactored for now diff --git a/moses/FF/DeleteRules.h b/moses/FF/DeleteRules.h index 1070aaa51..2decce4b3 100644 --- a/moses/FF/DeleteRules.h +++ b/moses/FF/DeleteRules.h @@ -15,7 +15,7 @@ protected: public: DeleteRules(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); bool IsUseable(const FactorMask &mask) const { return true; diff --git a/moses/FF/DynamicCacheBasedLanguageModel.cpp b/moses/FF/DynamicCacheBasedLanguageModel.cpp index 6f183eeec..f45e02bc1 100644 --- a/moses/FF/DynamicCacheBasedLanguageModel.cpp +++ b/moses/FF/DynamicCacheBasedLanguageModel.cpp @@ -323,8 +323,9 @@ void DynamicCacheBasedLanguageModel::Clear() m_cache.clear(); } -void DynamicCacheBasedLanguageModel::Load() +void DynamicCacheBasedLanguageModel::Load(AllOptions::ptr const& opts) { + m_options = opts; // SetPreComputedScores(); VERBOSE(2,"DynamicCacheBasedLanguageModel::Load()" << std::endl); Load(m_initfiles); diff --git a/moses/FF/DynamicCacheBasedLanguageModel.h b/moses/FF/DynamicCacheBasedLanguageModel.h index 497283b44..be3d07269 100644 --- a/moses/FF/DynamicCacheBasedLanguageModel.h +++ b/moses/FF/DynamicCacheBasedLanguageModel.h @@ -119,7 +119,7 @@ public: return true; } - void Load(); + void Load(AllOptions::ptr const& opts); void Load(const std::string filestr); void Execute(std::string command); void SetParameter(const std::string& key, const std::string& value); diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h index c9ad7d2c8..9d6aa6597 100644 --- a/moses/FF/FeatureFunction.h +++ b/moses/FF/FeatureFunction.h @@ -7,12 +7,13 @@ #include <string> #include "moses/FeatureVector.h" #include "moses/TypeDef.h" - +#include "moses/parameters/AllOptions.h" #include <boost/shared_ptr.hpp> namespace Moses { +class AllOptions; class Phrase; class TargetPhrase; class TranslationOptionList; @@ -46,6 +47,7 @@ protected: size_t m_index; // index into vector covering ALL feature function values std::vector<bool> m_tuneableComponents; size_t m_numTuneableComponents; + AllOptions::ptr m_options; //In case there's multiple producers with the same description static std::multiset<std::string> description_counts; @@ -69,7 +71,13 @@ public: virtual ~FeatureFunction(); //! override to load model files - virtual void Load() { + virtual void Load(AllOptions::ptr const& opts) { + m_options = opts; + } + + AllOptions::ptr const& + options() const { + return m_options; } static void ResetDescriptionCounts() { diff --git a/moses/FF/GlobalLexicalModel.cpp b/moses/FF/GlobalLexicalModel.cpp index 06e26b417..89f27f4a1 100644 --- a/moses/FF/GlobalLexicalModel.cpp +++ b/moses/FF/GlobalLexicalModel.cpp @@ -51,8 +51,9 @@ GlobalLexicalModel::~GlobalLexicalModel() } } -void GlobalLexicalModel::Load() +void GlobalLexicalModel::Load(AllOptions::ptr const& opts) { + m_options = opts; FactorCollection &factorCollection = FactorCollection::Instance(); const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter(); diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h index d126f21f2..1701195f7 100644 --- a/moses/FF/GlobalLexicalModel.h +++ b/moses/FF/GlobalLexicalModel.h @@ -57,7 +57,7 @@ private: std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec; std::string m_filePath; - void Load(); + void Load(AllOptions::ptr const& opts); float ScorePhrase( const TargetPhrase& targetPhrase ) const; float GetFromCacheOrScorePhrase( const TargetPhrase& targetPhrase ) const; diff --git a/moses/FF/HyperParameterAsWeight.cpp b/moses/FF/HyperParameterAsWeight.cpp index 37516af52..043631356 100644 --- a/moses/FF/HyperParameterAsWeight.cpp +++ b/moses/FF/HyperParameterAsWeight.cpp @@ -19,8 +19,8 @@ HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line) vector<float> weights = staticData.GetWeights(this); - staticData.m_options.search.stack_size = weights[0] * 1000; - staticData.m_options.search.beam_width = weights[1] * 10; + staticData.m_options->search.stack_size = weights[0] * 1000; + staticData.m_options->search.beam_width = weights[1] * 10; } diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp index 21bae61e1..790c8cc61 100644 --- a/moses/FF/InputFeature.cpp +++ b/moses/FF/InputFeature.cpp @@ -23,9 +23,9 @@ InputFeature::InputFeature(const std::string &line) s_instance = this; } -void InputFeature::Load() +void InputFeature::Load(AllOptions::ptr const& opts) { - + m_options = opts; const PhraseDictionary *pt = PhraseDictionary::GetColl()[0]; const PhraseDictionaryTreeAdaptor *ptBin = dynamic_cast<const PhraseDictionaryTreeAdaptor*>(pt); diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h index e26eb1317..b2b3b4ff4 100644 --- a/moses/FF/InputFeature.h +++ b/moses/FF/InputFeature.h @@ -23,7 +23,7 @@ public: InputFeature(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); void SetParameter(const std::string& key, const std::string& value); diff --git a/moses/FF/InternalTree.cpp b/moses/FF/InternalTree.cpp index c38fc5747..dde0ad268 100644 --- a/moses/FF/InternalTree.cpp +++ b/moses/FF/InternalTree.cpp @@ -7,8 +7,11 @@ namespace Moses InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool nonterminal) { + std::vector<FactorType> const& oFactors + = StaticData::Instance().options()->output.factor_order; if (len > 0) { - m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(start, len), nonterminal); + m_value.CreateFromString(Output, oFactors, StringPiece(line).substr(start, len), + nonterminal); } } @@ -18,7 +21,9 @@ InternalTree::InternalTree(const std::string & line, const bool nonterminal) size_t found = line.find_first_of("[] "); if (found == line.npos) { - m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), line, nonterminal); + m_value.CreateFromString(Output, + StaticData::Instance().options()->output.factor_order, + line, nonterminal); } else { AddSubTree(line, 0); } @@ -44,14 +49,18 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos) pos = m_children.back()->AddSubTree(line, pos+1); } else { if (len > 0) { - m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), false); + m_value.CreateFromString(Output, + StaticData::Instance().options()->output.factor_order, + StringPiece(line).substr(oldpos, len), false); has_value = true; } pos = AddSubTree(line, pos+1); } } else if (token == ' ' || token == ']') { if (len > 0 && !has_value) { - m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), true); + m_value.CreateFromString(Output, + StaticData::Instance().options()->output.factor_order, + StringPiece(line).substr(oldpos, len), true); has_value = true; } else if (len > 0) { m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false)); @@ -81,7 +90,7 @@ std::string InternalTree::GetString(bool start) const ret += "["; } - ret += m_value.GetString(StaticData::Instance().GetOutputFactorOrder(), false); + ret += m_value.GetString(StaticData::Instance().options()->output.factor_order, false); for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) { ret += (*it)->GetString(false); } @@ -189,4 +198,4 @@ bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>: return false; } -}
\ No newline at end of file +} diff --git a/moses/FF/InternalTree.h b/moses/FF/InternalTree.h index 29db0241e..165355d06 100644 --- a/moses/FF/InternalTree.h +++ b/moses/FF/InternalTree.h @@ -6,6 +6,7 @@ #include <vector> #include "FFState.h" #include "moses/Word.h" +#include "moses/StaticData.h" #include <boost/shared_ptr.hpp> #include <boost/make_shared.hpp> #include "util/generator.hh" diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index 168d7a8ec..a3fc2c1d6 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -84,8 +84,9 @@ LexicalReordering:: void LexicalReordering:: -Load() +Load(AllOptions::ptr const& opts) { + m_options = opts; typedef LexicalReorderingTable LRTable; if (m_filePath.size()) m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF, diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index fa2747c82..bc21e6f32 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -33,7 +33,7 @@ class LexicalReordering : public StatefulFeatureFunction public: LexicalReordering(const std::string &line); virtual ~LexicalReordering(); - void Load(); + void Load(AllOptions::ptr const& opts); virtual bool diff --git a/moses/FF/Model1Feature.cpp b/moses/FF/Model1Feature.cpp index 23a1fc0a3..b5d23bd73 100644 --- a/moses/FF/Model1Feature.cpp +++ b/moses/FF/Model1Feature.cpp @@ -159,8 +159,9 @@ void Model1Feature::SetParameter(const std::string& key, const std::string& valu } } -void Model1Feature::Load() +void Model1Feature::Load(AllOptions::ptr const& opts) { + m_options = opts; FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading source vocabulary from file " << m_fileNameVcbS << " ..."); Model1Vocabulary vcbS; vcbS.Load(m_fileNameVcbS); diff --git a/moses/FF/Model1Feature.h b/moses/FF/Model1Feature.h index 703443385..df7db4e2f 100644 --- a/moses/FF/Model1Feature.h +++ b/moses/FF/Model1Feature.h @@ -99,7 +99,7 @@ private: Model1LexicalTable m_model1; const Factor* m_emptyWord; - void Load(); + void Load(AllOptions::ptr const& opts); // cache mutable boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> > m_cache; diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp index d12ddcd52..4df2cbba6 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.cpp +++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp @@ -35,8 +35,9 @@ void OpSequenceModel :: readLanguageModel(const char *lmFile) } -void OpSequenceModel::Load() +void OpSequenceModel::Load(AllOptions::ptr const& opts) { + m_options = opts; readLanguageModel(m_lmPath.c_str()); } diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h index 36a901974..925f9c83a 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.h +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -25,7 +25,7 @@ public: ~OpSequenceModel(); void readLanguageModel(const char *); - void Load(); + void Load(AllOptions::ptr const& opts); FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp index d51a8c290..332be1d2a 100644 --- a/moses/FF/PhraseOrientationFeature.cpp +++ b/moses/FF/PhraseOrientationFeature.cpp @@ -15,7 +15,6 @@ #include "moses/Hypothesis.h" #include "moses/ChartHypothesis.h" #include "moses/ChartManager.h" -#include "phrase-extract/extract-ghkm/Alignment.h" #include <boost/shared_ptr.hpp> @@ -23,15 +22,59 @@ namespace Moses { size_t PhraseOrientationFeatureState::hash() const { - UTIL_THROW2("TODO:Haven't figure this out yet"); + if (!m_distinguishStates) { + return 0; + } + + size_t ret = 0; + + if (m_leftBoundaryIsSet) { + HashCombineLeftBoundaryRecursive(ret, *this, m_useSparseNT); + } + if (m_rightBoundaryIsSet) { + boost::hash_combine(ret, 42); + HashCombineRightBoundaryRecursive(ret, *this, m_useSparseNT); + } + + return 0; } bool PhraseOrientationFeatureState::operator==(const FFState& other) const { - UTIL_THROW2("TODO:Haven't figure this out yet"); + if (!m_distinguishStates) { + return true; + } + + const PhraseOrientationFeatureState &otherState = static_cast<const PhraseOrientationFeatureState&>(other); + + if (!m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet && + !m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) { + return true; + } + if (m_leftBoundaryIsSet != otherState.m_leftBoundaryIsSet) { + return false; + } + if (m_rightBoundaryIsSet != otherState.m_rightBoundaryIsSet) { + return false; + } + + if (m_leftBoundaryIsSet) { + int compareLeft = CompareLeftBoundaryRecursive(*this, otherState, m_useSparseNT); + if (compareLeft != 0) { + return false; + } + } + if (m_rightBoundaryIsSet) { + int compareRight = CompareRightBoundaryRecursive(*this, otherState, m_useSparseNT); + if (compareRight != 0) { + return false; + } + } + + return true; } -//////////////////////////////////////////////////////////////////////////////// + const std::string PhraseOrientationFeature::MORIENT("M"); const std::string PhraseOrientationFeature::SORIENT("S"); const std::string PhraseOrientationFeature::DORIENT("D"); @@ -75,8 +118,9 @@ void PhraseOrientationFeature::SetParameter(const std::string& key, const std::s } -void PhraseOrientationFeature::Load() +void PhraseOrientationFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; if ( !m_filenameTargetWordList.empty() ) { LoadWordList(m_filenameTargetWordList,m_targetWordList); m_useTargetWordList = true; @@ -143,7 +187,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, if (targetPhrase.GetAlignNonTerm().GetSize() != 0) { // Initialize phrase orientation scoring object - MosesTraining::Syntax::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(), + MosesTraining::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(), targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm()); PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData(); @@ -159,7 +203,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, // LEFT-TO-RIGHT DIRECTION - MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_L2R); + MosesTraining::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::PhraseOrientation::REO_DIR_L2R); if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary) && (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule @@ -179,7 +223,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned && reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) { // discontinuous - l2rOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT; + l2rOrientation = MosesTraining::PhraseOrientation::REO_CLASS_DLEFT; } else { reoClassData->firstNonTerminalIsBoundary = true; } @@ -189,7 +233,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, // RIGHT-TO-LEFT DIRECTION - MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_R2L); + MosesTraining::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::PhraseOrientation::REO_DIR_R2L); if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary) && (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule @@ -209,7 +253,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned && reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) { // discontinuous - r2lOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT; + r2lOrientation = MosesTraining::PhraseOrientation::REO_CLASS_DLEFT; } else { reoClassData->lastNonTerminalIsBoundary = true; } @@ -344,25 +388,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( // LEFT-TO-RIGHT DIRECTION - MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT]; + MosesTraining::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT]; IFFEATUREVERBOSE(2) { FEATUREVERBOSE(2, "l2rOrientation "); switch (l2rOrientation) { - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT: + case MosesTraining::PhraseOrientation::REO_CLASS_LEFT: FEATUREVERBOSE2(2, "mono" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT: + case MosesTraining::PhraseOrientation::REO_CLASS_RIGHT: FEATUREVERBOSE2(2, "swap" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT: + case MosesTraining::PhraseOrientation::REO_CLASS_DLEFT: FEATUREVERBOSE2(2, "dleft" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT: + case MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT: FEATUREVERBOSE2(2, "dright" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN: - // modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR + case MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN: + // modelType == MosesTraining::PhraseOrientation::REO_MSLR FEATUREVERBOSE2(2, "unknown->dleft" << std::endl); break; default: @@ -405,23 +449,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( } else { - if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) { + if ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_LEFT ) { newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()); // if sub-derivation has left-boundary non-terminal: // add recursive actual score of boundary non-terminal from subderivation LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator); - } else if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) { + } else if ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_RIGHT ) { newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap()); // if sub-derivation has left-boundary non-terminal: // add recursive actual score of boundary non-terminal from subderivation LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator); - } else if ( ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) || - ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) || - ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { + } else if ( ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DLEFT ) || + ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT ) || + ( l2rOrientation == MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous()); // if sub-derivation has left-boundary non-terminal: @@ -446,25 +490,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( // RIGHT-TO-LEFT DIRECTION - MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT]; + MosesTraining::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT]; IFFEATUREVERBOSE(2) { FEATUREVERBOSE(2, "r2lOrientation "); switch (r2lOrientation) { - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT: + case MosesTraining::PhraseOrientation::REO_CLASS_LEFT: FEATUREVERBOSE2(2, "mono" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT: + case MosesTraining::PhraseOrientation::REO_CLASS_RIGHT: FEATUREVERBOSE2(2, "swap" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT: + case MosesTraining::PhraseOrientation::REO_CLASS_DLEFT: FEATUREVERBOSE2(2, "dleft" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT: + case MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT: FEATUREVERBOSE2(2, "dright" << std::endl); break; - case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN: - // modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR + case MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN: + // modelType == MosesTraining::PhraseOrientation::REO_MSLR FEATUREVERBOSE2(2, "unknown->dleft" << std::endl); break; default: @@ -507,23 +551,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( } else { - if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) { + if ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_LEFT ) { newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono()); // if sub-derivation has right-boundary non-terminal: // add recursive actual score of boundary non-terminal from subderivation RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator); - } else if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) { + } else if ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_RIGHT ) { newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap()); // if sub-derivation has right-boundary non-terminal: // add recursive actual score of boundary non-terminal from subderivation RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator); - } else if ( ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) || - ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) || - ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { + } else if ( ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DLEFT ) || + ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT ) || + ( r2lOrientation == MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous()); // if sub-derivation has right-boundary non-terminal: @@ -871,17 +915,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin } -const std::string* PhraseOrientationFeature::ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const +const std::string* PhraseOrientationFeature::ToString(const MosesTraining::PhraseOrientation::REO_CLASS o) const { - if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) { + if ( o == MosesTraining::PhraseOrientation::REO_CLASS_LEFT ) { return &MORIENT; - } else if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) { + } else if ( o == MosesTraining::PhraseOrientation::REO_CLASS_RIGHT ) { return &SORIENT; - } else if ( ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) || - ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) || - ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { + } else if ( ( o == MosesTraining::PhraseOrientation::REO_CLASS_DLEFT ) || + ( o == MosesTraining::PhraseOrientation::REO_CLASS_DRIGHT ) || + ( o == MosesTraining::PhraseOrientation::REO_CLASS_UNKNOWN ) ) { return &DORIENT; } else { diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h index 4d9dc20dc..c8bb8ceba 100644 --- a/moses/FF/PhraseOrientationFeature.h +++ b/moses/FF/PhraseOrientationFeature.h @@ -16,7 +16,7 @@ #include "StatefulFeatureFunction.h" #include "FFState.h" #include "moses/Factor.h" -#include "phrase-extract/extract-ghkm/PhraseOrientation.h" +#include "phrase-extract/PhraseOrientation.h" #include "moses/PP/OrientationPhraseProperty.h" #include <boost/unordered_set.hpp> @@ -226,6 +226,54 @@ protected: return CompareRightBoundaryRecursive(*prevState, *otherPrevState, useSparseNT); }; + + static void HashCombineLeftBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) { + if (useSparseNT) { + boost::hash_combine(hash, state.m_leftBoundaryNonTerminalSymbol); + } + // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex); + // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations); + + for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) { + if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) { + boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RScores[i]); + } else { + boost::hash_combine(hash, 0); + } + } + + if (!state.m_leftBoundaryRecursionGuard) { + const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState; + if (prevState->m_leftBoundaryIsSet) { + HashCombineLeftBoundaryRecursive(hash, *prevState, useSparseNT); + } + } + }; + + static void HashCombineRightBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) { + if (useSparseNT) { + boost::hash_combine(hash, state.m_rightBoundaryNonTerminalSymbol); + } + // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex); + // boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations); + + for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) { + if (state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) { + boost::hash_combine(hash, state.m_rightBoundaryNonTerminalR2LScores[i]); + } else { + boost::hash_combine(hash, 0); + } + } + + if (!state.m_rightBoundaryRecursionGuard) { + const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState; + if (prevState->m_rightBoundaryIsSet) { + HashCombineRightBoundaryRecursive(hash, *prevState, useSparseNT); + } + } + }; + + template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y) { for (size_t i=0; i<N; ++i) { if (x[i] ^ y[i]) @@ -264,8 +312,8 @@ public: struct ReoClassData { public: - std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R; - std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L; + std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R; + std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L; bool firstNonTerminalIsBoundary; bool firstNonTerminalPreviousSourceSpanIsAligned; bool firstNonTerminalFollowingSourceSpanIsAligned; @@ -289,7 +337,7 @@ public: void SetParameter(const std::string& key, const std::string& value); - void Load(); + void Load(AllOptions::ptr const& opts); void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase @@ -351,7 +399,7 @@ protected: ScoreComponentCollection* scoreBreakdown, const std::string* o) const; - const std::string* ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const; + const std::string* ToString(const MosesTraining::PhraseOrientation::REO_CLASS o) const; static const std::string MORIENT; static const std::string SORIENT; diff --git a/moses/FF/PhrasePairFeature.cpp b/moses/FF/PhrasePairFeature.cpp index 5cee6d437..04878e2c6 100644 --- a/moses/FF/PhrasePairFeature.cpp +++ b/moses/FF/PhrasePairFeature.cpp @@ -65,8 +65,9 @@ void PhrasePairFeature::SetParameter(const std::string& key, const std::string& } } -void PhrasePairFeature::Load() +void PhrasePairFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; if (m_domainTrigger) { // domain trigger terms for each input document ifstream inFileSource(m_filePathSource.c_str()); diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h index a817c2d6c..53bf27628 100644 --- a/moses/FF/PhrasePairFeature.h +++ b/moses/FF/PhrasePairFeature.h @@ -44,7 +44,7 @@ class PhrasePairFeature: public StatelessFeatureFunction public: PhrasePairFeature(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); void SetParameter(const std::string& key, const std::string& value); bool IsUseable(const FactorMask &mask) const; diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp index 8a18e0a28..38583e0ff 100644 --- a/moses/FF/RuleScope.cpp +++ b/moses/FF/RuleScope.cpp @@ -14,11 +14,11 @@ RuleScope::RuleScope(const std::string &line) { } -bool IsAmbiguous(const Word &word, bool sourceSyntax) -{ - const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal(); - return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal); -} +// bool IsAmbiguous(const Word &word, bool sourceSyntax) +// { +// const Word &inputDefaultNonTerminal = StaticData::Instance().GetInputDefaultNonTerminal(); +// return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal); +// } void RuleScope::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase diff --git a/moses/FF/SoftMatchingFeature.cpp b/moses/FF/SoftMatchingFeature.cpp index 1bdfe70c2..e1eed9df2 100644 --- a/moses/FF/SoftMatchingFeature.cpp +++ b/moses/FF/SoftMatchingFeature.cpp @@ -38,7 +38,7 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string bool SoftMatchingFeature::Load(const std::string& filePath) { - StaticData &staticData = StaticData::InstanceNonConst(); + StaticData &SD = StaticData::InstanceNonConst(); InputFileStream inStream(filePath); std::string line; @@ -52,14 +52,14 @@ bool SoftMatchingFeature::Load(const std::string& filePath) } Word LHS, RHS; - LHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[0], true); - RHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), tokens[1], true); + LHS.CreateFromString(Output, SD.options()->output.factor_order, tokens[0], true); + RHS.CreateFromString(Output, SD.options()->output.factor_order, tokens[1], true); m_softMatches[RHS[0]->GetId()].push_back(LHS); GetOrSetFeatureName(RHS, LHS); } - staticData.SetSoftMatches(m_softMatches); + SD.SetSoftMatches(m_softMatches); return true; } @@ -124,9 +124,10 @@ const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, con boost::unique_lock<boost::shared_mutex> lock(m_accessLock); #endif std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()]; - const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - std::string LHS_string = LHS.GetString(outputFactorOrder, false); - std::string RHS_string = RHS.GetString(outputFactorOrder, false); + const std::vector<FactorType> & oFactors + = StaticData::Instance().options()->output.factor_order; + std::string LHS_string = LHS.GetString(oFactors, false); + std::string RHS_string = RHS.GetString(oFactors, false); name = LHS_string + "->" + RHS_string; return name; } diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp index 3802162bc..7e6121879 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp @@ -88,8 +88,9 @@ void SoftSourceSyntacticConstraintsFeature::SetParameter(const std::string& key, } } -void SoftSourceSyntacticConstraintsFeature::Load() +void SoftSourceSyntacticConstraintsFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; // don't change the loading order! LoadSourceLabelSet(); if (!m_coreSourceLabelSetFile.empty()) { @@ -311,8 +312,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu std::vector<float> newScores(m_numScoreComponents,0); const TreeInput& treeInput = static_cast<const TreeInput&>(input); - const StaticData& staticData = StaticData::Instance(); - const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal(); + // const StaticData& staticData = StaticData::Instance(); + // const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal(); size_t nNTs = 1; bool treeInputMismatchLHSBinary = true; @@ -365,7 +366,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin(); treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) { - if (*treeInputLabelsIt != outputDefaultNonTerminal) { + if (*treeInputLabelsIt != m_options->syntax.output_default_non_terminal) { boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel = m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]); if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) { @@ -387,7 +388,7 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu for (NonTerminalSet::const_iterator treeInputLabelsIt = treeInputLabels.begin(); treeInputLabelsIt != treeInputLabels.end(); ++treeInputLabelsIt) { - if (*treeInputLabelsIt != outputDefaultNonTerminal) { + if (*treeInputLabelsIt != m_options->syntax.output_default_non_terminal) { boost::unordered_map<const Factor*,size_t>::const_iterator foundTreeInputLabel = m_sourceLabelIndexesByFactor.find((*treeInputLabelsIt)[0]); if (foundTreeInputLabel != m_sourceLabelIndexesByFactor.end()) { @@ -568,7 +569,9 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu } if ( treeInputLabelsLHS.size() == 0 ) { scoreBreakdown.PlusEquals(this, - "LHSPAIR_" + targetLHS->GetString().as_string() + "_" + outputDefaultNonTerminal[0]->GetString().as_string(), + "LHSPAIR_" + targetLHS->GetString().as_string() + "_" + + m_options->syntax.output_default_non_terminal[0] + ->GetString().as_string(), 1); if (!m_targetSourceLHSJointCountFile.empty()) { t2sLabelsScore = TransformScore(m_floor); diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h index 53b6c678e..e62189734 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h @@ -31,7 +31,7 @@ public: void SetParameter(const std::string& key, const std::string& value); - void Load(); + void Load(AllOptions::ptr const& opts); void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp index 148eb3013..22b160989 100644 --- a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp +++ b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp @@ -47,11 +47,12 @@ void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType const Word& lhsLabel = targetPhrase.GetTargetLHS(); const StaticData& staticData = StaticData::Instance(); - const Word& outputDefaultNonTerminal = staticData.GetOutputDefaultNonTerminal(); - std::vector<float> newScores(m_numScoreComponents,0.0); // m_numScoreComponents == 2 // first fires for matches, second for mismatches + std::vector<float> newScores(m_numScoreComponents,0.0); + // m_numScoreComponents == 2 // first fires for matches, second for mismatches - if ( (treeInputLabels.find(lhsLabel) != treeInputLabels.end()) && (lhsLabel != outputDefaultNonTerminal) ) { + if ( (treeInputLabels.find(lhsLabel) != treeInputLabels.end()) + && (lhsLabel != m_options->syntax.output_default_non_terminal) ) { // match newScores[0] = 1.0; } else { @@ -62,6 +63,13 @@ void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType scoreBreakdown.PlusEquals(this, newScores); } +void +SourceGHKMTreeInputMatchFeature:: +Load(AllOptions::ptr const& opts) +{ + m_options = opts; + // m_output_default_nonterminal = opts->syntax.output_default_non_terminal; +} } diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h index b67649b26..403dca716 100644 --- a/moses/FF/SourceGHKMTreeInputMatchFeature.h +++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h @@ -1,6 +1,7 @@ #pragma once #include "StatelessFeatureFunction.h" +#include "moses/parameters/AllOptions.h" namespace Moses { @@ -40,6 +41,7 @@ public: void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {}; + void Load(AllOptions::ptr const& opts); }; diff --git a/moses/FF/SourceWordDeletionFeature.cpp b/moses/FF/SourceWordDeletionFeature.cpp index f8103e1ee..e4b1babec 100644 --- a/moses/FF/SourceWordDeletionFeature.cpp +++ b/moses/FF/SourceWordDeletionFeature.cpp @@ -36,8 +36,9 @@ void SourceWordDeletionFeature::SetParameter(const std::string& key, const std:: } } -void SourceWordDeletionFeature::Load() +void SourceWordDeletionFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; if (m_filename.empty()) return; diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h index a290b83de..0257621ea 100644 --- a/moses/FF/SourceWordDeletionFeature.h +++ b/moses/FF/SourceWordDeletionFeature.h @@ -23,7 +23,7 @@ private: public: SourceWordDeletionFeature(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); bool IsUseable(const FactorMask &mask) const; diff --git a/moses/FF/TargetBigramFeature.cpp b/moses/FF/TargetBigramFeature.cpp index b440c08b4..a6f3249e6 100644 --- a/moses/FF/TargetBigramFeature.cpp +++ b/moses/FF/TargetBigramFeature.cpp @@ -48,8 +48,9 @@ void TargetBigramFeature::SetParameter(const std::string& key, const std::string } } -void TargetBigramFeature::Load() +void TargetBigramFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; if (m_filePath == "*") return ; //allow all ifstream inFile(m_filePath.c_str()); diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h index eacd27656..a12f3d25d 100644 --- a/moses/FF/TargetBigramFeature.h +++ b/moses/FF/TargetBigramFeature.h @@ -34,7 +34,7 @@ class TargetBigramFeature : public StatefulFeatureFunction public: TargetBigramFeature(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); bool IsUseable(const FactorMask &mask) const; diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp index d181a8a37..42d014f85 100644 --- a/moses/FF/TargetNgramFeature.cpp +++ b/moses/FF/TargetNgramFeature.cpp @@ -74,8 +74,9 @@ void TargetNgramFeature::SetParameter(const std::string& key, const std::string& } } -void TargetNgramFeature::Load() +void TargetNgramFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; if (m_file == "") return; //allow all, for now if (m_file == "*") return; //allow all diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index 0a4b4aa25..830a73657 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -203,7 +203,7 @@ class TargetNgramFeature : public StatefulFeatureFunction public: TargetNgramFeature(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); bool IsUseable(const FactorMask &mask) const; diff --git a/moses/FF/TargetWordInsertionFeature.cpp b/moses/FF/TargetWordInsertionFeature.cpp index 73dbcd539..dd097f8e3 100644 --- a/moses/FF/TargetWordInsertionFeature.cpp +++ b/moses/FF/TargetWordInsertionFeature.cpp @@ -34,8 +34,9 @@ void TargetWordInsertionFeature::SetParameter(const std::string& key, const std: } } -void TargetWordInsertionFeature::Load() +void TargetWordInsertionFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; if (m_filename.empty()) return; diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h index b30e4302c..d06f32481 100644 --- a/moses/FF/TargetWordInsertionFeature.h +++ b/moses/FF/TargetWordInsertionFeature.h @@ -25,7 +25,7 @@ public: bool IsUseable(const FactorMask &mask) const; - void Load(); + void Load(AllOptions::ptr const& opts); virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp index 75b6d8b57..b83ef81ea 100644 --- a/moses/FF/TreeStructureFeature.cpp +++ b/moses/FF/TreeStructureFeature.cpp @@ -8,8 +8,9 @@ namespace Moses { -void TreeStructureFeature::Load() +void TreeStructureFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; // syntactic constraints can be hooked in here. m_constraints = NULL; diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h index 353328466..366b84fd2 100644 --- a/moses/FF/TreeStructureFeature.h +++ b/moses/FF/TreeStructureFeature.h @@ -74,7 +74,7 @@ public: int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; - void Load(); + void Load(AllOptions::ptr const& opts); }; diff --git a/moses/FF/VW/TrainingLoss.h b/moses/FF/VW/TrainingLoss.h index daf914a89..16571e66e 100644 --- a/moses/FF/VW/TrainingLoss.h +++ b/moses/FF/VW/TrainingLoss.h @@ -72,7 +72,7 @@ private: std::string MakeNGram(const TargetPhrase &phrase, size_t start, size_t end) const { std::vector<std::string> words; while (start != end) { - words.push_back(phrase.GetWord(start).GetString(StaticData::Instance().GetOutputFactorOrder(), false)); + words.push_back(phrase.GetWord(start).GetString(StaticData::Instance().options().output.factor_order, false)); start++; } return Join(" ", words); diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h index ba5406df5..75cea1a65 100644 --- a/moses/FF/VW/VW.h +++ b/moses/FF/VW/VW.h @@ -168,7 +168,7 @@ public: const std::vector<VWFeatureBase*>& targetFeatures = VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription()); - const WordsRange &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange(); + const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange(); const InputPath &inputPath = translationOptionList.Get(0)->GetInputPath(); if (m_train) { @@ -323,7 +323,7 @@ public: Phrase *target = new Phrase(); target->CreateFromString( Output - , StaticData::Instance().GetOutputFactorOrder() + , StaticData::Instance().options().output.factor_order , tabbedSentence.GetColumns()[0] , NULL); diff --git a/moses/FF/VW/VWFeatureBase.h b/moses/FF/VW/VWFeatureBase.h index 29b689af8..c8bd60a81 100644 --- a/moses/FF/VW/VWFeatureBase.h +++ b/moses/FF/VW/VWFeatureBase.h @@ -81,7 +81,7 @@ public: // source sentence word range. virtual void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const = 0; // Overload to process target-dependent features, create features once for diff --git a/moses/FF/VW/VWFeatureSourceBagOfWords.h b/moses/FF/VW/VWFeatureSourceBagOfWords.h index 12bcaecb8..97a1cc6c3 100644 --- a/moses/FF/VW/VWFeatureSourceBagOfWords.h +++ b/moses/FF/VW/VWFeatureSourceBagOfWords.h @@ -19,7 +19,7 @@ public: void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { for (size_t i = 0; i < input.GetSize(); i++) { classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i)); diff --git a/moses/FF/VW/VWFeatureSourceBigrams.h b/moses/FF/VW/VWFeatureSourceBigrams.h index 9d853d938..ce5430ab8 100644 --- a/moses/FF/VW/VWFeatureSourceBigrams.h +++ b/moses/FF/VW/VWFeatureSourceBigrams.h @@ -19,7 +19,7 @@ public: void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { for (size_t i = 1; i < input.GetSize(); i++) { classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i)); diff --git a/moses/FF/VW/VWFeatureSourceExternalFeatures.h b/moses/FF/VW/VWFeatureSourceExternalFeatures.h index 4596f7106..bacc5d231 100644 --- a/moses/FF/VW/VWFeatureSourceExternalFeatures.h +++ b/moses/FF/VW/VWFeatureSourceExternalFeatures.h @@ -24,7 +24,7 @@ public: void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { const Features& features = *m_tls.GetStored(); for (size_t i = 0; i < features.size(); i++) { diff --git a/moses/FF/VW/VWFeatureSourceIndicator.h b/moses/FF/VW/VWFeatureSourceIndicator.h index 784f2657e..fda929f13 100644 --- a/moses/FF/VW/VWFeatureSourceIndicator.h +++ b/moses/FF/VW/VWFeatureSourceIndicator.h @@ -21,7 +21,7 @@ public: void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { size_t begin = sourceRange.GetStartPos(); size_t end = sourceRange.GetEndPos() + 1; diff --git a/moses/FF/VW/VWFeatureSourcePhraseInternal.h b/moses/FF/VW/VWFeatureSourcePhraseInternal.h index 6b6f6f933..4e7f6e8d1 100644 --- a/moses/FF/VW/VWFeatureSourcePhraseInternal.h +++ b/moses/FF/VW/VWFeatureSourcePhraseInternal.h @@ -21,7 +21,7 @@ public: void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { size_t begin = sourceRange.GetStartPos(); size_t end = sourceRange.GetEndPos() + 1; diff --git a/moses/FF/VW/VWFeatureSourceSenseWindow.h b/moses/FF/VW/VWFeatureSourceSenseWindow.h index 5add76c09..614f7ff52 100644 --- a/moses/FF/VW/VWFeatureSourceSenseWindow.h +++ b/moses/FF/VW/VWFeatureSourceSenseWindow.h @@ -52,7 +52,7 @@ public: void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { int begin = sourceRange.GetStartPos(); int end = sourceRange.GetEndPos() + 1; diff --git a/moses/FF/VW/VWFeatureSourceWindow.h b/moses/FF/VW/VWFeatureSourceWindow.h index 844b7efb1..5205e4f2f 100644 --- a/moses/FF/VW/VWFeatureSourceWindow.h +++ b/moses/FF/VW/VWFeatureSourceWindow.h @@ -21,7 +21,7 @@ public: void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { int begin = sourceRange.GetStartPos(); int end = sourceRange.GetEndPos() + 1; diff --git a/moses/FF/VW/VWFeatureTarget.h b/moses/FF/VW/VWFeatureTarget.h index d56306aa8..2935b2b4e 100644 --- a/moses/FF/VW/VWFeatureTarget.h +++ b/moses/FF/VW/VWFeatureTarget.h @@ -24,7 +24,7 @@ public: virtual void operator()(const InputType &input , const InputPath &inputPath - , const WordsRange &sourceRange + , const Range &sourceRange , Discriminative::Classifier &classifier) const { } diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp index 2231a2941..6fcffff1a 100644 --- a/moses/FF/WordTranslationFeature.cpp +++ b/moses/FF/WordTranslationFeature.cpp @@ -87,8 +87,9 @@ void WordTranslationFeature::SetParameter(const std::string& key, const std::str } } -void WordTranslationFeature::Load() +void WordTranslationFeature::Load(AllOptions::ptr const& opts) { + m_options = opts; // load word list for restricted feature set if (m_filePathSource.empty()) { return; diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h index 85d1710ce..b3c3c18e2 100644 --- a/moses/FF/WordTranslationFeature.h +++ b/moses/FF/WordTranslationFeature.h @@ -40,7 +40,7 @@ public: void SetParameter(const std::string& key, const std::string& value); bool IsUseable(const FactorMask &mask) const; - void Load(); + void Load(AllOptions::ptr const& opts); void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath diff --git a/moses/ForestInput.cpp b/moses/ForestInput.cpp index 57b8fa472..68fec17ed 100644 --- a/moses/ForestInput.cpp +++ b/moses/ForestInput.cpp @@ -18,9 +18,7 @@ namespace Moses //! populate this InputType with data from in stream int ForestInput:: -Read(std::istream &in, - std::vector<FactorType> const& factorOrder, - AllOptions const& opts) +Read(std::istream &in) { using Syntax::F2S::Forest; @@ -48,7 +46,7 @@ Read(std::istream &in, std::getline(in, line); } else { do { - ParseHyperedgeLine(line, factorOrder); + ParseHyperedgeLine(line); std::getline(in, line); } while (line != ""); } @@ -58,7 +56,7 @@ Read(std::istream &in, // not sure ForestInput needs to. std::stringstream strme; strme << "<s> " << sentence << " </s>" << std::endl; - Sentence::Read(strme, factorOrder, opts); + Sentence::Read(strme); // Find the maximum end position of any vertex (0 if forest is empty). std::size_t maxEnd = FindMaxEnd(*m_forest); @@ -70,6 +68,9 @@ Read(std::istream &in, assert(topVertices.size() >= 1); } + + const std::vector<FactorType>& factorOrder = m_options->input.factor_order; + // Add <s> vertex. Forest::Vertex *startSymbol = NULL; { @@ -122,7 +123,9 @@ Read(std::istream &in, return 1; } -Syntax::F2S::Forest::Vertex *ForestInput::AddOrDeleteVertex(Forest::Vertex *v) +Syntax::F2S::Forest::Vertex* +ForestInput:: +AddOrDeleteVertex(Forest::Vertex *v) { std::pair<VertexSet::iterator, bool> ret = m_vertexSet.insert(v); if (ret.second) { @@ -172,14 +175,16 @@ void ForestInput::FindTopVertices(Forest &forest, std::back_inserter(topVertices)); } -void ForestInput::ParseHyperedgeLine( - const std::string &line, const std::vector<FactorType>& factorOrder) +void +ForestInput:: +ParseHyperedgeLine(const std::string &line) { + const std::vector<FactorType>& factorOrder = m_options->input.factor_order; using Syntax::F2S::Forest; const util::AnyCharacter delimiter(" \t"); util::TokenIter<util::AnyCharacter, true> p(line, delimiter); - Forest::Vertex *v = AddOrDeleteVertex(ParseVertex(*p, factorOrder)); + Forest::Vertex *v = AddOrDeleteVertex(ParseVertex(*p)); Forest::Hyperedge *e = new Forest::Hyperedge(); e->head = v; ++p; @@ -188,7 +193,7 @@ void ForestInput::ParseHyperedgeLine( //throw Exception(""); } for (++p; *p != "|||"; ++p) { - v = ParseVertex(*p, factorOrder); + v = ParseVertex(*p); if (!v->pvertex.symbol.IsNonTerminal()) { // Egret does not give start/end for terminals. v->pvertex.span = Range(e->head->pvertex.span.GetStartPos(), @@ -203,11 +208,11 @@ void ForestInput::ParseHyperedgeLine( e->head->incoming.push_back(e); } -Syntax::F2S::Forest::Vertex *ForestInput::ParseVertex( - const StringPiece &s, const std::vector<FactorType>& factorOrder) +Syntax::F2S::Forest::Vertex* +ForestInput::ParseVertex(const StringPiece &s) { using Syntax::F2S::Forest; - + const std::vector<FactorType>& factorOrder = m_options->input.factor_order; Word symbol; std::size_t pos = s.rfind('['); if (pos == std::string::npos) { diff --git a/moses/ForestInput.h b/moses/ForestInput.h index 5080b3f5f..88d2ecbbd 100644 --- a/moses/ForestInput.h +++ b/moses/ForestInput.h @@ -21,7 +21,7 @@ class ForestInput : public Sentence public: friend std::ostream &operator<<(std::ostream&, const ForestInput &); - ForestInput() : Sentence(), m_rootVertex(NULL) {} + ForestInput(AllOptions::ptr const& opts) : Sentence(opts), m_rootVertex(NULL) {} InputTypeEnum GetType() const { return ForestInputType; @@ -29,9 +29,7 @@ public: //! populate this InputType with data from in stream virtual int - Read(std::istream& in, - std::vector<FactorType> const& factorOrder, - AllOptions const& opts); + Read(std::istream& in); //! Output debugging info to stream out virtual void Print(std::ostream&) const; @@ -76,11 +74,9 @@ private: void FindTopVertices(Forest &, std::vector<Forest::Vertex *> &); - void ParseHyperedgeLine(const std::string &, - const std::vector<FactorType> &); + void ParseHyperedgeLine(const std::string &); - Forest::Vertex *ParseVertex(const StringPiece &, - const std::vector<FactorType> &); + Forest::Vertex *ParseVertex(const StringPiece &); boost::shared_ptr<Forest> m_forest; Forest::Vertex *m_rootVertex; diff --git a/moses/GenerationDictionary.cpp b/moses/GenerationDictionary.cpp index 40ff28177..29a4fa2b3 100644 --- a/moses/GenerationDictionary.cpp +++ b/moses/GenerationDictionary.cpp @@ -44,8 +44,9 @@ GenerationDictionary::GenerationDictionary(const std::string &line) ReadParameters(); } -void GenerationDictionary::Load() +void GenerationDictionary::Load(AllOptions::ptr const& opts) { + m_options = opts; FactorCollection &factorCollection = FactorCollection::Instance(); const size_t numFeatureValuesInConfig = this->GetNumScoreComponents(); diff --git a/moses/GenerationDictionary.h b/moses/GenerationDictionary.h index 82aa82426..11ebd5e27 100644 --- a/moses/GenerationDictionary.h +++ b/moses/GenerationDictionary.h @@ -62,7 +62,7 @@ public: virtual ~GenerationDictionary(); //! load data file - void Load(); + void Load(AllOptions::ptr const& opts); /** number of unique input entries in the generation table. * NOT the number of lines in the generation table diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp index 373c2109f..9f94a6cf3 100644 --- a/moses/HypergraphOutput.cpp +++ b/moses/HypergraphOutput.cpp @@ -56,7 +56,7 @@ WriteHypos(const ChartHypothesisCollection& hypos, ChartHypothesisCollection::const_iterator iter; for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { ChartHypothesis &mainHypo = **iter; - if (StaticData::Instance().options().output.DontPruneSearchGraph || + if (StaticData::Instance().options()->output.DontPruneSearchGraph || reachable.find(mainHypo.GetId()) != reachable.end()) { (*m_out) << m_lineNumber << " " << mainHypo << endl; } @@ -90,7 +90,7 @@ WriteHypos(const ChartHypothesisCollection& hypos, ChartHypothesisCollection::const_iterator iter; for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { const ChartHypothesis* mainHypo = *iter; - if (!StaticData::Instance().options().output.DontPruneSearchGraph && + if (!StaticData::Instance().options()->output.DontPruneSearchGraph && reachable.find(mainHypo->GetId()) == reachable.end()) { //Ignore non reachable nodes continue; diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index 879acc52f..0dc591ab3 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -337,57 +337,22 @@ GetTargetPhraseStringRep() const return GetTargetPhraseStringRep(allFactors); } -void +size_t Hypothesis:: -OutputAlignment(std::ostream &out, WordAlignmentSort sortOrder) const +OutputAlignment(std::ostream &out, bool recursive=true) const { - std::vector<const Hypothesis *> edges; - const Hypothesis *currentHypo = this; - while (currentHypo) { - edges.push_back(currentHypo); - currentHypo = currentHypo->GetPrevHypo(); - } - - OutputAlignment(out, edges, sortOrder); - -} - -void -Hypothesis:: -OutputAlignment(ostream &out, - vector<const Hypothesis *> const& edges, - WordAlignmentSort waso) -{ - size_t targetOffset = 0; - - for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { - const Hypothesis &edge = *edges[currEdge]; - const TargetPhrase &tp = edge.GetCurrTargetPhrase(); - size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); - - OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset, waso); - - targetOffset += tp.GetSize(); - } - // Used by --print-alignment-info, so no endl -} - -void -Hypothesis:: -OutputAlignment(ostream &out, const AlignmentInfo &ai, - size_t sourceOffset, size_t targetOffset, - WordAlignmentSort waso) -{ - typedef std::vector< const std::pair<size_t,size_t>* > AlignVec; - AlignVec alignments = ai.GetSortedAlignments(waso); - - AlignVec::const_iterator it; - for (it = alignments.begin(); it != alignments.end(); ++it) { - const std::pair<size_t,size_t> &alignment = **it; - out << alignment.first + sourceOffset << "-" - << alignment.second + targetOffset << " "; - } - + WordAlignmentSort const& waso = m_manager.options()->output.WA_SortOrder; + TargetPhrase const& tp = GetCurrTargetPhrase(); + + // call with head recursion to output things in the right order + size_t trg_off = recursive && m_prevHypo ? m_prevHypo->OutputAlignment(out) : 0; + size_t src_off = GetCurrSourceWordsRange().GetStartPos(); + + typedef std::pair<size_t,size_t> const* entry; + std::vector<entry> alnvec = tp.GetAlignTerm().GetSortedAlignments(waso); + BOOST_FOREACH(entry e, alnvec) + out << e->first + src_off << "-" << e->second + trg_off << " "; + return trg_off + tp.GetSize(); } void @@ -411,102 +376,6 @@ OutputInput(std::ostream& os) const if (inp_phrases[i]) os << *inp_phrases[i]; } -void -Hypothesis:: -OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder, - const ReportingOptions &options) const -{ - if (m_prevHypo) { - // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence - m_prevHypo->OutputBestSurface(out, outputFactorOrder, options); - } - OutputSurface(out, *this, outputFactorOrder, options); -} - -////////////////////////////////////////////////////////////////////////// -/*** - * print surface factor only for the given phrase - */ -void -Hypothesis:: -OutputSurface(std::ostream &out, const Hypothesis &edge, - const std::vector<FactorType> &outputFactorOrder, - const ReportingOptions &options) const -{ - UTIL_THROW_IF2(outputFactorOrder.size() == 0, - "Must specific at least 1 output factor"); - const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); - // TODO: slay the rest of StaticData here and move stuff into ReportingOptions - bool markUnknown = GetManager().options().unk.mark; - bool featureLabels = StaticData::Instance().options().nbest.include_feature_labels; - if (options.ReportAllFactors == true) { - out << phrase; - } else { - FactorType placeholderFactor - = StaticData::Instance().options().input.placeholder_factor; - - std::map<size_t, const Factor*> placeholders; - if (placeholderFactor != NOT_FOUND) { - // creates map of target position -> factor for placeholders - placeholders = GetPlaceholders(edge, placeholderFactor); - } - - size_t size = phrase.GetSize(); - for (size_t pos = 0 ; pos < size ; pos++) { - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); - - if (placeholders.size()) { - // do placeholders - std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos); - if (iter != placeholders.end()) { - factor = iter->second; - } - } - - UTIL_THROW_IF2(factor == NULL, - "No factor 0 at position " << pos); - - //preface surface form with UNK if marking unknowns - const Word &word = phrase.GetWord(pos); - if(markUnknown && word.IsOOV()) { - out << GetManager().options().unk.prefix << *factor - << GetManager().options().unk.suffix; - } else { - out << *factor; - } - - for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); - UTIL_THROW_IF2(factor == NULL, - "No factor " << i << " at position " << pos); - - out << "|" << *factor; - } - out << " "; - } - } - - // trace ("report segmentation") option "-t" / "-tt" - if (options.ReportSegmentation > 0 && phrase.GetSize() > 0) { - const Range &sourceRange = edge.GetCurrSourceWordsRange(); - const int sourceStart = sourceRange.GetStartPos(); - const int sourceEnd = sourceRange.GetEndPos(); - out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" - if (options.ReportSegmentation == 2) { - out << ",wa="; - const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); - Hypothesis::OutputAlignment(out, ai, 0, 0, options.WA_SortOrder); - out << ",total="; - out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); - out << ","; - ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); - scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); - scoreBreakdown.OutputAllFeatureScores(out, featureLabels); - } - out << "| "; - } -} - std::map<size_t, const Factor*> Hypothesis:: GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h index 129d241f3..82c1fca79 100644 --- a/moses/Hypothesis.h +++ b/moses/Hypothesis.h @@ -209,8 +209,8 @@ public: return m_arcList; } const ScoreComponentCollection& GetScoreBreakdown() const { - if (!m_scoreBreakdown.get()) { - m_scoreBreakdown.reset(new ScoreComponentCollection()); + if (!m_scoreBreakdown) { + m_scoreBreakdown.reset(new ScoreComponentCollection); m_scoreBreakdown->PlusEquals(m_currScoreBreakdown); if (m_prevHypo) { m_scoreBreakdown->PlusEquals(m_prevHypo->GetScoreBreakdown()); @@ -239,26 +239,11 @@ public: return m_transOpt; } - void - OutputAlignment(std::ostream &out, WordAlignmentSort sortOrder) const; - - static void - OutputAlignment(std::ostream &out, - const std::vector<const Hypothesis *> &edges, - WordAlignmentSort waso); - - static void - OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, - size_t sourceOffset, size_t targetOffset, - WordAlignmentSort waso); + size_t OutputAlignment(std::ostream &out, bool recursive) const; void OutputInput(std::ostream& os) const; static void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo); - void OutputBestSurface(std::ostream &out, const std::vector<Moses::FactorType> &outputFactorOrder, const ReportingOptions &options) const; - void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, - const ReportingOptions &options) const; - // creates a map of TARGET positions which should be replaced by word using placeholder std::map<size_t, const Moses::Factor*> GetPlaceholders(const Moses::Hypothesis &hypo, Moses::FactorType placeholderFactor) const; diff --git a/moses/HypothesisStackCubePruning.cpp b/moses/HypothesisStackCubePruning.cpp index 8094925ae..1f86e0fa0 100644 --- a/moses/HypothesisStackCubePruning.cpp +++ b/moses/HypothesisStackCubePruning.cpp @@ -36,10 +36,10 @@ namespace Moses HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) : HypothesisStack(manager) { - m_nBestIsEnabled = manager.options().nbest.enabled; + m_nBestIsEnabled = manager.options()->nbest.enabled; m_bestScore = -std::numeric_limits<float>::infinity(); m_worstScore = -std::numeric_limits<float>::infinity(); - m_deterministic = manager.options().cube.deterministic_search; + m_deterministic = manager.options()->cube.deterministic_search; } /** remove all hypotheses from the collection */ @@ -244,7 +244,7 @@ void HypothesisStackCubePruning::CleanupArcList() iterator iter; for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) { Hypothesis *mainHypo = *iter; - mainHypo->CleanupArcList(this->m_manager.options().nbest.nbest_size, this->m_manager.options().NBestDistinct()); + mainHypo->CleanupArcList(this->m_manager.options()->nbest.nbest_size, this->m_manager.options()->NBestDistinct()); } } diff --git a/moses/HypothesisStackNormal.cpp b/moses/HypothesisStackNormal.cpp index 5a0382029..d2ccdb52f 100644 --- a/moses/HypothesisStackNormal.cpp +++ b/moses/HypothesisStackNormal.cpp @@ -25,7 +25,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "HypothesisStackNormal.h" #include "TypeDef.h" #include "Util.h" -#include "StaticData.h" #include "Manager.h" #include "util/exception.hh" @@ -36,7 +35,7 @@ namespace Moses HypothesisStackNormal::HypothesisStackNormal(Manager& manager) : HypothesisStack(manager) { - m_nBestIsEnabled = manager.options().nbest.enabled; + m_nBestIsEnabled = manager.options()->nbest.enabled; m_bestScore = -std::numeric_limits<float>::infinity(); m_worstScore = -std::numeric_limits<float>::infinity(); } @@ -75,8 +74,12 @@ pair<HypothesisStackNormal::iterator, bool> HypothesisStackNormal::Add(Hypothesi // prune only if stack is twice as big as needed (lazy pruning) size_t toleratedSize = 2*m_maxHypoStackSize-1; // add in room for stack diversity - if (m_minHypoStackDiversity) - toleratedSize += m_minHypoStackDiversity << StaticData::Instance().GetMaxDistortion(); + if (m_minHypoStackDiversity) { + // so what happens if maxdistortion is negative? + toleratedSize += m_minHypoStackDiversity + << m_manager.options()->reordering.max_distortion; + } + if (m_hypos.size() > toleratedSize) { PruneToSize(m_maxHypoStackSize); } else { @@ -97,8 +100,8 @@ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo) } // too bad for stack. don't bother adding hypo into collection - if (!StaticData::Instance().GetDisableDiscarding() && - hypo->GetFutureScore() < m_worstScore + if (m_manager.options()->search.disable_discarding == false + && hypo->GetFutureScore() < m_worstScore && ! ( m_minHypoStackDiversity > 0 && hypo->GetFutureScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) ) { m_manager.GetSentenceStats().AddDiscarded(); @@ -266,7 +269,7 @@ void HypothesisStackNormal::CleanupArcList() iterator iter; for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) { Hypothesis *mainHypo = *iter; - mainHypo->CleanupArcList(this->m_manager.options().nbest.nbest_size, this->m_manager.options().NBestDistinct()); + mainHypo->CleanupArcList(this->m_manager.options()->nbest.nbest_size, this->m_manager.options()->NBestDistinct()); } } diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp index 297391a99..8049a2893 100644 --- a/moses/IOWrapper.cpp +++ b/moses/IOWrapper.cpp @@ -63,8 +63,9 @@ using namespace std; namespace Moses { -IOWrapper::IOWrapper() - : m_nBestStream(NULL) +IOWrapper::IOWrapper(AllOptions const& opts) + : m_options(new AllOptions(opts)) + , m_nBestStream(NULL) , m_surpressSingleBestOutput(false) , m_look_ahead(0) , m_look_back(0) @@ -77,20 +78,19 @@ IOWrapper::IOWrapper() Parameter const& P = staticData.GetParameter(); // context buffering for context-sensitive decoding - m_look_ahead = staticData.options().context.look_ahead; - m_look_back = staticData.options().context.look_back; - - m_inputType = staticData.options().input.input_type; + m_look_ahead = staticData.options()->context.look_ahead; + m_look_back = staticData.options()->context.look_back; + m_inputType = staticData.options()->input.input_type; UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput, "Context-sensitive decoding currently works only with sentence input."); m_currentLine = staticData.GetStartTranslationId(); - m_inputFactorOrder = &staticData.GetInputFactorOrder(); + m_inputFactorOrder = &staticData.options()->input.factor_order; - size_t nBestSize = staticData.options().nbest.nbest_size; - string nBestFilePath = staticData.options().nbest.output_file_path; + size_t nBestSize = staticData.options()->nbest.nbest_size; + string nBestFilePath = staticData.options()->nbest.output_file_path; staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", ""); if (m_inputFilePath.empty()) { @@ -129,8 +129,8 @@ IOWrapper::IOWrapper() P.SetParameter<string>(path, "output-word-graph", ""); if (path.size()) m_wordGraphCollector.reset(new OutputCollector(path)); - size_t latticeSamplesSize = staticData.GetLatticeSamplesSize(); - string latticeSamplesFile = staticData.GetLatticeSamplesFilePath(); + size_t latticeSamplesSize = staticData.options()->output.lattice_sample_size; + string latticeSamplesFile = staticData.options()->output.lattice_sample_filepath; if (latticeSamplesSize) { m_latticeSamplesCollector.reset(new OutputCollector(latticeSamplesFile)); if (m_latticeSamplesCollector->OutputIsCout()) { diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h index 02c3470bb..1301bc087 100644 --- a/moses/IOWrapper.h +++ b/moses/IOWrapper.h @@ -61,8 +61,10 @@ POSSIBILITY OF SUCH DAMAGE. #include "moses/LatticeMBR.h" #include "moses/ChartKBestExtractor.h" #include "moses/Syntax/KBestExtractor.h" +#include "moses/parameters/AllOptions.h" #include <boost/format.hpp> +#include <boost/shared_ptr.hpp> namespace Moses { @@ -81,6 +83,7 @@ struct SHyperedge; class IOWrapper { protected: + boost::shared_ptr<AllOptions const> m_options; const std::vector<Moses::FactorType> *m_inputFactorOrder; std::string m_inputFilePath; Moses::InputFileStream *m_inputFile; @@ -124,7 +127,7 @@ protected: std::string m_hypergraph_output_filepattern; public: - IOWrapper(); + IOWrapper(AllOptions const& opts); ~IOWrapper(); // Moses::InputType* GetInput(Moses::InputType *inputType); @@ -216,7 +219,6 @@ boost::shared_ptr<InputType> IOWrapper:: BufferInput() { - AllOptions const& opts = StaticData::Instance().options(); boost::shared_ptr<itype> source; boost::shared_ptr<InputType> ret; if (m_future_input.size()) { @@ -224,14 +226,14 @@ BufferInput() m_future_input.pop_front(); m_buffered_ahead -= ret->GetSize(); } else { - source.reset(new itype); - if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts)) + source.reset(new itype(m_options)); + if (!source->Read(*m_inputStream)) return ret; ret = source; } while (m_buffered_ahead < m_look_ahead) { - source.reset(new itype); - if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts)) + source.reset(new itype(m_options)); + if (!source->Read(*m_inputStream)) break; m_future_input.push_back(source); m_buffered_ahead += source->GetSize(); diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index 07096fc18..d6f589b15 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -208,7 +208,7 @@ Manager::Manager(ttasksptr const& ttask) : BaseManager(ttask) , cells_(m_source, ChartCellBaseFactory(), parser_) , parser_(ttask, cells_) - , n_best_(search::NBestConfig(StaticData::Instance().options().nbest.nbest_size)) + , n_best_(search::NBestConfig(StaticData::Instance().options()->nbest.nbest_size)) { } Manager::~Manager() @@ -232,8 +232,8 @@ PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best & const StaticData &data = StaticData::Instance(); const float lm_weight = data.GetWeights(&abstract)[0]; const float oov_weight = abstract.OOVFeatureEnabled() ? data.GetWeights(&abstract)[1] : 0.0; - size_t cpl = data.options().cube.pop_limit; - size_t nbs = data.options().nbest.nbest_size; + size_t cpl = data.options()->cube.pop_limit; + size_t nbs = data.options()->nbest.nbest_size; search::Config config(lm_weight * log_10, cpl, search::NBestConfig(nbs)); search::Context<Model> context(config, model); @@ -261,7 +261,7 @@ PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best & template <class Model> void Manager::LMCallback(const Model &model, const std::vector<lm::WordIndex> &words) { - std::size_t nbest = StaticData::Instance().options().nbest.nbest_size; + std::size_t nbest = StaticData::Instance().options()->nbest.nbest_size; if (nbest <= 1) { search::History ret = PopulateBest(model, words, single_best_); if (ret) { @@ -329,7 +329,7 @@ OutputNBestList(OutputCollector *collector, { const StaticData &staticData = StaticData::Instance(); const std::vector<Moses::FactorType> &outputFactorOrder - = staticData.GetOutputFactorOrder(); + = options()->output.factor_order; std::ostringstream out; // wtf? copied from the original OutputNBestList @@ -351,7 +351,7 @@ OutputNBestList(OutputCollector *collector, out << translationId << " ||| "; OutputSurface(out, outputPhrase); // , outputFactorOrder, false); out << " ||| "; - bool with_labels = options().nbest.include_feature_labels; + bool with_labels = options()->nbest.include_feature_labels; features.OutputAllFeatureScores(out, with_labels); out << " ||| " << i->GetScore() << '\n'; } @@ -509,7 +509,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied if (collector == NULL) return; std::ostringstream out; FixPrecision(out); - if (options().output.ReportHypoScore) { + if (options()->output.ReportHypoScore) { out << applied.GetScore() << ' '; } Phrase outPhrase; @@ -519,7 +519,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outPhrase.RemoveWord(0); outPhrase.RemoveWord(outPhrase.GetSize() - 1); - out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); + out << outPhrase.GetStringRep(options()->output.factor_order); out << '\n'; collector->Write(translationId, out.str()); @@ -531,7 +531,7 @@ Manager:: OutputBestNone(OutputCollector *collector, long translationId) const { if (collector == NULL) return; - if (options().output.ReportHypoScore) { + if (options()->output.ReportHypoScore) { collector->Write(translationId, "0 \n"); } else { collector->Write(translationId, "\n"); diff --git a/moses/InputType.cpp b/moses/InputType.cpp index db269ea8b..34894523f 100644 --- a/moses/InputType.cpp +++ b/moses/InputType.cpp @@ -29,8 +29,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA namespace Moses { -InputType::InputType(long translationId) - : m_translationId(translationId) +InputType::InputType(AllOptions::ptr const& opts, long translationId) + : m_options(opts) + , m_translationId(translationId) + , m_reorderingConstraint(opts->reordering.max_distortion) { m_frontSpanCoveredLength = 0; m_sourceCompleted.resize(0); diff --git a/moses/InputType.h b/moses/InputType.h index 9501f8c8a..80080a3f2 100644 --- a/moses/InputType.h +++ b/moses/InputType.h @@ -45,6 +45,7 @@ class TranslationTask; class InputType { protected: + AllOptions::ptr m_options; long m_translationId; //< contiguous Id long m_documentId; long m_topicId; @@ -67,11 +68,15 @@ public: size_t m_frontSpanCoveredLength; // how many words from the beginning are covered - InputType(long translationId = 0); + InputType(AllOptions::ptr const& opts, long translationId = 0); virtual ~InputType(); virtual InputTypeEnum GetType() const = 0; + AllOptions::ptr const& options() const { + return m_options; + } + long GetTranslationId() const { return m_translationId; } @@ -185,9 +190,10 @@ public: //! populate this InputType with data from in stream virtual int - Read(std::istream& in, - std::vector<FactorType> const& factorOrder, - AllOptions const& opts) =0; + Read(std::istream& in) = 0; + // , + // std::vector<FactorType> const& factorOrder, + // AllOptions const& opts) =0; //! Output debugging info to stream out virtual void Print(std::ostream&) const =0; diff --git a/moses/Jamfile b/moses/Jamfile index 9349d98f9..f7e05b85b 100644 --- a/moses/Jamfile +++ b/moses/Jamfile @@ -105,7 +105,7 @@ lib moses : TranslationModel/RuleTable/*.cpp TranslationModel/Scope3Parser/*.cpp TranslationModel/CYKPlusParser/*.cpp - ../phrase-extract/extract-ghkm/PhraseOrientation.cpp + ../phrase-extract/PhraseOrientation.cpp FF/*.cpp FF/bilingual-lm/*.cpp FF/OSM-Feature/*.cpp diff --git a/moses/LM/BackwardTest.cpp b/moses/LM/BackwardTest.cpp index bd9c74379..ef45d6e60 100644 --- a/moses/LM/BackwardTest.cpp +++ b/moses/LM/BackwardTest.cpp @@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/TypeDef.h" #include "moses/StaticData.h" +#include "moses/parameters/AllOptions.h" //#include "BackwardLMState.h" #include "moses/LM/Backward.h" @@ -61,12 +62,14 @@ namespace Moses // Apparently some Boost versions use templates and are pretty strict about types matching. #define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol)); +AllOptions::ptr DefaultOptions(new AllOptions); + class BackwardLanguageModelTest { public: BackwardLanguageModelTest() : - dummyInput(new Sentence), + dummyInput(new Sentence(DefaultOptions)), backwardLM( static_cast< BackwardLanguageModel<lm::ngram::ProbingModel> * >( ConstructBackwardLM( diff --git a/moses/LM/BilingualLM.cpp b/moses/LM/BilingualLM.cpp index f7c36a4e8..8fc88c597 100644 --- a/moses/LM/BilingualLM.cpp +++ b/moses/LM/BilingualLM.cpp @@ -20,8 +20,9 @@ BilingualLM::BilingualLM(const std::string &line) } -void BilingualLM::Load() +void BilingualLM::Load(AllOptions::ptr const& opts) { + m_options = opts; ReadParameters(); loadModel(); } diff --git a/moses/LM/BilingualLM.h b/moses/LM/BilingualLM.h index cb5075fd1..ed9d99489 100644 --- a/moses/LM/BilingualLM.h +++ b/moses/LM/BilingualLM.h @@ -117,7 +117,7 @@ public: return new BilingualLMState(0); } - void Load(); + void Load(AllOptions::ptr const& opts); FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp index 60eee0250..ae21995a9 100644 --- a/moses/LM/DALMWrapper.cpp +++ b/moses/LM/DALMWrapper.cpp @@ -204,7 +204,7 @@ LanguageModelDALM::~LanguageModelDALM() delete m_lm; } -void LanguageModelDALM::Load() +void LanguageModelDALM::Load(AllOptions const& opts) { ///////////////////// // READING INIFILE // diff --git a/moses/LM/DALMWrapper.h b/moses/LM/DALMWrapper.h index fe724e7c8..4898dd66c 100644 --- a/moses/LM/DALMWrapper.h +++ b/moses/LM/DALMWrapper.h @@ -28,7 +28,7 @@ public: LanguageModelDALM(const std::string &line); virtual ~LanguageModelDALM(); - void Load(); + void Load(AllOptions::ptr const& opts); virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const; diff --git a/moses/LM/IRST.cpp b/moses/LM/IRST.cpp index be0213c97..10fcdcd9f 100644 --- a/moses/LM/IRST.cpp +++ b/moses/LM/IRST.cpp @@ -96,7 +96,7 @@ bool LanguageModelIRST::IsUseable(const FactorMask &mask) const return ret; } -void LanguageModelIRST::Load() + void LanguageModelIRST::Load(AllOptions::ptr const& opts) { FactorCollection &factorCollection = FactorCollection::Instance(); diff --git a/moses/LM/IRST.h b/moses/LM/IRST.h index 820031faf..b4c080378 100644 --- a/moses/LM/IRST.h +++ b/moses/LM/IRST.h @@ -88,7 +88,7 @@ public: bool IsUseable(const FactorMask &mask) const; - void Load(); + void Load(AllOptions::ptr const& opts); const FFState *EmptyHypothesisState(const InputType &/*input*/) const; virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const; diff --git a/moses/LM/MaxEntSRI.cpp b/moses/LM/MaxEntSRI.cpp index 3e7f4df44..18fa4415f 100644 --- a/moses/LM/MaxEntSRI.cpp +++ b/moses/LM/MaxEntSRI.cpp @@ -66,7 +66,7 @@ LanguageModelMaxEntSRI::~LanguageModelMaxEntSRI() delete m_srilmVocab; } -void LanguageModelMaxEntSRI::Load() +void LanguageModelMaxEntSRI::Load(AllOptions const& opts) { m_srilmVocab = new ::Vocab(); m_srilmModel = new MEModel(*m_srilmVocab, m_nGramOrder); diff --git a/moses/LM/MaxEntSRI.h b/moses/LM/MaxEntSRI.h index c53a879b8..1f3004e0f 100644 --- a/moses/LM/MaxEntSRI.h +++ b/moses/LM/MaxEntSRI.h @@ -54,7 +54,7 @@ protected: public: LanguageModelMaxEntSRI(const std::string &line); ~LanguageModelMaxEntSRI(); - void Load(); + void Load(AllOptions::ptr const& opts); virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const; }; diff --git a/moses/LM/NeuralLMWrapper.cpp b/moses/LM/NeuralLMWrapper.cpp index 22ff90bb9..f19eb97c4 100644 --- a/moses/LM/NeuralLMWrapper.cpp +++ b/moses/LM/NeuralLMWrapper.cpp @@ -22,7 +22,7 @@ NeuralLMWrapper::~NeuralLMWrapper() } -void NeuralLMWrapper::Load() +void NeuralLMWrapper::Load(AllOptions const& opts) { // Set parameters required by ancestor classes diff --git a/moses/LM/NeuralLMWrapper.h b/moses/LM/NeuralLMWrapper.h index bd6635a7c..66a0278a9 100644 --- a/moses/LM/NeuralLMWrapper.h +++ b/moses/LM/NeuralLMWrapper.h @@ -27,7 +27,7 @@ public: virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const; - virtual void Load(); + virtual void Load(AllOptions::ptr const& opts); }; diff --git a/moses/LM/RDLM.cpp b/moses/LM/RDLM.cpp index 374274790..992233923 100644 --- a/moses/LM/RDLM.cpp +++ b/moses/LM/RDLM.cpp @@ -39,7 +39,7 @@ RDLM::~RDLM() delete lm_label_base_instance_; } -void RDLM::Load() +void RDLM::Load(AllOptions const& opts) { lm_head_base_instance_ = new nplm::neuralTM(); diff --git a/moses/LM/RDLM.h b/moses/LM/RDLM.h index 963c1e8d5..8fdc9d641 100644 --- a/moses/LM/RDLM.h +++ b/moses/LM/RDLM.h @@ -208,7 +208,7 @@ public: int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; - void Load(); + void Load(AllOptions::ptr const& opts); // Iterator-class that yields all children of a node; if child is virtual node of binarized tree, its children are yielded instead. class UnbinarizedChildren diff --git a/moses/LM/Rand.cpp b/moses/LM/Rand.cpp index edf06fd05..00474deee 100644 --- a/moses/LM/Rand.cpp +++ b/moses/LM/Rand.cpp @@ -52,7 +52,7 @@ LanguageModelRandLM::~LanguageModelRandLM() delete m_lm; } -void LanguageModelRandLM::Load() +void LanguageModelRandLM::Load(AllOptions const& opts) { cerr << "Loading LanguageModelRandLM..." << endl; FactorCollection &factorCollection = FactorCollection::Instance(); diff --git a/moses/LM/Rand.h b/moses/LM/Rand.h index caf367c8c..54b5738b6 100644 --- a/moses/LM/Rand.h +++ b/moses/LM/Rand.h @@ -39,7 +39,7 @@ public: LanguageModelRandLM(const std::string &line); ~LanguageModelRandLM(); - void Load(); + void Load(AllOptions::ptr const& opts); virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const; void InitializeForInput(ttasksptr const& ttask); void CleanUpAfterSentenceProcessing(const InputType& source); diff --git a/moses/LM/SRI.cpp b/moses/LM/SRI.cpp index fb60a4adb..2741cfa1e 100644 --- a/moses/LM/SRI.cpp +++ b/moses/LM/SRI.cpp @@ -66,7 +66,7 @@ LanguageModelSRI::~LanguageModelSRI() delete m_srilmVocab; } -void LanguageModelSRI::Load() +void LanguageModelSRI::Load(AllOptions const& opts) { m_srilmVocab = new ::Vocab(); m_srilmModel = new Ngram(*m_srilmVocab, m_nGramOrder); diff --git a/moses/LM/SRI.h b/moses/LM/SRI.h index 12d5a9626..0a6139832 100644 --- a/moses/LM/SRI.h +++ b/moses/LM/SRI.h @@ -54,7 +54,7 @@ protected: public: LanguageModelSRI(const std::string &line); ~LanguageModelSRI(); - void Load(); + void Load(AllOptions::ptr const& opts); virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const; }; diff --git a/moses/LM/oxlm/OxLM.cpp b/moses/LM/oxlm/OxLM.cpp index 8fde54713..963795bf3 100644 --- a/moses/LM/oxlm/OxLM.cpp +++ b/moses/LM/oxlm/OxLM.cpp @@ -70,7 +70,7 @@ void OxLM<Model>::SetParameter(const string& key, const string& value) } template<class Model> -void OxLM<Model>::Load() +void OxLM<Model>::Load(AllOptions const& opts) { model.load(m_filePath); diff --git a/moses/LM/oxlm/OxLM.h b/moses/LM/oxlm/OxLM.h index 4056ccab9..5c73cd6c7 100644 --- a/moses/LM/oxlm/OxLM.h +++ b/moses/LM/oxlm/OxLM.h @@ -24,7 +24,7 @@ public: void SetParameter(const std::string& key, const std::string& value); - void Load(); + void Load(AllOptions::ptr const& opts); virtual LMResult GetValue( const std::vector<const Word*> &contextFactor, diff --git a/moses/LatticeMBR.cpp b/moses/LatticeMBR.cpp index 77e99a722..c9c775bbe 100644 --- a/moses/LatticeMBR.cpp +++ b/moses/LatticeMBR.cpp @@ -515,8 +515,8 @@ void getLatticeMBRNBest(const Manager& manager, const TrellisPathList& nBestList vector< float> estimatedScores; manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores); - LMBR_Options const& lmbr = manager.options().lmbr; - MBR_Options const& mbr = manager.options().mbr; + LMBR_Options const& lmbr = manager.options()->lmbr; + MBR_Options const& mbr = manager.options()->mbr; pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores, manager.GetBestHypothesis(), lmbr.pruning_factor, mbr.scale); calcNgramExpectations(connectedList, incomingEdges, ngramPosteriors,true); @@ -577,8 +577,8 @@ const TrellisPath doConsensusDecoding(const Manager& manager, const TrellisPathL map<const Hypothesis*, vector<Edge> > incomingEdges; vector< float> estimatedScores; manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores); - LMBR_Options const& lmbr = manager.options().lmbr; - MBR_Options const& mbr = manager.options().mbr; + LMBR_Options const& lmbr = manager.options()->lmbr; + MBR_Options const& mbr = manager.options()->mbr; pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores, manager.GetBestHypothesis(), lmbr.pruning_factor, mbr.scale); calcNgramExpectations(connectedList, incomingEdges, ngramExpectations,false); diff --git a/moses/Manager.cpp b/moses/Manager.cpp index ce1fadddd..f4dc03584 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -47,7 +47,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/HypergraphOutput.h" #include "moses/mbr.h" #include "moses/LatticeMBR.h" - +#include "moses/SearchNormal.h" +#include "moses/SearchCubePruning.h" #include <boost/foreach.hpp> #ifdef HAVE_PROTOBUF @@ -72,10 +73,16 @@ Manager::Manager(ttasksptr const& ttask) boost::shared_ptr<InputType> source = ttask->GetSource(); m_transOptColl = source->CreateTranslationOptionCollection(ttask); - const StaticData &staticData = StaticData::Instance(); - SearchAlgorithm searchAlgorithm = staticData.options().search.algo; - m_search = Search::CreateSearch(*this, *source, searchAlgorithm, - *m_transOptColl); + switch(options()->search.algo) { + case Normal: + m_search = new SearchNormal(*this, *m_transOptColl); + break; + case CubePruning: + m_search = new SearchCubePruning(*this, *m_transOptColl); + break; + default: + UTIL_THROW2("ERROR: search. Aborting\n"); + } StaticData::Instance().InitializeForInput(ttask); } @@ -246,7 +253,7 @@ printThisHypothesis(long translationId, const Hypothesis* hypo, * \param count the number of n-best translations to produce * \param ret holds the n-best list that was calculated */ -void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) const +void Manager::CalcNBest(size_t count, TrellisPathList &ret, bool onlyDistinct) const { if (count <= 0) return; @@ -270,8 +277,9 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co contenders.Add(new TrellisPath(*iterBestHypo)); } - // factor defines stopping point for distinct n-best list if too many candidates identical - size_t nBestFactor = StaticData::Instance().options().nbest.factor; + // factor defines stopping point for distinct n-best list if too + // many candidates identical + size_t nBestFactor = options()->nbest.factor; if (nBestFactor < 1) nBestFactor = 1000; // 0 = unlimited // MAIN loop @@ -295,7 +303,7 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co if(onlyDistinct) { - const size_t nBestFactor = StaticData::Instance().options().nbest.factor; + const size_t nBestFactor = options()->nbest.factor; if (nBestFactor > 0) contenders.Prune(count * nBestFactor); } else { @@ -552,26 +560,9 @@ void Manager::OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothe } } - // lexicalised re-ordering - /* - const std::vector<LexicalReordering*> &lexOrderings = StaticData::Instance().GetReorderModels(); - std::vector<LexicalReordering*>::const_iterator iterLexOrdering; - for (iterLexOrdering = lexOrderings.begin() ; iterLexOrdering != lexOrderings.end() ; ++iterLexOrdering) { - LexicalReordering *lexicalReordering = *iterLexOrdering; - vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(lexicalReordering); - - outputWordGraphStream << scores[0]; - vector<float>::const_iterator iterScore; - for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) { - outputWordGraphStream << ", " << *iterScore; - } - } - */ - // words !! -// outputWordGraphStream << "\tw=" << hypo->GetCurrTargetPhrase(); - // output both source and target phrases in the word graph - outputWordGraphStream << "\tw=" << hypo->GetSourcePhraseStringRep() << "|" << hypo->GetCurrTargetPhrase(); + outputWordGraphStream << "\tw=" << hypo->GetSourcePhraseStringRep() + << "|" << hypo->GetCurrTargetPhrase(); outputWordGraphStream << endl; } @@ -830,50 +821,24 @@ size_t Manager::OutputFeatureWeightsForSLF(size_t index, const FeatureFunction* } } -size_t Manager::OutputFeatureValuesForSLF(size_t index, bool zeros, const Hypothesis* hypo, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) const +size_t +Manager:: +OutputFeatureValuesForSLF(size_t index, bool zeros, const Hypothesis* hypo, + const FeatureFunction* ff, std::ostream &out) const { - - // { const FeatureFunction* sp = ff; - // const FVector& m_scores = scoreCollection.GetScoresVector(); - // FVector& scores = const_cast<FVector&>(m_scores); - // std::string prefix = sp->GetScoreProducerDescription() + FName::SEP; - // // std::cout << "prefix==" << prefix << endl; - // // cout << "m_scores==" << m_scores << endl; - // // cout << "m_scores.size()==" << m_scores.size() << endl; - // // cout << "m_scores.coreSize()==" << m_scores.coreSize() << endl; - // // cout << "m_scores.cbegin() ?= m_scores.cend()\t" << (m_scores.cbegin() == m_scores.cend()) << endl; - - - // // for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) { - // // std::cout<<prefix << "\t" << (i->first) << "\t" << (i->second) << std::endl; - // // } - // for(int i=0, n=v.size(); i<n; i+=1) { - // // outputSearchGraphStream << prefix << i << "==" << v[i] << std::endl; - - // } - // } - - // FVector featureValues = scoreCollection.GetVectorForProducer(ff); - // outputSearchGraphStream << featureValues << endl; const ScoreComponentCollection& scoreCollection = hypo->GetScoreBreakdown(); - vector<float> featureValues = scoreCollection.GetScoresForProducer(ff); - size_t numScoreComps = featureValues.size();//featureValues.coreSize(); - // if (numScoreComps != ScoreProducer::unlimited) { - // vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff); + size_t numScoreComps = featureValues.size(); for (size_t i = 0; i < numScoreComps; ++i) { - outputSearchGraphStream << "x" << (index+i) << "=" << ((zeros) ? 0.0 : featureValues[i]) << " "; - } - return index+numScoreComps; - // } else { - // cerr << "Sparse features are not supported when outputting HTK standard lattice format" << endl; - // assert(false); - // return 0; - // } + out << "x" << (index+i) << "=" << ((zeros) ? 0.0 : featureValues[i]) << " "; + } + return index + numScoreComps; } /**! Output search graph in hypergraph format of Kenneth Heafield's lazy hypergraph decoder */ -void Manager::OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const +void +Manager:: +OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const { VERBOSE(2,"Getting search graph to output as hypergraph for sentence " << m_source.GetTranslationId() << std::endl) @@ -1118,22 +1083,23 @@ void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSea } + void OutputSearchNode(AllOptions const& opts, long translationId, - std::ostream &outputSearchGraphStream, + std::ostream &out, SearchGraphNode const& searchNode) { - const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); + const vector<FactorType> &outputFactorOrder = opts.output.factor_order; bool extendedFormat = opts.output.SearchGraphExtended.size(); - outputSearchGraphStream << translationId; + out << translationId; // special case: initial hypothesis if ( searchNode.hypo->GetId() == 0 ) { - outputSearchGraphStream << " hyp=0 stack=0"; + out << " hyp=0 stack=0"; if (extendedFormat) { - outputSearchGraphStream << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore; + out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore; } - outputSearchGraphStream << endl; + out << endl; return; } @@ -1141,50 +1107,42 @@ OutputSearchNode(AllOptions const& opts, long translationId, // output in traditional format if (!extendedFormat) { - outputSearchGraphStream << " hyp=" << searchNode.hypo->GetId() - << " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered() - << " back=" << prevHypo->GetId() - << " score=" << searchNode.hypo->GetScore() - << " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore()); + out << " hyp=" << searchNode.hypo->GetId() + << " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered() + << " back=" << prevHypo->GetId() + << " score=" << searchNode.hypo->GetScore() + << " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore()); if (searchNode.recombinationHypo != NULL) - outputSearchGraphStream << " recombined=" << searchNode.recombinationHypo->GetId(); + out << " recombined=" << searchNode.recombinationHypo->GetId(); - outputSearchGraphStream << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore - << " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos() - << "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos() - << " out=" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) - << endl; + out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore + << " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos() + << "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos() + << " out=" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) + << endl; return; } - // output in extended format -// if (searchNode.recombinationHypo != NULL) -// outputSearchGraphStream << " hyp=" << searchNode.recombinationHypo->GetId(); -// else - outputSearchGraphStream << " hyp=" << searchNode.hypo->GetId(); - - outputSearchGraphStream << " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered() - << " back=" << prevHypo->GetId() - << " score=" << searchNode.hypo->GetScore() - << " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore()); + out << " hyp=" << searchNode.hypo->GetId(); + out << " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered() + << " back=" << prevHypo->GetId() + << " score=" << searchNode.hypo->GetScore() + << " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore()); if (searchNode.recombinationHypo != NULL) - outputSearchGraphStream << " recombined=" << searchNode.recombinationHypo->GetId(); + out << " recombined=" << searchNode.recombinationHypo->GetId(); - outputSearchGraphStream << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore - << " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos() - << "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos(); + out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore + << " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos() + << "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos(); // Modified so that -osgx is a superset of -osg (GST Oct 2011) ScoreComponentCollection scoreBreakdown = searchNode.hypo->GetScoreBreakdown(); scoreBreakdown.MinusEquals( prevHypo->GetScoreBreakdown() ); - //outputSearchGraphStream << " scores = [ " << StaticData::Instance().GetAllWeights(); - outputSearchGraphStream << " scores=\"" << scoreBreakdown << "\""; - - outputSearchGraphStream << " out=\"" << searchNode.hypo->GetSourcePhraseStringRep() << "|" << - searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << "\"" << endl; -// outputSearchGraphStream << " out=" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << endl; + out << " scores=\"" << scoreBreakdown << "\"" + << " out=\"" << searchNode.hypo->GetSourcePhraseStringRep() + << "|" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << "\"" << endl; } void Manager::GetConnectedGraph( @@ -1195,7 +1153,8 @@ void Manager::GetConnectedGraph( std::vector< const Hypothesis *>& connectedList = *pConnectedList; // start with the ones in the final stack - const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks(); + const std::vector < HypothesisStack* > &hypoStackColl + = m_search->GetHypothesisStacks(); const HypothesisStack &finalStack = *hypoStackColl.back(); HypothesisStack::const_iterator iterHypo; for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) { @@ -1384,7 +1343,7 @@ OutputSearchGraph(long translationId, std::ostream &out) const vector<SearchGraphNode> searchGraph; GetSearchGraph(searchGraph); for (size_t i = 0; i < searchGraph.size(); ++i) { - OutputSearchNode(options(),translationId,out,searchGraph[i]); + OutputSearchNode(*options(),translationId,out,searchGraph[i]); } } @@ -1508,7 +1467,7 @@ void Manager::OutputBest(OutputCollector *collector) const FixPrecision(debug,PRECISION); // all derivations - send them to debug stream - if (staticData.PrintAllDerivations()) { + if (options()->output.PrintAllDerivations) { additionalReportingTime.start(); PrintAllDerivations(translationId, debug); additionalReportingTime.stop(); @@ -1519,38 +1478,34 @@ void Manager::OutputBest(OutputCollector *collector) const // MAP decoding: best hypothesis const Hypothesis* bestHypo = NULL; - if (!options().mbr.enabled) { + if (!options()->mbr.enabled) { bestHypo = GetBestHypothesis(); if (bestHypo) { - if (options().output.ReportHypoScore) { + if (options()->output.ReportHypoScore) { out << bestHypo->GetFutureScore() << ' '; } - if (options().output.RecoverPath) { + if (options()->output.RecoverPath) { bestHypo->OutputInput(out); out << "||| "; } - // const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id"); - if (options().output.PrintID) { + if (options()->output.PrintID) { out << translationId << " "; } // VN : I put back the code for OutputPassthroughInformation - if (options().output.PrintPassThrough) { + if (options()->output.PrintPassThrough) { OutputPassthroughInformation(out, bestHypo); } // end of add back - if (options().output.ReportSegmentation == 2) { + if (options()->output.ReportSegmentation == 2) { GetOutputLanguageModelOrder(out, bestHypo); } - bestHypo->OutputBestSurface( - out, - staticData.GetOutputFactorOrder(), - options().output); - if (options().output.PrintAlignmentInfo) { + OutputSurface(out,*bestHypo, true); + if (options()->output.PrintAlignmentInfo) { out << "||| "; - bestHypo->OutputAlignment(out, options().output.WA_SortOrder); + bestHypo->OutputAlignment(out, options()->output.WA_SortOrder); } IFVERBOSE(1) { @@ -1566,32 +1521,30 @@ void Manager::OutputBest(OutputCollector *collector) const // MBR decoding (n-best MBR, lattice MBR, consensus) else { // we first need the n-best translations - size_t nBestSize = options().mbr.size; + size_t nBestSize = options()->mbr.size; if (nBestSize <= 0) { cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl; exit(1); } TrellisPathList nBestList; - CalcNBest(nBestSize, nBestList,true); + CalcNBest(nBestSize, nBestList, true); VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl); IFVERBOSE(2) { PrintUserTime("calculated n-best list for (L)MBR decoding"); } // lattice MBR - if (options().lmbr.enabled) { - if (staticData.options().nbest.enabled) { + if (options()->lmbr.enabled) { + if (options()->nbest.enabled) { //lattice mbr nbest vector<LatticeMBRSolution> solutions; - size_t n = min(nBestSize, options().nbest.nbest_size); + size_t n = min(nBestSize, options()->nbest.nbest_size); getLatticeMBRNBest(*this,nBestList,solutions,n); OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId); } else { //Lattice MBR decoding vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList); - OutputBestHypo(mbrBestHypo, translationId, - options().output.ReportSegmentation, - options().output.ReportAllFactors, out); + OutputBestHypo(mbrBestHypo, out); IFVERBOSE(2) { PrintUserTime("finished Lattice MBR decoding"); } @@ -1599,11 +1552,9 @@ void Manager::OutputBest(OutputCollector *collector) const } // consensus decoding - else if (options().search.consensus) { + else if (options()->search.consensus) { const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList); - OutputBestHypo(conBestHypo, translationId, - options().output.ReportSegmentation, - options().output.ReportAllFactors, out); + OutputBestHypo(conBestHypo, out); OutputAlignment(m_alignmentOut, conBestHypo); IFVERBOSE(2) { PrintUserTime("finished Consensus decoding"); @@ -1612,10 +1563,8 @@ void Manager::OutputBest(OutputCollector *collector) const // n-best MBR decoding else { - const TrellisPath &mbrBestHypo = doMBR(nBestList); - OutputBestHypo(mbrBestHypo, translationId, - options().output.ReportSegmentation, - options().output.ReportAllFactors, out); + const TrellisPath &mbrBestHypo = doMBR(nBestList, *options()); + OutputBestHypo(mbrBestHypo, out); OutputAlignment(m_alignmentOut, mbrBestHypo); IFVERBOSE(2) { PrintUserTime("finished MBR decoding"); @@ -1638,22 +1587,16 @@ void Manager::OutputNBest(OutputCollector *collector) const return; } - const StaticData &staticData = StaticData::Instance(); - long translationId = m_source.GetTranslationId(); - - if (options().lmbr.enabled) { - if (options().nbest.enabled) { - collector->Write(translationId, m_latticeNBestOut.str()); + if (options()->lmbr.enabled) { + if (options()->nbest.enabled) { + collector->Write(m_source.GetTranslationId(), m_latticeNBestOut.str()); } } else { TrellisPathList nBestList; ostringstream out; - CalcNBest(options().nbest.nbest_size, nBestList, - options().nbest.only_distinct); - OutputNBest(out, nBestList, - staticData.GetOutputFactorOrder(), - m_source.GetTranslationId(), - options().output.ReportSegmentation); + NBestOptions const& nbo = options()->nbest; + CalcNBest(nbo.nbest_size, nBestList, nbo.only_distinct); + OutputNBest(out, nBestList); collector->Write(m_source.GetTranslationId(), out.str()); } @@ -1661,13 +1604,9 @@ void Manager::OutputNBest(OutputCollector *collector) const void Manager:: -OutputNBest(std::ostream& out, - const Moses::TrellisPathList &nBestList, - const std::vector<Moses::FactorType>& outputFactorOrder, - long translationId, char reportSegmentation) const +OutputNBest(std::ostream& out, Moses::TrellisPathList const& nBestList) const { - // const StaticData &staticData = StaticData::Instance(); - NBestOptions const& nbo = options().nbest; + NBestOptions const& nbo = options()->nbest; bool reportAllFactors = nbo.include_all_factors; bool includeSegmentation = nbo.include_segmentation; bool includeWordAlignment = nbo.include_alignment_info; @@ -1678,15 +1617,15 @@ OutputNBest(std::ostream& out, const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation - out << translationId << " ||| "; + out << m_source.GetTranslationId() << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; - OutputSurface(out, edge); //, outputFactorOrder, reportSegmentation, reportAllFactors); + OutputSurface(out, edge); } out << " |||"; // print scores with feature names - bool with_labels = options().nbest.include_feature_labels; + bool with_labels = options()->nbest.include_feature_labels; path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels); // total @@ -1725,7 +1664,7 @@ OutputNBest(std::ostream& out, } } - if (options().output.RecoverPath) { + if (options()->output.RecoverPath) { out << " ||| "; OutputInput(out, edges[0]); } @@ -1742,21 +1681,25 @@ OutputNBest(std::ostream& out, */ void Manager:: -OutputSurface(std::ostream &out, const Hypothesis &edge) const +OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) const { - std::vector<FactorType> outputFactorOrder = options().output.factor_order; + if (recursive && edge.GetPrevHypo()) { + OutputSurface(out,*edge.GetPrevHypo(), true); + } + + std::vector<FactorType> outputFactorOrder = options()->output.factor_order; UTIL_THROW_IF2(outputFactorOrder.size() == 0, "Must specific at least 1 output factor"); - FactorType placeholderFactor = options().input.placeholder_factor; + FactorType placeholderFactor = options()->input.placeholder_factor; std::map<size_t, const Factor*> placeholders; if (placeholderFactor != NOT_FOUND) { // creates map of target position -> factor for placeholders placeholders = GetPlaceholders(edge, placeholderFactor); } - bool markUnknown = options().unk.mark; - std::string const& fd = options().output.FactorDelimiter; + bool markUnknown = options()->unk.mark; + std::string const& fd = options()->output.FactorDelimiter; TargetPhrase const& phrase = edge.GetCurrTargetPhrase(); size_t size = phrase.GetSize(); @@ -1775,7 +1718,7 @@ OutputSurface(std::ostream &out, const Hypothesis &edge) const //preface surface form with UNK if marking unknowns const Word &word = phrase.GetWord(pos); if(markUnknown && word.IsOOV()) { - out << options().unk.prefix; + out << options()->unk.prefix; } out << *factor; @@ -1786,7 +1729,7 @@ OutputSurface(std::ostream &out, const Hypothesis &edge) const } if(markUnknown && word.IsOOV()) { - out << options().unk.suffix; + out << options()->unk.suffix; } out << " "; @@ -1794,7 +1737,7 @@ OutputSurface(std::ostream &out, const Hypothesis &edge) const } // trace ("report segmentation") option "-t" / "-tt" - int reportSegmentation = options().output.ReportSegmentation; + int reportSegmentation = options()->output.ReportSegmentation; if (reportSegmentation > 0 && phrase.GetSize() > 0) { const Range &sourceRange = edge.GetCurrSourceWordsRange(); const int sourceStart = sourceRange.GetStartPos(); @@ -1809,7 +1752,7 @@ OutputSurface(std::ostream &out, const Hypothesis &edge) const out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); - bool with_labels = options().nbest.include_feature_labels; + bool with_labels = options()->nbest.include_feature_labels; scoreBreakdown.OutputAllFeatureScores(out, with_labels); } out << "| "; @@ -1822,7 +1765,7 @@ OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const { typedef std::vector< const std::pair<size_t,size_t>* > AlignVec; - AlignVec alignments = ai.GetSortedAlignments(options().output.WA_SortOrder); + AlignVec alignments = ai.GetSortedAlignments(options()->output.WA_SortOrder); AlignVec::const_iterator it; for (it = alignments.begin(); it != alignments.end(); ++it) { @@ -1862,7 +1805,8 @@ std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo, for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) { const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor); if (factor) { - std::set<size_t> targetPos = hypo.GetTranslationOption().GetTargetPhrase().GetAlignTerm().GetAlignmentsForSource(sourcePos); + TargetPhrase const& tp = hypo.GetTranslationOption().GetTargetPhrase(); + std::set<size_t> targetPos = tp.GetAlignTerm().GetAlignmentsForSource(sourcePos); UTIL_THROW_IF2(targetPos.size() != 1, "Placeholder should be aligned to 1, and only 1, word"); ret[*targetPos.begin()] = factor; @@ -1874,15 +1818,11 @@ std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo, void Manager::OutputLatticeSamples(OutputCollector *collector) const { - const StaticData &staticData = StaticData::Instance(); if (collector) { TrellisPathList latticeSamples; ostringstream out; - CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples); - OutputNBest(out,latticeSamples, - staticData.GetOutputFactorOrder(), - m_source.GetTranslationId(), - options().output.ReportSegmentation); + CalcLatticeSamples(options()->output.lattice_sample_size, latticeSamples); + OutputNBest(out,latticeSamples); collector->Write(m_source.GetTranslationId(), out.str()); } @@ -1903,37 +1843,23 @@ void Manager::OutputAlignment(OutputCollector *collector) const edges.push_back(currentHypo); currentHypo = currentHypo->GetPrevHypo(); } + ostringstream out; + size_t targetOffset = 0; + BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) { + const TargetPhrase &tp = e->GetCurrTargetPhrase(); + size_t sourceOffset = e->GetCurrSourceWordsRange().GetStartPos(); + OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); + targetOffset += tp.GetSize(); + } + out << std::endl; // Used by --alignment-output-file so requires endl + collector->Write(m_source.GetTranslationId(), out.str()); - OutputAlignment(collector,m_source.GetTranslationId(), edges); - } -} - -void Manager::OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges) const -{ - ostringstream out; - OutputAlignment(out, edges); - - collector->Write(lineNo,out.str()); -} - -void Manager::OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges) const -{ - size_t targetOffset = 0; - - for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { - const Hypothesis &edge = *edges[currEdge]; - const TargetPhrase &tp = edge.GetCurrTargetPhrase(); - size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); - - OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); - - targetOffset += tp.GetSize(); } - // Used by --alignment-output-file so requires endl - out << std::endl; } -void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const +void +Manager:: +OutputDetailedTranslationReport(OutputCollector *collector) const { if (collector) { ostringstream out; @@ -1944,7 +1870,9 @@ void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const } -void Manager::OutputUnknowns(OutputCollector *collector) const +void +Manager:: +OutputUnknowns(OutputCollector *collector) const { if (collector) { long translationId = m_source.GetTranslationId(); @@ -1959,7 +1887,9 @@ void Manager::OutputUnknowns(OutputCollector *collector) const } -void Manager::OutputWordGraph(OutputCollector *collector) const +void +Manager:: +OutputWordGraph(OutputCollector *collector) const { if (collector) { long translationId = m_source.GetTranslationId(); @@ -1970,7 +1900,9 @@ void Manager::OutputWordGraph(OutputCollector *collector) const } } -void Manager::OutputSearchGraph(OutputCollector *collector) const +void +Manager:: +OutputSearchGraph(OutputCollector *collector) const { if (collector) { long translationId = m_source.GetTranslationId(); @@ -1996,11 +1928,11 @@ void Manager::OutputSearchGraph(OutputCollector *collector) const void Manager::OutputSearchGraphSLF() const { - const StaticData &staticData = StaticData::Instance(); + // const StaticData &staticData = StaticData::Instance(); long translationId = m_source.GetTranslationId(); // Output search graph in HTK standard lattice format (SLF) - std::string const& slf = options().output.SearchGraphSLF; + std::string const& slf = options()->output.SearchGraphSLF; if (slf.size()) { util::StringStream fileName; fileName << slf << "/" << translationId << ".slf"; @@ -2020,20 +1952,6 @@ void Manager::OutputSearchGraphSLF() const } -// void Manager::OutputSearchGraphHypergraph() const -// { -// const StaticData &staticData = StaticData::Instance(); -// if (!staticData.GetOutputSearchGraphHypergraph()) return; - -// static char const* key = "output-search-graph-hypergraph"; -// PARAM_VEC const* p = staticData.GetParameter().GetParam(key); -// ScoreComponentCollection const& weights = staticData.GetAllWeights(); -// string const& nBestFile = staticData.GetNBestFilePath(); -// HypergraphOutput<Manager> hypergraphOutput(PRECISION, p, nBestFile, weights); -// hypergraphOutput.Write(*this); - -// } - void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) const { for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) { @@ -2041,7 +1959,7 @@ void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSo out << " |||"; const vector<Word> mbrHypo = si->GetWords(); for (size_t i = 0 ; i < mbrHypo.size() ; i++) { - const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]); + const Factor *factor = mbrHypo[i].GetFactor(options()->output.factor_order[0]); if (i>0) out << " " << *factor; else out << *factor; } @@ -2058,31 +1976,28 @@ void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSo } } -void Manager::OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, char /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out) const +void +Manager:: +OutputBestHypo(const std::vector<Word>& mbrBestHypo, ostream& out) const { - + FactorType f = options()->output.factor_order[0]; for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) { - const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]); - UTIL_THROW_IF2(factor == NULL, - "No factor 0 at position " << i); - if (i>0) out << " " << *factor; - else out << *factor; + const Factor *factor = mbrBestHypo[i].GetFactor(f); + UTIL_THROW_IF2(factor == NULL, "No factor " << f << " at position " << i); + if (i) out << " "; + out << *factor; } out << endl; } void Manager:: -OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, - char reportSegmentation, bool reportAllFactors, - std::ostream &out) const +OutputBestHypo(const Moses::TrellisPath &path, std::ostream &out) const { - const std::vector<const Hypothesis *> &edges = path.GetEdges(); - + std::vector<const Hypothesis *> const& edges = path.GetEdges(); for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { - const Hypothesis &edge = *edges[currEdge]; + Hypothesis const& edge = *edges[currEdge]; OutputSurface(out, edge); - // , StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors); } out << endl; } @@ -2091,8 +2006,10 @@ void Manager:: OutputAlignment(std::ostringstream &out, const TrellisPath &path) const { - WordAlignmentSort waso = options().output.WA_SortOrder; - Hypothesis::OutputAlignment(out, path.GetEdges(), waso); + WordAlignmentSort waso = options()->output.WA_SortOrder; + BOOST_REVERSE_FOREACH(Hypothesis const* e, path.GetEdges()) + e->OutputAlignment(out, false); + // Hypothesis::OutputAlignment(out, path.GetEdges(), waso); // Used by --alignment-output-file so requires endl out << std::endl; } diff --git a/moses/Manager.h b/moses/Manager.h index 2d36afe55..e125903e1 100644 --- a/moses/Manager.h +++ b/moses/Manager.h @@ -132,21 +132,18 @@ protected: mutable std::ostringstream m_latticeNBestOut; mutable std::ostringstream m_alignmentOut; public: - void OutputNBest(std::ostream& out - , const Moses::TrellisPathList &nBestList - , const std::vector<Moses::FactorType>& outputFactorOrder - , long translationId - , char reportSegmentation) const; - - void OutputSurface(std::ostream &out, const Hypothesis &edge) const; + void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList) const; + void OutputSurface(std::ostream &out, + Hypothesis const& edge, + bool const recursive=false) const; void OutputAlignment(std::ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const; void OutputInput(std::ostream& os, const Hypothesis* hypo) const; void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const; void OutputPassthroughInformation(std::ostream& os, const Hypothesis* hypo) const; - std::map<size_t, const Factor*> GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const; - void OutputAlignment(OutputCollector* collector, size_t lineNo , const std::vector<const Hypothesis *> &edges) const; - void OutputAlignment(std::ostream &out, const std::vector<const Hypothesis *> &edges) const; + + std::map<size_t, const Factor*> + GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const; void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId) const; @@ -171,9 +168,8 @@ public: int GetNextHypoId(); void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId) const; - void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/, - char reportSegmentation, bool reportAllFactors, std::ostream& out) const; - void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,char reportSegmentation, bool reportAllFactors, std::ostream &out) const; + void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, std::ostream& out) const; + void OutputBestHypo(const Moses::TrellisPath &path, std::ostream &out) const; #ifdef HAVE_PROTOBUF void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const; @@ -196,8 +192,12 @@ public: /*** *For Lattice MBR */ - void GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected, - std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const; + void + GetForwardBackwardSearchGraph + ( std::map< int, bool >* pConnected, + std::vector< const Hypothesis* >* pConnectedList, + std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, + std::vector< float>* pFwdBwdScores) const; // outputs void OutputBest(OutputCollector *collector) const; diff --git a/moses/MockHypothesis.cpp b/moses/MockHypothesis.cpp index 25f319ea1..438484083 100644 --- a/moses/MockHypothesis.cpp +++ b/moses/MockHypothesis.cpp @@ -38,9 +38,8 @@ MockHypothesisGuard m_uwp("UnknownWordPenalty"), m_dist("Distortion") { BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size()); - std::vector<Moses::FactorType> factors(1,0); - AllOptions const& opts = StaticData::Instance().options(); - m_sentence.reset(new Sentence(0, sourceSentence, opts, &factors)); + AllOptions::ptr opts(new AllOptions(*StaticData::Instance().options())); + m_sentence.reset(new Sentence(opts, 0, sourceSentence)); m_ttask = TranslationTask::create(m_sentence); m_manager.reset(new Manager(m_ttask)); @@ -59,16 +58,14 @@ MockHypothesisGuard for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) { Hypothesis* prevHypo = m_hypothesis; Range range(ai->first,ai->second); - const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), - range); - + const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range); m_targetPhrases.push_back(TargetPhrase(NULL)); - // m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL); + vector<FactorType> const& factors = opts->output.factor_order; m_targetPhrases.back().CreateFromString(Input, factors, *ti, NULL); m_toptions.push_back(new TranslationOption (range,m_targetPhrases.back())); - m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap, m_manager->GetNextHypoId()); - + m_hypothesis = new Hypothesis(*prevHypo, *m_toptions.back(), newBitmap, + m_manager->GetNextHypoId()); } diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index e9969dceb..348f8bb31 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -218,6 +218,7 @@ Parameter::Parameter() // server options po::options_description server_opts("Moses Server Options"); AddParam(server_opts,"server", "Run moses as a translation server."); + AddParam(server_opts,"daemon", "Run moses as a translation server in the background."); AddParam(server_opts,"server-port", "Port for moses server"); AddParam(server_opts,"server-log", "Log destination for moses server"); AddParam(server_opts,"serial", "Run server in serial mode, processing only one request at a time."); @@ -232,7 +233,6 @@ Parameter::Parameter() "Max. number of seconds the server will keep a persistent connection alive."); AddParam(server_opts,"server-timeout", "Max. number of seconds the server will wait for a client to submit a request once a connection has been established."); - // session timeout and session cache size are for moses translation session handling // they have nothing to do with the abyss server (but relate to the moses server) AddParam(server_opts,"session-timeout", diff --git a/moses/PartialTranslOptColl.cpp b/moses/PartialTranslOptColl.cpp index b0f906f2e..6d8db54d5 100644 --- a/moses/PartialTranslOptColl.cpp +++ b/moses/PartialTranslOptColl.cpp @@ -28,11 +28,11 @@ using namespace std; namespace Moses { /** constructor, intializes counters and thresholds */ -PartialTranslOptColl::PartialTranslOptColl() +PartialTranslOptColl::PartialTranslOptColl(size_t const maxSize) { m_bestScore = -std::numeric_limits<float>::infinity(); m_worstScore = -std::numeric_limits<float>::infinity(); - m_maxSize = StaticData::Instance().GetMaxNoPartTransOpt(); + m_maxSize = maxSize; // StaticData::Instance().GetMaxNoPartTransOpt(); m_totalPruned = 0; } diff --git a/moses/PartialTranslOptColl.h b/moses/PartialTranslOptColl.h index 2507891c7..81776f1ae 100644 --- a/moses/PartialTranslOptColl.h +++ b/moses/PartialTranslOptColl.h @@ -64,7 +64,7 @@ public: return m_list.end(); } - PartialTranslOptColl(); + PartialTranslOptColl(size_t const maxSize); /** destructor, cleans out list */ ~PartialTranslOptColl() { diff --git a/moses/Phrase.cpp b/moses/Phrase.cpp index 052918295..eba0ed9c5 100644 --- a/moses/Phrase.cpp +++ b/moses/Phrase.cpp @@ -119,7 +119,7 @@ Phrase:: GetStringRep(vector<FactorType> const& factorsToPrint, AllOptions const* opts) const { - if (!opts) opts = &StaticData::Instance().options(); + if (!opts) opts = StaticData::Instance().options().get(); bool markUnk = opts->unk.mark; util::StringStream strme; for (size_t pos = 0 ; pos < GetSize() ; pos++) { diff --git a/moses/ReorderingConstraint.cpp b/moses/ReorderingConstraint.cpp index 7e3831efa..82899260e 100644 --- a/moses/ReorderingConstraint.cpp +++ b/moses/ReorderingConstraint.cpp @@ -187,7 +187,9 @@ bool ReorderingConstraint::Check( const Bitmap &bitmap, size_t startPos, size_t // check, if we are setting us up for a dead end due to distortion limits - size_t distortionLimit = (size_t)StaticData::Instance().GetMaxDistortion(); + + // size_t distortionLimit = (size_t)StaticData::Instance().GetMaxDistortion(); + size_t distortionLimit = m_max_distortion; if (startPos != firstGapPos && endZone-firstGapPos >= distortionLimit) { VERBOSE(3," dead end due to distortion limit" << std::endl); return false; diff --git a/moses/ReorderingConstraint.h b/moses/ReorderingConstraint.h index 575e64789..ea7458a33 100644 --- a/moses/ReorderingConstraint.h +++ b/moses/ReorderingConstraint.h @@ -53,11 +53,16 @@ protected: size_t *m_localWall; /**< flag for each word if it is a local wall */ std::vector< std::vector< size_t > > m_zone; /** zones that limit reordering */ bool m_active; /**< flag indicating, if there are any active constraints */ - + int m_max_distortion; public: //! create ReorderingConstraint of length size and initialise to zero - ReorderingConstraint() :m_wall(NULL),m_localWall(NULL),m_active(false) {} + ReorderingConstraint(int max_distortion) + : m_wall(NULL) + , m_localWall(NULL) + , m_active(false) + , m_max_distortion(max_distortion) + {} //! destructer ~ReorderingConstraint() { diff --git a/moses/RuleCube.cpp b/moses/RuleCube.cpp index 7ecd9611a..874a1f57d 100644 --- a/moses/RuleCube.cpp +++ b/moses/RuleCube.cpp @@ -27,7 +27,6 @@ #include "StaticData.h" #include "Util.h" #include "Range.h" - #include <boost/functional/hash.hpp> using namespace std; @@ -43,7 +42,7 @@ RuleCube::RuleCube(const ChartTranslationOptions &transOpt, { RuleCubeItem *item = new RuleCubeItem(transOpt, allChartCells); m_covered.insert(item); - if (StaticData::Instance().options().cube.lazy_scoring) { + if (StaticData::Instance().options()->cube.lazy_scoring) { item->EstimateScore(); } else { item->CreateHypothesis(transOpt, manager); @@ -91,7 +90,7 @@ void RuleCube::CreateNeighbor(const RuleCubeItem &item, int dimensionIndex, if (!result.second) { delete newItem; // already seen it } else { - if (StaticData::Instance().options().cube.lazy_scoring) { + if (StaticData::Instance().options()->cube.lazy_scoring) { newItem->EstimateScore(); } else { newItem->CreateHypothesis(m_transOpt, manager); diff --git a/moses/RuleCubeQueue.cpp b/moses/RuleCubeQueue.cpp index e4a13c528..1942a29f7 100644 --- a/moses/RuleCubeQueue.cpp +++ b/moses/RuleCubeQueue.cpp @@ -50,7 +50,7 @@ ChartHypothesis *RuleCubeQueue::Pop() // pop the most promising item from the cube and get the corresponding // hypothesis RuleCubeItem *item = cube->Pop(m_manager); - if (StaticData::Instance().options().cube.lazy_scoring) { + if (StaticData::Instance().options()->cube.lazy_scoring) { item->CreateHypothesis(cube->GetTranslationOption(), m_manager); } ChartHypothesis *hypo = item->ReleaseHypothesis(); diff --git a/moses/Search.cpp b/moses/Search.cpp index 8579ad152..2d8c74b5f 100644 --- a/moses/Search.cpp +++ b/moses/Search.cpp @@ -7,36 +7,19 @@ namespace Moses { -Search::Search(Manager& manager, const InputType &source) +Search::Search(Manager& manager) : m_manager(manager) - , m_source(source) - , m_options(manager.options()) + , m_source(manager.GetSource()) + , m_options(*manager.options()) , m_inputPath() , m_initialTransOpt() - , m_bitmaps(source.GetSize(), source.m_sourceCompleted) + , m_bitmaps(manager.GetSource().GetSize(), manager.GetSource().m_sourceCompleted) , interrupted_flag(0) { m_initialTransOpt.SetInputPath(m_inputPath); } -Search * -Search:: -CreateSearch(Manager& manager, const InputType &source, - SearchAlgorithm searchAlgorithm, - const TranslationOptionCollection &transOptColl) -{ - switch(searchAlgorithm) { - case Normal: - return new SearchNormal(manager,source, transOptColl); - case CubePruning: - return new SearchCubePruning(manager, source, transOptColl); - default: - UTIL_THROW2("ERROR: search. Aborting\n"); - return NULL; - } -} - bool Search:: out_of_time() diff --git a/moses/Search.h b/moses/Search.h index 9da16f1f5..a0e07870d 100644 --- a/moses/Search.h +++ b/moses/Search.h @@ -33,13 +33,9 @@ public: //! Decode the sentence according to the specified search algorithm. virtual void Decode() = 0; - explicit Search(Manager& manager, const InputType &source); + explicit Search(Manager& manager); virtual ~Search() {} - // Factory method - static Search *CreateSearch(Manager& manager, const InputType &source, SearchAlgorithm searchAlgorithm, - const TranslationOptionCollection &transOptColl); - protected: Manager& m_manager; const InputType &m_source; diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp index 11dba2f7c..9984ecadb 100644 --- a/moses/SearchCubePruning.cpp +++ b/moses/SearchCubePruning.cpp @@ -50,10 +50,9 @@ public: }; SearchCubePruning:: -SearchCubePruning(Manager& manager, const InputType &source, - const TranslationOptionCollection &transOptColl) - : Search(manager, source) - , m_hypoStackColl(source.GetSize() + 1) +SearchCubePruning(Manager& manager, TranslationOptionCollection const& transOptColl) + : Search(manager) + , m_hypoStackColl(manager.GetSource().GetSize() + 1) , m_transOptColl(transOptColl) { std::vector < HypothesisStackCubePruning >::iterator iterStack; @@ -88,13 +87,13 @@ void SearchCubePruning::Decode() firstStack.CleanupArcList(); CreateForwardTodos(firstStack); - const size_t PopLimit = m_manager.options().cube.pop_limit; + const size_t PopLimit = m_manager.options()->cube.pop_limit; VERBOSE(2,"Cube Pruning pop limit is " << PopLimit << std::endl); - const size_t Diversity = m_manager.options().cube.diversity; + const size_t Diversity = m_manager.options()->cube.diversity; VERBOSE(2,"Cube Pruning diversity is " << Diversity << std::endl); VERBOSE(2,"Max Phrase length is " - << m_manager.options().search.max_phrase_length << std::endl); + << m_manager.options()->search.max_phrase_length << std::endl); // go through each stack size_t stackNo = 1; @@ -227,7 +226,7 @@ void SearchCubePruning::CreateForwardTodos(HypothesisStackCubePruning &stack) } size_t maxSize = size - startPos; - size_t maxSizePhrase = m_manager.options().search.max_phrase_length; + size_t maxSizePhrase = m_manager.options()->search.max_phrase_length; maxSize = std::min(maxSize, maxSizePhrase); for (endPos = startPos+1; endPos < startPos + maxSize; endPos++) { if (bitmap.GetValue(endPos)) @@ -268,7 +267,7 @@ SearchCubePruning:: CheckDistortion(const Bitmap &hypoBitmap, const Range &range) const { // since we check for reordering limits, its good to have that limit handy - int maxDistortion = m_manager.options().reordering.max_distortion; + int maxDistortion = m_manager.options()->reordering.max_distortion; if (maxDistortion < 0) return true; // if there are reordering limits, make sure it is not violated diff --git a/moses/SearchCubePruning.h b/moses/SearchCubePruning.h index 205713925..6cb471aaa 100644 --- a/moses/SearchCubePruning.h +++ b/moses/SearchCubePruning.h @@ -31,7 +31,7 @@ protected: void PrintBitmapContainerGraph(); public: - SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl); + SearchCubePruning(Manager& manager, const TranslationOptionCollection &transOptColl); ~SearchCubePruning(); void Decode(); diff --git a/moses/SearchNormal.cpp b/moses/SearchNormal.cpp index bb798c061..7bd89a7f2 100644 --- a/moses/SearchNormal.cpp +++ b/moses/SearchNormal.cpp @@ -16,10 +16,9 @@ namespace Moses * /param transOptColl collection of translation options to be used for this sentence */ SearchNormal:: -SearchNormal(Manager& manager, const InputType &source, - const TranslationOptionCollection &transOptColl) - : Search(manager, source) - , m_hypoStackColl(source.GetSize() + 1) +SearchNormal(Manager& manager, const TranslationOptionCollection &transOptColl) + : Search(manager) + , m_hypoStackColl(manager.GetSource().GetSize() + 1) , m_transOptColl(transOptColl) { VERBOSE(1, "Translating: " << m_source << endl); diff --git a/moses/SearchNormal.h b/moses/SearchNormal.h index 2e3321d29..35ed27199 100644 --- a/moses/SearchNormal.h +++ b/moses/SearchNormal.h @@ -49,7 +49,7 @@ protected: const Bitmap &bitmap); public: - SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl); + SearchNormal(Manager& manager, const TranslationOptionCollection &transOptColl); ~SearchNormal(); void Decode(); diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index e5a64b9f8..c9c69b648 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "Util.h" #include "XmlOption.h" #include "FactorCollection.h" +#include "TranslationTask.h" using namespace std; @@ -40,11 +41,10 @@ namespace Moses { Sentence:: -Sentence() : Phrase(0) , InputType() +Sentence(AllOptions::ptr const& opts) : Phrase(0) , InputType(opts) { - const StaticData& SD = StaticData::Instance(); - if (SD.IsSyntax()) - m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal()); + if (is_syntax(opts->search.algo)) + m_defaultLabelSet.insert(opts->syntax.input_default_non_terminal); } Sentence:: @@ -145,37 +145,30 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG void Sentence:: -aux_interpret_xml(AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls, +aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls, std::vector<std::pair<size_t, std::string> >& placeholders) { // parse XML markup in translation line - - const StaticData &SD = StaticData::Instance(); - using namespace std; - if (opts.input.xml_policy != XmlPassThrough) { - int offset = SD.IsSyntax() ? 1 : 0; - bool OK = ProcessAndStripXMLTags(opts, line, m_xmlOptions, + if (m_options->input.xml_policy != XmlPassThrough) { + bool OK = ProcessAndStripXMLTags(*m_options, line, + m_xmlOptions, m_reorderingConstraint, - xmlWalls, placeholders, offset, - SD.GetXmlBrackets().first, - SD.GetXmlBrackets().second); + xmlWalls, placeholders); UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line); } } void Sentence:: -init(string line, std::vector<FactorType> const& factorOrder, - AllOptions const& opts) +init(string line) { using namespace std; - const StaticData &SD = StaticData::Instance(); m_frontSpanCoveredLength = 0; m_sourceCompleted.resize(0); - if (SD.ContinuePartialTranslation()) + if (m_options->input.continue_partial_translation) aux_init_partial_translation(line); line = Trim(line); @@ -183,28 +176,28 @@ init(string line, std::vector<FactorType> const& factorOrder, aux_interpret_dlt(line); // some poorly documented cache-based stuff // if sentences is specified as "<passthrough tag1=""/>" - if (SD.options().output.PrintPassThrough || - SD.options().nbest.include_passthrough) { + if (m_options->output.PrintPassThrough ||m_options->nbest.include_passthrough) { string pthru = PassthroughSGML(line,"passthrough"); this->SetPassthroughInformation(pthru); } vector<size_t> xmlWalls; vector<pair<size_t, string> >placeholders; - aux_interpret_xml(opts, line, xmlWalls, placeholders); + aux_interpret_xml(line, xmlWalls, placeholders); - Phrase::CreateFromString(Input, factorOrder, line, NULL); + Phrase::CreateFromString(Input, m_options->input.factor_order, line, NULL); ProcessPlaceholders(placeholders); - if (SD.IsSyntax()) InitStartEndWord(); + if (is_syntax(m_options->search.algo)) + InitStartEndWord(); // now that we have final word positions in phrase (from // CreateFromString), we can make input phrase objects to go with // our XmlOptions and create TranslationOptions // only fill the vector if we are parsing XML - if (opts.input.xml_policy != XmlPassThrough) { + if (m_options->input.xml_policy != XmlPassThrough) { m_xmlCoverageMap.assign(GetSize(), false); BOOST_FOREACH(XmlOption const* o, m_xmlOptions) { Range const& r = o->range; @@ -217,7 +210,7 @@ init(string line, std::vector<FactorType> const& factorOrder, m_reorderingConstraint.InitializeWalls(GetSize()); // set reordering walls, if "-monotone-at-punction" is set - if (SD.UseReorderingConstraint() && GetSize()) { + if (m_options->reordering.monotone_at_punct && GetSize()) { Range r(0, GetSize()-1); m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r)); } @@ -232,14 +225,12 @@ init(string line, std::vector<FactorType> const& factorOrder, int Sentence:: -Read(std::istream& in, - const std::vector<FactorType>& factorOrder, - AllOptions const& opts) +Read(std::istream& in) { std::string line; if (getline(in, line, '\n').eof()) return 0; - init(line, factorOrder, opts); + init(line); return 1; } @@ -247,7 +238,7 @@ void Sentence:: ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders) { - FactorType placeholderFactor = StaticData::Instance().options().input.placeholder_factor; + FactorType placeholderFactor = m_options->input.placeholder_factor; if (placeholderFactor == NOT_FOUND) { return; } @@ -265,10 +256,13 @@ TranslationOptionCollection* Sentence:: CreateTranslationOptionCollection(ttasksptr const& ttask) const { - size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage(); - float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold(); + size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov; + // StaticData::Instance().GetMaxNoTransOptPerCoverage(); + float transOptThreshold = ttask->options()->search.trans_opt_threshold; + // StaticData::Instance().GetTranslationOptionThreshold(); TranslationOptionCollection *rv - = new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage, + = new TranslationOptionCollectionText(ttask, *this, + maxNoTransOptPerCoverage, transOptThreshold); assert(rv); return rv; @@ -322,7 +316,7 @@ void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list, std::vector <ChartTranslationOptions*> Sentence:: -GetXmlChartTranslationOptions(AllOptions const& opts) const +GetXmlChartTranslationOptions() const { std::vector <ChartTranslationOptions*> ret; @@ -330,7 +324,7 @@ GetXmlChartTranslationOptions(AllOptions const& opts) const // this code is a copy of the 1 in Sentence. //only fill the vector if we are parsing XML - if (opts.input.xml_policy != XmlPassThrough ) { + if (m_options->input.xml_policy != XmlPassThrough ) { //TODO: needed to handle exclusive //for (size_t i=0; i<GetSize(); i++) { // m_xmlCoverageMap.push_back(false); @@ -371,14 +365,10 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString) } Sentence:: -Sentence(size_t const transId, - string const& stext, - AllOptions const& opts, - vector<FactorType> const* IFO) - : InputType(transId) +Sentence(AllOptions::ptr const& opts, size_t const transId, string stext) + : InputType(opts, transId) { - if (IFO) init(stext, *IFO, opts); - else init(stext, StaticData::Instance().GetInputFactorOrder(), opts); + init(stext); } } diff --git a/moses/Sentence.h b/moses/Sentence.h index 518350d24..c2a846762 100644 --- a/moses/Sentence.h +++ b/moses/Sentence.h @@ -63,10 +63,9 @@ protected: std::vector<std::map<std::string,std::string> > m_dlt_meta; public: - Sentence(); - Sentence(size_t const transId, std::string const& stext, - AllOptions const& opts, - std::vector<FactorType> const* IFO = NULL); + Sentence(AllOptions::ptr const& opts); + Sentence(AllOptions::ptr const& opts, size_t const transId, std::string stext); + // std::vector<FactorType> const* IFO = NULL); // Sentence(size_t const transId, std::string const& stext); ~Sentence(); @@ -95,11 +94,11 @@ public: //! populates vector argument with XML force translation options for the specific range passed void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const; void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const; - std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions(AllOptions const& opts) const; + std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const; virtual int - Read(std::istream& in, const std::vector<FactorType>& factorOrder, - AllOptions const& opts); + Read(std::istream& in); + // , const std::vector<FactorType>& factorOrder, AllOptions const& opts); void Print(std::ostream& out) const; @@ -116,9 +115,7 @@ public: } - void - init(std::string line, std::vector<FactorType> const& factorOrder, - AllOptions const& opts); + void init(std::string line); std::vector<std::map<std::string,std::string> > const& GetDltMeta() const { @@ -140,7 +137,7 @@ private: void aux_interpret_xml - (AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls, + (std::string& line, std::vector<size_t> & xmlWalls, std::vector<std::pair<size_t, std::string> >& placeholders); void diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index b4fa1c03a..08f757915 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -59,16 +59,11 @@ namespace Moses StaticData StaticData::s_instance; StaticData::StaticData() - : m_sourceStartPosMattersForRecombination(false) + : m_options(new AllOptions) , m_requireSortingAfterSourceContext(false) - , m_isAlwaysCreateDirectTranslationOption(false) , m_currentWeightSetting("default") , m_treeStructure(NULL) { - m_xmlBrackets.first="<"; - m_xmlBrackets.second=">"; - - // memory pools Phrase::InitializeMemPool(); } @@ -121,37 +116,6 @@ StaticData } -void -StaticData -::ini_input_options() -{ - const PARAM_VEC *params; - - m_parameter->SetParameter(m_continuePartialTranslation, - "continue-partial-translation", false ); - - // use of xml in input - // m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough); - - // specify XML tags opening and closing brackets for XML option - params = m_parameter->GetParam("xml-brackets"); - if (params && params->size()) { - std::vector<std::string> brackets = Tokenize(params->at(0)); - if(brackets.size()!=2) { - cerr << "invalid xml-brackets value, must specify exactly 2 blank-delimited strings for XML tags opening and closing brackets" << endl; - exit(1); - } - m_xmlBrackets.first= brackets[0]; - m_xmlBrackets.second=brackets[1]; - VERBOSE(1,"XML tags opening and closing brackets for XML input are: " - << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl); - } - - m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange, - "default-non-term-for-empty-range-only", false ); - -} - bool StaticData ::ini_output_options() @@ -162,51 +126,22 @@ StaticData m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1); - m_parameter->SetParameter(m_includeLHSInSearchGraph, - "include-lhs-in-search-graph", false ); m_parameter->SetParameter<string>(m_outputUnknownsFile, "output-unknowns", ""); - //Print Translation Options - m_parameter->SetParameter(m_printTranslationOptions, - "print-translation-option", false ); - - //Print All Derivations - m_parameter->SetParameter(m_printAllDerivations , - "print-all-derivations", false ); - m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0); - //lattice samples return true; } -void -StaticData:: -ini_compact_table_options() -{ - // Compact phrase table and reordering model - m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false ); - m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false ); -} - -void -StaticData:: -ini_lm_options() -{ - m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1); -} - // threads, timeouts, etc. bool StaticData ::ini_performance_options() { const PARAM_VEC *params; - // m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1); - // m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true; m_threadCount = 1; params = m_parameter->GetParam("threads"); @@ -240,98 +175,36 @@ StaticData return true; } -void -StaticData:: -ini_factor_maps() -{ - const PARAM_VEC *params; - // factor delimiter - m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|"); - if (m_factorDelimiter == "none") { - m_factorDelimiter = ""; - } - - // //input factors - // params = m_parameter->GetParam("input-factors"); - // if (params) { - // m_inputFactorOrder = Scan<FactorType>(*params); - // } - // if(m_inputFactorOrder.empty()) { - // m_inputFactorOrder.push_back(0); - // } - - //output factors - // params = m_parameter->GetParam("output-factors"); - // if (params) { - // m_outputFactorOrder = Scan<FactorType>(*params); - // } - // if(m_outputFactorOrder.empty()) { - // // default. output factor 0 - // m_outputFactorOrder.push_back(0); - // } -} - -void -StaticData:: -ini_oov_options() -{ - // unknown word processing - // m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false ); - // m_parameter->SetParameter(m_markUnknown, "mark-unknown", false ); - // m_parameter->SetParameter<string>(m_unknownWordPrefix, "unknown-word-prefix", "UNK" ); - // m_parameter->SetParameter<string>(m_unknownWordSuffix, "unknown-word-suffix", "" ); - - //source word deletion - m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false ); - - m_parameter->SetParameter(m_isAlwaysCreateDirectTranslationOption, "always-create-direct-transopt", false ); -} - -void -StaticData:: -ini_zombie_options() -{ - //Disable discarding - m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false); - -} - bool StaticData::LoadData(Parameter *parameter) { m_parameter = parameter; const PARAM_VEC *params; - m_options.init(*parameter); + m_options->init(*parameter); + if (is_syntax(m_options->search.algo)) + m_options->syntax.LoadNonTerminals(*parameter, FactorCollection::Instance()); if (IsSyntax()) LoadChartDecodingParameters(); // ORDER HERE MATTERS, SO DON'T CHANGE IT UNLESS YOU KNOW WHAT YOU ARE DOING! // input, output - ini_factor_maps(); - ini_input_options(); + + m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|"); + m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1); + m_bookkeeping_options.init(*parameter); if (!ini_output_options()) return false; // threading etc. if (!ini_performance_options()) return false; - // model loading - ini_compact_table_options(); - - // search - ini_oov_options(); + // Compact phrase table and reordering model + m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false ); + m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false ); // S2T decoder - m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", - RecursiveCYKPlus); - - - ini_zombie_options(); // probably dead, or maybe not - - // m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", - // NOT_FOUND); // FEATURE FUNCTION INITIALIZATION HAPPENS HERE =============================== initialize_features(); @@ -740,7 +613,7 @@ void StaticData::LoadFeatureFunctions() if (doLoad) { VERBOSE(1, "Loading " << ff->GetScoreProducerDescription() << endl); - ff->Load(); + ff->Load(options()); } } @@ -748,7 +621,7 @@ void StaticData::LoadFeatureFunctions() for (size_t i = 0; i < pts.size(); ++i) { PhraseDictionary *pt = pts[i]; VERBOSE(1, "Loading " << pt->GetScoreProducerDescription() << endl); - pt->Load(); + pt->Load(options()); } CheckLEGACYPT(); @@ -967,7 +840,7 @@ StaticData // FIXME Does this make sense for F2S? Perhaps it should be changed once // FIXME the pipeline uses RuleTable consistently. - SearchAlgorithm algo = m_options.search.algo; + SearchAlgorithm algo = m_options->search.algo; if (algo == SyntaxS2T || algo == SyntaxT2S || algo == SyntaxT2S_SCFG || algo == SyntaxF2S) { // Automatically override PhraseDictionary{Memory,Scope3}. This will diff --git a/moses/StaticData.h b/moses/StaticData.h index 66388d052..d648b53bc 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -72,7 +72,7 @@ private: static StaticData s_instance; protected: Parameter *m_parameter; - AllOptions m_options; + boost::shared_ptr<AllOptions> m_options; mutable ScoreComponentCollection m_allWeights; @@ -95,31 +95,22 @@ protected: size_t m_latticeSamplesSize; std::string m_latticeSamplesFilePath; - // bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them - // bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them - // std::string m_unknownWordPrefix; - // std::string m_unknownWordSuffix; - bool m_wordDeletionEnabled; + // bool m_wordDeletionEnabled; - bool m_disableDiscarding; bool m_printAllDerivations; bool m_printTranslationOptions; - bool m_sourceStartPosMattersForRecombination; + // bool m_sourceStartPosMattersForRecombination; bool m_requireSortingAfterSourceContext; mutable size_t m_verboseLevel; std::string m_factorDelimiter; //! by default, |, but it can be changed - // XmlInputType m_xmlInputType; //! method for handling sentence XML input std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">" size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1) - bool m_isAlwaysCreateDirectTranslationOption; - //! constructor. only the 1 static variable can be created - bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph std::string m_outputUnknownsFile; //! output unknowns in this file @@ -144,7 +135,6 @@ protected: std::map< std::string, std::set< std::string > > m_weightSettingIgnoreFF; // feature function std::map< std::string, std::set< size_t > > m_weightSettingIgnoreDP; // decoding path - // FactorType m_placeHolderFactor; bool m_useLegacyPT; bool m_defaultNonTermOnlyForEmptyRange; S2TParsingAlgorithm m_s2tParsingAlgorithm; @@ -174,29 +164,13 @@ protected: const StatefulFeatureFunction* m_treeStructure; - void ini_compact_table_options(); - void ini_consensus_decoding_options(); - void ini_cube_pruning_options(); - void ini_distortion_options(); - void ini_factor_maps(); - void ini_input_options(); - void ini_lm_options(); - void ini_lmbr_options(); - void ini_mbr_options(); - void ini_mira_options(); void ini_oov_options(); bool ini_output_options(); bool ini_performance_options(); - void ini_phrase_lookup_options(); - bool ini_stack_decoding_options(); - void ini_zombie_options(); void initialize_features(); public: - bool IsAlwaysCreateDirectTranslationOption() const { - return m_isAlwaysCreateDirectTranslationOption; - } //! destructor ~StaticData(); @@ -231,100 +205,60 @@ public: return *m_parameter; } - AllOptions const& + AllOptions::ptr const options() const { return m_options; } - AllOptions& - options() { - return m_options; - } - - const std::vector<FactorType> &GetInputFactorOrder() const { - return m_options.input.factor_order; - } - - const std::vector<FactorType> &GetOutputFactorOrder() const { - return m_options.output.factor_order; - } - - inline bool GetSourceStartPosMattersForRecombination() const { - return m_sourceStartPosMattersForRecombination; - } - // inline bool GetDropUnknown() const { - // return m_dropUnknown; - // } - // inline bool GetMarkUnknown() const { - // return m_markUnknown; - // } - // inline std::string GetUnknownWordPrefix() const { - // return m_unknownWordPrefix; + // AllOptions& + // options() { + // return m_options; // } - // inline std::string GetUnknownWordSuffix() const { - // return m_unknownWordSuffix; - // } - inline bool GetDisableDiscarding() const { - return m_disableDiscarding; - } - inline size_t GetMaxNoTransOptPerCoverage() const { - return m_options.search.max_trans_opt_per_cov; - } - inline size_t GetMaxNoPartTransOpt() const { - return m_options.search.max_partial_trans_opt; - } - inline size_t GetMaxPhraseLength() const { - return m_options.search.max_phrase_length; - } - bool IsWordDeletionEnabled() const { - return m_wordDeletionEnabled; - } - int GetMaxDistortion() const { - return m_options.reordering.max_distortion; - } - bool UseReorderingConstraint() const { - return m_reorderingConstraint; - } + // inline bool + // GetSourceStartPosMattersForRecombination() const { + // return m_sourceStartPosMattersForRecombination; + // } - bool UseEarlyDiscarding() const { - return m_options.search.early_discarding_threshold + bool + UseEarlyDiscarding() const { + return m_options->search.early_discarding_threshold != -std::numeric_limits<float>::infinity(); } - bool UseEarlyDistortionCost() const { - return m_options.reordering.use_early_distortion_cost; - // return m_useEarlyDistortionCost; + + bool + UseEarlyDistortionCost() const { + return m_options->reordering.use_early_distortion_cost; } - float GetTranslationOptionThreshold() const { - return m_options.search.trans_opt_threshold; + + float + GetTranslationOptionThreshold() const { + return m_options->search.trans_opt_threshold; } - size_t GetVerboseLevel() const { + size_t + GetVerboseLevel() const { return m_verboseLevel; } - void SetVerboseLevel(int x) const { + + void + SetVerboseLevel(int x) const { m_verboseLevel = x; } - bool UseMinphrInMemory() const { + bool + UseMinphrInMemory() const { return m_minphrMemory; } - bool UseMinlexrInMemory() const { + bool + UseMinlexrInMemory() const { return m_minlexrMemory; } - size_t GetLatticeSamplesSize() const { - return m_latticeSamplesSize; - } - - const std::string& GetLatticeSamplesFilePath() const { - return m_latticeSamplesFilePath; - } - bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const { if (algo == DefaultSearchAlgorithm) - algo = m_options.search.algo; + algo = m_options->search.algo; return (algo == CYKPlus || algo == ChartIncremental || algo == SyntaxS2T || algo == SyntaxT2S || @@ -369,41 +303,14 @@ public: return m_outputUnknownsFile; } - bool GetIncludeLHSInSearchGraph() const { - return m_includeLHSInSearchGraph; - } - - // XmlInputType GetXmlInputType() const { - // return m_xmlInputType; + // bool GetIncludeLHSInSearchGraph() const { + // return m_includeLHSInSearchGraph; // } - std::pair<std::string,std::string> GetXmlBrackets() const { - return m_xmlBrackets; - } - - bool PrintTranslationOptions() const { - return m_printTranslationOptions; - } - - bool PrintAllDerivations() const { - return m_printAllDerivations; - } - const UnknownLHSList &GetUnknownLHS() const { return m_unknownLHS; } - const Word &GetInputDefaultNonTerminal() const { - return m_inputDefaultNonTerminal; - } - const Word &GetOutputDefaultNonTerminal() const { - return m_outputDefaultNonTerminal; - } - - SourceLabelOverlap GetSourceLabelOverlap() const { - return m_sourceLabelOverlap; - } - size_t GetRuleLimit() const { return m_ruleLimit; } @@ -513,7 +420,6 @@ public: } //sentence (and thread) specific initialisationn and cleanup - // void InitializeForInput(const InputType& source, ttaskptr const& ttask) const; void InitializeForInput(ttasksptr const& ttask) const; void CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const; @@ -526,10 +432,6 @@ public: std::map<std::string, std::string> OverrideFeatureNames(); void OverrideFeatures(); - // FactorType GetPlaceholderFactor() const { - // return m_placeHolderFactor; - // } - const FeatureRegistry &GetFeatureRegistry() const { return m_registry; } @@ -569,10 +471,6 @@ public: return m_defaultNonTermOnlyForEmptyRange; } - S2TParsingAlgorithm GetS2TParsingAlgorithm() const { - return m_s2tParsingAlgorithm; - } - bool RequireSortingAfterSourceContext() const { return m_requireSortingAfterSourceContext; } diff --git a/moses/Syntax/F2S/GlueRuleSynthesizer.cpp b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp index 3ba5a26d3..930138750 100644 --- a/moses/Syntax/F2S/GlueRuleSynthesizer.cpp +++ b/moses/Syntax/F2S/GlueRuleSynthesizer.cpp @@ -3,9 +3,8 @@ #include <sstream> #include "moses/FF/UnknownWordPenaltyProducer.h" -#include "moses/StaticData.h" #include "util/string_stream.hh" - +#include "moses/parameters/AllOptions.h" namespace Moses { namespace Syntax @@ -13,13 +12,14 @@ namespace Syntax namespace F2S { -GlueRuleSynthesizer::GlueRuleSynthesizer(HyperTree &trie) - : m_hyperTree(trie) +GlueRuleSynthesizer:: +GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &trie) + : m_input_default_nonterminal(opts.syntax.input_default_non_terminal) + , m_output_default_nonterminal(opts.syntax.output_default_non_terminal) + , m_hyperTree(trie) { - const std::vector<FactorType> &inputFactorOrder = - StaticData::Instance().GetInputFactorOrder(); Word *lhs = NULL; - m_dummySourcePhrase.CreateFromString(Input, inputFactorOrder, "hello", &lhs); + m_dummySourcePhrase.CreateFromString(Input, opts.input.factor_order, "hello", &lhs); delete lhs; } @@ -46,11 +46,10 @@ void GlueRuleSynthesizer::SynthesizeHyperPath(const Forest::Hyperedge &e, } } -TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( - const Forest::Hyperedge &e) +TargetPhrase* +GlueRuleSynthesizer:: +SynthesizeTargetPhrase(const Forest::Hyperedge &e) { - const StaticData &staticData = StaticData::Instance(); - const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance(); @@ -60,7 +59,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( for (std::size_t i = 0; i < e.tail.size(); ++i) { const Word &symbol = e.tail[i]->pvertex.symbol; if (symbol.IsNonTerminal()) { - targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal()); + targetPhrase->AddWord(m_output_default_nonterminal); } else { // TODO Check this Word &targetWord = targetPhrase->AddWord(); @@ -74,7 +73,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( float score = LOWEST_SCORE; targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score); targetPhrase->EvaluateInIsolation(m_dummySourcePhrase); - Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal()); + Word *targetLhs = new Word(m_output_default_nonterminal); targetPhrase->SetTargetLHS(targetLhs); targetPhrase->SetAlignmentInfo(alignmentSS.str()); diff --git a/moses/Syntax/F2S/GlueRuleSynthesizer.h b/moses/Syntax/F2S/GlueRuleSynthesizer.h index fa271796f..a07c111b0 100644 --- a/moses/Syntax/F2S/GlueRuleSynthesizer.h +++ b/moses/Syntax/F2S/GlueRuleSynthesizer.h @@ -9,6 +9,7 @@ namespace Moses { +class AllOptions; namespace Syntax { namespace F2S @@ -16,8 +17,10 @@ namespace F2S class GlueRuleSynthesizer : public HyperTreeCreator { + Word m_input_default_nonterminal; + Word m_output_default_nonterminal; public: - GlueRuleSynthesizer(HyperTree &); + GlueRuleSynthesizer(Moses::AllOptions const& opts, HyperTree &); // Synthesize the minimal, monotone rule that can be applied to the given // hyperedge and add it to the rule trie. diff --git a/moses/Syntax/F2S/HyperTreeLoader.cpp b/moses/Syntax/F2S/HyperTreeLoader.cpp index 03305a017..907c733e3 100644 --- a/moses/Syntax/F2S/HyperTreeLoader.cpp +++ b/moses/Syntax/F2S/HyperTreeLoader.cpp @@ -20,6 +20,7 @@ #include "moses/ChartTranslationOptionList.h" #include "moses/FactorCollection.h" #include "moses/Syntax/RuleTableFF.h" +#include "moses/parameters/AllOptions.h" #include "util/file_piece.hh" #include "util/string_piece.hh" #include "util/tokenize_piece.hh" @@ -32,12 +33,14 @@ namespace Moses { + namespace Syntax { namespace F2S { -bool HyperTreeLoader::Load(const std::vector<FactorType> &input, +bool HyperTreeLoader::Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &ff, diff --git a/moses/Syntax/F2S/HyperTreeLoader.h b/moses/Syntax/F2S/HyperTreeLoader.h index eebf1185a..42d1ef774 100644 --- a/moses/Syntax/F2S/HyperTreeLoader.h +++ b/moses/Syntax/F2S/HyperTreeLoader.h @@ -14,6 +14,7 @@ namespace Moses { +class AllOptions; namespace Syntax { namespace F2S @@ -22,7 +23,8 @@ namespace F2S class HyperTreeLoader : public HyperTreeCreator { public: - bool Load(const std::vector<FactorType> &input, + bool Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &, diff --git a/moses/Syntax/F2S/Manager-inl.h b/moses/Syntax/F2S/Manager-inl.h index 9417b90f8..29b1f2ba9 100644 --- a/moses/Syntax/F2S/Manager-inl.h +++ b/moses/Syntax/F2S/Manager-inl.h @@ -42,7 +42,7 @@ Manager<RuleMatcher>::Manager(ttasksptr const& ttask) m_rootVertex = p->GetRootVertex(); m_sentenceLength = p->GetSize(); } else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&m_source)) { - T2S::InputTreeBuilder builder; + T2S::InputTreeBuilder builder(options()->output.factor_order); T2S::InputTree tmpTree; builder.Build(*p, "Q", tmpTree); boost::shared_ptr<Forest> forest = boost::make_shared<Forest>(); @@ -60,9 +60,9 @@ void Manager<RuleMatcher>::Decode() const StaticData &staticData = StaticData::Instance(); // Get various pruning-related constants. - const std::size_t popLimit = staticData.options().cube.pop_limit; + const std::size_t popLimit = staticData.options()->cube.pop_limit; const std::size_t ruleLimit = staticData.GetRuleLimit(); - const std::size_t stackLimit = staticData.options().search.stack_size; + const std::size_t stackLimit = staticData.options()->search.stack_size; // Initialize the stacks. InitializeStacks(); @@ -74,7 +74,7 @@ void Manager<RuleMatcher>::Decode() RuleMatcherCallback callback(m_stackMap, ruleLimit); // Create a glue rule synthesizer. - GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie); + GlueRuleSynthesizer glueRuleSynthesizer(*options(), *m_glueRuleTrie); // Sort the input forest's vertices into bottom-up topological order. std::vector<const Forest::Vertex *> sortedVertices; @@ -255,7 +255,7 @@ void Manager<RuleMatcher>::ExtractKBest( // with 0 being 'unlimited.' This actually sets a large-ish limit in case // too many translations are identical. const StaticData &staticData = StaticData::Instance(); - const std::size_t nBestFactor = staticData.options().nbest.factor; + const std::size_t nBestFactor = staticData.options()->nbest.factor; std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor; // Extract the derivations. diff --git a/moses/Syntax/KBestExtractor.cpp b/moses/Syntax/KBestExtractor.cpp index 741d8ce82..21d15cd78 100644 --- a/moses/Syntax/KBestExtractor.cpp +++ b/moses/Syntax/KBestExtractor.cpp @@ -75,7 +75,7 @@ void KBestExtractor::Extract( // Generate the target-side yield of the derivation d. Phrase KBestExtractor::GetOutputPhrase(const Derivation &d) { - FactorType placeholderFactor = StaticData::Instance().options().input.placeholder_factor; + FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor; Phrase ret(ARRAY_SIZE_INCR); diff --git a/moses/Syntax/Manager.cpp b/moses/Syntax/Manager.cpp index e30e818cf..27c5beda8 100644 --- a/moses/Syntax/Manager.cpp +++ b/moses/Syntax/Manager.cpp @@ -1,11 +1,8 @@ -#include "Manager.h" - #include <sstream> - -#include "moses/OutputCollector.h" -#include "moses/StaticData.h" - +#include "Manager.h" #include "PVertex.h" +#include "moses/OutputCollector.h" +#include "moses/Util.h" namespace Moses { @@ -26,12 +23,12 @@ void Manager::OutputBest(OutputCollector *collector) const const SHyperedge *best = GetBestSHyperedge(); if (best == NULL) { VERBOSE(1, "NO BEST TRANSLATION" << std::endl); - if (options().output.ReportHypoScore) { + if (options()->output.ReportHypoScore) { out << "0 "; } out << '\n'; } else { - if (options().output.ReportHypoScore) { + if (options()->output.ReportHypoScore) { out << best->label.futureScore << " "; } Phrase yield = GetOneBestTargetYield(*best); @@ -40,7 +37,7 @@ void Manager::OutputBest(OutputCollector *collector) const "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); yield.RemoveWord(0); yield.RemoveWord(yield.GetSize()-1); - out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); + out << yield.GetStringRep(options()->output.factor_order); out << '\n'; } collector->Write(m_source.GetTranslationId(), out.str()); @@ -51,8 +48,8 @@ void Manager::OutputNBest(OutputCollector *collector) const if (collector) { long translationId = m_source.GetTranslationId(); KBestExtractor::KBestVec nBestList; - ExtractKBest(options().nbest.nbest_size, nBestList, - options().nbest.only_distinct); + ExtractKBest(options()->nbest.nbest_size, nBestList, + options()->nbest.only_distinct); OutputNBestList(collector, nBestList, translationId); } } @@ -76,10 +73,7 @@ void Manager::OutputNBestList(OutputCollector *collector, const KBestExtractor::KBestVec &nBestList, long translationId) const { - const StaticData &staticData = StaticData::Instance(); - - const std::vector<FactorType> &outputFactorOrder = - staticData.GetOutputFactorOrder(); + const std::vector<FactorType> &outputFactorOrder = options()->output.factor_order; std::ostringstream out; @@ -89,8 +83,8 @@ void Manager::OutputNBestList(OutputCollector *collector, FixPrecision(out); } - bool includeWordAlignment = staticData.options().nbest.include_alignment_info; - bool PrintNBestTrees = staticData.options().nbest.print_trees; // PrintNBestTrees(); + bool includeWordAlignment = options()->nbest.include_alignment_info; + bool PrintNBestTrees = options()->nbest.print_trees; // PrintNBestTrees(); for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin(); p != nBestList.end(); ++p) { @@ -109,7 +103,7 @@ void Manager::OutputNBestList(OutputCollector *collector, out << translationId << " ||| "; OutputSurface(out, outputPhrase); // , outputFactorOrder, false); out << " ||| "; - bool with_labels = options().nbest.include_feature_labels; + bool with_labels = options()->nbest.include_feature_labels; derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels); out << " ||| " << derivation.score; diff --git a/moses/Syntax/RuleTableFF.cpp b/moses/Syntax/RuleTableFF.cpp index fd203b2fc..beee34a41 100644 --- a/moses/Syntax/RuleTableFF.cpp +++ b/moses/Syntax/RuleTableFF.cpp @@ -1,6 +1,5 @@ #include "RuleTableFF.h" - -#include "moses/StaticData.h" +#include "moses/parameters/AllOptions.h" #include "moses/Syntax/F2S/HyperTree.h" #include "moses/Syntax/F2S/HyperTreeLoader.h" #include "moses/Syntax/S2T/RuleTrieCYKPlus.h" @@ -8,7 +7,6 @@ #include "moses/Syntax/S2T/RuleTrieScope3.h" #include "moses/Syntax/T2S/RuleTrie.h" #include "moses/Syntax/T2S/RuleTrieLoader.h" - namespace Moses { namespace Syntax @@ -26,37 +24,35 @@ RuleTableFF::RuleTableFF(const std::string &line) s_instances.push_back(this); } -void RuleTableFF::Load() +void RuleTableFF::Load(Moses::AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); - const StaticData &staticData = StaticData::Instance(); - if (staticData.options().search.algo == SyntaxF2S || - staticData.options().search.algo == SyntaxT2S) { + if (opts->search.algo == SyntaxF2S || opts->search.algo == SyntaxT2S) { F2S::HyperTree *trie = new F2S::HyperTree(this); F2S::HyperTreeLoader loader; - loader.Load(m_input, m_output, m_filePath, *this, *trie, - m_sourceTerminalSet); + loader.Load(*opts, m_input, m_output, m_filePath, *this, *trie, m_sourceTerminalSet); m_table = trie; - } else if (staticData.options().search.algo == SyntaxS2T) { - S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm(); + } else if (opts->search.algo == SyntaxS2T) { + S2TParsingAlgorithm algorithm = opts->syntax.s2t_parsing_algo; // staticData.GetS2TParsingAlgorithm(); if (algorithm == RecursiveCYKPlus) { S2T::RuleTrieCYKPlus *trie = new S2T::RuleTrieCYKPlus(this); S2T::RuleTrieLoader loader; - loader.Load(m_input, m_output, m_filePath, *this, *trie); + loader.Load(*opts,m_input, m_output, m_filePath, *this, *trie); m_table = trie; } else if (algorithm == Scope3) { S2T::RuleTrieScope3 *trie = new S2T::RuleTrieScope3(this); S2T::RuleTrieLoader loader; - loader.Load(m_input, m_output, m_filePath, *this, *trie); + loader.Load(*opts, m_input, m_output, m_filePath, *this, *trie); m_table = trie; } else { UTIL_THROW2("ERROR: unhandled S2T parsing algorithm"); } - } else if (staticData.options().search.algo == SyntaxT2S_SCFG) { + } else if (opts->search.algo == SyntaxT2S_SCFG) { T2S::RuleTrie *trie = new T2S::RuleTrie(this); T2S::RuleTrieLoader loader; - loader.Load(m_input, m_output, m_filePath, *this, *trie); + loader.Load(*opts, m_input, m_output, m_filePath, *this, *trie); m_table = trie; } else { UTIL_THROW2( diff --git a/moses/Syntax/RuleTableFF.h b/moses/Syntax/RuleTableFF.h index 25e7d8428..9f394373a 100644 --- a/moses/Syntax/RuleTableFF.h +++ b/moses/Syntax/RuleTableFF.h @@ -9,7 +9,7 @@ namespace Moses class ChartParser; class ChartCellCollectionBase; - +class AllOptions; namespace Syntax { @@ -27,7 +27,7 @@ public: // FIXME Delete m_table? ~RuleTableFF() {} - void Load(); + void Load(AllOptions::ptr const& opts); const RuleTable *GetTable() const { return m_table; diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h index afb1f04c6..6bfc4a61c 100644 --- a/moses/Syntax/S2T/Manager-inl.h +++ b/moses/Syntax/S2T/Manager-inl.h @@ -163,9 +163,9 @@ void Manager<Parser>::Decode() const StaticData &staticData = StaticData::Instance(); // Get various pruning-related constants. - const std::size_t popLimit = staticData.options().cube.pop_limit; + const std::size_t popLimit = staticData.options()->cube.pop_limit; const std::size_t ruleLimit = staticData.GetRuleLimit(); - const std::size_t stackLimit = staticData.options().search.stack_size; + const std::size_t stackLimit = staticData.options()->search.stack_size; // Initialise the PChart and SChart. InitializeCharts(); @@ -303,7 +303,7 @@ void Manager<Parser>::ExtractKBest( // with 0 being 'unlimited.' This actually sets a large-ish limit in case // too many translations are identical. const StaticData &staticData = StaticData::Instance(); - const std::size_t nBestFactor = staticData.options().nbest.factor; + const std::size_t nBestFactor = staticData.options()->nbest.factor; std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor; // Extract the derivations. diff --git a/moses/Syntax/S2T/OovHandler-inl.h b/moses/Syntax/S2T/OovHandler-inl.h index ab9e3b07f..5b67080ec 100644 --- a/moses/Syntax/S2T/OovHandler-inl.h +++ b/moses/Syntax/S2T/OovHandler-inl.h @@ -52,11 +52,12 @@ Phrase *OovHandler<RuleTrie>::SynthesizeSourcePhrase(const Word &sourceWord) } template<typename RuleTrie> -Word *OovHandler<RuleTrie>::SynthesizeTargetLhs(const std::string &lhsStr) +Word * +OovHandler<RuleTrie>::SynthesizeTargetLhs(const std::string &lhsStr) { Word *targetLhs = new Word(true); targetLhs->CreateFromString(Output, - StaticData::Instance().GetOutputFactorOrder(), + StaticData::Instance().options()->output.factor_order, lhsStr, true); UTIL_THROW_IF2(targetLhs->GetFactor(0) == NULL, "Null factor for target LHS"); return targetLhs; @@ -82,7 +83,7 @@ TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase( targetPhrase->EvaluateInIsolation(srcPhrase); targetPhrase->SetTargetLHS(&targetLhs); targetPhrase->SetAlignmentInfo("0-0"); - if (!SD.options().output.detailed_tree_transrep_filepath.empty() || + if (!SD.options()->output.detailed_tree_transrep_filepath.empty() || SD.GetTreeStructure() != NULL) { std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " + oov[0]->GetString().as_string() + " ]"; @@ -95,7 +96,7 @@ TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase( template<typename RuleTrie> bool OovHandler<RuleTrie>::ShouldDrop(const Word &oov) { - if (!StaticData::Instance().options().unk.drop) { + if (!StaticData::Instance().options()->unk.drop) { return false; } const Factor *f = oov[0]; // TODO hack. shouldn't know which factor is surface diff --git a/moses/Syntax/S2T/RuleTrieLoader.cpp b/moses/Syntax/S2T/RuleTrieLoader.cpp index 12181f2a3..e4dd8f5c8 100644 --- a/moses/Syntax/S2T/RuleTrieLoader.cpp +++ b/moses/Syntax/S2T/RuleTrieLoader.cpp @@ -27,6 +27,7 @@ #include "util/exception.hh" #include "RuleTrie.h" +#include "moses/parameters/AllOptions.h" namespace Moses { @@ -35,7 +36,8 @@ namespace Syntax namespace S2T { -bool RuleTrieLoader::Load(const std::vector<FactorType> &input, +bool RuleTrieLoader::Load(Moses::AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &ff, @@ -43,7 +45,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input, { PrintUserTime(std::string("Start loading text phrase table. Moses format")); - const StaticData &staticData = StaticData::Instance(); + // const StaticData &staticData = StaticData::Instance(); std::size_t count = 0; @@ -76,7 +78,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input, } bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos); - if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { + if (isLHSEmpty && !opts.unk.word_deletion_enabled) { // staticData.IsWordDeletionEnabled()) { TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } diff --git a/moses/Syntax/S2T/RuleTrieLoader.h b/moses/Syntax/S2T/RuleTrieLoader.h index 855f1d2a8..0165a0e5b 100644 --- a/moses/Syntax/S2T/RuleTrieLoader.h +++ b/moses/Syntax/S2T/RuleTrieLoader.h @@ -11,6 +11,8 @@ namespace Moses { +class AllOptions; + namespace Syntax { namespace S2T @@ -19,7 +21,8 @@ namespace S2T class RuleTrieLoader : public RuleTrieCreator { public: - bool Load(const std::vector<FactorType> &input, + bool Load(Moses::AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &, diff --git a/moses/Syntax/SHyperedge.cpp b/moses/Syntax/SHyperedge.cpp index 976b0f0e3..d554af231 100644 --- a/moses/Syntax/SHyperedge.cpp +++ b/moses/Syntax/SHyperedge.cpp @@ -11,7 +11,7 @@ namespace Syntax Phrase GetOneBestTargetYield(const SHyperedge &h) { - FactorType placeholderFactor = StaticData::Instance().options().input.placeholder_factor; + FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor; Phrase ret(ARRAY_SIZE_INCR); diff --git a/moses/Syntax/T2S/GlueRuleSynthesizer.cpp b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp index f50f84629..6f3082654 100644 --- a/moses/Syntax/T2S/GlueRuleSynthesizer.cpp +++ b/moses/Syntax/T2S/GlueRuleSynthesizer.cpp @@ -3,7 +3,7 @@ #include <sstream> #include "moses/FF/UnknownWordPenaltyProducer.h" -#include "moses/StaticData.h" +#include <boost/scoped_ptr.hpp> namespace Moses { @@ -12,7 +12,9 @@ namespace Syntax namespace T2S { -void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node) +void +GlueRuleSynthesizer:: +SynthesizeRule(const InputTree::Node &node) { const Word &sourceLhs = node.pvertex.symbol; boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node)); @@ -22,7 +24,9 @@ void GlueRuleSynthesizer::SynthesizeRule(const InputTree::Node &node) tpc->Add(tp); } -Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node) +Phrase* +GlueRuleSynthesizer:: +SynthesizeSourcePhrase(const InputTree::Node &node) { Phrase *phrase = new Phrase(node.children.size()); for (std::vector<InputTree::Node*>::const_iterator p = node.children.begin(); @@ -37,11 +41,10 @@ Phrase *GlueRuleSynthesizer::SynthesizeSourcePhrase(const InputTree::Node &node) return phrase; } -TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( - const InputTree::Node &node, const Phrase &sourceRhs) +TargetPhrase* +GlueRuleSynthesizer:: +SynthesizeTargetPhrase(const InputTree::Node &node, const Phrase &sourceRhs) { - const StaticData &staticData = StaticData::Instance(); - const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance(); @@ -51,7 +54,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( for (std::size_t i = 0; i < node.children.size(); ++i) { const Word &symbol = node.children[i]->pvertex.symbol; if (symbol.IsNonTerminal()) { - targetPhrase->AddWord(staticData.GetOutputDefaultNonTerminal()); + targetPhrase->AddWord(m_output_default_nonterminal); } else { // TODO Check this Word &targetWord = targetPhrase->AddWord(); @@ -65,7 +68,7 @@ TargetPhrase *GlueRuleSynthesizer::SynthesizeTargetPhrase( float score = LOWEST_SCORE; targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score); targetPhrase->EvaluateInIsolation(sourceRhs); - Word *targetLhs = new Word(staticData.GetOutputDefaultNonTerminal()); + Word *targetLhs = new Word(m_output_default_nonterminal); targetPhrase->SetTargetLHS(targetLhs); targetPhrase->SetAlignmentInfo(alignmentSS.str()); diff --git a/moses/Syntax/T2S/GlueRuleSynthesizer.h b/moses/Syntax/T2S/GlueRuleSynthesizer.h index 3e51c08a4..3930db8c4 100644 --- a/moses/Syntax/T2S/GlueRuleSynthesizer.h +++ b/moses/Syntax/T2S/GlueRuleSynthesizer.h @@ -16,8 +16,12 @@ namespace T2S class GlueRuleSynthesizer : public RuleTrieCreator { + Word m_output_default_nonterminal; public: - GlueRuleSynthesizer(RuleTrie &trie) : m_ruleTrie(trie) {} + GlueRuleSynthesizer(RuleTrie &trie, Word dflt_nonterm) + : m_ruleTrie(trie) + , m_output_default_nonterminal(dflt_nonterm) + {} // Synthesize the minimal, montone rule that can be applied to the given node // and add it to the rule trie. diff --git a/moses/Syntax/T2S/InputTreeBuilder.cpp b/moses/Syntax/T2S/InputTreeBuilder.cpp index 927c6b4e3..140b578d6 100644 --- a/moses/Syntax/T2S/InputTreeBuilder.cpp +++ b/moses/Syntax/T2S/InputTreeBuilder.cpp @@ -9,8 +9,8 @@ namespace Syntax namespace T2S { -InputTreeBuilder::InputTreeBuilder() - : m_outputFactorOrder(StaticData::Instance().GetOutputFactorOrder()) +InputTreeBuilder::InputTreeBuilder(std::vector<FactorType> const& oFactors) + : m_outputFactorOrder(oFactors) { } diff --git a/moses/Syntax/T2S/InputTreeBuilder.h b/moses/Syntax/T2S/InputTreeBuilder.h index d789d06a2..e14f20a6b 100644 --- a/moses/Syntax/T2S/InputTreeBuilder.h +++ b/moses/Syntax/T2S/InputTreeBuilder.h @@ -17,7 +17,7 @@ namespace T2S class InputTreeBuilder { public: - InputTreeBuilder(); + InputTreeBuilder(std::vector<FactorType> const& oFactors); // Constructs a Moses::T2S::InputTree given a Moses::TreeInput and a label // for the top-level node (which covers <s> and </s>). diff --git a/moses/Syntax/T2S/Manager-inl.h b/moses/Syntax/T2S/Manager-inl.h index 873d36d7c..ec97e76de 100644 --- a/moses/Syntax/T2S/Manager-inl.h +++ b/moses/Syntax/T2S/Manager-inl.h @@ -33,7 +33,7 @@ Manager<RuleMatcher>::Manager(ttasksptr const& ttask) { if (const TreeInput *p = dynamic_cast<const TreeInput*>(&m_source)) { // Construct the InputTree. - InputTreeBuilder builder; + InputTreeBuilder builder(options()->output.factor_order); builder.Build(*p, "Q", m_inputTree); } else { UTIL_THROW2("ERROR: T2S::Manager requires input to be a tree"); @@ -97,9 +97,9 @@ void Manager<RuleMatcher>::Decode() const StaticData &staticData = StaticData::Instance(); // Get various pruning-related constants. - const std::size_t popLimit = this->options().cube.pop_limit; + const std::size_t popLimit = this->options()->cube.pop_limit; const std::size_t ruleLimit = staticData.GetRuleLimit(); - const std::size_t stackLimit = this->options().search.stack_size; + const std::size_t stackLimit = this->options()->search.stack_size; // Initialize the stacks. InitializeStacks(); @@ -111,7 +111,8 @@ void Manager<RuleMatcher>::Decode() F2S::RuleMatcherCallback callback(m_stackMap, ruleLimit); // Create a glue rule synthesizer. - GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie); + Word dflt_nonterm = options()->syntax.output_default_non_terminal; + GlueRuleSynthesizer glueRuleSynthesizer(*m_glueRuleTrie, dflt_nonterm); // Visit each node of the input tree in post-order. for (std::vector<InputTree::Node>::const_iterator p = @@ -215,7 +216,7 @@ void Manager<RuleMatcher>::ExtractKBest( // with 0 being 'unlimited.' This actually sets a large-ish limit in case // too many translations are identical. const StaticData &staticData = StaticData::Instance(); - const std::size_t nBestFactor = staticData.options().nbest.factor; + const std::size_t nBestFactor = staticData.options()->nbest.factor; std::size_t numDerivations = (nBestFactor == 0) ? k*1000 : k*nBestFactor; // Extract the derivations. diff --git a/moses/Syntax/T2S/RuleTrieLoader.cpp b/moses/Syntax/T2S/RuleTrieLoader.cpp index bcfcc819e..026237128 100644 --- a/moses/Syntax/T2S/RuleTrieLoader.cpp +++ b/moses/Syntax/T2S/RuleTrieLoader.cpp @@ -26,6 +26,7 @@ #include "util/exception.hh" #include "RuleTrie.h" +#include "moses/parameters/AllOptions.h" namespace Moses { @@ -34,7 +35,8 @@ namespace Syntax namespace T2S { -bool RuleTrieLoader::Load(const std::vector<FactorType> &input, +bool RuleTrieLoader::Load(Moses::AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &ff, @@ -42,7 +44,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input, { PrintUserTime(std::string("Start loading text phrase table. Moses format")); - const StaticData &staticData = StaticData::Instance(); + // const StaticData &staticData = StaticData::Instance(); // const std::string &factorDelimiter = staticData.GetFactorDelimiter(); std::size_t count = 0; @@ -80,7 +82,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input, ++pipes; // counts bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos); - if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { + if (isLHSEmpty && !opts.unk.word_deletion_enabled) { // staticData.IsWordDeletionEnabled()) { TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } diff --git a/moses/Syntax/T2S/RuleTrieLoader.h b/moses/Syntax/T2S/RuleTrieLoader.h index a7465d8eb..275e6b9ef 100644 --- a/moses/Syntax/T2S/RuleTrieLoader.h +++ b/moses/Syntax/T2S/RuleTrieLoader.h @@ -11,6 +11,7 @@ namespace Moses { +class AllOptions; namespace Syntax { namespace T2S @@ -19,7 +20,8 @@ namespace T2S class RuleTrieLoader : public RuleTrieCreator { public: - bool Load(const std::vector<FactorType> &input, + bool Load(Moses::AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, const RuleTableFF &, diff --git a/moses/TabbedSentence.cpp b/moses/TabbedSentence.cpp index a94674cef..6339ad596 100644 --- a/moses/TabbedSentence.cpp +++ b/moses/TabbedSentence.cpp @@ -47,9 +47,7 @@ void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder int TabbedSentence:: -Read(std::istream& in, - std::vector<FactorType> const& factorOrder, - AllOptions const& opts) +Read(std::istream& in) { TabbedColumns allColumns; @@ -60,17 +58,14 @@ Read(std::istream& in, boost::split(allColumns, line, boost::is_any_of("\t")); if(allColumns.size() < 2) { - std::stringstream dummyStream; - dummyStream << line << std::endl; - return Sentence::Read(dummyStream, factorOrder, opts); + Sentence::init(line); } else { m_columns.resize(allColumns.size() - 1); std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin()); - - std::stringstream dummyStream; - dummyStream << allColumns[0] << std::endl; - return Sentence::Read(dummyStream, factorOrder, opts); + Sentence::init(allColumns[0]); } + return 1; + } } diff --git a/moses/TabbedSentence.h b/moses/TabbedSentence.h index 02da4cae0..973fa4563 100644 --- a/moses/TabbedSentence.h +++ b/moses/TabbedSentence.h @@ -53,7 +53,7 @@ class TabbedSentence : public Sentence { public: - TabbedSentence() : Sentence() {} + TabbedSentence(AllOptions::ptr const& opts) : Sentence(opts) {} ~TabbedSentence() {} InputTypeEnum GetType() const { @@ -68,8 +68,7 @@ public: , const std::string &tabbedString); virtual int - Read(std::istream& in,const std::vector<FactorType>& factorOrder, - AllOptions const& opts); + Read(std::istream& in); const TabbedColumns& GetColumns() const { return m_columns; diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index f416fdaa7..7cd3afc3b 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -52,7 +52,7 @@ TargetPhrase::TargetPhrase( std::string out_string, const PhraseDictionary *pt) //ACAT const StaticData &staticData = StaticData::Instance(); // XXX should this really be InputFactorOrder??? - CreateFromString(Output, staticData.GetInputFactorOrder(), out_string, + CreateFromString(Output, staticData.options()->input.factor_order, out_string, // staticData.GetFactorDelimiter(), // eliminated [UG] NULL); } @@ -72,8 +72,7 @@ TargetPhrase::TargetPhrase(ttasksptr& ttask, std::string out_string, const Phras //ACAT const StaticData &staticData = StaticData::Instance(); // XXX should this really be InputFactorOrder??? - CreateFromString(Output, staticData.GetInputFactorOrder(), out_string, - // staticData.GetFactorDelimiter(), // eliminated [UG] + CreateFromString(Output, ttask->options()->input.factor_order, out_string, NULL); } diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp index e640ea6d3..04f8db219 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp @@ -51,6 +51,7 @@ ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk( size_t sourceSize = parser.GetSize(); m_expandableDottedRuleListVec.resize(sourceSize); + m_input_default_nonterminal = parser.options()->syntax.input_default_non_terminal; for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) { DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode()); @@ -81,7 +82,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( ChartParserCallback &outColl) { const StaticData &staticData = StaticData::Instance(); - const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal(); + // const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal(); const Range &range = inputPath.GetWordsRange(); size_t relEndPos = range.GetEndPos() - range.GetStartPos(); @@ -178,7 +179,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( if (m_dictionary.m_maxSpanDefault != NOT_FOUND) { // for Hieu's source syntax - bool isSourceSyntaxNonTerm = sourceLHS != defaultSourceNonTerm; + bool isSourceSyntaxNonTerm = sourceLHS != m_input_default_nonterminal; // defaultSourceNonTerm; size_t nonTermNumWordsCovered = endPos - startPos + 1; doSearch = isSourceSyntaxNonTerm ? diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h index dee9cc202..5402d5a54 100644 --- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h +++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h @@ -57,6 +57,7 @@ private: std::vector<DottedRuleStackOnDisk*> m_expandableDottedRuleListVec; std::map<uint64_t, TargetPhraseCollection::shared_ptr > m_cache; std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode; + Word m_input_default_nonterminal; }; } // namespace Moses diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index 2992cccdc..e30f5760f 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -57,8 +57,9 @@ PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line) ReadParameters(); } -void PhraseDictionaryCompact::Load() +void PhraseDictionaryCompact::Load(AllOptions::ptr const& opts) { + m_options = opts; const StaticData &staticData = StaticData::Instance(); SetFeaturesToApply(); diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h index f1c3dd1d8..ec11a1a4a 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h @@ -67,7 +67,7 @@ public: ~PhraseDictionaryCompact(); - void Load(); + void Load(AllOptions::ptr const& opts); TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &source) const; TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const; diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp index 11346f0b6..cf051ff36 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.cpp @@ -60,8 +60,9 @@ PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased() Clear(); } -void PhraseDictionaryDynamicCacheBased::Load() +void PhraseDictionaryDynamicCacheBased::Load(AllOptions::ptr const& opts) { + m_options = opts; VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl); SetFeaturesToApply(); @@ -336,14 +337,16 @@ void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseStr //target targetPhrase.Clear(); VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); - targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, /*factorDelimiter,*/ NULL); + targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order, + targetPhraseString, /*factorDelimiter,*/ NULL); VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl); //TODO: Would be better to reuse source phrases, but ownership has to be //consistent across phrase table implementations sourcePhrase.Clear(); VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, /*factorDelimiter,*/ NULL); + sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, + sourcePhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); ClearEntries(sourcePhrase, targetPhrase); @@ -433,7 +436,8 @@ void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> ent sourcePhrase.Clear(); VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), *it, /*factorDelimiter,*/ NULL); + sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, + *it, /*factorDelimiter,*/ NULL); VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); ClearSource(sourcePhrase); @@ -526,14 +530,15 @@ void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, s //target targetPhrase.Clear(); VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); - targetPhrase.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetPhraseString, /*factorDelimiter,*/ NULL); + targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order, + targetPhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl); //TODO: Would be better to reuse source phrases, but ownership has to be //consistent across phrase table implementations sourcePhrase.Clear(); VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); - sourcePhrase.CreateFromString(Input, staticData.GetInputFactorOrder(), sourcePhraseString, /*factorDelimiter,*/ NULL); + sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, sourcePhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl); diff --git a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h index 44488d719..09527debc 100644 --- a/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h +++ b/moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h @@ -108,7 +108,7 @@ public: return *s_instance; } - void Load(); + void Load(AllOptions::ptr const& opts); void Load(const std::string files); TargetPhraseCollection::shared_ptr diff --git a/moses/TranslationModel/PhraseDictionaryGroup.cpp b/moses/TranslationModel/PhraseDictionaryGroup.cpp index 160321ab1..125ddaa64 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.cpp +++ b/moses/TranslationModel/PhraseDictionaryGroup.cpp @@ -73,8 +73,9 @@ void PhraseDictionaryGroup::SetParameter(const string& key, const string& value) } } -void PhraseDictionaryGroup::Load() +void PhraseDictionaryGroup::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); m_pdFeature.push_back(const_cast<PhraseDictionaryGroup*>(this)); size_t numScoreComponents = 0; diff --git a/moses/TranslationModel/PhraseDictionaryGroup.h b/moses/TranslationModel/PhraseDictionaryGroup.h index e483269c0..b4909fc72 100644 --- a/moses/TranslationModel/PhraseDictionaryGroup.h +++ b/moses/TranslationModel/PhraseDictionaryGroup.h @@ -63,7 +63,7 @@ class PhraseDictionaryGroup: public PhraseDictionary public: PhraseDictionaryGroup(const std::string& line); - void Load(); + void Load(AllOptions::ptr const& opts); TargetPhraseCollection::shared_ptr CreateTargetPhraseCollection(const ttasksptr& ttask, const Phrase& src) const; diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp index e8ffa9d1d..4100f9b07 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp @@ -79,8 +79,9 @@ PhraseDictionaryMultiModel:: ~PhraseDictionaryMultiModel() { } -void PhraseDictionaryMultiModel::Load() +void PhraseDictionaryMultiModel::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); for(size_t i = 0; i < m_numModels; ++i) { diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.h b/moses/TranslationModel/PhraseDictionaryMultiModel.h index bdb399ddb..4a516ef6e 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModel.h +++ b/moses/TranslationModel/PhraseDictionaryMultiModel.h @@ -70,7 +70,7 @@ public: PhraseDictionaryMultiModel(const std::string &line); PhraseDictionaryMultiModel(int type, const std::string &line); ~PhraseDictionaryMultiModel(); - void Load(); + void Load(AllOptions::ptr const& opts); virtual void CollectSufficientStatistics diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp index bd2552d9d..dbc99cbbf 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp @@ -83,8 +83,9 @@ PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts() } -void PhraseDictionaryMultiModelCounts::Load() +void PhraseDictionaryMultiModelCounts::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); for(size_t i = 0; i < m_numModels; ++i) { diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h index 5f59d826b..65a09fa81 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h +++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.h @@ -79,7 +79,7 @@ class PhraseDictionaryMultiModelCounts: public PhraseDictionaryMultiModel public: PhraseDictionaryMultiModelCounts(const std::string &line); ~PhraseDictionaryMultiModelCounts(); - void Load(); + void Load(AllOptions::ptr const& opts); TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats, std::vector<std::vector<float> > &multimodelweights) const; void CollectSufficientStats(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats) const; float GetTargetCount(const Phrase& target, size_t modelIndex) const; diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp index 13ca1d1d1..3d1664822 100644 --- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp +++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp @@ -22,8 +22,9 @@ PhraseDictionaryTransliteration::PhraseDictionaryTransliteration(const std::stri m_outputLang.empty(), "Must specify all arguments"); } -void PhraseDictionaryTransliteration::Load() +void PhraseDictionaryTransliteration::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); } diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.h b/moses/TranslationModel/PhraseDictionaryTransliteration.h index 9d2b0d768..d4a5d0919 100644 --- a/moses/TranslationModel/PhraseDictionaryTransliteration.h +++ b/moses/TranslationModel/PhraseDictionaryTransliteration.h @@ -18,7 +18,7 @@ class PhraseDictionaryTransliteration : public PhraseDictionary public: PhraseDictionaryTransliteration(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); virtual void CleanUpAfterSentenceProcessing(const InputType& source); diff --git a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp index d2007e523..c375a2fcf 100644 --- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp +++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.cpp @@ -37,8 +37,9 @@ PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor() { } -void PhraseDictionaryTreeAdaptor::Load() +void PhraseDictionaryTreeAdaptor::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); } diff --git a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h index 90c8f12ff..f3b91a553 100644 --- a/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h +++ b/moses/TranslationModel/PhraseDictionaryTreeAdaptor.h @@ -46,7 +46,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary public: PhraseDictionaryTreeAdaptor(const std::string &line); virtual ~PhraseDictionaryTreeAdaptor(); - void Load(); + void Load(AllOptions::ptr const& opts); // enable/disable caching // you enable caching if you request the target candidates for a source phrase multiple times diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT/ProbingPT.cpp index 391391ab7..aa7cc1efe 100644 --- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp +++ b/moses/TranslationModel/ProbingPT/ProbingPT.cpp @@ -25,8 +25,9 @@ ProbingPT::~ProbingPT() delete m_engine; } -void ProbingPT::Load() +void ProbingPT::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); m_engine = new QueryEngine(m_filePath.c_str()); @@ -76,7 +77,7 @@ void ProbingPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQue InputPath &inputPath = **iter; const Phrase &sourcePhrase = inputPath.GetPhrase(); - if (sourcePhrase.GetSize() > StaticData::Instance().GetMaxPhraseLength()) { + if (sourcePhrase.GetSize() > StaticData::Instance().options()->search.max_phrase_length) { continue; } diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.h b/moses/TranslationModel/ProbingPT/ProbingPT.h index 48f4441f9..4e7ab02c6 100644 --- a/moses/TranslationModel/ProbingPT/ProbingPT.h +++ b/moses/TranslationModel/ProbingPT/ProbingPT.h @@ -21,7 +21,7 @@ public: ProbingPT(const std::string &line); ~ProbingPT(); - void Load(); + void Load(AllOptions::ptr const& opts); void InitializeForInput(ttasksptr const& ttask); diff --git a/moses/TranslationModel/RuleTable/Loader.h b/moses/TranslationModel/RuleTable/Loader.h index b3aed9dc0..3ff8f2cc4 100644 --- a/moses/TranslationModel/RuleTable/Loader.h +++ b/moses/TranslationModel/RuleTable/Loader.h @@ -21,6 +21,7 @@ #include "Trie.h" #include "moses/TypeDef.h" +#include "moses/parameters/AllOptions.h" #include <istream> #include <vector> @@ -35,7 +36,8 @@ class RuleTableLoader public: virtual ~RuleTableLoader() {} - virtual bool Load(const std::vector<FactorType> &input, + virtual bool Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, size_t tableLimit, diff --git a/moses/TranslationModel/RuleTable/LoaderCompact.cpp b/moses/TranslationModel/RuleTable/LoaderCompact.cpp index 49e469e4f..f69616525 100644 --- a/moses/TranslationModel/RuleTable/LoaderCompact.cpp +++ b/moses/TranslationModel/RuleTable/LoaderCompact.cpp @@ -32,7 +32,8 @@ namespace Moses { -bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input, +bool RuleTableLoaderCompact::Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, size_t /* tableLimit */, diff --git a/moses/TranslationModel/RuleTable/LoaderCompact.h b/moses/TranslationModel/RuleTable/LoaderCompact.h index 26e19fce6..05a99cf98 100644 --- a/moses/TranslationModel/RuleTable/LoaderCompact.h +++ b/moses/TranslationModel/RuleTable/LoaderCompact.h @@ -36,7 +36,8 @@ class RuleTableTrie; class RuleTableLoaderCompact : public RuleTableLoader { public: - bool Load(const std::vector<FactorType> &input, + bool Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, size_t tableLimit, diff --git a/moses/TranslationModel/RuleTable/LoaderFactory.cpp b/moses/TranslationModel/RuleTable/LoaderFactory.cpp index 5569f952c..e7ec1d48d 100644 --- a/moses/TranslationModel/RuleTable/LoaderFactory.cpp +++ b/moses/TranslationModel/RuleTable/LoaderFactory.cpp @@ -35,8 +35,9 @@ namespace Moses // Determines the rule table type by peeking inside the file then creates // a suitable RuleTableLoader object. -std::auto_ptr<RuleTableLoader> RuleTableLoaderFactory::Create( - const std::string &path) +std::auto_ptr<RuleTableLoader> +RuleTableLoaderFactory:: +Create(const std::string &path) { InputFileStream input(path); std::string line; @@ -51,9 +52,7 @@ std::auto_ptr<RuleTableLoader> RuleTableLoaderFactory::Create( std::cerr << "Unsupported compact rule table format: " << tokens[0]; return std::auto_ptr<RuleTableLoader>(); } else if (tokens[0] == "[X]" && tokens[1] == "|||") { - return std::auto_ptr<RuleTableLoader>(new - RuleTableLoaderHiero()); - + return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderHiero()); } return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderStandard()); diff --git a/moses/TranslationModel/RuleTable/LoaderHiero.cpp b/moses/TranslationModel/RuleTable/LoaderHiero.cpp index 81289d9b2..eb81d5677 100644 --- a/moses/TranslationModel/RuleTable/LoaderHiero.cpp +++ b/moses/TranslationModel/RuleTable/LoaderHiero.cpp @@ -14,13 +14,14 @@ using namespace std; namespace Moses { -bool RuleTableLoaderHiero::Load(const std::vector<FactorType> &input, +bool RuleTableLoaderHiero::Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, size_t tableLimit, RuleTableTrie &ruleTable) { - bool ret = RuleTableLoaderStandard::Load(HieroFormat + bool ret = RuleTableLoaderStandard::Load(opts, HieroFormat ,input, output ,inFile ,tableLimit diff --git a/moses/TranslationModel/RuleTable/LoaderHiero.h b/moses/TranslationModel/RuleTable/LoaderHiero.h index 099787281..dfa405c44 100644 --- a/moses/TranslationModel/RuleTable/LoaderHiero.h +++ b/moses/TranslationModel/RuleTable/LoaderHiero.h @@ -18,7 +18,8 @@ namespace Moses class RuleTableLoaderHiero : public RuleTableLoaderStandard { public: - bool Load(const std::vector<FactorType> &input, + bool Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, size_t tableLimit, diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/moses/TranslationModel/RuleTable/LoaderStandard.cpp index 2c4d6f3e9..c84286588 100644 --- a/moses/TranslationModel/RuleTable/LoaderStandard.cpp +++ b/moses/TranslationModel/RuleTable/LoaderStandard.cpp @@ -47,19 +47,17 @@ using namespace boost::algorithm; namespace Moses { -bool RuleTableLoaderStandard::Load(const std::vector<FactorType> &input - , const std::vector<FactorType> &output - , const std::string &inFile - , size_t tableLimit - , RuleTableTrie &ruleTable) -{ - bool ret = Load(MosesFormat - ,input, output - ,inFile - ,tableLimit - ,ruleTable); - return ret; +bool +RuleTableLoaderStandard:: +Load(AllOptions const& opts + , const std::vector<FactorType> &input + , const std::vector<FactorType> &output + , const std::string &inFile + , size_t tableLimit + , RuleTableTrie &ruleTable) +{ + return Load(opts, MosesFormat,input, output ,inFile ,tableLimit ,ruleTable); } void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t, size_t> > &ntAlign) @@ -142,7 +140,7 @@ void ReformatHieroRule(const string &lineOrig, string &out) out = ret.str(); } -bool RuleTableLoaderStandard::Load(FormatType format +bool RuleTableLoaderStandard::Load(AllOptions const& opts, FormatType format , const std::vector<FactorType> &input , const std::vector<FactorType> &output , const std::string &inFile @@ -151,7 +149,7 @@ bool RuleTableLoaderStandard::Load(FormatType format { PrintUserTime(string("Start loading text phrase table. ") + (format==MosesFormat?"Moses":"Hiero") + " format"); - const StaticData &staticData = StaticData::Instance(); + // const StaticData &staticData = StaticData::Instance(); string lineOrig; size_t count = 0; @@ -192,7 +190,7 @@ bool RuleTableLoaderStandard::Load(FormatType format } bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); - if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { + if (isLHSEmpty && !opts.unk.word_deletion_enabled) { TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } diff --git a/moses/TranslationModel/RuleTable/LoaderStandard.h b/moses/TranslationModel/RuleTable/LoaderStandard.h index b47f7c00b..4e3977f5e 100644 --- a/moses/TranslationModel/RuleTable/LoaderStandard.h +++ b/moses/TranslationModel/RuleTable/LoaderStandard.h @@ -29,14 +29,16 @@ class RuleTableLoaderStandard : public RuleTableLoader { protected: - bool Load(FormatType format, + bool Load(AllOptions const& opts, + FormatType format, const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, size_t tableLimit, RuleTableTrie &); public: - bool Load(const std::vector<FactorType> &input, + bool Load(AllOptions const& opts, + const std::vector<FactorType> &input, const std::vector<FactorType> &output, const std::string &inFile, size_t tableLimit, diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp index b1f684124..cb322a830 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.cpp @@ -32,8 +32,9 @@ PhraseDictionaryALSuffixArray::PhraseDictionaryALSuffixArray(const std::string & ReadParameters(); } -void PhraseDictionaryALSuffixArray::Load() +void PhraseDictionaryALSuffixArray::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); } @@ -47,11 +48,11 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(ttasksptr const& ttask) std::auto_ptr<RuleTableLoader> loader = RuleTableLoaderFactory::Create(grammarFile); - bool ret = loader->Load(m_input, m_output, grammarFile, m_tableLimit, - *this); + AllOptions::ptr const& opts = ttask->options(); + bool ret = loader->Load(*opts, m_input, m_output, grammarFile, m_tableLimit, *this); - UTIL_THROW_IF2(!ret, - "Rules not successfully loaded for sentence id " << translationId); + UTIL_THROW_IF2(!ret, "Rules not successfully loaded for sentence id " + << translationId); } void PhraseDictionaryALSuffixArray::CleanUpAfterSentenceProcessing(const InputType &source) diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h index 79c8e11c5..ae4abfeaf 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h @@ -23,7 +23,7 @@ class PhraseDictionaryALSuffixArray : public PhraseDictionaryMemory { public: PhraseDictionaryALSuffixArray(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); void InitializeForInput(ttasksptr const& ttask); void CleanUpAfterSentenceProcessing(const InputType& source); diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp index 435478d35..50dd4bb8a 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp @@ -93,8 +93,9 @@ PhraseDictionaryFuzzyMatch::~PhraseDictionaryFuzzyMatch() delete m_FuzzyMatchWrapper; } -void PhraseDictionaryFuzzyMatch::Load() +void PhraseDictionaryFuzzyMatch::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); m_FuzzyMatchWrapper = new tmmt::FuzzyMatchWrapper(m_config[0], m_config[1], m_config[2]); @@ -241,7 +242,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask) , &alignString = tokens[3]; bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); - if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { + if (isLHSEmpty && !ttask->options()->unk.word_deletion_enabled) { TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h index 5c710021a..e5a5f0704 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h @@ -44,7 +44,7 @@ class PhraseDictionaryFuzzyMatch : public PhraseDictionary public: PhraseDictionaryFuzzyMatch(const std::string &line); ~PhraseDictionaryFuzzyMatch(); - void Load(); + void Load(AllOptions::ptr const& opts); const PhraseDictionaryNodeMemory &GetRootNode(long translationId) const; diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp index 3eba6e5de..adb3f36c1 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp @@ -47,8 +47,9 @@ PhraseDictionaryOnDisk::~PhraseDictionaryOnDisk() { } -void PhraseDictionaryOnDisk::Load() +void PhraseDictionaryOnDisk::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); } diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h index c3449e972..1bd357d05 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h @@ -67,7 +67,7 @@ protected: public: PhraseDictionaryOnDisk(const std::string &line); ~PhraseDictionaryOnDisk(); - void Load(); + void Load(AllOptions::ptr const& opts); // PhraseDictionary impl virtual ChartRuleLookupManager *CreateRuleLookupManager( diff --git a/moses/TranslationModel/RuleTable/Trie.cpp b/moses/TranslationModel/RuleTable/Trie.cpp index 758b9a3cf..5f1bf99b4 100644 --- a/moses/TranslationModel/RuleTable/Trie.cpp +++ b/moses/TranslationModel/RuleTable/Trie.cpp @@ -34,8 +34,9 @@ RuleTableTrie::~RuleTableTrie() { } -void RuleTableTrie::Load() +void RuleTableTrie::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); std::auto_ptr<Moses::RuleTableLoader> loader = @@ -44,8 +45,7 @@ void RuleTableTrie::Load() throw runtime_error("Error: Loading " + m_filePath); } - bool ret = loader->Load(m_input, m_output, m_filePath, m_tableLimit, - *this); + bool ret = loader->Load(*opts, m_input, m_output, m_filePath, m_tableLimit, *this); if (!ret) { throw runtime_error("Error: Loading " + m_filePath); } diff --git a/moses/TranslationModel/RuleTable/Trie.h b/moses/TranslationModel/RuleTable/Trie.h index 76cbc16e5..7a9e12e8d 100644 --- a/moses/TranslationModel/RuleTable/Trie.h +++ b/moses/TranslationModel/RuleTable/Trie.h @@ -46,7 +46,7 @@ public: virtual ~RuleTableTrie(); - void Load(); + void Load(AllOptions::ptr const& opts); private: friend class RuleTableLoader; diff --git a/moses/TranslationModel/SkeletonPT.cpp b/moses/TranslationModel/SkeletonPT.cpp index 1b8e0cfa1..6b42212f9 100644 --- a/moses/TranslationModel/SkeletonPT.cpp +++ b/moses/TranslationModel/SkeletonPT.cpp @@ -12,8 +12,9 @@ SkeletonPT::SkeletonPT(const std::string &line) ReadParameters(); } -void SkeletonPT::Load() +void SkeletonPT::Load(AllOptions::ptr const& opts) { + m_options = opts; SetFeaturesToApply(); } diff --git a/moses/TranslationModel/SkeletonPT.h b/moses/TranslationModel/SkeletonPT.h index c18571080..443f1cc8e 100644 --- a/moses/TranslationModel/SkeletonPT.h +++ b/moses/TranslationModel/SkeletonPT.h @@ -16,7 +16,7 @@ class SkeletonPT : public PhraseDictionary public: SkeletonPT(const std::string &line); - void Load(); + void Load(AllOptions::ptr const& opts); void InitializeForInput(ttasksptr const& ttask); diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index a67951fd3..ed60771ae 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -417,9 +417,9 @@ namespace Moses void Mmsapt:: - Load() + Load(AllOptions::ptr const& opts) { - Load(true); + Load(opts, true); } void @@ -474,8 +474,9 @@ namespace Moses void Mmsapt:: - Load(bool with_checks) + Load(AllOptions::ptr const& opts, bool with_checks) { + m_options = opts; boost::unique_lock<boost::shared_mutex> lock(m_lock); // load feature functions (i.e., load underlying data bases, if any) BOOST_FOREACH(SPTR<pscorer>& ff, m_active_ff_fix) ff->load(); diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index 9dbd869df..4a8393c11 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -211,8 +211,8 @@ namespace Moses // Mmsapt(std::string const& description, std::string const& line); Mmsapt(std::string const& line); - void Load(); - void Load(bool with_checks); + void Load(AllOptions::ptr const& opts); + void Load(AllOptions::ptr const& opts, bool with_checks); size_t SetTableLimit(size_t limit); // returns the prior table limit std::string const& GetName() const; diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index 1e729b6ad..a85144655 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -60,6 +60,7 @@ TranslationOptionCollection(ttasksptr const& ttask, , m_estimatedScores(src.GetSize()) , m_maxNoTransOptPerCoverage(maxNoTransOptPerCoverage) , m_translationOptionThreshold(translationOptionThreshold) + , m_max_phrase_length(ttask->options()->search.max_phrase_length) { // create 2-d vector size_t size = src.GetSize(); @@ -67,8 +68,7 @@ TranslationOptionCollection(ttasksptr const& ttask, m_collection.push_back( vector< TranslationOptionList >() ); size_t maxSize = size - sPos; - size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); - maxSize = std::min(maxSize, maxSizePhrase); + maxSize = std::min(maxSize, m_max_phrase_length); for (size_t ePos = 0 ; ePos < maxSize ; ++ePos) { m_collection[sPos].push_back( TranslationOptionList() ); @@ -145,12 +145,14 @@ ProcessUnknownWord() } } - bool alwaysCreateDirectTranslationOption - = StaticData::Instance().IsAlwaysCreateDirectTranslationOption(); + // bool alwaysCreateDirectTranslationOption + // = StaticData::Instance().IsAlwaysCreateDirectTranslationOption(); + bool always = m_ttask.lock()->options()->unk.always_create_direct_transopt; + // create unknown words for 1 word coverage where we don't have any trans options for (size_t pos = 0 ; pos < size ; ++pos) { TranslationOptionList* fullList = GetTranslationOptionList(pos, pos); - if (!fullList || fullList->size() == 0 || alwaysCreateDirectTranslationOption) + if (!fullList || fullList->size() == 0 || always) ProcessUnknownWord(pos); } } @@ -192,7 +194,7 @@ ProcessOneUnknownWord(const InputPath &inputPath, size_t sourcePos, const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface const StringPiece s = f->GetString(); bool isEpsilon = (s=="" || s==EPSILON); - bool dropUnk = GetTranslationTask()->options().unk.drop; + bool dropUnk = GetTranslationTask()->options()->unk.drop; if (dropUnk) { isDigit = s.find_first_of("0123456789"); if (isDigit == string::npos) @@ -354,8 +356,8 @@ CreateTranslationOptions() // iterate over spans for (size_t sPos = 0 ; sPos < size; sPos++) { size_t maxSize = size - sPos; // don't go over end of sentence - size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); - maxSize = std::min(maxSize, maxSizePhrase); + // size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); + maxSize = std::min(maxSize, m_max_phrase_length); for (size_t ePos = sPos ; ePos < sPos + maxSize ; ePos++) { if (gidx && backoff && @@ -386,12 +388,12 @@ CreateTranslationOptionsForRange { typedef DecodeStepTranslation Tstep; typedef DecodeStepGeneration Gstep; - XmlInputType xml_policy = m_ttask.lock()->options().input.xml_policy; + XmlInputType xml_policy = m_ttask.lock()->options()->input.xml_policy; if ((xml_policy != XmlExclusive) || !HasXmlOptionsOverlappingRange(sPos,ePos)) { // partial trans opt stored in here - PartialTranslOptColl* oldPtoc = new PartialTranslOptColl; + PartialTranslOptColl* oldPtoc = new PartialTranslOptColl(m_max_phrase_length); size_t totalEarlyPruned = 0; // initial translation step @@ -411,7 +413,7 @@ CreateTranslationOptionsForRange for (++d ; d != dgraph.end() ; ++d) { const DecodeStep *dstep = *d; - PartialTranslOptColl* newPtoc = new PartialTranslOptColl; + PartialTranslOptColl* newPtoc = new PartialTranslOptColl(m_max_phrase_length); // go thru each intermediate trans opt just created const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList(); diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h index 382cc29ef..4d56fb144 100644 --- a/moses/TranslationOptionCollection.h +++ b/moses/TranslationOptionCollection.h @@ -67,10 +67,11 @@ class TranslationOptionCollection protected: ttaskwptr m_ttask; // that is and must be a weak pointer! std::vector< std::vector< TranslationOptionList > > m_collection; /*< contains translation options */ - InputType const &m_source; /*< reference to the input */ - SquareMatrix m_estimatedScores; /*< matrix of future costs for contiguous parts (span) of the input */ - const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */ - const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */ + InputType const &m_source; /*< reference to the input */ + SquareMatrix m_estimatedScores; /*< matrix of future costs for contiguous parts (span) of the input */ + const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */ + const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */ + size_t m_max_phrase_length; std::vector<const Phrase*> m_unksrcs; InputPathList m_inputPathQueue; diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index dc8c66b9d..2fac458ac 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -41,7 +41,7 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask, size_t inputSize = input.GetSize(); m_inputPathMatrix.resize(inputSize); - size_t maxSizePhrase = ttask->options().search.max_phrase_length; + size_t maxSizePhrase = ttask->options()->search.max_phrase_length; maxSizePhrase = std::min(inputSize, maxSizePhrase); // 1-word phrases @@ -218,16 +218,19 @@ CreateTranslationOptionsForRangeNew bool TranslationOptionCollectionConfusionNet:: -CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t startPos, - size_t endPos, bool adhereTableLimit, size_t graphInd) +CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, + size_t startPos, size_t endPos, + bool adhereTableLimit, size_t graphInd) { bool retval = true; - XmlInputType intype = m_ttask.lock()->options().input.xml_policy; + size_t const max_phrase_length + = StaticData::Instance().options()->search.max_phrase_length; + XmlInputType intype = m_ttask.lock()->options()->input.xml_policy; if ((intype != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) { InputPathList &inputPathList = GetInputPathList(startPos, endPos); // partial trans opt stored in here - PartialTranslOptColl* oldPtoc = new PartialTranslOptColl; + PartialTranslOptColl* oldPtoc = new PartialTranslOptColl(max_phrase_length); size_t totalEarlyPruned = 0; // initial translation step @@ -248,7 +251,7 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st const DecodeStepTranslation *transStep =dynamic_cast<const DecodeStepTranslation*>(decodeStep); const DecodeStepGeneration *genStep =dynamic_cast<const DecodeStepGeneration*>(decodeStep); - PartialTranslOptColl* newPtoc = new PartialTranslOptColl; + PartialTranslOptColl* newPtoc = new PartialTranslOptColl(max_phrase_length); // go thru each intermediate trans opt just created const vector<TranslationOption*>& partTransOptList = oldPtoc->GetList(); diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp index 10e61b5d4..9346d839d 100644 --- a/moses/TranslationOptionCollectionLattice.cpp +++ b/moses/TranslationOptionCollectionLattice.cpp @@ -11,6 +11,7 @@ #include "FF/InputFeature.h" #include "TranslationModel/PhraseDictionaryTreeAdaptor.h" #include "util/exception.hh" +#include "TranslationTask.h" using namespace std; @@ -31,7 +32,7 @@ TranslationOptionCollectionLattice const InputFeature *inputFeature = InputFeature::InstancePtr(); UTIL_THROW_IF2(inputFeature == NULL, "Input feature must be specified"); - size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength(); + size_t maxPhraseLength = ttask->options()->search.max_phrase_length; //StaticData::Instance().GetMaxPhraseLength(); size_t size = input.GetSize(); // 1-word phrases @@ -68,13 +69,16 @@ TranslationOptionCollectionLattice m_inputPathQueue.push_back(path); // recursive - Extend(*path, input); + Extend(*path, input, ttask->options()->search.max_phrase_length); } } } -void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const WordLattice &input) +void +TranslationOptionCollectionLattice:: +Extend(const InputPath &prevPath, const WordLattice &input, + size_t const maxPhraseLength) { size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1; if (nextPos >= input.GetSize()) { @@ -100,7 +104,7 @@ void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const Range range(startPos, endPos); - size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength(); + // size_t maxPhraseLength = StaticData::Instance().GetMaxPhraseLength(); if (range.GetNumWordsCovered() > maxPhraseLength) { continue; } @@ -121,7 +125,7 @@ void TranslationOptionCollectionLattice::Extend(const InputPath &prevPath, const m_inputPathQueue.push_back(path); // recursive - Extend(*path, input); + Extend(*path, input, maxPhraseLength); } } diff --git a/moses/TranslationOptionCollectionLattice.h b/moses/TranslationOptionCollectionLattice.h index 0b03157ea..e2c710a01 100644 --- a/moses/TranslationOptionCollectionLattice.h +++ b/moses/TranslationOptionCollectionLattice.h @@ -32,7 +32,8 @@ public: bool adhereTableLimit, size_t graphInd); // do not implement protected: - void Extend(const InputPath &prevPath, const WordLattice &input); + void Extend(const InputPath &prevPath, const WordLattice &input, + size_t const maxPhraseLength); }; diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 0375a1e70..75df7443b 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -31,19 +31,6 @@ GetContextWindow() const return m_context; } -// SPTR<std::map<std::string, float> const> -// TranslationTask::GetContextWeights() const -// { -// return m_context_weights; -// } - -// void -// TranslationTask -// ::ReSetContextWeights(std::map<std::string, float> const& new_weights) -// { -// m_context_weights.reset(new std::map<string,float>(new_weights)); -// } - void TranslationTask:: SetContextWindow(boost::shared_ptr<std::vector<std::string> > const& cw) @@ -90,7 +77,7 @@ TranslationTask boost::shared_ptr<IOWrapper> const& ioWrapper) : m_source(source) , m_ioWrapper(ioWrapper) { - m_options = StaticData::Instance().options(); + m_options = source->options(); } TranslationTask::~TranslationTask() @@ -102,8 +89,8 @@ TranslationTask ::SetupManager(SearchAlgorithm algo) { boost::shared_ptr<BaseManager> manager; - StaticData const& staticData = StaticData::Instance(); - if (algo == DefaultSearchAlgorithm) algo = staticData.options().search.algo; + // StaticData const& staticData = StaticData::Instance(); + // if (algo == DefaultSearchAlgorithm) algo = staticData.options().search.algo; if (!is_syntax(algo)) manager.reset(new Manager(this->self())); // phrase-based @@ -117,7 +104,7 @@ TranslationTask else if (algo == SyntaxS2T) { // new-style string-to-tree decoding (ask Phil Williams) - S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm(); + S2TParsingAlgorithm algorithm = m_options->syntax.s2t_parsing_algo; if (algorithm == RecursiveCYKPlus) { typedef Syntax::S2T::EagerParserCallback Callback; typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser; @@ -145,7 +132,7 @@ TranslationTask return manager; } -AllOptions const& +AllOptions::ptr const& TranslationTask:: options() const { @@ -198,7 +185,7 @@ void TranslationTask::Run() Timer initTime; initTime.start(); - boost::shared_ptr<BaseManager> manager = SetupManager(); + boost::shared_ptr<BaseManager> manager = SetupManager(m_options->search.algo); VERBOSE(1, "Line " << translationId << ": Initialize search took " << initTime << " seconds total" << endl); @@ -231,7 +218,7 @@ void TranslationTask::Run() // Output search graph in hypergraph format for Kenneth Heafield's // lazy hypergraph decoder; writes to stderr - if (options().output.SearchGraphHG.size()) { + if (m_options->output.SearchGraphHG.size()) { size_t transId = manager->GetSource().GetTranslationId(); string fname = io->GetHypergraphOutputFileName(transId); manager->OutputSearchGraphAsHypergraph(fname, PRECISION); diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h index 82e4d0156..9c951ef03 100644 --- a/moses/TranslationTask.h +++ b/moses/TranslationTask.h @@ -44,7 +44,7 @@ class TranslationTask : public Moses::Task return *this; } protected: - AllOptions m_options; + AllOptions::ptr m_options; boost::weak_ptr<TranslationTask> m_self; // weak ptr to myself boost::shared_ptr<ContextScope> m_scope; // sores local info // pointer to ContextScope, which stores context-specific information @@ -115,7 +115,7 @@ public: } boost::shared_ptr<BaseManager> - SetupManager(SearchAlgorithm algo = DefaultSearchAlgorithm); + SetupManager(SearchAlgorithm algo); // = DefaultSearchAlgorithm); boost::shared_ptr<ContextScope> const& @@ -134,7 +134,7 @@ public: // void SetContextWeights(std::string const& context_weights); // void ReSetContextWeights(std::map<std::string, float> const& new_weights); - AllOptions const& options() const; + AllOptions::ptr const& options() const; protected: boost::shared_ptr<Moses::InputType> m_source; diff --git a/moses/TreeInput.cpp b/moses/TreeInput.cpp index 95cd5ed2e..a6a7f80d6 100644 --- a/moses/TreeInput.cpp +++ b/moses/TreeInput.cpp @@ -30,6 +30,8 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, { //parse XML markup in translation line + vector<FactorType> const& oFactors = opts.output.factor_order; + // no xml tag? we're done. if (line.find_first_of('<') == string::npos) { return true; @@ -53,10 +55,6 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, string cleanLine; // return string (text without xml) size_t wordPos = 0; // position in sentence (in terms of number of words) - // keep this handy for later - const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - // const string &factorDelimiter = StaticData::Instance().GetFactorDelimiter(); - // loop through the tokens for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) { // not a xml tag, but regular text (may contain many words) @@ -184,8 +182,7 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, for (size_t i=0; i<altTexts.size(); ++i) { // set target phrase TargetPhrase targetPhrase(firstPt); - // targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL); - targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL); + targetPhrase.CreateFromString(Output, oFactors, altTexts[i], NULL); // set constituent label string targetLHSstr; @@ -197,7 +194,7 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, targetLHSstr = iterLHS->first; } Word *targetLHS = new Word(true); - targetLHS->CreateFromString(Output, outputFactorOrder, targetLHSstr, true); + targetLHS->CreateFromString(Output, oFactors, targetLHSstr, true); UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor left-hand-side"); targetPhrase.SetTargetLHS(targetLHS); @@ -243,25 +240,19 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, //! populate this InputType with data from in stream int TreeInput:: -Read(std::istream& in, const std::vector<FactorType>& factorOrder, - AllOptions const& opts) +Read(std::istream& in) { - const StaticData &staticData = StaticData::Instance(); - string line; if (getline(in, line, '\n').eof()) return 0; - // remove extra spaces - //line = Trim(line); - m_labelledSpans.clear(); - ProcessAndStripXMLTags(opts, line, m_labelledSpans, m_xmlOptions); + ProcessAndStripXMLTags(*m_options, line, m_labelledSpans, m_xmlOptions); // do words 1st - hack stringstream strme; strme << line << endl; - Sentence::Read(strme, factorOrder, opts); + Sentence::Read(strme); // size input chart size_t sourceSize = GetSize(); @@ -273,19 +264,21 @@ Read(std::istream& in, const std::vector<FactorType>& factorOrder, // do source labels vector<XMLParseOutput>::const_iterator iterLabel; - for (iterLabel = m_labelledSpans.begin(); iterLabel != m_labelledSpans.end(); ++iterLabel) { + for (iterLabel = m_labelledSpans.begin(); + iterLabel != m_labelledSpans.end(); ++iterLabel) { const XMLParseOutput &labelItem = *iterLabel; const Range &range = labelItem.m_range; const string &label = labelItem.m_label; - AddChartLabel(range.GetStartPos() + 1, range.GetEndPos() + 1, label, factorOrder); + AddChartLabel(range.GetStartPos() + 1, range.GetEndPos() + 1, label); } // default label + bool only4empty = m_options->syntax.default_non_term_only_for_empty_range; for (size_t startPos = 0; startPos < sourceSize; ++startPos) { for (size_t endPos = startPos; endPos < sourceSize; ++endPos) { NonTerminalSet &list = GetLabelSet(startPos, endPos); - if (list.size() == 0 || !staticData.GetDefaultNonTermOnlyForEmptyRange()) { - AddChartLabel(startPos, endPos, staticData.GetInputDefaultNonTerminal(), factorOrder); + if (list.size() == 0 || ! only4empty ) { + AddChartLabel(startPos, endPos, m_options->syntax.input_default_non_terminal); } } } @@ -306,13 +299,13 @@ TranslationOptionCollection* TreeInput::CreateTranslationOptionCollection() cons return NULL; } -void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label - , const std::vector<FactorType>& /* factorOrder */) +void +TreeInput:: +AddChartLabel(size_t startPos, size_t endPos, const Word &label) { UTIL_THROW_IF2(!label.IsNonTerminal(), "Label must be a non-terminal"); - - SourceLabelOverlap overlapType = StaticData::Instance().GetSourceLabelOverlap(); + SourceLabelOverlap overlapType = m_options->syntax.source_label_overlap; NonTerminalSet &list = GetLabelSet(startPos, endPos); switch (overlapType) { case SourceLabelOverlapAdd: @@ -330,14 +323,17 @@ void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const Word &label } } -void TreeInput::AddChartLabel(size_t startPos, size_t endPos, const string &label - , const std::vector<FactorType>& factorOrder) +void +TreeInput:: +AddChartLabel(size_t startPos, size_t endPos, const string &label) { + const std::vector<FactorType>& fOrder = m_options->input.factor_order; Word word(true); - const Factor *factor = FactorCollection::Instance().AddFactor(Input, factorOrder[0], label, true); // TODO - no factors + const Factor *factor + = FactorCollection::Instance().AddFactor(Input, fOrder[0], label, true); + // TODO - no factors word.SetFactor(0, factor); - - AddChartLabel(startPos, endPos, word, factorOrder); + AddChartLabel(startPos, endPos, word); } std::ostream& operator<<(std::ostream &out, const TreeInput &input) diff --git a/moses/TreeInput.h b/moses/TreeInput.h index d6838af8c..0d79516ab 100644 --- a/moses/TreeInput.h +++ b/moses/TreeInput.h @@ -35,10 +35,9 @@ protected: std::vector<std::vector<NonTerminalSet> > m_sourceChart; std::vector<XMLParseOutput> m_labelledSpans; - void AddChartLabel(size_t startPos, size_t endPos, const std::string &label - ,const std::vector<FactorType>& factorOrder); - void AddChartLabel(size_t startPos, size_t endPos, const Word &label - ,const std::vector<FactorType>& factorOrder); + void AddChartLabel(size_t startPos, size_t endPos, const std::string &label); + void AddChartLabel(size_t startPos, size_t endPos, const Word &label); + NonTerminalSet &GetLabelSet(size_t startPos, size_t endPos) { return m_sourceChart[startPos][endPos - startPos]; } @@ -48,7 +47,7 @@ protected: std::vector<XmlOption const*> &res); public: - TreeInput() : Sentence() { } + TreeInput(AllOptions::ptr const& opts) : Sentence(opts) { } InputTypeEnum GetType() const { return TreeInputType; @@ -56,9 +55,7 @@ public: //! populate this InputType with data from in stream virtual int - Read(std::istream& in, - const std::vector<FactorType>& factorOrder, - AllOptions const& opts); + Read(std::istream& in); //! Output debugging info to stream out virtual void Print(std::ostream&) const; diff --git a/moses/TrellisPath.cpp b/moses/TrellisPath.cpp index 20c7f5338..1f09b2eed 100644 --- a/moses/TrellisPath.cpp +++ b/moses/TrellisPath.cpp @@ -164,21 +164,27 @@ void TrellisPath::CreateDeviantPaths(TrellisPathList &pathColl) const } } -const boost::shared_ptr<ScoreComponentCollection> TrellisPath::GetScoreBreakdown() const +boost::shared_ptr<ScoreComponentCollection> const +TrellisPath:: +GetScoreBreakdown() const { if (!m_scoreBreakdown) { - float totalScore = m_path[0]->GetWinningHypo()->GetFutureScore(); // calculated for sanity check only + float totalScore = m_path[0]->GetWinningHypo()->GetFutureScore(); + // calculated for sanity check only - m_scoreBreakdown = boost::shared_ptr<ScoreComponentCollection>(new ScoreComponentCollection()); - m_scoreBreakdown->PlusEquals(ScoreComponentCollection(m_path[0]->GetWinningHypo()->GetScoreBreakdown())); + m_scoreBreakdown.reset(new ScoreComponentCollection()); + m_scoreBreakdown->PlusEquals(m_path[0]->GetWinningHypo()->GetScoreBreakdown()); - //calc score + // adjust score + // I assume things are done this way on the assumption that most hypothesis edges + // are shared with the winning path, so that score adjustments are cheaper than + // recomputing the score from scratch. UG size_t sizePath = m_path.size(); for (size_t pos = 0 ; pos < sizePath ; pos++) { const Hypothesis *hypo = m_path[pos]; const Hypothesis *winningHypo = hypo->GetWinningHypo(); if (hypo != winningHypo) { - totalScore = totalScore - winningHypo->GetFutureScore() + hypo->GetFutureScore(); + totalScore += hypo->GetFutureScore() - winningHypo->GetFutureScore(); m_scoreBreakdown->MinusEquals(winningHypo->GetScoreBreakdown()); m_scoreBreakdown->PlusEquals(hypo->GetScoreBreakdown()); } @@ -208,16 +214,14 @@ Phrase TrellisPath::GetTargetPhrase() const Phrase TrellisPath::GetSurfacePhrase() const { - const std::vector<FactorType> &outputFactor - = manager().options().output.factor_order; - // = StaticData::Instance().GetOutputFactorOrder(); + std::vector<FactorType> const& oFactor = manager().options()->output.factor_order; Phrase targetPhrase = GetTargetPhrase(); Phrase ret(targetPhrase.GetSize()); for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) { Word &newWord = ret.AddWord(); - for (size_t i = 0 ; i < outputFactor.size() ; i++) { - FactorType factorType = outputFactor[i]; + for (size_t i = 0 ; i < oFactor.size() ; i++) { + FactorType factorType = oFactor[i]; const Factor *factor = targetPhrase.GetFactor(pos, factorType); UTIL_THROW_IF2(factor == NULL, "No factor " << factorType << " at position " << pos); diff --git a/moses/WordLattice.cpp b/moses/WordLattice.cpp index 09866f812..01f8eac9f 100644 --- a/moses/WordLattice.cpp +++ b/moses/WordLattice.cpp @@ -7,10 +7,11 @@ #include "TranslationOptionCollectionLattice.h" #include "TranslationOptionCollectionConfusionNet.h" #include "moses/FF/InputFeature.h" +#include "moses/TranslationTask.h" namespace Moses { -WordLattice::WordLattice() : ConfusionNet() +WordLattice::WordLattice(AllOptions::ptr const& opts) : ConfusionNet(opts) { UTIL_THROW_IF2(InputFeature::InstancePtr() == NULL, "Input feature must be specified"); @@ -51,18 +52,15 @@ void WordLattice::Print(std::ostream& out) const int WordLattice:: -InitializeFromPCNDataType -(const PCN::CN& cn, - const std::vector<FactorType>& factorOrder, - const std::string& debug_line) +InitializeFromPCNDataType(const PCN::CN& cn, const std::string& debug_line) { - // const StaticData &staticData = StaticData::Instance(); + const std::vector<FactorType>& factorOrder = m_options->input.factor_order; + size_t const maxPhraseLength = m_options->search.max_phrase_length; + const InputFeature *inputFeature = InputFeature::InstancePtr(); size_t numInputScores = inputFeature->GetNumInputScores(); size_t numRealWordCount = inputFeature->GetNumRealWordsInInput(); - size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); - bool addRealWordCount = (numRealWordCount > 0); //when we have one more weight than params, we add a word count feature @@ -118,8 +116,8 @@ InitializeFromPCNDataType // String2Word(alt.m_word, data[i][j]. first, factorOrder); next_nodes[i][j] = alt.m_next; - if(next_nodes[i][j] > maxSizePhrase) { - TRACE_ERR("ERROR: Jump length " << next_nodes[i][j] << " in word lattice exceeds maximum phrase length " << maxSizePhrase << ".\n"); + if(next_nodes[i][j] > maxPhraseLength) { + TRACE_ERR("ERROR: Jump length " << next_nodes[i][j] << " in word lattice exceeds maximum phrase length " << maxPhraseLength << ".\n"); TRACE_ERR("ERROR: Increase max-phrase-length to process this lattice.\n"); return false; } @@ -149,9 +147,7 @@ InitializeFromPCNDataType int WordLattice:: -Read(std::istream& in, - std::vector<FactorType> const& factorOrder, - AllOptions const& opts) +Read(std::istream& in) { Clear(); std::string line; @@ -162,7 +158,7 @@ Read(std::istream& in, } PCN::CN cn = PCN::parsePCN(line); - return InitializeFromPCNDataType(cn, factorOrder, line); + return InitializeFromPCNDataType(cn, line); } void WordLattice::GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const @@ -226,11 +222,10 @@ TranslationOptionCollection* WordLattice ::CreateTranslationOptionCollection(ttasksptr const& ttask) const { - size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage(); - float translationOptionThreshold = StaticData::Instance().GetTranslationOptionThreshold(); + size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov; + float translationOptionThreshold = ttask->options()->search.trans_opt_threshold; TranslationOptionCollection *rv = NULL; - //rv = new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold); if (StaticData::Instance().GetUseLegacyPT()) { rv = new TranslationOptionCollectionConfusionNet(ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold); diff --git a/moses/WordLattice.h b/moses/WordLattice.h index 1b3ff889b..f36569e78 100644 --- a/moses/WordLattice.h +++ b/moses/WordLattice.h @@ -23,7 +23,7 @@ private: std::vector<std::vector<int> > distances; public: - WordLattice(); + WordLattice(AllOptions::ptr const& opts); InputTypeEnum GetType() const { return WordLatticeInput; @@ -40,12 +40,11 @@ public: /** Given a lattice represented using the PCN::CN data type (topologically sorted agency list * representation), initialize the WordLattice object */ - int InitializeFromPCNDataType(const PCN::CN& cn, const std::vector<FactorType>& factorOrder, const std::string& debug_line = ""); + int InitializeFromPCNDataType(const PCN::CN& cn, const std::string& debug_line = ""); + /** Read from PLF format (1 lattice per line) */ - int Read(std::istream& in, - std::vector<FactorType> const& factorOrder, - AllOptions const& opts); + int Read(std::istream& in); /** Convert internal representation into an edge matrix * @note edges[1][2] means there is an edge from 1 to 2 diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp index 1befb8234..8a517386f 100644 --- a/moses/XmlOption.cpp +++ b/moses/XmlOption.cpp @@ -159,16 +159,19 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons * \param rbrackStr xml tag's right bracket string, typically ">" */ bool -ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption const*> &res, +ProcessAndStripXMLTags(AllOptions const& opts, string &line, + vector<XmlOption const*> &res, ReorderingConstraint &reorderingConstraint, vector< size_t > &walls, - std::vector< std::pair<size_t, std::string> > &placeholders, - int offset, const std::string& lbrackStr, - const std::string& rbrackStr) + std::vector< std::pair<size_t, std::string> > &placeholders) { //parse XML markup in translation line - const StaticData &staticData = StaticData::Instance(); + const std::string& lbrackStr = opts.input.xml_brackets.first; + const std::string& rbrackStr = opts.input.xml_brackets.second; + int offset = is_syntax(opts.search.algo) ? 1 : 0; + + // const StaticData &staticData = StaticData::Instance(); // hack. What pt should XML trans opt be assigned to? PhraseDictionary *firstPt = NULL; @@ -177,7 +180,6 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co } // no xml tag? we're done. -//if (line.find_first_of('<') == string::npos) { if (line.find(lbrackStr) == string::npos) { return true; } @@ -194,8 +196,7 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co string cleanLine; // return string (text without xml) size_t wordPos = 0; // position in sentence (in terms of number of words) - const vector<FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder(); - // const string &factorDelimiter = staticData.GetFactorDelimiter(); + const vector<FactorType> &outputFactorOrder = opts.output.factor_order; // loop through the tokens for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) { @@ -459,7 +460,7 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption co targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL); // lhs - const UnknownLHSList &lhsList = staticData.GetUnknownLHS(); + const UnknownLHSList &lhsList = opts.syntax.unknown_lhs; // staticData.GetUnknownLHS(); if (!lhsList.empty()) { const Factor *factor = FactorCollection::Instance().AddFactor(lhsList[0].first, true); Word *targetLHS = new Word(true); diff --git a/moses/XmlOption.h b/moses/XmlOption.h index e35cb7c80..00bebc2e6 100644 --- a/moses/XmlOption.h +++ b/moses/XmlOption.h @@ -32,10 +32,10 @@ std::vector<std::string> TokenizeXml(const std::string& str, const std::string& bool ProcessAndStripXMLTags(AllOptions const& opts, std::string &line, std::vector<XmlOption const*> &res, - ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls, - std::vector< std::pair<size_t, std::string> > &placeholders, - int offset, - const std::string& lbrackStr="<", const std::string& rbrackStr=">"); + ReorderingConstraint &reorderingConstraint, + std::vector< size_t > &walls, + std::vector< std::pair<size_t, std::string> > &placeholders); + } diff --git a/moses/mbr.cpp b/moses/mbr.cpp index e49b1c5d9..9a2f91124 100644 --- a/moses/mbr.cpp +++ b/moses/mbr.cpp @@ -10,7 +10,7 @@ #include <cstdio> #include "moses/TrellisPathList.h" #include "moses/TrellisPath.h" -#include "moses/StaticData.h" +// #include "moses/StaticData.h" #include "moses/Util.h" #include "mbr.h" @@ -89,10 +89,10 @@ float calculate_score(const vector< vector<const Factor*> > & sents, int ref, in return exp(logbleu); } -const TrellisPath doMBR(const TrellisPathList& nBestList) +const TrellisPath doMBR(const TrellisPathList& nBestList, AllOptions const& opts) { float marginal = 0; - float mbr_scale = StaticData::Instance().options().mbr.scale; + float mbr_scale = opts.mbr.scale; vector<float> joint_prob_vec; vector< vector<const Factor*> > translations; float joint_prob; @@ -108,6 +108,8 @@ const TrellisPath doMBR(const TrellisPathList& nBestList) if (maxScore < score) maxScore = score; } + vector<FactorType> const& oFactors = opts.output.factor_order; + UTIL_THROW_IF2(oFactors.size() != 1, "Need exactly one output factor!"); for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; joint_prob = UntransformScore(mbr_scale * path.GetScoreBreakdown()->GetWeightedScore() - maxScore); @@ -116,7 +118,7 @@ const TrellisPath doMBR(const TrellisPathList& nBestList) // get words in translation vector<const Factor*> translation; - GetOutputFactors(path, translation); + GetOutputFactors(path, oFactors[0], translation); // collect n-gram counts map < vector < const Factor *>, int > counts; @@ -156,11 +158,11 @@ const TrellisPath doMBR(const TrellisPathList& nBestList) //return translations[minMBRLossIdx]; } -void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translation) +void +GetOutputFactors(const TrellisPath &path, FactorType const oFactor, + vector <const Factor*> &translation) { const std::vector<const Hypothesis *> &edges = path.GetEdges(); - const std::vector<FactorType>& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - assert (outputFactorOrder.size() == 1); // print the surface factor of the translation for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { @@ -168,8 +170,7 @@ void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translati const Phrase &phrase = edge.GetCurrTargetPhrase(); size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { - - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); + const Factor *factor = phrase.GetFactor(pos, oFactor); translation.push_back(factor); } } diff --git a/moses/mbr.h b/moses/mbr.h index d08b11a98..a7707281c 100644 --- a/moses/mbr.h +++ b/moses/mbr.h @@ -21,8 +21,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #ifndef moses_cmd_mbr_h #define moses_cmd_mbr_h +#include "moses/parameters/AllOptions.h" + +Moses::TrellisPath const +doMBR(Moses::TrellisPathList const& nBestList, Moses::AllOptions const& opts); + +void +GetOutputFactors(const Moses::TrellisPath &path, Moses::FactorType const f, + std::vector <const Moses::Factor*> &translation); + +float +calculate_score(const std::vector< std::vector<const Moses::Factor*> > & sents, + int ref, int hyp, + std::vector<std::map<std::vector<const Moses::Factor*>,int> > & + ngram_stats ); -const Moses::TrellisPath doMBR(const Moses::TrellisPathList& nBestList); -void GetOutputFactors(const Moses::TrellisPath &path, std::vector <const Moses::Factor*> &translation); -float calculate_score(const std::vector< std::vector<const Moses::Factor*> > & sents, int ref, int hyp, std::vector < std::map < std::vector < const Moses::Factor *>, int > > & ngram_stats ); #endif diff --git a/moses/parameters/AllOptions.cpp b/moses/parameters/AllOptions.cpp index a7acdadfa..868b8e4fb 100644 --- a/moses/parameters/AllOptions.cpp +++ b/moses/parameters/AllOptions.cpp @@ -4,6 +4,12 @@ namespace Moses { AllOptions:: + AllOptions() + : mira(false) + , use_legacy_pt(false) + { } + + AllOptions:: AllOptions(Parameter const& param) { init(param); @@ -23,6 +29,7 @@ namespace Moses if (!lmbr.init(param)) return false; if (!output.init(param)) return false; if (!unk.init(param)) return false; + if (!syntax.init(param)) return false; param.SetParameter(mira, "mira", false); @@ -91,6 +98,7 @@ namespace Moses if (!lmbr.update(param)) return false; if (!output.update(param)) return false; if (!unk.update(param)) return false; + if (!syntax.update(param)) return false; return sanity_check(); } #endif diff --git a/moses/parameters/AllOptions.h b/moses/parameters/AllOptions.h index a691d5d46..3b8abf188 100644 --- a/moses/parameters/AllOptions.h +++ b/moses/parameters/AllOptions.h @@ -13,12 +13,15 @@ #include "LMBR_Options.h" #include "ReportingOptions.h" #include "OOVHandlingOptions.h" +#include "SyntaxOptions.h" +#include <boost/shared_ptr.hpp> namespace Moses { struct AllOptions : public OptionsBaseClass { + typedef boost::shared_ptr<AllOptions const> ptr; SearchOptions search; CubePruningOptions cube; NBestOptions nbest; @@ -29,13 +32,14 @@ namespace Moses LMBR_Options lmbr; ReportingOptions output; OOVHandlingOptions unk; + SyntaxOptions syntax; bool mira; - + bool use_legacy_pt; // StackOptions stack; // BeamSearchOptions beam; bool init(Parameter const& param); bool sanity_check(); - AllOptions() {} + AllOptions(); AllOptions(Parameter const& param); bool update(std::map<std::string,xmlrpc_c::value>const& param); diff --git a/moses/parameters/BookkeepingOptions.cpp b/moses/parameters/BookkeepingOptions.cpp index db8fbd909..891a424c0 100644 --- a/moses/parameters/BookkeepingOptions.cpp +++ b/moses/parameters/BookkeepingOptions.cpp @@ -2,6 +2,7 @@ namespace Moses { + bool BookkeepingOptions:: init(Parameter const& P) @@ -15,4 +16,10 @@ init(Parameter const& P) } return true; } + +BookkeepingOptions:: +BookkeepingOptions() + : need_alignment_info(false) +{ } + } diff --git a/moses/parameters/BookkeepingOptions.h b/moses/parameters/BookkeepingOptions.h index 0fd046766..d54bccc0a 100644 --- a/moses/parameters/BookkeepingOptions.h +++ b/moses/parameters/BookkeepingOptions.h @@ -9,6 +9,7 @@ namespace Moses { bool need_alignment_info; bool init(Parameter const& param); + BookkeepingOptions(); }; diff --git a/moses/parameters/CubePruningOptions.cpp b/moses/parameters/CubePruningOptions.cpp index b4ebb0d5e..793ae2db6 100644 --- a/moses/parameters/CubePruningOptions.cpp +++ b/moses/parameters/CubePruningOptions.cpp @@ -4,6 +4,14 @@ namespace Moses { + CubePruningOptions:: + CubePruningOptions() + : pop_limit(DEFAULT_CUBE_PRUNING_POP_LIMIT) + , diversity(DEFAULT_CUBE_PRUNING_DIVERSITY) + , lazy_scoring(false) + , deterministic_search(false) + {} + bool CubePruningOptions:: init(Parameter const& param) diff --git a/moses/parameters/CubePruningOptions.h b/moses/parameters/CubePruningOptions.h index 9f85520ea..0545b6610 100644 --- a/moses/parameters/CubePruningOptions.h +++ b/moses/parameters/CubePruningOptions.h @@ -16,7 +16,7 @@ namespace Moses bool init(Parameter const& param); CubePruningOptions(Parameter const& param); - CubePruningOptions() {}; + CubePruningOptions(); bool update(std::map<std::string,xmlrpc_c::value>const& params); diff --git a/moses/parameters/InputOptions.cpp b/moses/parameters/InputOptions.cpp index 728adb020..98735ea66 100644 --- a/moses/parameters/InputOptions.cpp +++ b/moses/parameters/InputOptions.cpp @@ -2,17 +2,21 @@ #include "InputOptions.h" #include <vector> #include <iostream> -#include "moses/StaticData.h" +// #include "moses/StaticData.h" #include "moses/TypeDef.h" namespace Moses { InputOptions:: InputOptions() + : continue_partial_translation(false) + , input_type(SentenceInput) + , xml_policy(XmlPassThrough) + , placeholder_factor(NOT_FOUND) { xml_brackets.first = "<"; xml_brackets.second = ">"; - input_type = SentenceInput; + factor_order.assign(1,0); } bool @@ -20,6 +24,7 @@ namespace Moses { init(Parameter const& param) { param.SetParameter(input_type, "inputtype", SentenceInput); +#if 0 if (input_type == SentenceInput) { VERBOSE(2, "input type is: text input"); } else if (input_type == ConfusionNetworkInput) @@ -32,12 +37,12 @@ namespace Moses { { VERBOSE(2, "input type is: tabbed sentence"); } else if (input_type == ForestInputType) { VERBOSE(2, "input type is: forest"); } +#endif + param.SetParameter(continue_partial_translation, - "continue-partial-translation", false); - param.SetParameter(default_non_term_only_for_empty_range, - "default-non-term-for-empty-range-only", false); - + "continue-partial-translation", false); + param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough); // specify XML tags opening and closing brackets for XML option @@ -59,9 +64,11 @@ namespace Moses { xml_brackets.first= brackets[0]; xml_brackets.second=brackets[1]; +#if 0 VERBOSE(1,"XML tags opening and closing brackets for XML input are: " << xml_brackets.first << " and " << xml_brackets.second << std::endl); +#endif } pspec = param.GetParam("input-factors"); @@ -69,6 +76,8 @@ namespace Moses { if (factor_order.empty()) factor_order.assign(1,0); param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND); + param.SetParameter<std::string>(input_file_path,"input-file",""); + return true; } diff --git a/moses/parameters/InputOptions.cpp.orig b/moses/parameters/InputOptions.cpp.orig new file mode 100644 index 000000000..5337be2fa --- /dev/null +++ b/moses/parameters/InputOptions.cpp.orig @@ -0,0 +1,105 @@ +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- +#include "InputOptions.h" +#include <vector> +#include <iostream> +#include "moses/StaticData.h" +#include "moses/TypeDef.h" + +namespace Moses { + + InputOptions:: + InputOptions() + { + xml_brackets.first = "<"; + xml_brackets.second = ">"; + input_type = SentenceInput; + } + + bool + InputOptions:: + init(Parameter const& param) + { + param.SetParameter(input_type, "inputtype", SentenceInput); + if (input_type == SentenceInput) + { VERBOSE(2, "input type is: text input"); } + else if (input_type == ConfusionNetworkInput) + { VERBOSE(2, "input type is: confusion net"); } + else if (input_type == WordLatticeInput) + { VERBOSE(2, "input type is: word lattice"); } + else if (input_type == TreeInputType) + { VERBOSE(2, "input type is: tree"); } + else if (input_type == TabbedSentenceInput) + { VERBOSE(2, "input type is: tabbed sentence"); } + else if (input_type == ForestInputType) + { VERBOSE(2, "input type is: forest"); } + + param.SetParameter(continue_partial_translation, + "continue-partial-translation", false); + param.SetParameter(default_non_term_only_for_empty_range, + "default-non-term-for-empty-range-only", false); + + + param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough); + + // specify XML tags opening and closing brackets for XML option + // Do we really want this to be configurable???? UG + const PARAM_VEC *pspec; + pspec = param.GetParam("xml-brackets"); + if (pspec && pspec->size()) + { + std::vector<std::string> brackets = Tokenize(pspec->at(0)); + if(brackets.size()!=2) + { + std::cerr << "invalid xml-brackets value, " + << "must specify exactly 2 blank-delimited strings " +<<<<<<< HEAD + << "for XML tags opening and closing brackets" << std::endl; + exit(1); + } + xml_brackets.first= brackets[0]; + xml_brackets.second=brackets[1]; +======= + << "for XML tags opening and closing brackets" + << std::endl; + exit(1); + } + + xml_brackets.first= brackets[0]; + xml_brackets.second=brackets[1]; + +>>>>>>> b733804fdcf20a5a9e822861471248c8fdbc0e2d + VERBOSE(1,"XML tags opening and closing brackets for XML input are: " + << xml_brackets.first << " and " + << xml_brackets.second << std::endl); + } + +<<<<<<< HEAD + param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND); + + return true; + } + + +#ifdef HAVE_XMLRPC_C + bool + InputOptions:: + update(std::map<std::string,xmlrpc_c::value>const& param) + { + typedef std::map<std::string, xmlrpc_c::value> params_t; + params_t::const_iterator si = param.find("xml-input"); + if (si != param.end()) + xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second)); + return true; + } +#endif + +======= + pspec = param.GetParam("input-factors"); + if (pspec) factor_order = Scan<FactorType>(*pspec); + if (factor_order.empty()) factor_order.assign(1,0); + + return true; + } + +>>>>>>> b733804fdcf20a5a9e822861471248c8fdbc0e2d +} diff --git a/moses/parameters/InputOptions.h b/moses/parameters/InputOptions.h index 1ef433721..c767d9e98 100644 --- a/moses/parameters/InputOptions.h +++ b/moses/parameters/InputOptions.h @@ -11,13 +11,12 @@ namespace Moses InputOptions : public OptionsBaseClass { bool continue_partial_translation; - bool default_non_term_only_for_empty_range; // whatever that means InputTypeEnum input_type; XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive std::vector<FactorType> factor_order; // input factor order FactorType placeholder_factor; // where to store original text for placeholders - + std::string input_file_path; std::pair<std::string,std::string> xml_brackets; // strings to use as XML tags' opening and closing brackets. diff --git a/moses/parameters/InputOptions.h.orig b/moses/parameters/InputOptions.h.orig new file mode 100644 index 000000000..89320700d --- /dev/null +++ b/moses/parameters/InputOptions.h.orig @@ -0,0 +1,38 @@ +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- +#pragma once +#include <string> +#include "moses/Parameter.h" +#include <string> +#include "OptionsBaseClass.h" + +namespace Moses +{ + struct + InputOptions : public OptionsBaseClass + { + bool continue_partial_translation; + bool default_non_term_only_for_empty_range; // whatever that means + InputTypeEnum input_type; + XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive +<<<<<<< HEAD + + FactorType placeholder_factor; // where to store original text for placeholders + + +======= + std::vector<FactorType> factor_order; // input factor order + +>>>>>>> b733804fdcf20a5a9e822861471248c8fdbc0e2d + std::pair<std::string,std::string> xml_brackets; + // strings to use as XML tags' opening and closing brackets. + // Default are "<" and ">" + + InputOptions(); + + bool init(Parameter const& param); + bool update(std::map<std::string,xmlrpc_c::value>const& param); + + }; + +} + diff --git a/moses/parameters/LMBR_Options.cpp b/moses/parameters/LMBR_Options.cpp index 808be5f18..e3b5d038d 100644 --- a/moses/parameters/LMBR_Options.cpp +++ b/moses/parameters/LMBR_Options.cpp @@ -3,6 +3,16 @@ namespace Moses { + LMBR_Options:: + LMBR_Options() + : enabled(false) + , use_lattice_hyp_set(false) + , precision(0.8f) + , ratio(0.6f) + , map_weight(0.8f) + , pruning_factor(30) + { } + bool LMBR_Options:: init(Parameter const& param) @@ -20,5 +30,8 @@ namespace Moses { return true; } - + + + + } diff --git a/moses/parameters/LMBR_Options.h b/moses/parameters/LMBR_Options.h index 54fd0fcd0..2f33c36d8 100644 --- a/moses/parameters/LMBR_Options.h +++ b/moses/parameters/LMBR_Options.h @@ -19,7 +19,7 @@ namespace Moses size_t pruning_factor; //! average number of nodes per word wanted in pruned lattice std::vector<float> theta; //! theta(s) for lattice mbr calculation bool init(Parameter const& param); - LMBR_Options() {} + LMBR_Options(); }; } diff --git a/moses/parameters/MBR_Options.cpp b/moses/parameters/MBR_Options.cpp index 9a9fbc966..a1a897399 100644 --- a/moses/parameters/MBR_Options.cpp +++ b/moses/parameters/MBR_Options.cpp @@ -3,14 +3,22 @@ namespace Moses { -bool -MBR_Options:: -init(Parameter const& param) -{ - param.SetParameter(enabled, "minimum-bayes-risk", false); - param.SetParameter<size_t>(size, "mbr-size", 200); - param.SetParameter(scale, "mbr-scale", 1.0f); - return true; -} + MBR_Options:: + MBR_Options() + : enabled(false) + , size(200) + , scale(1.0f) + {} + + + bool + MBR_Options:: + init(Parameter const& param) + { + param.SetParameter(enabled, "minimum-bayes-risk", false); + param.SetParameter<size_t>(size, "mbr-size", 200); + param.SetParameter(scale, "mbr-scale", 1.0f); + return true; + } } diff --git a/moses/parameters/MBR_Options.h b/moses/parameters/MBR_Options.h index 0462ebc0f..69863f6b5 100644 --- a/moses/parameters/MBR_Options.h +++ b/moses/parameters/MBR_Options.h @@ -13,9 +13,9 @@ namespace Moses bool enabled; size_t size; //! number of translation candidates considered float scale; /*! scaling factor for computing marginal probability - * of candidate translation */ + * of candidate translation */ bool init(Parameter const& param); - MBR_Options() {} + MBR_Options(); }; } diff --git a/moses/parameters/NBestOptions.cpp b/moses/parameters/NBestOptions.cpp index c65f9e852..cf5857d00 100644 --- a/moses/parameters/NBestOptions.cpp +++ b/moses/parameters/NBestOptions.cpp @@ -5,6 +5,21 @@ namespace Moses { + NBestOptions:: + NBestOptions() + : nbest_size(0) + , factor(20) + , enabled(false) + , print_trees(false) + , only_distinct(false) + , include_alignment_info(false) + , include_feature_labels(true) + , include_segmentation(false) + , include_passthrough(false) + , include_all_factors(false) + {} + + bool NBestOptions:: init(Parameter const& P) diff --git a/moses/parameters/NBestOptions.h b/moses/parameters/NBestOptions.h index 0fe77dc3a..66aea35a9 100644 --- a/moses/parameters/NBestOptions.h +++ b/moses/parameters/NBestOptions.h @@ -25,7 +25,8 @@ struct NBestOptions : public OptionsBaseClass bool init(Parameter const& param); bool update(std::map<std::string,xmlrpc_c::value>const& param); - + + NBestOptions(); }; } diff --git a/moses/parameters/OOVHandlingOptions.cpp b/moses/parameters/OOVHandlingOptions.cpp index 8635db908..154074664 100644 --- a/moses/parameters/OOVHandlingOptions.cpp +++ b/moses/parameters/OOVHandlingOptions.cpp @@ -14,6 +14,8 @@ namespace Moses { mark = false; prefix = "UNK"; suffix = ""; + word_deletion_enabled = false; + always_create_direct_transopt = false; } bool @@ -22,6 +24,8 @@ namespace Moses { { param.SetParameter(drop,"drop-unknown",false); param.SetParameter(mark,"mark-unknown",false); + param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false); + param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false); param.SetParameter<std::string>(prefix,"unknown-word-prefix","UNK"); param.SetParameter<std::string>(suffix,"unknown-word-suffix",""); return true; diff --git a/moses/parameters/OOVHandlingOptions.h b/moses/parameters/OOVHandlingOptions.h index 1f34dcd4b..73ce88d07 100644 --- a/moses/parameters/OOVHandlingOptions.h +++ b/moses/parameters/OOVHandlingOptions.h @@ -15,7 +15,8 @@ namespace Moses std::string prefix; std::string suffix; - + bool word_deletion_enabled; + bool always_create_direct_transopt; OOVHandlingOptions(); bool init(Parameter const& param); diff --git a/moses/parameters/ReorderingOptions.cpp b/moses/parameters/ReorderingOptions.cpp index 016c4ab0d..2b8cafd6a 100644 --- a/moses/parameters/ReorderingOptions.cpp +++ b/moses/parameters/ReorderingOptions.cpp @@ -4,6 +4,14 @@ namespace Moses { ReorderingOptions:: + ReorderingOptions() + : max_distortion(-1) + , monotone_at_punct(false) + , use_early_distortion_cost(false) + {} + + + ReorderingOptions:: ReorderingOptions(Parameter const& param) { init(param); diff --git a/moses/parameters/ReorderingOptions.h b/moses/parameters/ReorderingOptions.h index f10fc4973..867569f6b 100644 --- a/moses/parameters/ReorderingOptions.h +++ b/moses/parameters/ReorderingOptions.h @@ -14,7 +14,7 @@ namespace Moses bool use_early_distortion_cost; bool init(Parameter const& param); ReorderingOptions(Parameter const& param); - ReorderingOptions() {} + ReorderingOptions(); }; } diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp index d96ea33b9..f1cb89b06 100644 --- a/moses/parameters/ReportingOptions.cpp +++ b/moses/parameters/ReportingOptions.cpp @@ -4,10 +4,34 @@ namespace Moses { using namespace std; + + ReportingOptions:: + ReportingOptions() + : start_translation_id(0) + , ReportAllFactors(false) + , ReportSegmentation(0) + , PrintAlignmentInfo(false) + , PrintAllDerivations(false) + , PrintTranslationOptions(false) + , WA_SortOrder(NoSort) + , WordGraph(false) + , DontPruneSearchGraph(false) + , RecoverPath(false) + , ReportHypoScore(false) + , PrintID(false) + , PrintPassThrough(false) + , include_lhs_in_search_graph(false) + , lattice_sample_size(0) + { + factor_order.assign(1,0); + } + bool ReportingOptions:: init(Parameter const& param) { + param.SetParameter<long>(start_translation_id, "start-translation-id", 0); + // including factors in the output param.SetParameter(ReportAllFactors, "report-all-factors", false); @@ -21,12 +45,16 @@ namespace Moses { param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort); std::string e; // hack to save us param.SetParameter<string>(...) param.SetParameter(AlignmentOutputFile,"alignment-output-file", e); - + + + param.SetParameter(PrintAllDerivations, "print-all-derivations", false); + param.SetParameter(PrintTranslationOptions, "print-translation-option", false); + // output a word graph PARAM_VEC const* params; params = param.GetParam("output-word-graph"); WordGraph = (params && params->size() == 2); // what are the two options? - + // dump the search graph param.SetParameter(SearchGraph, "output-search-graph", e); param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e); @@ -35,9 +63,11 @@ namespace Moses { #ifdef HAVE_PROTOBUF param.SetParameter(SearchGraphPB, "output-search-graph-pb", e); #endif - - param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false); + param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false); + param.SetParameter(include_lhs_in_search_graph, + "include-lhs-in-search-graph", false ); + // miscellaneous param.SetParameter(RecoverPath, "recover-input-path",false); @@ -59,8 +89,6 @@ namespace Moses { std::cerr <<"wrong format for switch -lattice-samples file size"; return false; } - } else { - lattice_sample_size = 0; } params= param.GetParam("output-factors"); @@ -77,7 +105,7 @@ namespace Moses { return true; } - + #ifdef HAVE_XMLRPC_C bool ReportingOptions:: @@ -88,22 +116,33 @@ namespace Moses { std::map<std::string, xmlrpc_c::value>::const_iterator m; m = param.find("output-factors"); - if (m != param.end()) - factor_order = Tokenize<FactorType>(xmlrpc_c::value_string(m->second), ","); - + if (m != param.end()) { + factor_order=Tokenize<FactorType>(xmlrpc_c::value_string(m->second),","); + } + if (ReportAllFactors) { factor_order.clear(); for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) factor_order.push_back(i); } + + m = param.find("align"); + if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second))) + ReportSegmentation = 1; + + PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo); m = param.find("factor-delimiter"); - if (m != param.end()) FactorDelimiter = Trim(xmlrpc_c::value_string(m->second)); - m = param.find("output-factor-delimiter"); - if (m != param.end()) FactorDelimiter = Trim(xmlrpc_c::value_string(m->second)); + if (m != param.end()) { + FactorDelimiter = Trim(xmlrpc_c::value_string(m->second)); + } + m = param.find("output-factor-delimiter"); + if (m != param.end()) { + FactorDelimiter = Trim(xmlrpc_c::value_string(m->second)); + } + return true; } #endif - } diff --git a/moses/parameters/ReportingOptions.h b/moses/parameters/ReportingOptions.h index 2685c0380..76f8fdae4 100644 --- a/moses/parameters/ReportingOptions.h +++ b/moses/parameters/ReportingOptions.h @@ -10,13 +10,17 @@ namespace Moses struct ReportingOptions : public OptionsBaseClass { + long start_translation_id; + std::vector<FactorType> factor_order; bool ReportAllFactors; // m_reportAllFactors; - int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched bool PrintAlignmentInfo; // m_PrintAlignmentInfo + bool PrintAllDerivations; + bool PrintTranslationOptions; + WordAlignmentSort WA_SortOrder; // 0: no, 1: target order std::string AlignmentOutputFile; std::string FactorDelimiter; @@ -36,13 +40,12 @@ namespace Moses bool PrintID; bool PrintPassThrough; - // print .. - bool aln_info; // m_PrintAlignmentInfo; - // transrep = translation reporting std::string detailed_transrep_filepath; std::string detailed_tree_transrep_filepath; std::string detailed_all_transrep_filepath; + bool include_lhs_in_search_graph; + std::string lattice_sample_filepath; size_t lattice_sample_size; @@ -57,7 +60,10 @@ namespace Moses #ifdef HAVE_XMLRPC_C bool update(std::map<std::string, xmlrpc_c::value>const& param); #endif - }; + + ReportingOptions(); + }; + } diff --git a/moses/parameters/SearchOptions.cpp b/moses/parameters/SearchOptions.cpp index 39ac64515..678f9bfe0 100644 --- a/moses/parameters/SearchOptions.cpp +++ b/moses/parameters/SearchOptions.cpp @@ -3,6 +3,23 @@ namespace Moses { + + SearchOptions:: + SearchOptions() + : algo(Normal) + , stack_size(DEFAULT_MAX_HYPOSTACK_SIZE) + , stack_diversity(0) + , disable_discarding(false) + , max_phrase_length(DEFAULT_MAX_PHRASE_LENGTH) + , max_trans_opt_per_cov(DEFAULT_MAX_TRANS_OPT_SIZE) + , max_partial_trans_opt(DEFAULT_MAX_PART_TRANS_OPT_SIZE) + , beam_width(DEFAULT_BEAM_WIDTH) + , timeout(0) + , consensus(false) + , early_discarding_threshold(DEFAULT_EARLY_DISCARDING_THRESHOLD) + , trans_opt_threshold(DEFAULT_TRANSLATION_OPTION_THRESHOLD) + { } + SearchOptions:: SearchOptions(Parameter const& param) : stack_diversity(0) @@ -31,7 +48,8 @@ namespace Moses DEFAULT_MAX_PART_TRANS_OPT_SIZE); param.SetParameter(consensus, "consensus-decoding", false); - + param.SetParameter(disable_discarding, "disable-discarding", false); + // transformation to log of a few scores beam_width = TransformScore(beam_width); trans_opt_threshold = TransformScore(trans_opt_threshold); @@ -85,6 +103,4 @@ namespace Moses } #endif - - } diff --git a/moses/parameters/SearchOptions.h b/moses/parameters/SearchOptions.h index 875678cc9..46c53e95b 100644 --- a/moses/parameters/SearchOptions.h +++ b/moses/parameters/SearchOptions.h @@ -14,9 +14,10 @@ namespace Moses SearchAlgorithm algo; // stack decoding - size_t stack_size; // maxHypoStackSize; - size_t stack_diversity; // minHypoStackDiversity; - + size_t stack_size; // maxHypoStackSize; + size_t stack_diversity; // minHypoStackDiversity; + bool disable_discarding; + // Disable discarding of bad hypotheses from HypothesisStackNormal size_t max_phrase_length; size_t max_trans_opt_per_cov; size_t max_partial_trans_opt; @@ -26,7 +27,7 @@ namespace Moses int timeout; bool consensus; //! Use Consensus decoding (DeNero et al 2009) - + // reordering options // bool reorderingConstraint; //! use additional reordering constraints // bool useEarlyDistortionCost; @@ -36,7 +37,7 @@ namespace Moses bool init(Parameter const& param); SearchOptions(Parameter const& param); - SearchOptions() {} + SearchOptions(); bool UseEarlyDiscarding() const { diff --git a/moses/parameters/ServerOptions.cpp b/moses/parameters/ServerOptions.cpp index 83cddc78c..ab71a26b1 100644 --- a/moses/parameters/ServerOptions.cpp +++ b/moses/parameters/ServerOptions.cpp @@ -35,6 +35,20 @@ parse_timespec(std::string const& spec) } ServerOptions:: +ServerOptions() + : is_serial(false) + , numThreads(15) // why 15? + , sessionTimeout(1800) // = 30 min + , sessionCacheSize(25) + , port(8080) + , maxConn(15) + , maxConnBacklog(15) + , keepaliveTimeout(15) + , keepaliveMaxConn(30) + , timeout(15) +{ } + +ServerOptions:: ServerOptions(Parameter const& P) { init(P); diff --git a/moses/parameters/ServerOptions.h b/moses/parameters/ServerOptions.h index 7bbef2612..f39a02e6a 100644 --- a/moses/parameters/ServerOptions.h +++ b/moses/parameters/ServerOptions.h @@ -24,6 +24,7 @@ namespace Moses bool init(Parameter const& param); ServerOptions(Parameter const& param); + ServerOptions(); }; } diff --git a/moses/parameters/SyntaxOptions.cpp b/moses/parameters/SyntaxOptions.cpp new file mode 100644 index 000000000..f76c187ec --- /dev/null +++ b/moses/parameters/SyntaxOptions.cpp @@ -0,0 +1,82 @@ +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- +#include "SyntaxOptions.h" +#include <vector> +#include <iostream> +#include "moses/StaticData.h" +#include "moses/TypeDef.h" +#include "moses/Factor.h" +#include "moses/InputFileStream.h" + +namespace Moses { + + SyntaxOptions:: + SyntaxOptions() + : s2t_parsing_algo(RecursiveCYKPlus) + , default_non_term_only_for_empty_range(false) + , source_label_overlap(SourceLabelOverlapAdd) + , rule_limit(DEFAULT_MAX_TRANS_OPT_SIZE) + { } + + bool + SyntaxOptions:: + init(Parameter const& param) + { + param.SetParameter(rule_limit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE); + param.SetParameter(s2t_parsing_algo, "s2t-parsing-algorithm", + RecursiveCYKPlus); + param.SetParameter(default_non_term_only_for_empty_range, + "default-non-term-for-empty-range-only", false); + param.SetParameter(source_label_overlap, "source-label-overlap", + SourceLabelOverlapAdd); + return true; + } + + void + SyntaxOptions:: + LoadNonTerminals(Parameter const& param, FactorCollection& factorCollection) + { + using namespace std; + string dfltNonTerm; + param.SetParameter<string>(dfltNonTerm, "non-terminals", "X"); + + const Factor *srcFactor = factorCollection.AddFactor(Input, 0, dfltNonTerm, true); + input_default_non_terminal.SetFactor(0, srcFactor); + input_default_non_terminal.SetIsNonTerminal(true); + + const Factor *trgFactor = factorCollection.AddFactor(Output, 0, dfltNonTerm, true); + output_default_non_terminal.SetFactor(0, trgFactor); + output_default_non_terminal.SetIsNonTerminal(true); + + // for unknown words + const PARAM_VEC *params = param.GetParam("unknown-lhs"); + if (params == NULL || params->size() == 0) { + UnknownLHSEntry entry(dfltNonTerm, 0.0f); + unknown_lhs.push_back(entry); + } else { + const string &filePath = params->at(0); + InputFileStream inStream(filePath); + string line; + while(getline(inStream, line)) { + vector<string> tokens = Tokenize(line); + UTIL_THROW_IF2(tokens.size() != 2, "Incorrect unknown LHS format: " << line); + UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1])); + unknown_lhs.push_back(entry); + factorCollection.AddFactor(Output, 0, tokens[0], true); + } + } + } + +#ifdef HAVE_XMLRPC_C + bool + SyntaxOptions:: + update(std::map<std::string,xmlrpc_c::value>const& param) + { + typedef std::map<std::string, xmlrpc_c::value> params_t; + // params_t::const_iterator si = param.find("xml-input"); + // if (si != param.end()) + // xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second)); + return true; + } +#endif + +} diff --git a/moses/parameters/SyntaxOptions.h b/moses/parameters/SyntaxOptions.h new file mode 100644 index 000000000..e66a24cd1 --- /dev/null +++ b/moses/parameters/SyntaxOptions.h @@ -0,0 +1,34 @@ +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- +#pragma once +#include <string> +#include "moses/Parameter.h" +#include "moses/Word.h" +#include <string> +#include "OptionsBaseClass.h" +#include "moses/FactorCollection.h" + +namespace Moses +{ + typedef std::pair<std::string, float> UnknownLHSEntry; + typedef std::vector<UnknownLHSEntry> UnknownLHSList; + + struct + SyntaxOptions : public OptionsBaseClass + { + S2TParsingAlgorithm s2t_parsing_algo; + Word input_default_non_terminal; + Word output_default_non_terminal; + bool default_non_term_only_for_empty_range; // whatever that means + UnknownLHSList unknown_lhs; + SourceLabelOverlap source_label_overlap; // m_sourceLabelOverlap; + size_t rule_limit; + + SyntaxOptions(); + + bool init(Parameter const& param); + bool update(std::map<std::string,xmlrpc_c::value>const& param); + void LoadNonTerminals(Parameter const& param, FactorCollection& factorCollection); + }; + +} + diff --git a/moses/server/Hypothesis_4server.cpp b/moses/server/Hypothesis_4server.cpp index af2b6392b..9ace9c967 100644 --- a/moses/server/Hypothesis_4server.cpp +++ b/moses/server/Hypothesis_4server.cpp @@ -11,7 +11,7 @@ namespace Moses { Range const& src = this->GetCurrSourceWordsRange(); Range const& trg = this->GetCurrTargetWordsRange(); - WordAlignmentSort waso = m_manager.options().output.WA_SortOrder; + WordAlignmentSort waso = m_manager.options()->output.WA_SortOrder; vector<pair<size_t,size_t> const* > a = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso); typedef pair<size_t,size_t> item; diff --git a/moses/server/Server.cpp b/moses/server/Server.cpp index 8a78dd7bc..19073873f 100644 --- a/moses/server/Server.cpp +++ b/moses/server/Server.cpp @@ -1,5 +1,7 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- #include "Server.h" +#include <sstream> + namespace MosesServer { Server:: @@ -16,6 +18,12 @@ namespace MosesServer m_registry.addMethod("close_session", m_close_session); } + Server:: + ~Server() + { + unlink(m_pidfile.c_str()); + } + int Server:: run() @@ -32,7 +40,12 @@ namespace MosesServer .keepaliveMaxConn(m_server_options.keepaliveMaxConn) .timeout(m_server_options.timeout) ); - + std::ostringstream pidfilename; + pidfilename << "/tmp/moses-server." << m_server_options.port << ".pid"; + m_pidfile = pidfilename.str(); + std::ofstream pidfile(m_pidfile.c_str()); + pidfile << getpid() << std::endl; + pidfile.close(); XVERBOSE(1,"Listening on port " << m_server_options.port << std::endl); if (m_server_options.is_serial) { diff --git a/moses/server/Server.h b/moses/server/Server.h index 4afbf5d91..802eaef3e 100644 --- a/moses/server/Server.h +++ b/moses/server/Server.h @@ -16,6 +16,7 @@ #include "CloseSession.h" #include "Session.h" #include "moses/parameters/ServerOptions.h" +#include <string> namespace MosesServer { @@ -28,9 +29,10 @@ namespace MosesServer xmlrpc_c::methodPtr const m_optimizer; xmlrpc_c::methodPtr const m_translator; xmlrpc_c::methodPtr const m_close_session; + std::string m_pidfile; public: Server(Moses::Parameter& params); - + ~Server(); int run(); void delete_session(uint64_t const session_id); diff --git a/moses/server/Session.h b/moses/server/Session.h index f56780839..27d5ca845 100644 --- a/moses/server/Session.h +++ b/moses/server/Session.h @@ -2,6 +2,7 @@ #pragma once #include "moses/Util.h" #include "moses/ContextScope.h" +#include "moses/parameters/AllOptions.h" #include <sys/time.h> #include <boost/unordered_map.hpp> @@ -19,9 +20,10 @@ namespace MosesServer{ boost::shared_ptr<Moses::ContextScope> const scope; // stores local info SPTR<std::map<std::string,float> > m_context_weights; - + Session(uint64_t const session_id) - : id(session_id), scope(new Moses::ContextScope) + : id(session_id) + , scope(new Moses::ContextScope) { last_access = start_time = time(NULL); } diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 0a7ce42e9..174fe39e6 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -57,14 +57,6 @@ Run() parse_request(params); // cerr << "SESSION ID" << ret->m_session_id << endl; - if (m_session_id) - { - Session const& S = m_translator->get_session(m_session_id); - m_scope = S.scope; - m_session_id = S.id; - // cerr << "SESSION ID" << m_session_id << endl; - } - else m_scope.reset(new Moses::ContextScope); // settings within the session scope param_t::const_iterator si = params.find("context-weights"); @@ -72,18 +64,11 @@ Run() Moses::StaticData const& SD = Moses::StaticData::Instance(); - //Make sure alternative paths are retained, if necessary - // if (m_withGraphInfo || m_nbestSize>0) - // why on earth is this a global variable? Is this even thread-safe???? UG - // (const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true); - // std::stringstream out, graphInfo, transCollOpts; - if (SD.IsSyntax()) run_chart_decoder(); else run_phrase_decoder(); - // XVERBOSE(1,"Output: " << out.str() << endl); { boost::lock_guard<boost::mutex> lock(m_mutex); m_done = true; @@ -97,7 +82,8 @@ void TranslationRequest:: add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const { - if (!m_withAlignInfo) return; + // if (!m_withAlignInfo) return; + if (!options()->output.ReportSegmentation) return; Range const& trg = h.GetCurrTargetWordsRange(); Range const& src = h.GetCurrSourceWordsRange(); @@ -159,37 +145,23 @@ insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData) x["recombined"] = value_int(n.recombinationHypo->GetId()); x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos()); x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos()); - x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder())); + x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(options()->output.factor_order)); } searchGraphXml.push_back(value_struct(x)); } retData["sg"] = xmlrpc_c::value_array(searchGraphXml); } -// void -// TranslationRequest:: -// output_phrase(ostream& out, Phrase const& phrase) const -// { -// if (!m_options.output.ReportAllFactors) { -// for (size_t i = 0 ; i < phrase.GetSize(); ++i) -// out << *phrase.GetFactor(i, 0) << " "; -// } else out << phrase; -// } - void TranslationRequest:: outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData) { TrellisPathList nBestList; vector<xmlrpc_c::value> nBestXml; - manager.CalcNBest(m_options.nbest.nbest_size, nBestList, - m_options.nbest.only_distinct); - - StaticData const& SD = StaticData::Instance(); - manager.OutputNBest(cout, nBestList, - SD.GetOutputFactorOrder(), - m_source->GetTranslationId(), - options().output.ReportSegmentation); + + Moses::NBestOptions const& nbo = m_options->nbest; + manager.CalcNBest(nbo.nbest_size, nBestList, nbo.only_distinct); + manager.OutputNBest(cout, nBestList); BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) { vector<const Hypothesis *> const& E = path->GetEdges(); @@ -199,10 +171,9 @@ outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData) if (m_withScoreBreakdown) { // should the score breakdown be reported in a more structured manner? ostringstream buf; - bool with_labels = m_options.nbest.include_feature_labels; + bool with_labels = nbo.include_feature_labels; path->GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels); nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str()); - nBestXmlItem["scores"] = PackScores(*path->GetScoreBreakdown()); } @@ -218,30 +189,27 @@ TranslationRequest:: insertTranslationOptions(Moses::Manager& manager, std::map<std::string, xmlrpc_c::value>& retData) { - const TranslationOptionCollection* toptsColl - = manager.getSntTranslationOptions(); + std::vector<Moses::FactorType> const& ofactor_order = options()->output.factor_order; + + const TranslationOptionCollection* toptsColl = manager.getSntTranslationOptions(); vector<xmlrpc_c::value> toptsXml; size_t const stop = toptsColl->GetSource().GetSize(); TranslationOptionList const* tol; for (size_t s = 0 ; s < stop ; ++s) { - for (size_t e = s; - (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL; - ++e) { + for (size_t e=s;(tol=toptsColl->GetTranslationOptionList(s,e))!=NULL;++e) { BOOST_FOREACH(TranslationOption const* topt, *tol) { std::map<std::string, xmlrpc_c::value> toptXml; TargetPhrase const& tp = topt->GetTargetPhrase(); - StaticData const& GLOBAL = StaticData::Instance(); - std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder()); + std::string tphrase = tp.GetStringRep(ofactor_order); toptXml["phrase"] = xmlrpc_c::value_string(tphrase); toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore()); toptXml["start"] = xmlrpc_c::value_int(s); toptXml["end"] = xmlrpc_c::value_int(e); vector<xmlrpc_c::value> scoresXml; const std::valarray<FValue> &scores - = topt->GetScoreBreakdown().getCoreFeatures(); + = topt->GetScoreBreakdown().getCoreFeatures(); for (size_t j = 0; j < scores.size(); ++j) scoresXml.push_back(xmlrpc_c::value_double(scores[j])); - toptXml["scores"] = xmlrpc_c::value_array(scoresXml); toptsXml.push_back(xmlrpc_c::value_struct(toptXml)); } @@ -254,10 +222,9 @@ TranslationRequest:: TranslationRequest(xmlrpc_c::paramList const& paramList, boost::condition_variable& cond, boost::mutex& mut) : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList) - // , m_nbestSize(0) , m_session_id(0) { - m_options = StaticData::Instance().options(); + } bool @@ -276,28 +243,43 @@ TranslationRequest:: parse_request(std::map<std::string, xmlrpc_c::value> const& params) { // parse XMLRPC request - // params_t const params = m_paramList.getStruct(0); m_paramList.verifyEnd(1); // ??? UG - m_options.update(params); + typedef std::map<std::string, xmlrpc_c::value> params_t; + params_t::const_iterator si; + + si = params.find("session-id"); + if (si != params.end()) + { + m_session_id = xmlrpc_c::value_int(si->second); + Session const& S = m_translator->get_session(m_session_id); + m_scope = S.scope; + m_session_id = S.id; + } + else + { + m_session_id = 0; + m_scope.reset(new Moses::ContextScope); + } + + boost::shared_ptr<Moses::AllOptions> opts(new Moses::AllOptions(*StaticData::Instance().options())); + opts->update(params); + + m_withGraphInfo = check(params, "sg"); + if (m_withGraphInfo || opts->nbest.nbest_size > 0) { + opts->output.SearchGraph = "true"; + opts->nbest.enabled = true; + } + + m_options = opts; // source text must be given, or we don't know what to translate - typedef std::map<std::string, xmlrpc_c::value> params_t; - params_t::const_iterator si = params.find("text"); + si = params.find("text"); if (si == params.end()) throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE); m_source_string = xmlrpc_c::value_string(si->second); XVERBOSE(1,"Input: " << m_source_string << endl); - - si = params.find("session-id"); - if (si != params.end()) - m_session_id = xmlrpc_c::value_int(si->second); - else - m_session_id = 0; - m_withAlignInfo = check(params, "align"); - m_withWordAlignInfo = check(params, "word-align"); - m_withGraphInfo = check(params, "sg"); m_withTopts = check(params, "topt"); m_withScoreBreakdown = check(params, "add-score-breakdown"); si = params.find("lambda"); @@ -314,16 +296,10 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params) string const model_name = xmlrpc_c::value_string(si->second); PhraseDictionaryMultiModel* pdmm = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); - // Moses::PhraseDictionaryMultiModel* pdmm - // = FindPhraseDictionary(model_name); pdmm->SetTemporaryMultiModelWeightsVector(w); } } - // si = params.find("nbest"); - // if (si != params.end()) - // m_nbestSize = xmlrpc_c::value_int(si->second); - si = params.find("context"); if (si != params.end()) { @@ -341,7 +317,7 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params) // for (size_t i = 1; i < tmp.size(); i += 2) // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]); // } - m_source.reset(new Sentence(0,m_source_string,m_options)); + m_source.reset(new Sentence(m_options,0,m_source_string)); } // end of Translationtask::parse_request() @@ -349,9 +325,9 @@ void TranslationRequest:: run_chart_decoder() { - Moses::TreeInput tinput; + Moses::TreeInput tinput(m_options); istringstream buf(m_source_string + "\n"); - tinput.Read(buf, StaticData::Instance().GetInputFactorOrder(), m_options); + tinput.Read(buf); Moses::ChartManager manager(this->self()); manager.Decode(); @@ -372,21 +348,21 @@ run_chart_decoder() void TranslationRequest:: -pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const& edges, string const& key, +pack_hypothesis(const Moses::Manager& manager, + vector<Hypothesis const* > const& edges, string const& key, map<string, xmlrpc_c::value> & dest) const { // target string ostringstream target; BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) { manager.OutputSurface(target, *e); - // , m_options.output.factor_order, - // m_options.output.ReportSegmentation, m_options.output.ReportAllFactors); } - XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl); -// XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl); + XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) + << std::endl); dest[key] = xmlrpc_c::value_string(target.str()); - if (m_withAlignInfo) { + // if (m_withAlignInfo) { + if (options()->output.ReportSegmentation) { // phrase alignment, if requested vector<xmlrpc_c::value> p_aln; @@ -395,7 +371,8 @@ pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const& dest["align"] = xmlrpc_c::value_array(p_aln); } - if (m_withWordAlignInfo) { + // if (m_withWordAlignInfo) { + if (options()->output.PrintAlignmentInfo) { // word alignment, if requested vector<xmlrpc_c::value> w_aln; BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) @@ -421,28 +398,15 @@ void TranslationRequest:: run_phrase_decoder() { - if (m_withGraphInfo || m_options.nbest.nbest_size>0) { - m_options.output.SearchGraph = "true"; - m_options.nbest.enabled = true; - } - Manager manager(this->self()); - // if (m_bias.size()) manager.SetBias(&m_bias); - - manager.Decode(); - pack_hypothesis(manager, manager.GetBestHypothesis(), "text", m_retData); if (m_session_id) m_retData["session-id"] = xmlrpc_c::value_int(m_session_id); if (m_withGraphInfo) insertGraphInfo(manager,m_retData); if (m_withTopts) insertTranslationOptions(manager,m_retData); - if (m_options.nbest.nbest_size) outputNBest(manager, m_retData); - - // (const_cast<StaticData&>(Moses::StaticData::Instance())) - // .SetOutputSearchGraph(false); - // WTF? one more reason not to have this as global variable! --- UG + if (m_options->nbest.nbest_size) outputNBest(manager, m_retData); } } diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index 7fb122714..2554e5544 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -38,8 +38,8 @@ TranslationRequest : public virtual Moses::TranslationTask Translator* m_translator; std::string m_source_string, m_target_string; - bool m_withAlignInfo; - bool m_withWordAlignInfo; + // bool m_withAlignInfo; + // bool m_withWordAlignInfo; bool m_withGraphInfo; bool m_withTopts; bool m_withScoreBreakdown; @@ -58,17 +58,16 @@ TranslationRequest : public virtual Moses::TranslationTask run_phrase_decoder(); void - pack_hypothesis(const Moses::Manager& manager, std::vector<Moses::Hypothesis const* > const& edges, + pack_hypothesis(const Moses::Manager& manager, + std::vector<Moses::Hypothesis const* > const& edges, std::string const& key, std::map<std::string, xmlrpc_c::value> & dest) const; void - pack_hypothesis(const Moses::Manager& manager, Moses::Hypothesis const* h, std::string const& key, + pack_hypothesis(const Moses::Manager& manager, Moses::Hypothesis const* h, + std::string const& key, std::map<std::string, xmlrpc_c::value> & dest) const; - // void - // output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; - void add_phrase_aln_info(Moses::Hypothesis const& h, std::vector<xmlrpc_c::value>& aInfo) const; diff --git a/phrase-extract/extract-ghkm/Alignment.cpp b/phrase-extract/Alignment.cpp index d12f9398b..cef7a4638 100644 --- a/phrase-extract/extract-ghkm/Alignment.cpp +++ b/phrase-extract/Alignment.cpp @@ -27,10 +27,6 @@ namespace MosesTraining { -namespace Syntax -{ -namespace GHKM -{ void ReadAlignment(const std::string &s, Alignment &a) { @@ -46,7 +42,7 @@ void ReadAlignment(const std::string &s, Alignment &a) } int src = std::atoi(s.substr(begin, end-begin).c_str()); if (end+1 == s.size()) { - throw Exception("Target index missing"); + throw Syntax::Exception("Target index missing"); } begin = end+1; @@ -71,6 +67,4 @@ void FlipAlignment(Alignment &a) } } -} // namespace GHKM -} // namespace Syntax } // namespace MosesTraining diff --git a/phrase-extract/extract-ghkm/Alignment.h b/phrase-extract/Alignment.h index da1279f8f..c25896e6d 100644 --- a/phrase-extract/extract-ghkm/Alignment.h +++ b/phrase-extract/Alignment.h @@ -25,10 +25,6 @@ namespace MosesTraining { -namespace Syntax -{ -namespace GHKM -{ typedef std::vector<std::pair<int, int> > Alignment; @@ -36,6 +32,4 @@ void ReadAlignment(const std::string &, Alignment &); void FlipAlignment(Alignment &); -} // namespace GHKM -} // namespace Syntax } // namespace MosesTraining diff --git a/phrase-extract/ExtractedRule.h b/phrase-extract/ExtractedRule.h index cb2f2261d..adbde43e8 100644 --- a/phrase-extract/ExtractedRule.h +++ b/phrase-extract/ExtractedRule.h @@ -26,6 +26,8 @@ #include <sstream> #include <map> +#include "PhraseOrientation.h" + namespace MosesTraining { @@ -37,8 +39,6 @@ public: std::string target; std::string alignment; std::string alignmentInv; - std::string orientation; - std::string orientationForward; std::string sourceContextLeft; std::string sourceContextRight; std::string targetContextLeft; @@ -51,14 +51,14 @@ public: int endS; float count; double pcfgScore; + PhraseOrientation::REO_CLASS l2rOrientation; + PhraseOrientation::REO_CLASS r2lOrientation; ExtractedRule(int sT, int eT, int sS, int eS) : source() , target() , alignment() , alignmentInv() - , orientation() - , orientationForward() , sourceContextLeft() , sourceContextRight() , targetContextLeft() @@ -70,8 +70,10 @@ public: , startS(sS) , endS(eS) , count(0) - , pcfgScore(0.0) { - } + , pcfgScore(0.0) + , l2rOrientation(PhraseOrientation::REO_CLASS_UNKNOWN) + , r2lOrientation(PhraseOrientation::REO_CLASS_UNKNOWN) + { } }; } diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.cpp b/phrase-extract/PhraseOrientation.cpp index f07e19a46..20297b4de 100644 --- a/phrase-extract/extract-ghkm/PhraseOrientation.cpp +++ b/phrase-extract/PhraseOrientation.cpp @@ -28,10 +28,6 @@ namespace MosesTraining { -namespace Syntax -{ -namespace GHKM -{ std::vector<float> PhraseOrientation::m_l2rOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0); std::vector<float> PhraseOrientation::m_r2lOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0); @@ -100,6 +96,18 @@ PhraseOrientation::PhraseOrientation(int sourceSize, Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS); } +PhraseOrientation::PhraseOrientation(int sourceSize, + int targetSize, + const std::vector<std::vector<int> > &alignedToT, + const std::vector<std::vector<int> > &alignedToS, + const std::vector<int> &alignedCountS) + : m_countF(sourceSize) + , m_countE(targetSize) + , m_alignedToT(alignedToT) +{ + Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS); +} + void PhraseOrientation::Init(int sourceSize, int targetSize, @@ -470,6 +478,4 @@ void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE } } -} // namespace GHKM -} // namespace Syntax } // namespace MosesTraining diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.h b/phrase-extract/PhraseOrientation.h index d956e2bc8..1cdfb65be 100644 --- a/phrase-extract/extract-ghkm/PhraseOrientation.h +++ b/phrase-extract/PhraseOrientation.h @@ -32,10 +32,6 @@ namespace MosesTraining { -namespace Syntax -{ -namespace GHKM -{ // The key of the map is the English index and the value is a set of the source ones typedef std::map <int, std::set<int> > HSentenceVertices; @@ -49,6 +45,7 @@ public: enum REO_CLASS {REO_CLASS_LEFT, REO_CLASS_RIGHT, REO_CLASS_DLEFT, REO_CLASS_DRIGHT, REO_CLASS_UNKNOWN}; enum REO_DIR {REO_DIR_L2R, REO_DIR_R2L, REO_DIR_BIDIR}; + PhraseOrientation() {}; PhraseOrientation(int sourceSize, int targetSize, @@ -59,6 +56,12 @@ public: const Moses::AlignmentInfo &alignTerm, const Moses::AlignmentInfo &alignNonTerm); + PhraseOrientation(int sourceSize, + int targetSize, + const std::vector<std::vector<int> > &alignedToT, + const std::vector<std::vector<int> > &alignedToS, + const std::vector<int> &alignedCountS); + REO_CLASS GetOrientationInfo(int startF, int endF, REO_DIR direction) const; REO_CLASS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const; const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=REO_DIR_BIDIR) const; @@ -104,8 +107,8 @@ private: return first < second; }; - const int m_countF; - const int m_countE; + int m_countF; + int m_countE; std::vector<std::vector<int> > m_alignedToT; @@ -121,6 +124,4 @@ private: static std::vector<float> m_r2lOrientationPriorCounts; }; -} // namespace GHKM -} // namespace Syntax } // namespace MosesTraining diff --git a/phrase-extract/RuleExtractionOptions.h b/phrase-extract/RuleExtractionOptions.h index b38258470..aab059cf9 100644 --- a/phrase-extract/RuleExtractionOptions.h +++ b/phrase-extract/RuleExtractionOptions.h @@ -54,6 +54,7 @@ public: bool conditionOnTargetLhs; bool boundaryRules; bool flexScoreFlag; + bool phraseOrientation; RuleExtractionOptions() : maxSpan(10) @@ -86,7 +87,8 @@ public: , unpairedExtractFormat(false) , conditionOnTargetLhs(false) , boundaryRules(false) - , flexScoreFlag(false) {} + , flexScoreFlag(false) + , phraseOrientation(false) {} }; } diff --git a/phrase-extract/extract-rules-main.cpp b/phrase-extract/extract-rules-main.cpp index e6fff965d..62dbbbf0e 100644 --- a/phrase-extract/extract-rules-main.cpp +++ b/phrase-extract/extract-rules-main.cpp @@ -46,6 +46,7 @@ #include "XmlTree.h" #include "InputFileStream.h" #include "OutputFileStream.h" +#include "PhraseOrientation.h" using namespace std; using namespace MosesTraining; @@ -62,6 +63,7 @@ private: Moses::OutputFileStream& m_extractFileInv; Moses::OutputFileStream& m_extractFileContext; Moses::OutputFileStream& m_extractFileContextInv; + PhraseOrientation m_phraseOrientation; vector< ExtractedRule > m_extractedRules; @@ -109,6 +111,7 @@ public: void collectWordLabelCounts(SentenceAlignmentWithSyntax &sentence ); void writeGlueGrammar(const string &, RuleExtractionOptions &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection); void writeUnknownWordLabel(const string &); +void writePhraseOrientationPriors(const string &); double getPcfgScore(const SyntaxNode &); @@ -142,7 +145,8 @@ int main(int argc, char* argv[]) << " | --UnpairedExtractFormat" << " | --ConditionOnTargetLHS ]" << " | --BoundaryRules[" << options.boundaryRules << "]" - << " | --FlexibilityScore\n"; + << " | --FlexibilityScore" + << " | --PhraseOrientation\n"; exit(1); } @@ -267,6 +271,8 @@ int main(int argc, char* argv[]) options.conditionOnTargetLhs = true; } else if (strcmp(argv[i],"--FlexibilityScore") == 0) { options.flexScoreFlag = true; + } else if (strcmp(argv[i],"--PhraseOrientation") == 0) { + options.phraseOrientation = true; } else if (strcmp(argv[i],"-threads") == 0 || strcmp(argv[i],"--threads") == 0 || strcmp(argv[i],"--Threads") == 0) { @@ -377,6 +383,11 @@ int main(int argc, char* argv[]) if (options.unknownWordLabelFlag) writeUnknownWordLabel(fileNameUnknownWordLabel); + + if (options.phraseOrientation) { + std::string fileNamePhraseOrientationPriors = fileNameExtract + string(".phraseOrientationPriors"); + writePhraseOrientationPriors(fileNamePhraseOrientationPriors); + } } void ExtractTask::Run() @@ -392,6 +403,12 @@ void ExtractTask::extractRules() int countT = m_sentence.target.size(); int countS = m_sentence.source.size(); + // initialize phrase orientation scoring object (for lexicalized reordering model) + if (m_options.phraseOrientation) { + m_sentence.invertAlignment(); // fill m_sentence.alignedToS + m_phraseOrientation = PhraseOrientation(countS, countT, m_sentence.alignedToT, m_sentence.alignedToS, m_sentence.alignedCountS); + } + // phrase repository for creating hiero phrases RuleExist ruleExist(countT); @@ -990,6 +1007,10 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, int count } } + rule.alignment.erase(rule.alignment.size()-1); + if (!m_options.onlyDirectFlag) + rule.alignmentInv.erase(rule.alignmentInv.size()-1); + // context (words to left and right) if (m_options.flexScoreFlag) { rule.sourceContextLeft = startS == 0 ? "<s>" : m_sentence.source[startS-1]; @@ -998,9 +1019,14 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, int count rule.targetContextRight = endT+1 == m_sentence.target.size() ? "<s>" : m_sentence.target[endT+1]; } - rule.alignment.erase(rule.alignment.size()-1); - if (!m_options.onlyDirectFlag) - rule.alignmentInv.erase(rule.alignmentInv.size()-1); + // phrase orientation (lexicalized reordering model) + if (m_options.phraseOrientation) { + rule.l2rOrientation = m_phraseOrientation.GetOrientationInfo(startS,endS,PhraseOrientation::REO_DIR_L2R); + rule.r2lOrientation = m_phraseOrientation.GetOrientationInfo(startS,endS,PhraseOrientation::REO_DIR_R2L); + // std::cerr << "span " << startS << " " << endS << std::endl; + // std::cerr << "phraseOrientationL2R " << m_phraseOrientation.GetOrientationInfo(startS,endS,PhraseOrientation::REO_DIR_L2R) << std::endl; + // std::cerr << "phraseOrientationR2L " << m_phraseOrientation.GetOrientationInfo(startS,endS,PhraseOrientation::REO_DIR_R2L) << std::endl; + } addRuleToCollection( rule ); } @@ -1070,6 +1096,15 @@ void ExtractTask::writeRulesToFile() if (m_options.pcfgScore) { out << " ||| " << rule->pcfgScore; } + if (m_options.phraseOrientation) { + out << " {{Orientation "; + m_phraseOrientation.WriteOrientation(out,rule->l2rOrientation); + out << " "; + m_phraseOrientation.WriteOrientation(out,rule->r2lOrientation); + m_phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_L2R,rule->l2rOrientation,1); + m_phraseOrientation.IncrementPriorCount(PhraseOrientation::REO_DIR_R2L,rule->r2lOrientation,1); + out << "}}"; + } out << "\n"; if (!m_options.onlyDirectFlag) { @@ -1119,10 +1154,14 @@ void writeGlueGrammar( const string & fileName, RuleExtractionOptions &options, { ofstream grammarFile; grammarFile.open(fileName.c_str()); + std::string glueRulesPhraseProperty = ""; + if (options.phraseOrientation) { + glueRulesPhraseProperty.append(" ||| ||| {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}"); + } if (!options.targetSyntax) { - grammarFile << "<s> [X] ||| <s> [S] ||| 1 ||| 0-0 ||| 0" << endl - << "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 1-1 ||| 0" << endl - << "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl; + grammarFile << "<s> [X] ||| <s> [S] ||| 1 ||| 0-0 ||| 0" << glueRulesPhraseProperty << endl + << "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 1-1 ||| 0" << glueRulesPhraseProperty << endl + << "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << glueRulesPhraseProperty << endl; } else { // chose a top label that is not already a label string topLabel = "QQQQQQ"; @@ -1196,6 +1235,14 @@ void writeUnknownWordLabel(const string & fileName) outFile.close(); } +void writePhraseOrientationPriors(const string &fileName) +{ + ofstream outFile; + outFile.open(fileName.c_str()); + PhraseOrientation::WritePriorCounts(outFile); + outFile.close(); +} + double getPcfgScore(const SyntaxNode &node) { double score = 0.0f; diff --git a/regression-testing/run-single-test.perl b/regression-testing/run-single-test.perl index b01524ab9..a99e24a26 100755 --- a/regression-testing/run-single-test.perl +++ b/regression-testing/run-single-test.perl @@ -176,7 +176,11 @@ sub exec_moses_server { } elsif ($pid == 0) { setpgrp(0, 0); warn "Starting Moses server on port $serverport ...\n"; - ($o, $ec, $sig) = run_command("$decoder --server --server-port $serverport -f $conf -verbose 2 --server-log $results/run.stderr.server 2> $results/run.stderr "); + my $cmd = "$decoder --server --server-port $serverport -f $conf -verbose 2 --server-log $results/run.stderr.server 2> $results/run.stderr "; + open CMD, ">$results/cmd_line"; + print CMD "$cmd\n"; + close CMD; + ($o, $ec, $sig) = run_command($cmd); exit; # this should not be reached unless the server fails to start } diff --git a/run-regtests.sh b/run-regtests.sh index db4290a90..3d93741d5 100755 --- a/run-regtests.sh +++ b/run-regtests.sh @@ -2,21 +2,82 @@ # this script assumes that all 3rd-party dependencies are installed under ./opt # you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake -set -e -o pipefail -x +set -e -o pipefail opt=$(pwd)/opt + +args=$(getopt -oj:aq -lwith-irstlm:,with-boost:,with-cmph:,with-regtest:,no-xmlrpc-c,with-xmlrpc-c:,full -- "$@") +eval set -- "$args" + +# default settings +noserver=false; +full=false; +j=$(getconf _NPROCESSORS_ONLN) +irstlm=$opt +boost=$opt +cmph=$opt +xmlrpc=--with-xmlrpc-c\=$opt +regtest=$(pwd)/regtest +unset q +unset a +# the regression test for the compactpt bug is currently know to fail, +# let's skip it for the time being +skipcompact=--regtest-skip-compactpt + +# overrides from command line +while true ; do + case "$1" in + -j ) j=$2; shift 2 ;; + -a ) a=-a; shift ;; + -q ) q=-q; shift ;; + --no-xmlrpc-c ) xmlrpc=$1; shift ;; + --with-xmlrpc-c ) + xmlrpc=--with-xmlrpc-c\=$2; shift 2 ;; + --with-irstlm ) irstlm=$2; shift 2 ;; + --with-boost ) boost=$2; shift 2 ;; + --with-cmph ) cmph=$2; shift 2 ;; + --with-regtest ) regtest=$2; shift 2 ;; + --full ) full=true; shift 2 ;; + -- ) shift; break ;; + * ) break ;; + esac +done + +if [ $? != 0 ] ; then exit $?; fi + git submodule init git submodule update regtest -if [ "$RECOMPILE" == "NO" ] ; then - RECOMPILE= + +# full test means +# -- compile from scratch without server, run regtests +# -- compile from scratch with server, run regtests +set -x +if [ "$full" == true ] ; then + ./bjam -j$j --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest -a $skipcompact $@ $q || exit $? + if ./regression-testing/run-single-test.perl --server --startuptest ; then + ./bjam -j$j --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest -a $skipcompact $@ $q + fi else - RECOMPILE="-a" + # when investigating failures, always run single-threaded + if [ "$q" == "-q" ] ; then j=1; fi + + if ./regression-testing/run-single-test.perl --server --startuptest ; then + ./bjam -j$j $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest $skipcompact $@ + else + ./bjam -j$j $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest $skipcompact $@ + fi fi -# test compilation without xmlrpc-c -./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --no-xmlrpc-c --with-regtest=$(pwd)/regtest -a -q $@ || exit $? +# if [ "$RECOMPILE" == "NO" ] ; then +# RECOMPILE= +# else +# RECOMPILE="-a" +# fi -# test compilation with xmlrpc-c -if ./regression-testing/run-single-test.perl --server --startuptest ; then - ./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-regtest=$(pwd)/regtest $RECOMPILE -q $@ -fi +# # test compilation without xmlrpc-c +# # ./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --no-xmlrpc-c --with-regtest=$(pwd)/regtest -a -q $@ || exit $? + +# # test compilation with xmlrpc-c +# if ./regression-testing/run-single-test.perl --server --startuptest ; then +# ./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-regtest=$(pwd)/regtest $RECOMPILE -q --regtest-skip-compactpt $@ +# fi diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl index fe8a3c9ab..4b9b52e11 100755 --- a/scripts/ems/experiment.perl +++ b/scripts/ems/experiment.perl @@ -2364,6 +2364,12 @@ sub define_training_extract_phrases { $cmd .= "-unknown-word-soft-matches $unknown_word_soft_matches "; } + if (&get("TRAINING:phrase-orientation")) { + $cmd .= "-phrase-orientation "; + my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model","")); + $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file "; + } + if (&get("TRAINING:use-ghkm")) { $cmd .= "-ghkm "; } @@ -2372,12 +2378,6 @@ sub define_training_extract_phrases { $cmd .= "-ghkm-tree-fragments "; } - if (&get("TRAINING:ghkm-phrase-orientation")) { - $cmd .= "-ghkm-phrase-orientation "; - my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model","")); - $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file "; - } - if (&get("TRAINING:ghkm-source-labels")) { $cmd .= "-ghkm-source-labels "; my $source_labels_file = &versionize(&long_file_name("source-labels","model","")); @@ -2427,16 +2427,16 @@ sub define_training_build_ttable { if (&get("TRAINING:hierarchical-rule-set")) { - if (&get("TRAINING:ghkm-tree-fragments")) { - $cmd .= "-ghkm-tree-fragments "; - } - - if (&get("TRAINING:ghkm-phrase-orientation")) { - $cmd .= "-ghkm-phrase-orientation "; + if (&get("TRAINING:phrase-orientation")) { + $cmd .= "-phrase-orientation "; my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model","")); $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file "; } + if (&get("TRAINING:ghkm-tree-fragments")) { + $cmd .= "-ghkm-tree-fragments "; + } + if (&get("TRAINING:ghkm-source-labels")) { $cmd .= "-ghkm-source-labels "; my $source_labels_file = &versionize(&long_file_name("source-labels","model","")); @@ -2640,6 +2640,10 @@ sub define_training_create_config { } } + if (&get("TRAINING:phrase-orientation")) { + $cmd .= "-phrase-orientation "; + } + if (&get("TRAINING:ghkm-source-labels")) { $cmd .= "-ghkm-source-labels "; my $source_labels_file = &versionize(&long_file_name("source-labels","model","")); diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index 76f661712..b8a2c13b9 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -93,10 +93,10 @@ my($_EXTERNAL_BINDIR, $_GLUE_GRAMMAR_FILE, $_DONT_TUNE_GLUE_GRAMMAR, $_UNKNOWN_WORD_LABEL_FILE, + $_PHRASE_ORIENTATION, + $_PHRASE_ORIENTATION_PRIORS_FILE, $_GHKM, $_GHKM_TREE_FRAGMENTS, - $_GHKM_PHRASE_ORIENTATION, - $_PHRASE_ORIENTATION_PRIORS_FILE, $_GHKM_SOURCE_LABELS, $_GHKM_SOURCE_LABELS_FILE, $_GHKM_PARTS_OF_SPEECH, @@ -210,10 +210,10 @@ $_HELP = 1 'dont-tune-glue-grammar' => \$_DONT_TUNE_GLUE_GRAMMAR, 'unknown-word-label-file=s' => \$_UNKNOWN_WORD_LABEL_FILE, 'unknown-word-soft-matches-file=s' => \$_UNKNOWN_WORD_SOFT_MATCHES_FILE, # give dummy label to unknown word, and allow soft matches to all other labels (with cost determined by sparse features) + 'phrase-orientation' => \$_PHRASE_ORIENTATION, + 'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for Hiero and GHKM extraction only; phrase orientation for PBT has different implementation 'ghkm' => \$_GHKM, 'ghkm-tree-fragments' => \$_GHKM_TREE_FRAGMENTS, - 'ghkm-phrase-orientation' => \$_GHKM_PHRASE_ORIENTATION, - 'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for GHKM extraction only; phrase orientation for PBT has different implementation 'ghkm-source-labels' => \$_GHKM_SOURCE_LABELS, 'ghkm-source-labels-file=s' => \$_GHKM_SOURCE_LABELS_FILE, 'ghkm-parts-of-speech' => \$_GHKM_PARTS_OF_SPEECH, @@ -1561,11 +1561,11 @@ sub extract_phrase { $cmd .= " --PCFG" if $_PCFG; $cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2; $cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1; + $cmd .= " --PhraseOrientation" if $_PHRASE_ORIENTATION; + $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE); if (defined($_GHKM)) { $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS; - $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION; - $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE); $cmd .= " --SourceLabels" if $_GHKM_SOURCE_LABELS; $cmd .= " --PartsOfSpeech" if $_GHKM_PARTS_OF_SPEECH; $cmd .= " --PartsOfSpeechFactor" if $_GHKM_PARTS_OF_SPEECH_FACTOR; @@ -1701,6 +1701,7 @@ sub score_phrase_phrase_extract { my $LOG_PROB = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /LogProb/); my $NEG_LOG_PROB = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NegLogProb/); my $NO_LEX = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NoLex/); + my $MIN_COUNT = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /MinCount ([\d\.]+)/) ? $1 : undef; my $MIN_COUNT_HIERARCHICAL = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /MinCountHierarchical ([\d\.]+)/) ? $1 : undef; my $SOURCE_LABELS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabels/); my $SOURCE_LABEL_COUNTS_LHS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelCountsLHS/); @@ -1748,13 +1749,14 @@ sub score_phrase_phrase_extract { $cmd .= " --SpanLength" if $SPAN_LENGTH && $inverse eq ""; $cmd .= " --UnalignedPenalty" if $UNALIGNED_COUNT; $cmd .= " --UnalignedFunctionWordPenalty ".($inverse ? $UNALIGNED_FW_F : $UNALIGNED_FW_E) if $UNALIGNED_FW_COUNT; + $cmd .= " --MinCount $MIN_COUNT" if $MIN_COUNT; $cmd .= " --MinCountHierarchical $MIN_COUNT_HIERARCHICAL" if $MIN_COUNT_HIERARCHICAL; $cmd .= " --PCFG" if $_PCFG; $cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2; $cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1; $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS; - $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION; - $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_GHKM_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE); + $cmd .= " --PhraseOrientation" if $_PHRASE_ORIENTATION; + $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE); $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE); $cmd .= " --PartsOfSpeech $_GHKM_PARTS_OF_SPEECH_FILE" if $_GHKM_PARTS_OF_SPEECH && defined($_GHKM_PARTS_OF_SPEECH_FILE); $cmd .= " $DOMAIN" if $DOMAIN; @@ -2365,6 +2367,7 @@ sub create_ini { print INI "PhrasePenalty\n"; print INI "SoftMatchingFeature name=SM0 path=$_UNKNOWN_WORD_SOFT_MATCHES_FILE\n" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_SOFT_MATCHES_FILE); print INI "SoftSourceSyntacticConstraintsFeature sourceLabelSetFile=$_GHKM_SOURCE_LABELS_FILE\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE); + print INI "PhraseOrientationFeature\n" if $_PHRASE_ORIENTATION; print INI $feature_spec; print INI "\n# dense weights for feature functions\n"; @@ -2375,6 +2378,7 @@ sub create_ini { print INI "WordPenalty0= -1\n"; print INI "PhrasePenalty0= 0.2\n"; print INI "SoftSourceSyntacticConstraintsFeature0= -0.2 -0.2 -0.2 0.1 0.1 0.1\n" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE); + print INI "PhraseOrientationFeature0= 0.05 0.05 0.05 0.05 0.05 0.05\n" if $_PHRASE_ORIENTATION; print INI $weight_spec; close(INI); } diff --git a/vw/Classifier.h b/vw/Classifier.h index 31bd6ff82..39b3461ad 100644 --- a/vw/Classifier.h +++ b/vw/Classifier.h @@ -83,8 +83,8 @@ protected: // some of VW settings are hard-coded because they are always needed in our scenario // (e.g. quadratic source X target features) -const std::string VW_DEFAULT_OPTIONS = " --hash all --noconstant -q st -t --ldf_override s "; -const std::string VW_DEFAULT_PARSER_OPTIONS = " --quiet --hash all --noconstant -q st -t --csoaa_ldf s "; +const std::string VW_DEFAULT_OPTIONS = " --hash all --noconstant -q st -t --ldf_override sc "; +const std::string VW_DEFAULT_PARSER_OPTIONS = " --quiet --hash all --noconstant -q st -t --csoaa_ldf sc "; /** * Produce VW training file (does not use the VW library!) diff --git a/vw/VWPredictor.cpp b/vw/VWPredictor.cpp index 95158363c..01192a9c6 100644 --- a/vw/VWPredictor.cpp +++ b/vw/VWPredictor.cpp @@ -44,7 +44,7 @@ void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float valu // the first feature of a new example => create the source namespace for // label-independent features to live in m_isFirstSource = false; - m_ex->clear_features(); // removes all namespaces along with features + m_ex->finish(); m_ex->addns('s'); if (DEBUG) std::cerr << "VW :: Setting source namespace\n"; } |