From 13ec6060b0d7ef600f056c15dd249d190cc16b25 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 8 Jan 2015 11:49:53 +0000 Subject: move mira code to contrib --- contrib/mira/Decoder.cpp | 352 +++++ contrib/mira/Decoder.h | 138 ++ contrib/mira/Hildreth.cpp | 175 +++ contrib/mira/Hildreth.h | 13 + contrib/mira/HildrethTest.cpp | 793 ++++++++++++ contrib/mira/HypothesisQueue.cpp | 66 + contrib/mira/HypothesisQueue.h | 69 + contrib/mira/Jamfile | 15 + contrib/mira/Main.cpp | 1847 +++++++++++++++++++++++++++ contrib/mira/Main.h | 58 + contrib/mira/MiraOptimiser.cpp | 446 +++++++ contrib/mira/MiraTest.cpp | 24 + contrib/mira/Optimiser.h | 153 +++ contrib/mira/Perceptron.cpp | 53 + contrib/mira/expt.cfg | 34 + contrib/mira/mira.xcodeproj/project.pbxproj | 401 ++++++ contrib/mira/training-expt.perl | 994 ++++++++++++++ 17 files changed, 5631 insertions(+) create mode 100644 contrib/mira/Decoder.cpp create mode 100644 contrib/mira/Decoder.h create mode 100644 contrib/mira/Hildreth.cpp create mode 100644 contrib/mira/Hildreth.h create mode 100644 contrib/mira/HildrethTest.cpp create mode 100644 contrib/mira/HypothesisQueue.cpp create mode 100644 contrib/mira/HypothesisQueue.h create mode 100644 contrib/mira/Jamfile create mode 100644 contrib/mira/Main.cpp create mode 100644 contrib/mira/Main.h create mode 100644 contrib/mira/MiraOptimiser.cpp create mode 100644 contrib/mira/MiraTest.cpp create mode 100644 contrib/mira/Optimiser.h create mode 100644 contrib/mira/Perceptron.cpp create mode 100644 contrib/mira/expt.cfg create mode 100644 contrib/mira/mira.xcodeproj/project.pbxproj create mode 100755 contrib/mira/training-expt.perl (limited to 'contrib') diff --git a/contrib/mira/Decoder.cpp b/contrib/mira/Decoder.cpp new file mode 100644 index 000000000..c9bb4c983 --- /dev/null +++ b/contrib/mira/Decoder.cpp @@ -0,0 +1,352 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2009 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include "Decoder.h" +#include "moses/Manager.h" +#include "moses/ChartManager.h" +#include "moses/Sentence.h" +#include "moses/InputType.h" +#include "moses/Phrase.h" +#include "moses/TrellisPathList.h" +#include "moses/ChartKBestExtractor.h" + +using namespace std; +using namespace Moses; + + +namespace Mira +{ + +/** + * Allocates a char* and copies string into it. +**/ +static char* strToChar(const string& s) +{ + char* c = new char[s.size()+1]; + strcpy(c,s.c_str()); + return c; +} + +MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vector decoder_params) + : m_manager(NULL) +{ + static int BASE_ARGC = 6; + Parameter* params = new Parameter(); + char ** mosesargv = new char*[BASE_ARGC + argc]; + mosesargv[0] = strToChar("-f"); + mosesargv[1] = strToChar(inifile); + mosesargv[2] = strToChar("-v"); + stringstream dbgin; + dbgin << debuglevel; + mosesargv[3] = strToChar(dbgin.str()); + + mosesargv[4] = strToChar("-no-cache"); + mosesargv[5] = strToChar("true"); + /* + mosesargv[4] = strToChar("-use-persistent-cache"); + mosesargv[5] = strToChar("0"); + mosesargv[6] = strToChar("-persistent-cache-size"); + mosesargv[7] = strToChar("0"); + */ + + for (int i = 0; i < argc; ++i) { + char *cstr = &(decoder_params[i])[0]; + mosesargv[BASE_ARGC + i] = cstr; + } + + if (!params->LoadParam(BASE_ARGC + argc,mosesargv)) { + cerr << "Loading static data failed, exit." << endl; + exit(1); + } + StaticData::LoadDataStatic(params, "mira"); + for (int i = 0; i < BASE_ARGC; ++i) { + delete[] mosesargv[i]; + } + delete[] mosesargv; + + const std::vector &bleuFFs = BleuScoreFeature::GetColl(); + assert(bleuFFs.size() == 1); + m_bleuScoreFeature = bleuFFs[0]; +} + +void MosesDecoder::cleanup(bool chartDecoding) +{ + delete m_manager; + if (chartDecoding) + delete m_chartManager; + else + delete m_sentence; +} + +vector< vector > MosesDecoder::getNBest(const std::string& source, + size_t sentenceid, + size_t nBestSize, + float bleuObjectiveWeight, + float bleuScoreWeight, + vector< ScoreComponentCollection>& featureValues, + vector< float>& bleuScores, + vector< float>& modelScores, + size_t numReturnedTranslations, + bool realBleu, + bool distinct, + bool avgRefLength, + size_t rank, + size_t epoch, + string filename) +{ + StaticData &staticData = StaticData::InstanceNonConst(); + bool chartDecoding = staticData.IsChart(); + initialize(staticData, source, sentenceid, bleuObjectiveWeight, bleuScoreWeight, avgRefLength, chartDecoding); + + // run the decoder + if (chartDecoding) { + return runChartDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight, + featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch); + } else { + SearchAlgorithm search = staticData.GetSearchAlgorithm(); + return runDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight, + featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch, + search, filename); + } +} + +vector< vector > MosesDecoder::runDecoder(const std::string& source, + size_t sentenceid, + size_t nBestSize, + float bleuObjectiveWeight, + float bleuScoreWeight, + vector< ScoreComponentCollection>& featureValues, + vector< float>& bleuScores, + vector< float>& modelScores, + size_t numReturnedTranslations, + bool realBleu, + bool distinct, + size_t rank, + size_t epoch, + SearchAlgorithm& search, + string filename) +{ + // run the decoder + m_manager = new Moses::Manager(*m_sentence); + m_manager->Decode(); + TrellisPathList nBestList; + m_manager->CalcNBest(nBestSize, nBestList, distinct); + + // optionally print nbest to file (to extract scores and features.. currently just for sentence bleu scoring) + /*if (filename != "") { + ofstream out(filename.c_str()); + if (!out) { + ostringstream msg; + msg << "Unable to open " << filename; + throw runtime_error(msg.str()); + } + // TODO: handle sentence id (for now always 0) + //OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(), 0, false); + out.close(); + }*/ + + // read off the feature values and bleu scores for each sentence in the nbest list + Moses::TrellisPathList::const_iterator iter; + for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { + const Moses::TrellisPath &path = **iter; + featureValues.push_back(path.GetScoreBreakdown()); + float bleuScore, dynBleuScore, realBleuScore; + if (realBleu) realBleuScore = m_bleuScoreFeature->CalculateBleu(path.GetTargetPhrase()); + else dynBleuScore = getBleuScore(featureValues.back()); + bleuScore = realBleu ? realBleuScore : dynBleuScore; + bleuScores.push_back(bleuScore); + + //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl; + float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore); + modelScores.push_back(scoreWithoutBleu); + + if (iter != nBestList.begin()) + cerr << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << path.GetTargetPhrase() << "\", score: " + << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << path.GetTotalScore(); + if (m_bleuScoreFeature->Enabled() && realBleu) + cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") "; + + // set bleu score to zero in the feature vector since we do not want to optimise its weight + setBleuScore(featureValues.back(), 0); + } + + // prepare translations to return + vector< vector > translations; + for (size_t i=0; i < numReturnedTranslations && i < nBestList.GetSize(); ++i) { + const TrellisPath &path = nBestList.at(i); + Phrase phrase = path.GetTargetPhrase(); + + vector translation; + for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { + const Word &word = phrase.GetWord(pos); + Word *newWord = new Word(word); + translation.push_back(newWord); + } + translations.push_back(translation); + } + + return translations; +} + +vector< vector > MosesDecoder::runChartDecoder(const std::string& source, + size_t sentenceid, + size_t nBestSize, + float bleuObjectiveWeight, + float bleuScoreWeight, + vector< ScoreComponentCollection>& featureValues, + vector< float>& bleuScores, + vector< float>& modelScores, + size_t numReturnedTranslations, + bool realBleu, + bool distinct, + size_t rank, + size_t epoch) +{ + // run the decoder + m_chartManager = new ChartManager(*m_sentence); + m_chartManager->Decode(); + ChartKBestExtractor::KBestVec nBestList; + m_chartManager->CalcNBest(nBestSize, nBestList, distinct); + + // read off the feature values and bleu scores for each sentence in the nbest list + for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin(); + p != nBestList.end(); ++p) { + const ChartKBestExtractor::Derivation &derivation = **p; + featureValues.push_back(*ChartKBestExtractor::GetOutputScoreBreakdown(derivation)); + float bleuScore, dynBleuScore, realBleuScore; + dynBleuScore = getBleuScore(featureValues.back()); + Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation); + realBleuScore = m_bleuScoreFeature->CalculateBleu(outputPhrase); + bleuScore = realBleu ? realBleuScore : dynBleuScore; + bleuScores.push_back(bleuScore); + + float scoreWithoutBleu = derivation.score - (bleuObjectiveWeight * bleuScoreWeight * bleuScore); + modelScores.push_back(scoreWithoutBleu); + + if (p != nBestList.begin()) + cerr << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << outputPhrase << "\", score: " + << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << derivation.score; + if (m_bleuScoreFeature->Enabled() && realBleu) + cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") "; + + // set bleu score to zero in the feature vector since we do not want to optimise its weight + setBleuScore(featureValues.back(), 0); + } + + // prepare translations to return + vector< vector > translations; + for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin(); + p != nBestList.end(); ++p) { + const ChartKBestExtractor::Derivation &derivation = **p; + Phrase phrase = ChartKBestExtractor::GetOutputPhrase(derivation); + + vector translation; + for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { + const Word &word = phrase.GetWord(pos); + Word *newWord = new Word(word); + translation.push_back(newWord); + } + translations.push_back(translation); + } + + return translations; +} + +void MosesDecoder::initialize(StaticData& staticData, const std::string& source, size_t sentenceid, + float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding) +{ + m_sentence = new Sentence(); + stringstream in(source + "\n"); + const std::vector &inputFactorOrder = staticData.GetInputFactorOrder(); + m_sentence->Read(in,inputFactorOrder); + + // set weight of BleuScoreFeature + //cerr << "Reload Bleu feature weight: " << bleuObjectiveWeight*bleuScoreWeight << " (" << bleuObjectiveWeight << "*" << bleuScoreWeight << ")" << endl; + //staticData.ReLoadBleuScoreFeatureParameter(bleuObjectiveWeight*bleuScoreWeight); + + m_bleuScoreFeature->SetCurrSourceLength((*m_sentence).GetSize()); + if (chartDecoding) + m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize()-2); + else + m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize()); + + if (avgRefLength) + m_bleuScoreFeature->SetCurrAvgRefLength(sentenceid); + else + m_bleuScoreFeature->SetCurrShortestRefLength(sentenceid); + m_bleuScoreFeature->SetCurrReferenceNgrams(sentenceid); +} + +float MosesDecoder::getBleuScore(const ScoreComponentCollection& scores) +{ + return scores.GetScoreForProducer(m_bleuScoreFeature); +} + +void MosesDecoder::setBleuScore(ScoreComponentCollection& scores, float bleu) +{ + scores.Assign(m_bleuScoreFeature, bleu); +} + +ScoreComponentCollection MosesDecoder::getWeights() +{ + return StaticData::Instance().GetAllWeights(); +} + +void MosesDecoder::setWeights(const ScoreComponentCollection& weights) +{ + StaticData::InstanceNonConst().SetAllWeights(weights); +} + +void MosesDecoder::updateHistory(const vector& words) +{ + m_bleuScoreFeature->UpdateHistory(words); +} + +void MosesDecoder::updateHistory(const vector< vector< const Word*> >& words, vector& sourceLengths, vector& ref_ids, size_t rank, size_t epoch) +{ + m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch); +} + +void MosesDecoder::printBleuFeatureHistory(std::ostream& out) +{ + m_bleuScoreFeature->PrintHistory(out); +} + +size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength) +{ + return m_bleuScoreFeature->GetClosestRefLength(ref_id, hypoLength); +} + +size_t MosesDecoder::getShortestReferenceIndex(size_t ref_id) +{ + return m_bleuScoreFeature->GetShortestRefIndex(ref_id); +} + +void MosesDecoder::setBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength, + bool scaleByInverseLength, bool scaleByAvgInverseLength, + float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu) +{ + m_bleuScoreFeature->SetBleuParameters(disable, sentenceBleu, scaleByInputLength, scaleByAvgInputLength, + scaleByInverseLength, scaleByAvgInverseLength, + scaleByX, historySmoothing, scheme, simpleHistoryBleu); +} +} + diff --git a/contrib/mira/Decoder.h b/contrib/mira/Decoder.h new file mode 100644 index 000000000..0cc1eb3ab --- /dev/null +++ b/contrib/mira/Decoder.h @@ -0,0 +1,138 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2010 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ +#ifndef _MIRA_DECODER_H_ +#define _MIRA_DECODER_H_ + +#include +#include +#include + + +#include "moses/Hypothesis.h" +#include "moses/Parameter.h" +#include "moses/SearchNormal.h" +#include "moses/Sentence.h" +#include "moses/StaticData.h" +#include "moses/FF/BleuScoreFeature.h" + +// +// Wrapper functions and objects for the decoder. +// + +namespace Mira +{ + +/** + * Wraps moses decoder. + **/ +class MosesDecoder +{ +public: + /** + * Initialise moses (including StaticData) using the given ini file and debuglevel, passing through any + * other command line arguments. + **/ + MosesDecoder(const std::string& inifile, int debuglevel, int argc, std::vector decoder_params); + + //returns the best sentence + std::vector< std::vector > getNBest(const std::string& source, + size_t sentenceid, + size_t nbestSize, + float bleuObjectiveweight, //weight of bleu in objective + float bleuScoreWeight, //weight of bleu in score + std::vector< Moses::ScoreComponentCollection>& featureValues, + std::vector< float>& bleuScores, + std::vector< float>& modelScores, + size_t numReturnedTranslations, + bool realBleu, + bool distinct, + bool avgRefLength, + size_t rank, + size_t epoch, + std::string filename); + std::vector< std::vector > runDecoder(const std::string& source, + size_t sentenceid, + size_t nbestSize, + float bleuObjectiveweight, //weight of bleu in objective + float bleuScoreWeight, //weight of bleu in score + std::vector< Moses::ScoreComponentCollection>& featureValues, + std::vector< float>& bleuScores, + std::vector< float>& modelScores, + size_t numReturnedTranslations, + bool realBleu, + bool distinct, + size_t rank, + size_t epoch, + Moses::SearchAlgorithm& seach, + std::string filename); + std::vector< std::vector > runChartDecoder(const std::string& source, + size_t sentenceid, + size_t nbestSize, + float bleuObjectiveweight, //weight of bleu in objective + float bleuScoreWeight, //weight of bleu in score + std::vector< Moses::ScoreComponentCollection>& featureValues, + std::vector< float>& bleuScores, + std::vector< float>& modelScores, + size_t numReturnedTranslations, + bool realBleu, + bool distinct, + size_t rank, + size_t epoch); + void initialize(Moses::StaticData& staticData, const std::string& source, size_t sentenceid, + float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding); + void updateHistory(const std::vector& words); + void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector& sourceLengths, std::vector& ref_ids, size_t rank, size_t epoch); + void printBleuFeatureHistory(std::ostream& out); + void printReferenceLength(const std::vector& ref_ids); + size_t getReferenceLength(size_t ref_id); + size_t getClosestReferenceLength(size_t ref_id, int hypoLength); + size_t getShortestReferenceIndex(size_t ref_id); + void setBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength, + bool scaleByInverseLength, bool scaleByAvgInverseLength, + float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu); + void setAvgInputLength (float l) { + m_bleuScoreFeature->SetAvgInputLength(l); + } + Moses::ScoreComponentCollection getWeights(); + void setWeights(const Moses::ScoreComponentCollection& weights); + void cleanup(bool chartDecoding); + + float getSourceLengthHistory() { + return m_bleuScoreFeature->GetSourceLengthHistory(); + } + float getTargetLengthHistory() { + return m_bleuScoreFeature->GetTargetLengthHistory(); + } + float getAverageInputLength() { + return m_bleuScoreFeature->GetAverageInputLength(); + } + +private: + float getBleuScore(const Moses::ScoreComponentCollection& scores); + void setBleuScore(Moses::ScoreComponentCollection& scores, float bleu); + Moses::Manager *m_manager; + Moses::ChartManager *m_chartManager; + Moses::Sentence *m_sentence; + Moses::BleuScoreFeature *m_bleuScoreFeature; +}; + + +} //namespace + +#endif diff --git a/contrib/mira/Hildreth.cpp b/contrib/mira/Hildreth.cpp new file mode 100644 index 000000000..03076e767 --- /dev/null +++ b/contrib/mira/Hildreth.cpp @@ -0,0 +1,175 @@ +#include "Hildreth.h" + +using namespace Moses; +using namespace std; + +namespace Mira +{ + +vector Hildreth::optimise (const vector& a, const vector& b) +{ + + size_t i; + int max_iter = 10000; + float eps = 0.00000001; + float zero = 0.000000000001; + + vector alpha ( b.size() ); + vector F ( b.size() ); + vector kkt ( b.size() ); + + float max_kkt = -1e100; + + size_t K = b.size(); + + float A[K][K]; + bool is_computed[K]; + for ( i = 0; i < K; i++ ) { + A[i][i] = a[i].InnerProduct(a[i]); + is_computed[i] = false; + } + + int max_kkt_i = -1; + + + for ( i = 0; i < b.size(); i++ ) { + F[i] = b[i]; + kkt[i] = F[i]; + if ( kkt[i] > max_kkt ) { + max_kkt = kkt[i]; + max_kkt_i = i; + } + } + + int iter = 0; + float diff_alpha; + float try_alpha; + float add_alpha; + + while ( max_kkt >= eps && iter < max_iter ) { + + diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i]; + try_alpha = alpha[max_kkt_i] + diff_alpha; + add_alpha = 0.0; + + if ( try_alpha < 0.0 ) + add_alpha = -1.0 * alpha[max_kkt_i]; + else + add_alpha = diff_alpha; + + alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha; + + if ( !is_computed[max_kkt_i] ) { + for ( i = 0; i < K; i++ ) { + A[i][max_kkt_i] = a[i].InnerProduct(a[max_kkt_i] ); // for version 1 + //A[i][max_kkt_i] = 0; // for version 1 + is_computed[max_kkt_i] = true; + } + } + + for ( i = 0; i < F.size(); i++ ) { + F[i] -= add_alpha * A[i][max_kkt_i]; + kkt[i] = F[i]; + if ( alpha[i] > zero ) + kkt[i] = abs ( F[i] ); + } + max_kkt = -1e100; + max_kkt_i = -1; + for ( i = 0; i < F.size(); i++ ) + if ( kkt[i] > max_kkt ) { + max_kkt = kkt[i]; + max_kkt_i = i; + } + + iter++; + } + + return alpha; +} + +vector Hildreth::optimise (const vector& a, const vector& b, float C) +{ + + size_t i; + int max_iter = 10000; + float eps = 0.00000001; + float zero = 0.000000000001; + + vector alpha ( b.size() ); + vector F ( b.size() ); + vector kkt ( b.size() ); + + float max_kkt = -1e100; + + size_t K = b.size(); + + float A[K][K]; + bool is_computed[K]; + for ( i = 0; i < K; i++ ) { + A[i][i] = a[i].InnerProduct(a[i]); + is_computed[i] = false; + } + + int max_kkt_i = -1; + + + for ( i = 0; i < b.size(); i++ ) { + F[i] = b[i]; + kkt[i] = F[i]; + if ( kkt[i] > max_kkt ) { + max_kkt = kkt[i]; + max_kkt_i = i; + } + } + + int iter = 0; + float diff_alpha; + float try_alpha; + float add_alpha; + + while ( max_kkt >= eps && iter < max_iter ) { + + diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i]; + try_alpha = alpha[max_kkt_i] + diff_alpha; + add_alpha = 0.0; + + if ( try_alpha < 0.0 ) + add_alpha = -1.0 * alpha[max_kkt_i]; + else if (try_alpha > C) + add_alpha = C - alpha[max_kkt_i]; + else + add_alpha = diff_alpha; + + alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha; + + if ( !is_computed[max_kkt_i] ) { + for ( i = 0; i < K; i++ ) { + A[i][max_kkt_i] = a[i].InnerProduct(a[max_kkt_i] ); // for version 1 + //A[i][max_kkt_i] = 0; // for version 1 + is_computed[max_kkt_i] = true; + } + } + + for ( i = 0; i < F.size(); i++ ) { + F[i] -= add_alpha * A[i][max_kkt_i]; + kkt[i] = F[i]; + if (alpha[i] > C - zero) + kkt[i]=-kkt[i]; + else if (alpha[i] > zero) + kkt[i] = abs(F[i]); + + } + max_kkt = -1e100; + max_kkt_i = -1; + for ( i = 0; i < F.size(); i++ ) + if ( kkt[i] > max_kkt ) { + max_kkt = kkt[i]; + max_kkt_i = i; + } + + iter++; + } + + return alpha; +} +} diff --git a/contrib/mira/Hildreth.h b/contrib/mira/Hildreth.h new file mode 100644 index 000000000..373f2ac43 --- /dev/null +++ b/contrib/mira/Hildreth.h @@ -0,0 +1,13 @@ +#include "moses/FeatureVector.h" +#include "moses/ScoreComponentCollection.h" + +namespace Mira +{ + +class Hildreth +{ +public : + static std::vector optimise (const std::vector& a, const std::vector& b ); + static std::vector optimise (const std::vector& a, const std::vector& b, float C); +}; +} diff --git a/contrib/mira/HildrethTest.cpp b/contrib/mira/HildrethTest.cpp new file mode 100644 index 000000000..43e4403e4 --- /dev/null +++ b/contrib/mira/HildrethTest.cpp @@ -0,0 +1,793 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2010 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include +#include +#include + +#include + +#include "Hildreth.h" +#include "Optimiser.h" +#include "ScoreComponentCollection.h" + +using namespace std; +using namespace Moses; +using namespace Mira; + +namespace MosesTest +{ + +class MockSingleFeature : public StatelessFeatureFunction +{ +public: + MockSingleFeature(): StatelessFeatureFunction("MockSingle",1) {} + std::string GetScoreProducerWeightShortName(unsigned) const { + return "sf"; + } +}; + +class MockMultiFeature : public StatelessFeatureFunction +{ +public: + MockMultiFeature(): StatelessFeatureFunction("MockMulti",5) {} + std::string GetScoreProducerWeightShortName(unsigned) const { + return "mf"; + } +}; + +class MockSparseFeature : public StatelessFeatureFunction +{ +public: + MockSparseFeature(): StatelessFeatureFunction("MockSparse", ScoreProducer::unlimited) {} + std::string GetScoreProducerWeightShortName(unsigned) const { + return "sf"; + } +}; + +struct MockProducers { + MockProducers() {} + + MockSingleFeature single; + MockMultiFeature multi; + MockSparseFeature sparse; +}; + + + +BOOST_AUTO_TEST_SUITE(hildreth_test) + +BOOST_FIXTURE_TEST_CASE(test_hildreth_1, MockProducers) +{ + // Feasible example with 2 constraints + cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl; + vector< ScoreComponentCollection> featureValueDiffs; + vector< float> lossMinusModelScoreDiff; + + // initial weights + float w[] = { 1, 1, 1, 1, 0 }; + vector vec(w,w+5); + ScoreComponentCollection weights; + weights.PlusEquals(&multi, vec); + + // feature values (second is oracle) + //float arr1[] = {0, -5, -27.0908, -1.83258, 0 }; + //float arr2[] = {0, -5, -29.158, -1.83258, 0 }; + //float arr3[] = {0, -5, -27.0908, -1.83258, 0 }; + + // feature value differences (to oracle) + ScoreComponentCollection s1, s2, s3; + float arr1[] = { 0, 0, -2.0672, 0, 0 }; + float arr2[] = { 0, 0, 0, 0, 0 }; + float arr3[] = { 0, 0, -2.0672, 0, 0 }; + + float loss1 = 2.34085; + float loss2 = 0; + float loss3 = 2.34085; + + vector vec1(arr1,arr1+5); + vector vec2(arr2,arr2+5); + vector vec3(arr3,arr3+5); + + s1.PlusEquals(&multi,vec1); + s2.PlusEquals(&multi,vec2); + s3.PlusEquals(&multi,vec3); + + featureValueDiffs.push_back(s1); + featureValueDiffs.push_back(s2); + featureValueDiffs.push_back(s3); + + cerr << "feature value diff: " << featureValueDiffs[0] << endl; + cerr << "feature value diff: " << featureValueDiffs[1] << endl; + cerr << "feature value diff: " << featureValueDiffs[2] << endl << endl; + + float oldModelScoreDiff1 = featureValueDiffs[0].InnerProduct(weights); + float oldModelScoreDiff2 = featureValueDiffs[1].InnerProduct(weights); + float oldModelScoreDiff3 = featureValueDiffs[2].InnerProduct(weights); + + cerr << "model score diff: " << oldModelScoreDiff1 << ", loss: " << loss1 << endl; + cerr << "model score diff: " << oldModelScoreDiff2 << ", loss: " << loss2 << endl; + cerr << "model score diff: " << oldModelScoreDiff3 << ", loss: " << loss3 << endl << endl; + + lossMinusModelScoreDiff.push_back(loss1 - oldModelScoreDiff1); + lossMinusModelScoreDiff.push_back(loss2 - oldModelScoreDiff2); + lossMinusModelScoreDiff.push_back(loss3 - oldModelScoreDiff3); + + vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff); + vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01); + + cerr << "\nalphas without slack:" << endl; + for (size_t i = 0; i < alphas1.size(); ++i) { + cerr << "alpha " << i << ": " << alphas1[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs); + FVector totalUpdate1 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs1.size(); ++k) { + featureValueDiffs1[k].MultiplyEquals(alphas1[k]); + cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl; + FVector update = featureValueDiffs1[k].GetScoresVector(); + totalUpdate1 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate1 << endl << endl; + + ScoreComponentCollection weightsUpdate1(weights); + weightsUpdate1.PlusEquals(totalUpdate1); + cerr << "new weights: " << weightsUpdate1 << endl << endl; + + float newModelScoreDiff1 = featureValueDiffs[0].InnerProduct(weightsUpdate1); + float newModelScoreDiff2 = featureValueDiffs[1].InnerProduct(weightsUpdate1); + float newModelScoreDiff3 = featureValueDiffs[2].InnerProduct(weightsUpdate1); + + cerr << "new model score diff: " << newModelScoreDiff1 << ", loss: " << loss1 << endl; + cerr << "new model score diff: " << newModelScoreDiff2 << ", loss: " << loss2 << endl; + cerr << "new model score diff: " << newModelScoreDiff3 << ", loss: " << loss3 << endl; + + cerr << "\n\nalphas with slack 0.01:" << endl; + for (size_t i = 0; i < alphas2.size(); ++i) { + cerr << "alpha " << i << ": " << alphas2[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs); + FVector totalUpdate2 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs2.size(); ++k) { + featureValueDiffs2[k].MultiplyEquals(alphas2[k]); + cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl; + FVector update = featureValueDiffs2[k].GetScoresVector(); + totalUpdate2 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate2 << endl << endl; + + ScoreComponentCollection weightsUpdate2(weights); + weightsUpdate2.PlusEquals(totalUpdate2); + cerr << "new weights: " << weightsUpdate2 << endl << endl; + + float newModelScoreDiff4 = featureValueDiffs[0].InnerProduct(weightsUpdate2); + float newModelScoreDiff5 = featureValueDiffs[1].InnerProduct(weightsUpdate2); + float newModelScoreDiff6 = featureValueDiffs[2].InnerProduct(weightsUpdate2); + + cerr << "new model score diff: " << newModelScoreDiff4 << ", loss: " << loss1 << endl; + cerr << "new model score diff: " << newModelScoreDiff5 << ", loss: " << loss2 << endl; + cerr << "new model score diff: " << newModelScoreDiff6 << ", loss: " << loss3 << endl; +} + + +BOOST_FIXTURE_TEST_CASE(test_hildreth_3, MockProducers) +{ + // Unfeasible example with 21 constraints + cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl; + vector< ScoreComponentCollection> featureValueDiffs; + vector< float> lossMinusModelScoreDiff; + + // initial weights + float w[] = { 1, 1, 0.638672, 1, 0 }; + vector vec(w,w+5); + ScoreComponentCollection weights; + weights.PlusEquals(&multi, vec); + + int numberOfConstraints = 21; + + // feature value differences (to oracle) + // NOTE: these feature values are only approximations + ScoreComponentCollection s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21; + float arr1[] = { 0, 0, -2.0672, 0, 0 }; + float arr2[] = { 0, 0, 0, 0, 0 }; + float arr3[] = { 0, 0, -2.08436, 1.38629, 0 }; + float arr4[] = { 0, 0, -0.0171661, 1.38629, 0 }; + float arr5[] = { 0, 0, 4.4283, 0, 0 }; + float arr6[] = { 0, 0, 3.84829, 1.38629, 0 }; + float arr7[] = { 0, 0, 6.83689, 0, 0 }; + float arr8[] = { 0, 0, 0, 0, 0 }; + float arr9[] = { 0, 0, -2.0672, 0, 0 }; + float arr10[] = { 0, 0, -0.0171661, 1.38629, 0 }; + float arr11[] = { 0, 0, -2.08436, 1.38629, 0 }; + float arr12[] = { 0, 0, 4.4283, 0, 0 }; + float arr13[] = { 3, 0, 2.41089, 0, 0 }; + float arr14[] = { 3, 0, 2.32709, 0, 0 }; + float arr15[] = { 0, 0, -2.0672, 0, 0 }; + float arr16[] = { 0, 0, -2.08436, 1.38629, 0 }; + float arr17[] = { 0, 0, 4.4283, 0, 0 }; + float arr18[] = { 0, 0, 3.84829, 1.38629, 0 }; + float arr19[] = { 0, 0, -0.0171661, 1.38629, 0 }; + float arr20[] = { 0, 0, 0, 0, 0 }; + float arr21[] = { 0, 0, 6.83689, 0, 0 }; + + vector losses; + losses.push_back(2.73485); + losses.push_back(0); + losses.push_back(3.64118); + losses.push_back(1.47347); + losses.push_back(3.64118); + losses.push_back(4.16278); + losses.push_back(3.13952); + losses.push_back(0); + losses.push_back(2.73485); + losses.push_back(1.47347); + losses.push_back(3.64118); + losses.push_back(3.64118); + losses.push_back(2.51662); + losses.push_back(2.73485); + losses.push_back(2.73485); + losses.push_back(3.64118); + losses.push_back(3.64118); + losses.push_back(4.16278); + losses.push_back(1.47347); + losses.push_back(0); + losses.push_back(3.13952); + + vector vec1(arr1,arr1+5); + vector vec2(arr2,arr2+5); + vector vec3(arr3,arr3+5); + vector vec4(arr4,arr4+5); + vector vec5(arr5,arr5+5); + vector vec6(arr6,arr6+5); + vector vec7(arr7,arr7+5); + vector vec8(arr8,arr8+5); + vector vec9(arr9,arr9+5); + vector vec10(arr10,arr10+5); + vector vec11(arr11,arr11+5); + vector vec12(arr12,arr12+5); + vector vec13(arr13,arr13+5); + vector vec14(arr14,arr14+5); + vector vec15(arr15,arr15+5); + vector vec16(arr16,arr16+5); + vector vec17(arr17,arr17+5); + vector vec18(arr18,arr18+5); + vector vec19(arr19,arr19+5); + vector vec20(arr20,arr20+5); + vector vec21(arr21,arr21+5); + + s1.PlusEquals(&multi,vec1); + s2.PlusEquals(&multi,vec2); + s3.PlusEquals(&multi,vec3); + s4.PlusEquals(&multi,vec4); + s5.PlusEquals(&multi,vec5); + s6.PlusEquals(&multi,vec6); + s7.PlusEquals(&multi,vec7); + s8.PlusEquals(&multi,vec8); + s9.PlusEquals(&multi,vec9); + s10.PlusEquals(&multi,vec10); + s11.PlusEquals(&multi,vec11); + s12.PlusEquals(&multi,vec12); + s13.PlusEquals(&multi,vec13); + s14.PlusEquals(&multi,vec14); + s15.PlusEquals(&multi,vec15); + s16.PlusEquals(&multi,vec16); + s17.PlusEquals(&multi,vec17); + s18.PlusEquals(&multi,vec18); + s19.PlusEquals(&multi,vec19); + s20.PlusEquals(&multi,vec20); + s21.PlusEquals(&multi,vec21); + + featureValueDiffs.push_back(s1); + featureValueDiffs.push_back(s2); + featureValueDiffs.push_back(s3); + featureValueDiffs.push_back(s4); + featureValueDiffs.push_back(s5); + featureValueDiffs.push_back(s6); + featureValueDiffs.push_back(s7); + featureValueDiffs.push_back(s8); + featureValueDiffs.push_back(s9); + featureValueDiffs.push_back(s10); + featureValueDiffs.push_back(s11); + featureValueDiffs.push_back(s12); + featureValueDiffs.push_back(s13); + featureValueDiffs.push_back(s14); + featureValueDiffs.push_back(s15); + featureValueDiffs.push_back(s16); + featureValueDiffs.push_back(s17); + featureValueDiffs.push_back(s18); + featureValueDiffs.push_back(s19); + featureValueDiffs.push_back(s20); + featureValueDiffs.push_back(s21); + + vector oldModelScoreDiff; + for (int i = 0; i < numberOfConstraints; ++i) { + oldModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weights)); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "old model score diff: " << oldModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (oldModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + } + + for (int i = 0; i < numberOfConstraints; ++i) { + lossMinusModelScoreDiff.push_back(losses[i] - oldModelScoreDiff[i]); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "A: " << featureValueDiffs[i] << ", b: " << lossMinusModelScoreDiff[i] << endl; + } + + vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff); + vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01); + + cerr << "\nalphas without slack:" << endl; + for (size_t i = 0; i < alphas1.size(); ++i) { + cerr << "alpha " << i << ": " << alphas1[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs); + FVector totalUpdate1 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs1.size(); ++k) { + featureValueDiffs1[k].MultiplyEquals(alphas1[k]); + cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl; + FVector update = featureValueDiffs1[k].GetScoresVector(); + totalUpdate1 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate1 << endl << endl; + + ScoreComponentCollection weightsUpdate1(weights); + weightsUpdate1.PlusEquals(totalUpdate1); + cerr << "old weights: " << weights << endl; + cerr << "new weights: " << weightsUpdate1 << endl << endl; + + vector newModelScoreDiff; + for (int i = 0; i < numberOfConstraints; ++i) { + newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate1)); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + } + + cerr << "\n\nalphas with slack 0.01:" << endl; + for (size_t i = 0; i < alphas2.size(); ++i) { + cerr << "alpha " << i << ": " << alphas2[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs); + FVector totalUpdate2 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs2.size(); ++k) { + featureValueDiffs2[k].MultiplyEquals(alphas2[k]); + cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl; + FVector update = featureValueDiffs2[k].GetScoresVector(); + totalUpdate2 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate2 << endl << endl; + + ScoreComponentCollection weightsUpdate2(weights); + weightsUpdate2.PlusEquals(totalUpdate2); + cerr << "old weights: " << weights << endl; + cerr << "new weights: " << weightsUpdate2 << endl << endl; + + newModelScoreDiff.clear(); + for (int i = 0; i < numberOfConstraints; ++i) { + newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate2)); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << endl; + } +} + +BOOST_FIXTURE_TEST_CASE(test_hildreth_4, MockProducers) +{ + // Feasible example with 8 constraints + cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl; + vector< ScoreComponentCollection> featureValueDiffs; + vector< float> lossMinusModelScoreDiff; + + // initial weights + float w[] = { 1, 1, 0.638672, 1, 0 }; + vector vec(w,w+5); + ScoreComponentCollection weights; + weights.PlusEquals(&multi, vec); + + int numberOfConstraints = 8; + + // feature value differences (to oracle) + // NOTE: these feature values are only approximations + ScoreComponentCollection s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21; + float arr1[] = { 0, 0, -2.0672, 0, 0 }; + float arr2[] = { 0, 0, 0, 0, 0 }; + float arr3[] = { 0, 0, -2.08436, 1.38629, 0 }; + float arr4[] = { 0, 0, -0.0171661, 1.38629, 0 }; +// float arr5[] = { 0, 0, 4.4283, 0, 0 }; +// float arr6[] = { 0, 0, 3.84829, 1.38629, 0 }; +// float arr7[] = { 0, 0, 6.83689, 0, 0 }; + + float arr8[] = { 0, 0, 0, 0, 0 }; + float arr9[] = { 0, 0, -2.0672, 0, 0 }; +// float arr10[] = { 0, 0, -0.0171661, 1.38629, 0 }; +// float arr11[] = { 0, 0, -2.08436, 1.38629, 0 }; +// float arr12[] = { 0, 0, 4.4283, 0, 0 }; +// float arr13[] = { 3, 0, 2.41089, 0, 0 }; +// float arr14[] = { 3, 0, 2.32709, 0, 0 }; + + float arr15[] = { 0, 0, -2.0672, 0, 0 }; + float arr16[] = { 0, 0, -2.08436, 1.38629, 0 }; +// float arr17[] = { 0, 0, 4.4283, 0, 0 }; +// float arr18[] = { 0, 0, 3.84829, 1.38629, 0 }; +// float arr19[] = { 0, 0, -0.0171661, 1.38629, 0 }; +// float arr20[] = { 0, 0, 0, 0, 0 }; +// float arr21[] = { 0, 0, 6.83689, 0, 0 }; + + vector losses; + losses.push_back(2.73485); + losses.push_back(0); + losses.push_back(3.64118); + losses.push_back(1.47347); +// losses.push_back(3.64118); +// losses.push_back(4.16278); +// losses.push_back(3.13952); + losses.push_back(0); + losses.push_back(2.73485); +// losses.push_back(1.47347); +// losses.push_back(3.64118); +// losses.push_back(3.64118); +// losses.push_back(2.51662); +// losses.push_back(2.73485); + losses.push_back(2.73485); + losses.push_back(3.64118); +// losses.push_back(3.64118); +// losses.push_back(4.16278); +// losses.push_back(1.47347); +// losses.push_back(0); +// losses.push_back(3.13952); + + vector vec1(arr1,arr1+5); + vector vec2(arr2,arr2+5); + vector vec3(arr3,arr3+5); + vector vec4(arr4,arr4+5); +// vector vec5(arr5,arr5+5); +// vector vec6(arr6,arr6+5); +// vector vec7(arr7,arr7+5); + vector vec8(arr8,arr8+5); + vector vec9(arr9,arr9+5); +// vector vec10(arr10,arr10+5); +// vector vec11(arr11,arr11+5); +// vector vec12(arr12,arr12+5); +// vector vec13(arr13,arr13+5); +// vector vec14(arr14,arr14+5); + vector vec15(arr15,arr15+5); + vector vec16(arr16,arr16+5); +// vector vec17(arr17,arr17+5); +// vector vec18(arr18,arr18+5); +// vector vec19(arr19,arr19+5); +// vector vec20(arr20,arr20+5); +// vector vec21(arr21,arr21+5); + + s1.PlusEquals(&multi,vec1); + s2.PlusEquals(&multi,vec2); + s3.PlusEquals(&multi,vec3); + s4.PlusEquals(&multi,vec4); +// s5.PlusEquals(&multi,vec5); +// s6.PlusEquals(&multi,vec6); +// s7.PlusEquals(&multi,vec7); + s8.PlusEquals(&multi,vec8); + s9.PlusEquals(&multi,vec9); +// s10.PlusEquals(&multi,vec10); +// s11.PlusEquals(&multi,vec11); +// s12.PlusEquals(&multi,vec12); +// s13.PlusEquals(&multi,vec13); +// s14.PlusEquals(&multi,vec14); + s15.PlusEquals(&multi,vec15); + s16.PlusEquals(&multi,vec16); +// s17.PlusEquals(&multi,vec17); +// s18.PlusEquals(&multi,vec18); +// s19.PlusEquals(&multi,vec19); +// s20.PlusEquals(&multi,vec20); +// s21.PlusEquals(&multi,vec21); + + featureValueDiffs.push_back(s1); + featureValueDiffs.push_back(s2); + featureValueDiffs.push_back(s3); + featureValueDiffs.push_back(s4); +// featureValueDiffs.push_back(s5); +// featureValueDiffs.push_back(s6); +// featureValueDiffs.push_back(s7); + featureValueDiffs.push_back(s8); + featureValueDiffs.push_back(s9); +// featureValueDiffs.push_back(s10); +// featureValueDiffs.push_back(s11); +// featureValueDiffs.push_back(s12); +// featureValueDiffs.push_back(s13); +// featureValueDiffs.push_back(s14); + featureValueDiffs.push_back(s15); + featureValueDiffs.push_back(s16); +// featureValueDiffs.push_back(s17); +// featureValueDiffs.push_back(s18); +// featureValueDiffs.push_back(s19); +// featureValueDiffs.push_back(s20); +// featureValueDiffs.push_back(s21); + + vector oldModelScoreDiff; + for (int i = 0; i < numberOfConstraints; ++i) { + oldModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weights)); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "old model score diff: " << oldModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (oldModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + } + + for (int i = 0; i < numberOfConstraints; ++i) { + lossMinusModelScoreDiff.push_back(losses[i] - oldModelScoreDiff[i]); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "A: " << featureValueDiffs[i] << ", b: " << lossMinusModelScoreDiff[i] << endl; + } + + vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff); + vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01); + + cerr << "\nalphas without slack:" << endl; + for (size_t i = 0; i < alphas1.size(); ++i) { + cerr << "alpha " << i << ": " << alphas1[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs); + FVector totalUpdate1 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs1.size(); ++k) { + featureValueDiffs1[k].MultiplyEquals(alphas1[k]); + cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl; + FVector update = featureValueDiffs1[k].GetScoresVector(); + totalUpdate1 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate1 << endl << endl; + + ScoreComponentCollection weightsUpdate1(weights); + weightsUpdate1.PlusEquals(totalUpdate1); + cerr << "old weights: " << weights << endl; + cerr << "new weights: " << weightsUpdate1 << endl << endl; + + vector newModelScoreDiff; + for (int i = 0; i < numberOfConstraints; ++i) { + newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate1)); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + } + + cerr << "\n\nalphas with slack 0.01:" << endl; + for (size_t i = 0; i < alphas2.size(); ++i) { + cerr << "alpha " << i << ": " << alphas2[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs); + FVector totalUpdate2 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs2.size(); ++k) { + featureValueDiffs2[k].MultiplyEquals(alphas2[k]); + cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl; + FVector update = featureValueDiffs2[k].GetScoresVector(); + totalUpdate2 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate2 << endl << endl; + + ScoreComponentCollection weightsUpdate2(weights); + weightsUpdate2.PlusEquals(totalUpdate2); + cerr << "old weights: " << weights << endl; + cerr << "new weights: " << weightsUpdate2 << endl << endl; + + newModelScoreDiff.clear(); + for (int i = 0; i < numberOfConstraints; ++i) { + newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate2)); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << endl; + } +} + +BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers) +{ + // Unfeasible example with 2 constraints + cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl; + vector< ScoreComponentCollection> featureValueDiffs; + vector< float> lossMinusModelScoreDiff; + + // initial weights + float w[] = { 1, 1, 0.638672, 1, 0 }; + vector vec(w,w+5); + ScoreComponentCollection weights; + weights.PlusEquals(&multi, vec); + + int numberOfConstraints = 2; + + // feature value differences (to oracle) + // NOTE: these feature values are only approximations + ScoreComponentCollection s1, s17; + float arr1[] = { 0, 0, -2.0672, 0, 0 }; + float arr17[] = { 0, 0, 4.4283, 0, 0 }; + vector losses; + losses.push_back(2.73485); + losses.push_back(3.64118); + + vector vec1(arr1,arr1+5); + vector vec17(arr17,arr17+5); + + s1.PlusEquals(&multi,vec1); + s17.PlusEquals(&multi,vec17); + + featureValueDiffs.push_back(s1); + featureValueDiffs.push_back(s17); + + vector oldModelScoreDiff; + for (int i = 0; i < numberOfConstraints; ++i) { + oldModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weights)); + } + + float sumOfOldError = 0; + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "old model score diff: " << oldModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (oldModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + sumOfOldError += (losses[i] - oldModelScoreDiff[i]); + } + cerr << "sum of old error: " << sumOfOldError << endl; + + for (int i = 0; i < numberOfConstraints; ++i) { + lossMinusModelScoreDiff.push_back(losses[i] - oldModelScoreDiff[i]); + } + + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "A: " << featureValueDiffs[i] << ", b: " << lossMinusModelScoreDiff[i] << endl; + } + + vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff); + vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01); + vector< float> alphas3 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.1); + + cerr << "\nalphas without slack:" << endl; + for (size_t i = 0; i < alphas1.size(); ++i) { + cerr << "alpha " << i << ": " << alphas1[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs); + FVector totalUpdate1 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs1.size(); ++k) { + featureValueDiffs1[k].MultiplyEquals(alphas1[k]); + cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl; + FVector update = featureValueDiffs1[k].GetScoresVector(); + totalUpdate1 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate1 << endl << endl; + + ScoreComponentCollection weightsUpdate1(weights); + weightsUpdate1.PlusEquals(totalUpdate1); + cerr << "old weights: " << weights << endl; + cerr << "new weights: " << weightsUpdate1 << endl << endl; + + vector newModelScoreDiff; + for (int i = 0; i < numberOfConstraints; ++i) { + newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate1)); + } + + float sumOfNewError = 0; + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + sumOfNewError += (losses[i] - newModelScoreDiff[i]); + } + cerr << "sum of new error: " << sumOfNewError << endl; + + cerr << "\n\nalphas with slack 0.01:" << endl; + for (size_t i = 0; i < alphas2.size(); ++i) { + cerr << "alpha " << i << ": " << alphas2[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs); + FVector totalUpdate2 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs2.size(); ++k) { + featureValueDiffs2[k].MultiplyEquals(alphas2[k]); + cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl; + FVector update = featureValueDiffs2[k].GetScoresVector(); + totalUpdate2 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate2 << endl << endl; + + ScoreComponentCollection weightsUpdate2(weights); + weightsUpdate2.PlusEquals(totalUpdate2); + cerr << "old weights: " << weights << endl; + cerr << "new weights: " << weightsUpdate2 << endl << endl; + + newModelScoreDiff.clear(); + for (int i = 0; i < numberOfConstraints; ++i) { + newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate2)); + } + + sumOfNewError = 0; + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + sumOfNewError += (losses[i] - newModelScoreDiff[i]); + } + cerr << "sum of new error: " << sumOfNewError << endl; + + cerr << "\n\nalphas with slack 0.1:" << endl; + for (size_t i = 0; i < alphas3.size(); ++i) { + cerr << "alpha " << i << ": " << alphas3[i] << endl; + } + cerr << endl; + + cerr << "partial updates:" << endl; + vector< ScoreComponentCollection> featureValueDiffs3(featureValueDiffs); + FVector totalUpdate3 = ScoreComponentCollection::CreateFVector(); + for (size_t k = 0; k < featureValueDiffs3.size(); ++k) { + featureValueDiffs3[k].MultiplyEquals(alphas3[k]); + cerr << k << ": " << featureValueDiffs3[k].GetScoresVector() << endl; + FVector update = featureValueDiffs3[k].GetScoresVector(); + totalUpdate3 += update; + } + cerr << endl; + cerr << "total update: " << totalUpdate3 << endl << endl; + + ScoreComponentCollection weightsUpdate3(weights); + weightsUpdate3.PlusEquals(totalUpdate3); + cerr << "old weights: " << weights << endl; + cerr << "new weights: " << weightsUpdate3 << endl << endl; + + newModelScoreDiff.clear(); + for (int i = 0; i < numberOfConstraints; ++i) { + newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate3)); + } + + sumOfNewError = 0; + for (int i = 0; i < numberOfConstraints; ++i) { + cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl; + sumOfNewError += (losses[i] - newModelScoreDiff[i]); + } + cerr << "sum of new error: " << sumOfNewError << endl; +} + +BOOST_AUTO_TEST_SUITE_END() + +} + diff --git a/contrib/mira/HypothesisQueue.cpp b/contrib/mira/HypothesisQueue.cpp new file mode 100644 index 000000000..8c8daa4da --- /dev/null +++ b/contrib/mira/HypothesisQueue.cpp @@ -0,0 +1,66 @@ +/*********************************************************************** + Moses - statistical machine translation system + Copyright (C) 2006-2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include +#include "HypothesisQueue.h" + +using namespace std; + +namespace Moses +{ + +HypothesisQueue::~HypothesisQueue() +{ + m_queue.clear(); +} + +void HypothesisQueue::Push(BleuIndexPair hypo) +{ + //pair::iterator,bool> ret; + + if (m_capacity == 0 || m_queue.size() < m_capacity) { + m_queue.insert(hypo); + } else if (hypo.first > (*(m_queue.rbegin())).first) { + // Remove the worst-scoring item from the queue and insert hypo (only erase item if new item was successfully added ) + /*ret = m_queue.insert(hypo); + if ((*(ret.first)).second == 1) { + HypoQueueType::iterator p = m_queue.end(); + --p; + m_queue.erase(p); + }*/ + // with multisets we do not have to check whether the item was successfully added + m_queue.insert(hypo); + HypoQueueType::iterator p = m_queue.end(); + --p; + m_queue.erase(p); + } else { + // The hypo is unusable: the queue is full and hypo has a worse (or + // equal) score than the worst-scoring item already held. + } +} + +BleuIndexPair HypothesisQueue::Pop() +{ + HypoQueueType::iterator p = m_queue.begin(); + BleuIndexPair top = *p; + m_queue.erase(p); + return top; +} + +} // namespace Moses diff --git a/contrib/mira/HypothesisQueue.h b/contrib/mira/HypothesisQueue.h new file mode 100644 index 000000000..63cabbd0f --- /dev/null +++ b/contrib/mira/HypothesisQueue.h @@ -0,0 +1,69 @@ +/*********************************************************************** + Moses - statistical machine translation system + Copyright (C) 2006-2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#pragma once + +#include + +namespace Moses +{ + +// pair of Bleu score and index +typedef std::pair BleuIndexPair; + +// A bounded priority queue of BleuIndexPairs. The top item is +// the best scoring hypothesis. The queue assumes ownership of pushed items and +// relinquishes ownership when they are popped. Any remaining items at the +// time of the queue's destruction are deleted. +class HypothesisQueue +{ + +public: + // Create empty queue with fixed capacity of c. Capacity 0 means unbounded. + HypothesisQueue(size_t c) : m_capacity(c) {} + ~HypothesisQueue(); + + bool Empty() { + return m_queue.empty(); + } + + // Add the hypo to the queue or delete it if the queue is full and the + // score is no better than the queue's worst score. + void Push(BleuIndexPair hypo); + + // Remove the best-scoring detour from the queue and return it. The + // caller is responsible for deleting the object. + BleuIndexPair Pop(); + +private: + struct HypothesisOrderer { + bool operator()(BleuIndexPair a, + BleuIndexPair b) { + return (a.first > b.first); + } + }; + + typedef std::multiset HypoQueueType; + //typedef std::set HypoQueueType; + + HypoQueueType m_queue; + const size_t m_capacity; +}; + +} // namespace Moses diff --git a/contrib/mira/Jamfile b/contrib/mira/Jamfile new file mode 100644 index 000000000..e43a993b5 --- /dev/null +++ b/contrib/mira/Jamfile @@ -0,0 +1,15 @@ +lib mira_lib : +[ glob *.cpp : *Test.cpp Main.cpp ] +../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ; + +exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ..//boost_filesystem ; + +alias programs : mira ; + +import testing ; + +unit-test mira_test : [ glob *Test.cpp ] mira_lib ..//boost_unit_test_framework ; + +explicit mira_test ; + + diff --git a/contrib/mira/Main.cpp b/contrib/mira/Main.cpp new file mode 100644 index 000000000..abf92b598 --- /dev/null +++ b/contrib/mira/Main.cpp @@ -0,0 +1,1847 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2010 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef MPI_ENABLE +#include +namespace mpi = boost::mpi; +#endif + +#include "Main.h" +#include "Optimiser.h" +#include "Hildreth.h" +#include "HypothesisQueue.h" +#include "moses/StaticData.h" +#include "moses/ScoreComponentCollection.h" +#include "moses/ThreadPool.h" +#include "mert/BleuScorer.h" +#include "moses/FeatureVector.h" + +#include "moses/FF/WordTranslationFeature.h" +#include "moses/FF/PhrasePairFeature.h" +#include "moses/FF/WordPenaltyProducer.h" +#include "moses/LM/Base.h" + +using namespace Mira; +using namespace std; +using namespace Moses; +namespace po = boost::program_options; + +int main(int argc, char** argv) +{ + size_t rank = 0; + size_t size = 1; +#ifdef MPI_ENABLE + mpi::environment env(argc,argv); + mpi::communicator world; + rank = world.rank(); + size = world.size(); +#endif + + bool help; + int verbosity; + string mosesConfigFile; + string inputFile; + vector referenceFiles; + vector mosesConfigFilesFolds, inputFilesFolds, referenceFilesFolds; + // string coreWeightFile, startWeightFile; + size_t epochs; + string learner; + bool shuffle; + size_t mixingFrequency; + size_t weightDumpFrequency; + string weightDumpStem; + bool scale_margin; + bool scale_update; + size_t n; + size_t batchSize; + bool distinctNbest; + bool accumulateWeights; + float historySmoothing; + bool scaleByInputLength, scaleByAvgInputLength; + bool scaleByInverseLength, scaleByAvgInverseLength; + float scaleByX; + float slack; + bool averageWeights; + bool weightConvergence; + float learning_rate; + float mira_learning_rate; + float perceptron_learning_rate; + string decoder_settings; + float min_weight_change; + bool normaliseWeights, normaliseMargin; + bool print_feature_values; + bool historyBleu ; + bool sentenceBleu; + bool perceptron_update; + bool hope_fear; + bool model_hope_fear; + size_t hope_n, fear_n; + size_t bleu_smoothing_scheme; + float min_oracle_bleu; + float minBleuRatio, maxBleuRatio; + bool boost; + bool decode_hope, decode_fear, decode_model; + string decode_filename; + bool batchEqualsShard; + bool sparseAverage, dumpMixedWeights, sparseNoAverage; + int featureCutoff; + bool pruneZeroWeights; + bool printFeatureCounts, printNbestWithFeatures; + bool avgRefLength; + bool print_weights, print_core_weights, debug_model, scale_lm, scale_wp; + float scale_lm_factor, scale_wp_factor; + bool kbest; + string moses_src; + float sigmoidParam; + float bleuWeight, bleuWeight_hope, bleuWeight_fear; + bool bleu_weight_lm; + float bleu_weight_lm_factor; + bool l1_regularize, l2_regularize, l1_reg_sparse, l2_reg_sparse; + float l1_lambda, l2_lambda; + bool most_violated, most_violated_reg, all_violated, max_bleu_diff; + bool feature_confidence, signed_counts; + float decay_core, decay_sparse, core_r0, sparse_r0; + float bleu_weight_fear_factor; + bool hildreth; + float add2lm; + + // compute real sentence Bleu scores on complete translations, disable Bleu feature + bool realBleu, disableBleuFeature; + bool rescaleSlack; + bool makePairs; + bool debug; + bool reg_on_every_mix; + size_t continue_epoch; + bool modelPlusBleu, simpleHistoryBleu; + po::options_description desc("Allowed options"); + desc.add_options() + ("continue-epoch", po::value(&continue_epoch)->default_value(0), "Continue an interrupted experiment from this epoch on") + ("freq-reg", po::value(®_on_every_mix)->default_value(false), "Regularize after every weight mixing") + ("l1sparse", po::value(&l1_reg_sparse)->default_value(true), "L1-regularization for sparse weights only") + ("l2sparse", po::value(&l2_reg_sparse)->default_value(true), "L2-regularization for sparse weights only") + ("mv-reg", po::value(&most_violated_reg)->default_value(false), "Regularize most violated constraint") + ("most-violated", po::value(&most_violated)->default_value(false), "Add most violated constraint") + ("all-violated", po::value(&all_violated)->default_value(false), "Add all violated constraints") + ("feature-confidence", po::value(&feature_confidence)->default_value(false), "Confidence-weighted learning") + ("signed-counts", po::value(&signed_counts)->default_value(false), "Use signed feature counts for CWL") + ("dbg", po::value(&debug)->default_value(true), "More debug output") + ("make-pairs", po::value(&makePairs)->default_value(true), "Make pairs of hypotheses for 1slack") + ("debug", po::value(&debug)->default_value(true), "More debug output") + ("rescale-slack", po::value(&rescaleSlack)->default_value(false), "Rescale slack in 1-slack formulation") + ("add2lm", po::value(&add2lm)->default_value(0.0), "Add the specified amount to all LM weights") + ("hildreth", po::value(&hildreth)->default_value(false), "Prefer Hildreth over analytical update") + ("model-plus-bleu", po::value(&modelPlusBleu)->default_value(false), "Use the sum of model score and +/- bleu to select hope and fear translations") + ("simple-history-bleu", po::value(&simpleHistoryBleu)->default_value(false), "Simple history Bleu") + + ("bleu-weight", po::value(&bleuWeight)->default_value(1.0), "Bleu weight used in decoder objective") + ("bw-hope", po::value(&bleuWeight_hope)->default_value(-1.0), "Bleu weight used in decoder objective for hope") + ("bw-fear", po::value(&bleuWeight_fear)->default_value(-1.0), "Bleu weight used in decoder objective for fear") + + ("core-r0", po::value(&core_r0)->default_value(1.0), "Start learning rate for core features") + ("sparse-r0", po::value(&sparse_r0)->default_value(1.0), "Start learning rate for sparse features") + ("decay-core", po::value(&decay_core)->default_value(0.01), "Decay for core feature learning rate") + ("decay-sparse", po::value(&decay_sparse)->default_value(0.01), "Decay for sparse feature learning rate") + + ("tie-bw-to-lm", po::value(&bleu_weight_lm)->default_value(true), "Make bleu weight depend on lm weight") + ("bw-lm-factor", po::value(&bleu_weight_lm_factor)->default_value(2.0), "Make bleu weight depend on lm weight by this factor") + ("bw-factor-fear", po::value(&bleu_weight_fear_factor)->default_value(1.0), "Multiply fear weight by this factor") + ("accumulate-weights", po::value(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs") + ("average-weights", po::value(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update") + ("avg-ref-length", po::value(&avgRefLength)->default_value(false), "Use average reference length instead of shortest for BLEU score feature") + ("batch-equals-shard", po::value(&batchEqualsShard)->default_value(false), "Batch size is equal to shard size (purely batch)") + ("batch-size,b", po::value(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments") + ("bleu-smoothing-scheme", po::value(&bleu_smoothing_scheme)->default_value(1), "Set a smoothing scheme for sentence-Bleu: +1 (1), +0.1 (2), papineni (3) (default:1)") + ("boost", po::value(&boost)->default_value(false), "Apply boosting factor to updates on misranked candidates") + ("config,f", po::value(&mosesConfigFile), "Moses ini-file") + ("configs-folds", po::value >(&mosesConfigFilesFolds), "Moses ini-files, one for each fold") + ("debug-model", po::value(&debug_model)->default_value(false), "Get best model translation for debugging purposes") + ("decode-hope", po::value(&decode_hope)->default_value(false), "Decode dev input set according to hope objective") + ("decode-fear", po::value(&decode_fear)->default_value(false), "Decode dev input set according to fear objective") + ("decode-model", po::value(&decode_model)->default_value(false), "Decode dev input set according to normal objective") + ("decode-filename", po::value(&decode_filename), "Filename for Bleu objective translations") + ("decoder-settings", po::value(&decoder_settings)->default_value(""), "Decoder settings for tuning runs") + ("distinct-nbest", po::value(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step") + ("dump-mixed-weights", po::value(&dumpMixedWeights)->default_value(false), "Dump mixed weights instead of averaged weights") + ("epochs,e", po::value(&epochs)->default_value(10), "Number of epochs") + ("feature-cutoff", po::value(&featureCutoff)->default_value(-1), "Feature cutoff as additional regularization for sparse features") + ("fear-n", po::value(&fear_n)->default_value(1), "Number of fear translations used") + ("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit") + ("history-bleu", po::value(&historyBleu)->default_value(false), "Use 1best translations to update the history") + ("history-smoothing", po::value(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing") + ("hope-fear", po::value(&hope_fear)->default_value(true), "Use only hope and fear translations for optimisation (not model)") + ("hope-n", po::value(&hope_n)->default_value(2), "Number of hope translations used") + ("input-file,i", po::value(&inputFile), "Input file containing tokenised source") + ("input-files-folds", po::value >(&inputFilesFolds), "Input files containing tokenised source, one for each fold") + ("learner,l", po::value(&learner)->default_value("mira"), "Learning algorithm") + ("l1-lambda", po::value(&l1_lambda)->default_value(0.0001), "Lambda for l1-regularization (w_i +/- lambda)") + ("l2-lambda", po::value(&l2_lambda)->default_value(0.01), "Lambda for l2-regularization (w_i * (1 - lambda))") + ("l1-reg", po::value(&l1_regularize)->default_value(false), "L1-regularization") + ("l2-reg", po::value(&l2_regularize)->default_value(false), "L2-regularization") + ("min-bleu-ratio", po::value(&minBleuRatio)->default_value(-1), "Set a minimum BLEU ratio between hope and fear") + ("max-bleu-ratio", po::value(&maxBleuRatio)->default_value(-1), "Set a maximum BLEU ratio between hope and fear") + ("max-bleu-diff", po::value(&max_bleu_diff)->default_value(true), "Select hope/fear with maximum Bleu difference") + ("min-oracle-bleu", po::value(&min_oracle_bleu)->default_value(0), "Set a minimum oracle BLEU score") + ("min-weight-change", po::value(&min_weight_change)->default_value(0.0001), "Set minimum weight change for stopping criterion") + ("mira-learning-rate", po::value(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)") + ("mixing-frequency", po::value(&mixingFrequency)->default_value(10), "How often per epoch to mix weights, when using mpi") + ("model-hope-fear", po::value(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation") + ("moses-src", po::value(&moses_src)->default_value(""), "Moses source directory") + ("nbest,n", po::value(&n)->default_value(30), "Number of translations in n-best list") + ("normalise-weights", po::value(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder") + ("normalise-margin", po::value(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1") + ("perceptron-learning-rate", po::value(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate") + ("print-feature-values", po::value(&print_feature_values)->default_value(false), "Print out feature values") + ("print-feature-counts", po::value(&printFeatureCounts)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch") + ("print-nbest-with-features", po::value(&printNbestWithFeatures)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch") + ("print-weights", po::value(&print_weights)->default_value(false), "Print out current weights") + ("print-core-weights", po::value(&print_core_weights)->default_value(true), "Print out current core weights") + ("prune-zero-weights", po::value(&pruneZeroWeights)->default_value(false), "Prune zero-valued sparse feature weights") + ("reference-files,r", po::value >(&referenceFiles), "Reference translation files for training") + ("reference-files-folds", po::value >(&referenceFilesFolds), "Reference translation files for training, one for each fold") + ("kbest", po::value(&kbest)->default_value(true), "Select hope/fear pairs from a list of nbest translations") + + ("scale-by-inverse-length", po::value(&scaleByInverseLength)->default_value(false), "Scale BLEU by (history of) inverse input length") + ("scale-by-input-length", po::value(&scaleByInputLength)->default_value(true), "Scale BLEU by (history of) input length") + ("scale-by-avg-input-length", po::value(&scaleByAvgInputLength)->default_value(false), "Scale BLEU by average input length") + ("scale-by-avg-inverse-length", po::value(&scaleByAvgInverseLength)->default_value(false), "Scale BLEU by average inverse input length") + ("scale-by-x", po::value(&scaleByX)->default_value(0.1), "Scale the BLEU score by value x") + ("scale-lm", po::value(&scale_lm)->default_value(true), "Scale the language model feature") + ("scale-factor-lm", po::value(&scale_lm_factor)->default_value(0.5), "Scale the language model feature by this factor") + ("scale-wp", po::value(&scale_wp)->default_value(false), "Scale the word penalty feature") + ("scale-factor-wp", po::value(&scale_wp_factor)->default_value(2), "Scale the word penalty feature by this factor") + ("scale-margin", po::value(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation") + ("sentence-level-bleu", po::value(&sentenceBleu)->default_value(true), "Use a sentences level Bleu scoring function") + ("shuffle", po::value(&shuffle)->default_value(false), "Shuffle input sentences before processing") + ("sigmoid-param", po::value(&sigmoidParam)->default_value(1), "y=sigmoidParam is the axis that this sigmoid approaches") + ("slack", po::value(&slack)->default_value(0.05), "Use slack in optimiser") + ("sparse-average", po::value(&sparseAverage)->default_value(false), "Average weights by the number of processes") + ("sparse-no-average", po::value(&sparseNoAverage)->default_value(false), "Don't average sparse weights, just sum") + ("stop-weights", po::value(&weightConvergence)->default_value(true), "Stop when weights converge") + ("verbosity,v", po::value(&verbosity)->default_value(0), "Verbosity level") + ("weight-dump-frequency", po::value(&weightDumpFrequency)->default_value(2), "How often per epoch to dump weights (mpi)") + ("weight-dump-stem", po::value(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights"); + + po::options_description cmdline_options; + cmdline_options.add(desc); + po::variables_map vm; + po::store(po::command_line_parser(argc, argv). options(cmdline_options).run(), vm); + po::notify(vm); + + if (help) { + std::cout << "Usage: " + string(argv[0]) + + " -f mosesini-file -i input-file -r reference-file(s) [options]" << std::endl; + std::cout << desc << std::endl; + return 0; + } + + const StaticData &staticData = StaticData::Instance(); + + bool trainWithMultipleFolds = false; + if (mosesConfigFilesFolds.size() > 0 || inputFilesFolds.size() > 0 || referenceFilesFolds.size() > 0) { + if (rank == 0) + cerr << "Training with " << mosesConfigFilesFolds.size() << " folds" << endl; + trainWithMultipleFolds = true; + } + + if (dumpMixedWeights && (mixingFrequency != weightDumpFrequency)) { + cerr << "Set mixing frequency = weight dump frequency for dumping mixed weights!" << endl; + exit(1); + } + + if ((sparseAverage || sparseNoAverage) && averageWeights) { + cerr << "Parameters --sparse-average 1/--sparse-no-average 1 and --average-weights 1 are incompatible (not implemented)" << endl; + exit(1); + } + + if (trainWithMultipleFolds) { + if (!mosesConfigFilesFolds.size()) { + cerr << "Error: No moses ini files specified for training with folds" << endl; + exit(1); + } + + if (!inputFilesFolds.size()) { + cerr << "Error: No input files specified for training with folds" << endl; + exit(1); + } + + if (!referenceFilesFolds.size()) { + cerr << "Error: No reference files specified for training with folds" << endl; + exit(1); + } + } else { + if (mosesConfigFile.empty()) { + cerr << "Error: No moses ini file specified" << endl; + return 1; + } + + if (inputFile.empty()) { + cerr << "Error: No input file specified" << endl; + return 1; + } + + if (!referenceFiles.size()) { + cerr << "Error: No reference files specified" << endl; + return 1; + } + } + + // load input and references + vector inputSentences; + size_t inputSize = trainWithMultipleFolds? inputFilesFolds.size(): 0; + size_t refSize = trainWithMultipleFolds? referenceFilesFolds.size(): referenceFiles.size(); + vector > inputSentencesFolds(inputSize); + vector > referenceSentences(refSize); + + // number of cores for each fold + size_t coresPerFold = 0, myFold = 0; + if (trainWithMultipleFolds) { + if (mosesConfigFilesFolds.size() > size) { + cerr << "Number of cores has to be a multiple of the number of folds" << endl; + exit(1); + } + coresPerFold = size/mosesConfigFilesFolds.size(); + if (size % coresPerFold > 0) { + cerr << "Number of cores has to be a multiple of the number of folds" << endl; + exit(1); + } + + if (rank == 0) + cerr << "Number of cores per fold: " << coresPerFold << endl; + myFold = rank/coresPerFold; + cerr << "Rank " << rank << ", my fold: " << myFold << endl; + } + + // NOTE: we do not actually need the references here, because we are reading them in from StaticData + if (trainWithMultipleFolds) { + if (!loadSentences(inputFilesFolds[myFold], inputSentencesFolds[myFold])) { + cerr << "Error: Failed to load input sentences from " << inputFilesFolds[myFold] << endl; + exit(1); + } + VERBOSE(1, "Rank " << rank << " reading inputs from " << inputFilesFolds[myFold] << endl); + + if (!loadSentences(referenceFilesFolds[myFold], referenceSentences[myFold])) { + cerr << "Error: Failed to load reference sentences from " << referenceFilesFolds[myFold] << endl; + exit(1); + } + if (referenceSentences[myFold].size() != inputSentencesFolds[myFold].size()) { + cerr << "Error: Input file length (" << inputSentencesFolds[myFold].size() << ") != (" + << referenceSentences[myFold].size() << ") reference file length (rank " << rank << ")" << endl; + exit(1); + } + VERBOSE(1, "Rank " << rank << " reading references from " << referenceFilesFolds[myFold] << endl); + } else { + if (!loadSentences(inputFile, inputSentences)) { + cerr << "Error: Failed to load input sentences from " << inputFile << endl; + return 1; + } + + for (size_t i = 0; i < referenceFiles.size(); ++i) { + if (!loadSentences(referenceFiles[i], referenceSentences[i])) { + cerr << "Error: Failed to load reference sentences from " + << referenceFiles[i] << endl; + return 1; + } + if (referenceSentences[i].size() != inputSentences.size()) { + cerr << "Error: Input file length (" << inputSentences.size() << ") != (" + << referenceSentences[i].size() << ") length of reference file " << i + << endl; + return 1; + } + } + } + + if (scaleByAvgInputLength || scaleByInverseLength || scaleByAvgInverseLength) + scaleByInputLength = false; + + if (historyBleu || simpleHistoryBleu) { + sentenceBleu = false; + cerr << "Using history Bleu. " << endl; + } + + if (kbest) { + realBleu = true; + disableBleuFeature = true; + cerr << "Use kbest lists and real Bleu scores, disable Bleu feature.." << endl; + } + + // initialise Moses + // add references to initialize Bleu feature + boost::trim(decoder_settings); + decoder_settings += " -mira -n-best-list - " + boost::lexical_cast(n) + " distinct"; + + vector decoder_params; + boost::split(decoder_params, decoder_settings, boost::is_any_of("\t ")); + + // bleu feature + decoder_params.push_back("-feature-add"); + + decoder_settings = "BleuScoreFeature tuneable=false references="; + if (trainWithMultipleFolds) { + decoder_settings += referenceFilesFolds[myFold]; + } else { + decoder_settings += referenceFiles[0]; + for (size_t i=1; i < referenceFiles.size(); ++i) { + decoder_settings += ","; + decoder_settings += referenceFiles[i]; + } + } + decoder_params.push_back(decoder_settings); + + string configFile = trainWithMultipleFolds? mosesConfigFilesFolds[myFold] : mosesConfigFile; + VERBOSE(1, "Rank " << rank << " reading config file from " << configFile << endl); + MosesDecoder* decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params); + decoder->setBleuParameters(disableBleuFeature, sentenceBleu, scaleByInputLength, scaleByAvgInputLength, + scaleByInverseLength, scaleByAvgInverseLength, + scaleByX, historySmoothing, bleu_smoothing_scheme, simpleHistoryBleu); + bool chartDecoding = staticData.IsChart(); + + // Optionally shuffle the sentences + vector order; + if (trainWithMultipleFolds) { + for (size_t i = 0; i < inputSentencesFolds[myFold].size(); ++i) { + order.push_back(i); + } + } else { + if (rank == 0) { + for (size_t i = 0; i < inputSentences.size(); ++i) { + order.push_back(i); + } + } + } + + // initialise optimizer + Optimiser* optimiser = NULL; + if (learner == "mira") { + if (rank == 0) { + cerr << "Optimising using Mira" << endl; + cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl; + if (normaliseMargin) + cerr << "sigmoid parameter: " << sigmoidParam << endl; + } + optimiser = new MiraOptimiser(slack, scale_margin, scale_update, boost, normaliseMargin, sigmoidParam); + learning_rate = mira_learning_rate; + perceptron_update = false; + } else if (learner == "perceptron") { + if (rank == 0) { + cerr << "Optimising using Perceptron" << endl; + } + optimiser = new Perceptron(); + learning_rate = perceptron_learning_rate; + perceptron_update = true; + model_hope_fear = false; // mira only + hope_fear = false; // mira only + n = 1; + hope_n = 1; + fear_n = 1; + } else { + cerr << "Error: Unknown optimiser: " << learner << endl; + return 1; + } + + // resolve parameter dependencies + if (batchSize > 1 && perceptron_update) { + batchSize = 1; + cerr << "Info: Setting batch size to 1 for perceptron update" << endl; + } + + if (hope_n == 0) + hope_n = n; + if (fear_n == 0) + fear_n = n; + + if (model_hope_fear || kbest) + hope_fear = false; // is true by default + if (learner == "mira" && !(hope_fear || model_hope_fear || kbest)) { + cerr << "Error: Need to select one of parameters --hope-fear/--model-hope-fear/--kbest for mira update." << endl; + return 1; + } + +#ifdef MPI_ENABLE + if (!trainWithMultipleFolds) + mpi::broadcast(world, order, 0); +#endif + + // Create shards according to the number of processes used + vector shard; + if (trainWithMultipleFolds) { + size_t shardSize = order.size()/coresPerFold; + size_t shardStart = (size_t) (shardSize * (rank % coresPerFold)); + size_t shardEnd = shardStart + shardSize; + if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + batchSize = 1; + } else { + size_t shardSize = order.size() / size; + size_t shardStart = (size_t) (shardSize * rank); + size_t shardEnd = (size_t) (shardSize * (rank + 1)); + if (rank == size - 1) { + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank: " << rank << " Shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + if (batchEqualsShard) + batchSize = shardSize; + } + + // get reference to feature functions + // const vector &featureFunctions = FeatureFunction::GetFeatureFunctions(); + ScoreComponentCollection initialWeights = decoder->getWeights(); + + if (add2lm != 0) { + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + float lmWeight = initialWeights.GetScoreForProducer(lm) + add2lm; + initialWeights.Assign(lm, lmWeight); + cerr << "Rank " << rank << ", add " << add2lm << " to lm weight." << endl; + } + } + } + + if (normaliseWeights) { + initialWeights.L1Normalise(); + cerr << "Rank " << rank << ", normalised initial weights: " << initialWeights << endl; + } + + decoder->setWeights(initialWeights); + + // set bleu weight to twice the size of the language model weight(s) + if (bleu_weight_lm) { + float lmSum = 0; + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + lmSum += abs(initialWeights.GetScoreForProducer(lm)); + } + } + + bleuWeight = lmSum * bleu_weight_lm_factor; + if (!kbest) cerr << "Set bleu weight to lm weight * " << bleu_weight_lm_factor << endl; + } + + // bleu weights can be set separately for hope and fear; otherwise they are both set to 'lm weight * bleu_weight_lm_factor' + if (bleuWeight_hope == -1) { + bleuWeight_hope = bleuWeight; + } + if (bleuWeight_fear == -1) { + bleuWeight_fear = bleuWeight; + } + bleuWeight_fear *= bleu_weight_fear_factor; + if (!kbest) { + cerr << "Bleu weight: " << bleuWeight << endl; + cerr << "Bleu weight fear: " << bleuWeight_fear << endl; + } + + if (decode_hope || decode_fear || decode_model) { + size_t decode = 1; + if (decode_fear) decode = 2; + if (decode_model) decode = 3; + decodeHopeOrFear(rank, size, decode, decode_filename, inputSentences, decoder, n, bleuWeight); + } + + //Main loop: + ScoreComponentCollection cumulativeWeights; // collect weights per epoch to produce an average + ScoreComponentCollection cumulativeWeightsBinary; + size_t numberOfUpdates = 0; + size_t numberOfUpdatesThisEpoch = 0; + + time_t now; + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now); + + float avgInputLength = 0; + float sumOfInputs = 0; + size_t numberOfInputs = 0; + + ScoreComponentCollection mixedWeights; + ScoreComponentCollection mixedWeightsPrevious; + ScoreComponentCollection mixedWeightsBeforePrevious; + ScoreComponentCollection mixedAverageWeights; + ScoreComponentCollection mixedAverageWeightsPrevious; + ScoreComponentCollection mixedAverageWeightsBeforePrevious; + + bool stop = false; +// int sumStillViolatedConstraints; + float epsilon = 0.0001; + + // Variables for feature confidence + ScoreComponentCollection confidenceCounts, mixedConfidenceCounts, featureLearningRates; + featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0); //initialise core learning rates + cerr << "Initial learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl; + + for (size_t epoch = continue_epoch; epoch < epochs && !stop; ++epoch) { + if (shuffle) { + if (trainWithMultipleFolds || rank == 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", shuffling input sentences.." << endl; + RandomIndex rindex; + random_shuffle(order.begin(), order.end(), rindex); + } + +#ifdef MPI_ENABLE + if (!trainWithMultipleFolds) + mpi::broadcast(world, order, 0); +#endif + + // redo shards + if (trainWithMultipleFolds) { + size_t shardSize = order.size()/coresPerFold; + size_t shardStart = (size_t) (shardSize * (rank % coresPerFold)); + size_t shardEnd = shardStart + shardSize; + if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + batchSize = 1; + } else { + size_t shardSize = order.size()/size; + size_t shardStart = (size_t) (shardSize * rank); + size_t shardEnd = (size_t) (shardSize * (rank + 1)); + if (rank == size - 1) { + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + if (batchEqualsShard) + batchSize = shardSize; + } + } + + // sum of violated constraints in an epoch + // sumStillViolatedConstraints = 0; + + numberOfUpdatesThisEpoch = 0; + // Sum up weights over one epoch, final average uses weights from last epoch + if (!accumulateWeights) { + cumulativeWeights.ZeroAll(); + cumulativeWeightsBinary.ZeroAll(); + } + + // number of weight dumps this epoch + size_t weightMixingThisEpoch = 0; + size_t weightEpochDump = 0; + + size_t shardPosition = 0; + vector::const_iterator sid = shard.begin(); + while (sid != shard.end()) { + // feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues) + vector > featureValues; + vector > bleuScores; + vector > modelScores; + + // variables for hope-fear/perceptron setting + vector > featureValuesHope; + vector > featureValuesFear; + vector > bleuScoresHope; + vector > bleuScoresFear; + vector > modelScoresHope; + vector > modelScoresFear; + + // get moses weights + ScoreComponentCollection mosesWeights = decoder->getWeights(); + VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", weights: " << mosesWeights << endl); + + if (historyBleu || simpleHistoryBleu) { + decoder->printBleuFeatureHistory(cerr); + } + + // BATCHING: produce nbest lists for all input sentences in batch + vector oracleBleuScores; + vector oracleModelScores; + vector > oneBests; + vector oracleFeatureValues; + vector inputLengths; + vector ref_ids; + size_t actualBatchSize = 0; + + size_t examples_in_batch = 0; + bool skip_example = false; + for (size_t batchPosition = 0; batchPosition < batchSize && sid + != shard.end(); ++batchPosition) { + string input; + if (trainWithMultipleFolds) + input = inputSentencesFolds[myFold][*sid]; + else + input = inputSentences[*sid]; + + Moses::Sentence *sentence = new Sentence(); + stringstream in(input + "\n"); + const vector inputFactorOrder = staticData.GetInputFactorOrder(); + sentence->Read(in,inputFactorOrder); + cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \""; + sentence->Print(cerr); + cerr << "\"" << " (batch pos " << batchPosition << ")" << endl; + size_t current_input_length = (*sentence).GetSize(); + + if (epoch == 0 && (scaleByAvgInputLength || scaleByAvgInverseLength)) { + sumOfInputs += current_input_length; + ++numberOfInputs; + avgInputLength = sumOfInputs/numberOfInputs; + decoder->setAvgInputLength(avgInputLength); + cerr << "Rank " << rank << ", epoch 0, average input length: " << avgInputLength << endl; + } + + vector newFeatureValues; + vector newScores; + if (model_hope_fear) { + featureValues.push_back(newFeatureValues); + bleuScores.push_back(newScores); + modelScores.push_back(newScores); + } + if (hope_fear || perceptron_update) { + featureValuesHope.push_back(newFeatureValues); + featureValuesFear.push_back(newFeatureValues); + bleuScoresHope.push_back(newScores); + bleuScoresFear.push_back(newScores); + modelScoresHope.push_back(newScores); + modelScoresFear.push_back(newScores); + if (historyBleu || simpleHistoryBleu || debug_model) { + featureValues.push_back(newFeatureValues); + bleuScores.push_back(newScores); + modelScores.push_back(newScores); + } + } + if (kbest) { + // for decoding + featureValues.push_back(newFeatureValues); + bleuScores.push_back(newScores); + modelScores.push_back(newScores); + + // for storing selected examples + featureValuesHope.push_back(newFeatureValues); + featureValuesFear.push_back(newFeatureValues); + bleuScoresHope.push_back(newScores); + bleuScoresFear.push_back(newScores); + modelScoresHope.push_back(newScores); + modelScoresFear.push_back(newScores); + } + + size_t ref_length; + float avg_ref_length; + + if (print_weights) + cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: " << mosesWeights << endl; + if (print_core_weights) { + cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: "; + mosesWeights.PrintCoreFeatures(); + cerr << endl; + } + + // check LM weight + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + float lmWeight = mosesWeights.GetScoreForProducer(lm); + cerr << "Rank " << rank << ", epoch " << epoch << ", lm weight: " << lmWeight << endl; + if (lmWeight <= 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: language model weight should never be <= 0." << endl; + mosesWeights.Assign(lm, 0.1); + cerr << "Rank " << rank << ", epoch " << epoch << ", assign lm weights of 0.1" << endl; + } + } + } + + // select inference scheme + cerr << "Rank " << rank << ", epoch " << epoch << ", real Bleu? " << realBleu << endl; + if (hope_fear || perceptron_update) { + // HOPE + cerr << "Rank " << rank << ", epoch " << epoch << ", " << hope_n << + "best hope translations" << endl; + vector< vector > outputHope = decoder->getNBest(input, *sid, hope_n, 1.0, bleuWeight_hope, + featureValuesHope[batchPosition], bleuScoresHope[batchPosition], modelScoresHope[batchPosition], + 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector oracle = outputHope[0]; + decoder->cleanup(chartDecoding); + ref_length = decoder->getClosestReferenceLength(*sid, oracle.size()); + avg_ref_length = ref_length; + float hope_length_ratio = (float)oracle.size()/ref_length; + cerr << endl; + + // count sparse features occurring in hope translation + featureValuesHope[batchPosition][0].IncrementSparseHopeFeatures(); + + vector bestModel; + if (debug_model || historyBleu || simpleHistoryBleu) { + // MODEL (for updating the history only, using dummy vectors) + cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (debug or history)" << endl; + vector< vector > outputModel = decoder->getNBest(input, *sid, n, 0.0, bleuWeight, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + bestModel = outputModel[0]; + decoder->cleanup(chartDecoding); + cerr << endl; + ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); + } + + // FEAR + //float fear_length_ratio = 0; + float bleuRatioHopeFear = 0; + //int fearSize = 0; + cerr << "Rank " << rank << ", epoch " << epoch << ", " << fear_n << "best fear translations" << endl; + vector< vector > outputFear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuWeight_fear, + featureValuesFear[batchPosition], bleuScoresFear[batchPosition], modelScoresFear[batchPosition], + 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector fear = outputFear[0]; + decoder->cleanup(chartDecoding); + ref_length = decoder->getClosestReferenceLength(*sid, fear.size()); + avg_ref_length += ref_length; + avg_ref_length /= 2; + //fear_length_ratio = (float)fear.size()/ref_length; + //fearSize = (int)fear.size(); + cerr << endl; + for (size_t i = 0; i < fear.size(); ++i) + delete fear[i]; + + // count sparse features occurring in fear translation + featureValuesFear[batchPosition][0].IncrementSparseFearFeatures(); + + // Bleu-related example selection + bool skip = false; + bleuRatioHopeFear = bleuScoresHope[batchPosition][0] / bleuScoresFear[batchPosition][0]; + if (minBleuRatio != -1 && bleuRatioHopeFear < minBleuRatio) + skip = true; + if(maxBleuRatio != -1 && bleuRatioHopeFear > maxBleuRatio) + skip = true; + + // sanity check + if (historyBleu || simpleHistoryBleu) { + if (bleuScores[batchPosition][0] > bleuScoresHope[batchPosition][0] && + modelScores[batchPosition][0] > modelScoresHope[batchPosition][0]) { + if (abs(bleuScores[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon && + abs(modelScores[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: MODEL translation better than HOPE translation." << endl; + skip = true; + } + } + if (bleuScoresFear[batchPosition][0] > bleuScores[batchPosition][0] && + modelScoresFear[batchPosition][0] > modelScores[batchPosition][0]) { + if (abs(bleuScoresFear[batchPosition][0] - bleuScores[batchPosition][0]) > epsilon && + abs(modelScoresFear[batchPosition][0] - modelScores[batchPosition][0]) > epsilon) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than MODEL translation." << endl; + skip = true; + } + } + } + if (bleuScoresFear[batchPosition][0] > bleuScoresHope[batchPosition][0]) { + if (abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon) { + // check if it's an error or a warning + skip = true; + if (modelScoresFear[batchPosition][0] > modelScoresHope[batchPosition][0] && abs(modelScoresFear[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <getNBest(input, *sid, n, 1.0, bleuWeight_hope, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + //vector oracle = outputHope[0]; + // needed for history + inputLengths.push_back(current_input_length); + ref_ids.push_back(*sid); + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, oracle.size()); + //float hope_length_ratio = (float)oracle.size()/ref_length; + cerr << endl; + + oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]); + oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]); + oracleModelScores.push_back(modelScores[batchPosition][oraclePos]); + + // MODEL + cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl; + if (historyBleu || simpleHistoryBleu) { + vector< vector > outputModel = decoder->getNBest(input, *sid, n, 0.0, + bleuWeight, featureValues[batchPosition], bleuScores[batchPosition], + modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector bestModel = outputModel[0]; + oneBests.push_back(bestModel); + inputLengths.push_back(current_input_length); + ref_ids.push_back(*sid); + } else { + decoder->getNBest(input, *sid, n, 0.0, bleuWeight, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + } + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); + //float model_length_ratio = (float)bestModel.size()/ref_length; + cerr << endl; + + // FEAR + cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl; + decoder->getNBest(input, *sid, n, -1.0, bleuWeight_fear, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, fear.size()); + //float fear_length_ratio = (float)fear.size()/ref_length; + + examples_in_batch++; + } + if (kbest) { + // MODEL + cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl; + if (historyBleu || simpleHistoryBleu) { + vector< vector > outputModel = decoder->getNBest(input, *sid, n, 0.0, + bleuWeight, featureValues[batchPosition], bleuScores[batchPosition], + modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector bestModel = outputModel[0]; + oneBests.push_back(bestModel); + inputLengths.push_back(current_input_length); + ref_ids.push_back(*sid); + } else { + decoder->getNBest(input, *sid, n, 0.0, bleuWeight, + featureValues[batchPosition], bleuScores[batchPosition], + modelScores[batchPosition], 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + } + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); + //float model_length_ratio = (float)bestModel.size()/ref_length; + cerr << endl; + + examples_in_batch++; + + HypothesisQueue queueHope(hope_n); + HypothesisQueue queueFear(fear_n); + cerr << endl; + if (most_violated || all_violated) { + float bleuHope = -1000; + float bleuFear = 1000; + int indexHope = -1; + int indexFear = -1; + + vector bleuHopeList; + vector bleuFearList; + vector indexHopeList; + vector indexFearList; + + if (most_violated) + cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with most violated constraint" << endl; + else if (all_violated) + cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with violated constraints"; + else + cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with hope"; + + // find best hope, then find fear that violates our constraint most + for (size_t i=0; i modelScores[batchPosition][indexHope]) { + if (abs(modelScores[batchPosition][i] - modelScores[batchPosition][indexHope]) > epsilon) { + // better model score + bleuHope = bleuScores[batchPosition][i]; + indexHope = i; + } + } + } else if (bleuScores[batchPosition][i] > bleuHope) { // better than current best + bleuHope = bleuScores[batchPosition][i]; + indexHope = i; + } + } + + float currentViolation = 0; + for (size_t i=0; i epsilon) && (modelDiff < bleuDiff)) { + float diff = bleuDiff - modelDiff; + if (diff > epsilon) { + if (all_violated) { + cerr << ".. adding pair"; + bleuHopeList.push_back(bleuHope); + bleuFearList.push_back(bleuScores[batchPosition][i]); + indexHopeList.push_back(indexHope); + indexFearList.push_back(i); + } else if (most_violated && diff > currentViolation) { + currentViolation = diff; + bleuFear = bleuScores[batchPosition][i]; + indexFear = i; + cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << currentViolation << " (" << modelDiff << " >= " << bleuDiff << ")" << endl; + } + } + } + } + + if (most_violated) { + if (currentViolation > 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", adding pair with violation " << currentViolation << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << bleuHope << " (" << indexHope << "), fear: " << bleuFear << " (" << indexFear << ")" << endl; + bleuScoresHope[batchPosition].push_back(bleuHope); + bleuScoresFear[batchPosition].push_back(bleuFear); + featureValuesHope[batchPosition].push_back(featureValues[batchPosition][indexHope]); + featureValuesFear[batchPosition].push_back(featureValues[batchPosition][indexFear]); + float modelScoreHope = modelScores[batchPosition][indexHope]; + float modelScoreFear = modelScores[batchPosition][indexFear]; + if (most_violated_reg) { + // reduce model score difference by factor ~0.5 + float reg = currentViolation/4; + modelScoreHope += abs(reg); + modelScoreFear -= abs(reg); + float newViolation = (bleuHope - bleuFear) - (modelScoreHope - modelScoreFear); + cerr << "Rank " << rank << ", epoch " << epoch << ", regularized violation: " << newViolation << endl; + } + modelScoresHope[batchPosition].push_back(modelScoreHope); + modelScoresFear[batchPosition].push_back(modelScoreFear); + + featureValues[batchPosition][indexHope].IncrementSparseHopeFeatures(); + featureValues[batchPosition][indexFear].IncrementSparseFearFeatures(); + } else { + cerr << "Rank " << rank << ", epoch " << epoch << ", no violated constraint found." << endl; + skip_example = 1; + } + } else cerr << endl; + } + if (max_bleu_diff) { + cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with max Bleu diff from list: " << bleuScores[batchPosition].size() << endl; + for (size_t i=0; i hopeList, fearList; + for (size_t i=0; i > losses(actualBatchSize); + if (model_hope_fear) { + // Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis) + for (size_t batchPosition = 0; batchPosition < actualBatchSize; ++batchPosition) { + for (size_t j = 0; j < bleuScores[batchPosition].size(); ++j) { + losses[batchPosition].push_back(oracleBleuScores[batchPosition] - bleuScores[batchPosition][j]); + } + } + } + + // set weight for bleu feature to 0 before optimizing + vector::const_iterator iter; + const vector &featureFunctions2 = FeatureFunction::GetFeatureFunctions(); + for (iter = featureFunctions2.begin(); iter != featureFunctions2.end(); ++iter) { + if ((*iter)->GetScoreProducerDescription() == "BleuScoreFeature") { + mosesWeights.Assign(*iter, 0); + break; + } + } + + // scale LM feature (to avoid rapid changes) + if (scale_lm) { + cerr << "scale lm" << endl; + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + // scale down score + if (model_hope_fear) { + scaleFeatureScore(lm, scale_lm_factor, featureValues, rank, epoch); + } else { + scaleFeatureScore(lm, scale_lm_factor, featureValuesHope, rank, epoch); + scaleFeatureScore(lm, scale_lm_factor, featureValuesFear, rank, epoch); + } + } + } + } + + // scale WP + if (scale_wp) { + // scale up weight + WordPenaltyProducer &wp = WordPenaltyProducer::InstanceNonConst(); + + // scale down score + if (model_hope_fear) { + scaleFeatureScore(&wp, scale_wp_factor, featureValues, rank, epoch); + } else { + scaleFeatureScore(&wp, scale_wp_factor, featureValuesHope, rank, epoch); + scaleFeatureScore(&wp, scale_wp_factor, featureValuesFear, rank, epoch); + } + } + + // print out the feature values + if (print_feature_values) { + cerr << "\nRank " << rank << ", epoch " << epoch << ", feature values: " << endl; + if (model_hope_fear) printFeatureValues(featureValues); + else { + cerr << "hope: " << endl; + printFeatureValues(featureValuesHope); + cerr << "fear: " << endl; + printFeatureValues(featureValuesFear); + } + } + + // apply learning rates to feature vectors before optimization + if (feature_confidence) { + cerr << "Rank " << rank << ", epoch " << epoch << ", apply feature learning rates with decays " << decay_core << "/" << decay_sparse << ": " << featureLearningRates << endl; + if (model_hope_fear) { + applyPerFeatureLearningRates(featureValues, featureLearningRates, sparse_r0); + } else { + applyPerFeatureLearningRates(featureValuesHope, featureLearningRates, sparse_r0); + applyPerFeatureLearningRates(featureValuesFear, featureLearningRates, sparse_r0); + } + } else { + // apply fixed learning rates + cerr << "Rank " << rank << ", epoch " << epoch << ", apply fixed learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl; + if (core_r0 != 1.0 || sparse_r0 != 1.0) { + if (model_hope_fear) { + applyLearningRates(featureValues, core_r0, sparse_r0); + } else { + applyLearningRates(featureValuesHope, core_r0, sparse_r0); + applyLearningRates(featureValuesFear, core_r0, sparse_r0); + } + } + } + + // Run optimiser on batch: + VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl); + size_t update_status = 1; + ScoreComponentCollection weightUpdate; + if (perceptron_update) { + vector > dummy1; + update_status = optimiser->updateWeightsHopeFear( weightUpdate, featureValuesHope, + featureValuesFear, dummy1, dummy1, dummy1, dummy1, learning_rate, rank, epoch); + } else if (hope_fear) { + if (bleuScoresHope[0][0] >= min_oracle_bleu) { + if (hope_n == 1 && fear_n ==1 && batchSize == 1 && !hildreth) { + update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(weightUpdate, + featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], + bleuScoresFear[0][0], modelScoresHope[0][0], modelScoresFear[0][0], learning_rate, rank, epoch); + } else + update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope, + featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope, + modelScoresFear, learning_rate, rank, epoch); + } else + update_status = 1; + } else if (kbest) { + if (batchSize == 1 && featureValuesHope[0].size() == 1 && !hildreth) { + cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl; + update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically( + weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0], + bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0], + modelScoresFear[0][0], learning_rate, rank, epoch); + } else { + cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl; + update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope, + featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope, + modelScoresFear, learning_rate, rank, epoch); + } + } else { + // model_hope_fear + update_status = ((MiraOptimiser*) optimiser)->updateWeights(weightUpdate, + featureValues, losses, bleuScores, modelScores, oracleFeatureValues, + oracleBleuScores, oracleModelScores, learning_rate, rank, epoch); + } + + // sumStillViolatedConstraints += update_status; + + if (update_status == 0) { // if weights were updated + // apply weight update + if (debug) + cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << weightUpdate << endl; + + if (feature_confidence) { + // update confidence counts based on weight update + confidenceCounts.UpdateConfidenceCounts(weightUpdate, signed_counts); + + // update feature learning rates + featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0); + } + + // apply weight update to Moses weights + mosesWeights.PlusEquals(weightUpdate); + + if (normaliseWeights) + mosesWeights.L1Normalise(); + + cumulativeWeights.PlusEquals(mosesWeights); + if (sparseAverage) { + ScoreComponentCollection binary; + binary.SetToBinaryOf(mosesWeights); + cumulativeWeightsBinary.PlusEquals(binary); + } + + ++numberOfUpdates; + ++numberOfUpdatesThisEpoch; + if (averageWeights) { + ScoreComponentCollection averageWeights(cumulativeWeights); + if (accumulateWeights) { + averageWeights.DivideEquals(numberOfUpdates); + } else { + averageWeights.DivideEquals(numberOfUpdatesThisEpoch); + } + + mosesWeights = averageWeights; + } + + // set new Moses weights + decoder->setWeights(mosesWeights); + //cerr << "Rank " << rank << ", epoch " << epoch << ", new weights: " << mosesWeights << endl; + } + + // update history (for approximate document Bleu) + if (historyBleu || simpleHistoryBleu) { + for (size_t i = 0; i < oneBests.size(); ++i) + cerr << "Rank " << rank << ", epoch " << epoch << ", update history with 1best length: " << oneBests[i].size() << " "; + decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch); + deleteTranslations(oneBests); + } + } // END TRANSLATE AND UPDATE BATCH + + // size of all shards except for the last one + size_t generalShardSize; + if (trainWithMultipleFolds) + generalShardSize = order.size()/coresPerFold; + else + generalShardSize = order.size()/size; + + size_t mixing_base = mixingFrequency == 0 ? 0 : generalShardSize / mixingFrequency; + size_t dumping_base = weightDumpFrequency == 0 ? 0 : generalShardSize / weightDumpFrequency; + bool mix = evaluateModulo(shardPosition, mixing_base, actualBatchSize); + + // mix weights? + if (mix) { +#ifdef MPI_ENABLE + cerr << "Rank " << rank << ", epoch " << epoch << ", mixing weights.. " << endl; + // collect all weights in mixedWeights and divide by number of processes + mpi::reduce(world, mosesWeights, mixedWeights, SCCPlus(), 0); + + // mix confidence counts + //mpi::reduce(world, confidenceCounts, mixedConfidenceCounts, SCCPlus(), 0); + ScoreComponentCollection totalBinary; + if (sparseAverage) { + ScoreComponentCollection binary; + binary.SetToBinaryOf(mosesWeights); + mpi::reduce(world, binary, totalBinary, SCCPlus(), 0); + } + if (rank == 0) { + // divide by number of processes + if (sparseNoAverage) + mixedWeights.CoreDivideEquals(size); // average only core weights + else if (sparseAverage) + mixedWeights.DivideEquals(totalBinary); + else + mixedWeights.DivideEquals(size); + + // divide confidence counts + //mixedConfidenceCounts.DivideEquals(size); + + // normalise weights after averaging + if (normaliseWeights) { + mixedWeights.L1Normalise(); + } + + ++weightMixingThisEpoch; + + if (pruneZeroWeights) { + size_t pruned = mixedWeights.PruneZeroWeightFeatures(); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " zero-weighted features pruned from mixedWeights." << endl; + + pruned = cumulativeWeights.PruneZeroWeightFeatures(); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " zero-weighted features pruned from cumulativeWeights." << endl; + } + + if (featureCutoff != -1 && weightMixingThisEpoch == mixingFrequency) { + size_t pruned = mixedWeights.PruneSparseFeatures(featureCutoff); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " features pruned from mixedWeights." << endl; + + pruned = cumulativeWeights.PruneSparseFeatures(featureCutoff); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " features pruned from cumulativeWeights." << endl; + } + + if (weightMixingThisEpoch == mixingFrequency || reg_on_every_mix) { + if (l1_regularize) { + size_t pruned; + if (l1_reg_sparse) + pruned = mixedWeights.SparseL1Regularize(l1_lambda); + else + pruned = mixedWeights.L1Regularize(l1_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l1-reg. on mixedWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl; + } + if (l2_regularize) { + if (l2_reg_sparse) + mixedWeights.SparseL2Regularize(l2_lambda); + else + mixedWeights.L2Regularize(l2_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l2-reg. on mixedWeights with lambda=" << l2_lambda << endl; + } + } + } + + // broadcast average weights from process 0 + mpi::broadcast(world, mixedWeights, 0); + decoder->setWeights(mixedWeights); + mosesWeights = mixedWeights; + + // broadcast summed confidence counts + //mpi::broadcast(world, mixedConfidenceCounts, 0); + //confidenceCounts = mixedConfidenceCounts; +#endif +#ifndef MPI_ENABLE + //cerr << "\nRank " << rank << ", no mixing, weights: " << mosesWeights << endl; + mixedWeights = mosesWeights; +#endif + } // end mixing + + // Dump weights? + if (trainWithMultipleFolds || weightEpochDump == weightDumpFrequency) { + // dump mixed weights at end of every epoch to enable continuing a crashed experiment + // (for jackknife every time the weights are mixed) + ostringstream filename; + if (epoch < 10) + filename << weightDumpStem << "_mixed_0" << epoch; + else + filename << weightDumpStem << "_mixed_" << epoch; + + if (weightDumpFrequency > 1) + filename << "_" << weightEpochDump; + + mixedWeights.Save(filename.str()); + cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl; + } + if (dumpMixedWeights) { + if (mix && rank == 0 && !weightDumpStem.empty()) { + // dump mixed weights instead of average weights + ostringstream filename; + if (epoch < 10) + filename << weightDumpStem << "_0" << epoch; + else + filename << weightDumpStem << "_" << epoch; + + if (weightDumpFrequency > 1) + filename << "_" << weightEpochDump; + + cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl; + mixedWeights.Save(filename.str()); + ++weightEpochDump; + } + } else { + if (evaluateModulo(shardPosition, dumping_base, actualBatchSize)) { + cerr << "Rank " << rank << ", epoch " << epoch << ", dump weights.. (pos: " << shardPosition << ", base: " << dumping_base << ")" << endl; + ScoreComponentCollection tmpAverageWeights(cumulativeWeights); + bool proceed = false; + if (accumulateWeights) { + if (numberOfUpdates > 0) { + tmpAverageWeights.DivideEquals(numberOfUpdates); + proceed = true; + } + } else { + if (numberOfUpdatesThisEpoch > 0) { + if (sparseNoAverage) // average only core weights + tmpAverageWeights.CoreDivideEquals(numberOfUpdatesThisEpoch); + else if (sparseAverage) + tmpAverageWeights.DivideEquals(cumulativeWeightsBinary); + else + tmpAverageWeights.DivideEquals(numberOfUpdatesThisEpoch); + proceed = true; + } + } + + if (proceed) { +#ifdef MPI_ENABLE + // average across processes + mpi::reduce(world, tmpAverageWeights, mixedAverageWeights, SCCPlus(), 0); + ScoreComponentCollection totalBinary; + if (sparseAverage) { + ScoreComponentCollection binary; + binary.SetToBinaryOf(mosesWeights); + mpi::reduce(world, binary, totalBinary, SCCPlus(), 0); + } +#endif +#ifndef MPI_ENABLE + mixedAverageWeights = tmpAverageWeights; + //FIXME: What do to for non-mpi version + ScoreComponentCollection totalBinary; +#endif + if (rank == 0 && !weightDumpStem.empty()) { + // divide by number of processes + if (sparseNoAverage) + mixedAverageWeights.CoreDivideEquals(size); // average only core weights + else if (sparseAverage) + mixedAverageWeights.DivideEquals(totalBinary); + else + mixedAverageWeights.DivideEquals(size); + + // normalise weights after averaging + if (normaliseWeights) { + mixedAverageWeights.L1Normalise(); + } + + // dump final average weights + ostringstream filename; + if (epoch < 10) { + filename << weightDumpStem << "_0" << epoch; + } else { + filename << weightDumpStem << "_" << epoch; + } + + if (weightDumpFrequency > 1) { + filename << "_" << weightEpochDump; + } + + /*if (accumulateWeights) { + cerr << "\nMixed average weights (cumulative) during epoch " << epoch << ": " << mixedAverageWeights << endl; + } else { + cerr << "\nMixed average weights during epoch " << epoch << ": " << mixedAverageWeights << endl; + }*/ + + cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl; + mixedAverageWeights.Save(filename.str()); + ++weightEpochDump; + + if (weightEpochDump == weightDumpFrequency) { + if (l1_regularize) { + size_t pruned = mixedAverageWeights.SparseL1Regularize(l1_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l1-reg. on mixedAverageWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl; + + } + if (l2_regularize) { + mixedAverageWeights.SparseL2Regularize(l2_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l2-reg. on mixedAverageWeights with lambda=" << l2_lambda << endl; + } + + if (l1_regularize || l2_regularize) { + filename << "_reg"; + cerr << "Dumping regularized mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl; + mixedAverageWeights.Save(filename.str()); + } + } + + if (weightEpochDump == weightDumpFrequency && printFeatureCounts) { + // print out all features with counts + stringstream s1, s2; + s1 << "sparse_feature_hope_counts" << "_" << epoch; + s2 << "sparse_feature_fear_counts" << "_" << epoch; + ofstream sparseFeatureCountsHope(s1.str().c_str()); + ofstream sparseFeatureCountsFear(s2.str().c_str()); + + mixedAverageWeights.PrintSparseHopeFeatureCounts(sparseFeatureCountsHope); + mixedAverageWeights.PrintSparseFearFeatureCounts(sparseFeatureCountsFear); + sparseFeatureCountsHope.close(); + sparseFeatureCountsFear.close(); + } + } + } + }// end dumping + } // end if dump + } // end of shard loop, end of this epoch + cerr << "Rank " << rank << ", epoch " << epoch << ", end of epoch.." << endl; + + if (historyBleu || simpleHistoryBleu) { + cerr << "Bleu feature history after epoch " << epoch << endl; + decoder->printBleuFeatureHistory(cerr); + } + // cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl; + + // Check whether there were any weight updates during this epoch + size_t sumUpdates; + size_t *sendbuf_uint, *recvbuf_uint; + sendbuf_uint = (size_t *) malloc(sizeof(size_t)); + recvbuf_uint = (size_t *) malloc(sizeof(size_t)); +#ifdef MPI_ENABLE + sendbuf_uint[0] = numberOfUpdatesThisEpoch; + recvbuf_uint[0] = 0; + MPI_Reduce(sendbuf_uint, recvbuf_uint, 1, MPI_UNSIGNED, MPI_SUM, 0, world); + sumUpdates = recvbuf_uint[0]; +#endif +#ifndef MPI_ENABLE + sumUpdates = numberOfUpdatesThisEpoch; +#endif + if (rank == 0 && sumUpdates == 0) { + cerr << "\nNo weight updates during this epoch.. stopping." << endl; + stop = true; +#ifdef MPI_ENABLE + mpi::broadcast(world, stop, 0); +#endif + } + + if (!stop) { + // Test if weights have converged + if (weightConvergence) { + bool reached = true; + if (rank == 0 && (epoch >= 2)) { + ScoreComponentCollection firstDiff, secondDiff; + if (dumpMixedWeights) { + firstDiff = mixedWeights; + firstDiff.MinusEquals(mixedWeightsPrevious); + secondDiff = mixedWeights; + secondDiff.MinusEquals(mixedWeightsBeforePrevious); + } else { + firstDiff = mixedAverageWeights; + firstDiff.MinusEquals(mixedAverageWeightsPrevious); + secondDiff = mixedAverageWeights; + secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious); + } + VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << " (max: " << firstDiff.GetLInfNorm() << ")" << endl); + VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << " (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl); + + // check whether stopping criterion has been reached + // (both difference vectors must have all weight changes smaller than min_weight_change) + if (firstDiff.GetLInfNorm() >= min_weight_change) + reached = false; + if (secondDiff.GetLInfNorm() >= min_weight_change) + reached = false; + if (reached) { + // stop MIRA + stop = true; + cerr << "\nWeights have converged after epoch " << epoch << ".. stopping MIRA." << endl; + ScoreComponentCollection dummy; + ostringstream endfilename; + endfilename << "stopping"; + dummy.Save(endfilename.str()); + } + } + + mixedWeightsBeforePrevious = mixedWeightsPrevious; + mixedWeightsPrevious = mixedWeights; + mixedAverageWeightsBeforePrevious = mixedAverageWeightsPrevious; + mixedAverageWeightsPrevious = mixedAverageWeights; +#ifdef MPI_ENABLE + mpi::broadcast(world, stop, 0); +#endif + } //end if (weightConvergence) + } + } // end of epoch loop + +#ifdef MPI_ENABLE + MPI_Finalize(); +#endif + + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now); + + if (rank == 0) { + ScoreComponentCollection dummy; + ostringstream endfilename; + endfilename << "finished"; + dummy.Save(endfilename.str()); + } + + delete decoder; + exit(0); +} + +bool loadSentences(const string& filename, vector& sentences) +{ + ifstream in(filename.c_str()); + if (!in) + return false; + string line; + while (getline(in, line)) + sentences.push_back(line); + return true; +} + +bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size) +{ + if (mix_or_dump_base == 0) return 0; + if (actual_batch_size > 1) { + bool mix_or_dump = false; + size_t numberSubtracts = actual_batch_size; + do { + if (shard_position % mix_or_dump_base == 0) { + mix_or_dump = true; + break; + } + --shard_position; + --numberSubtracts; + } while (numberSubtracts > 0); + return mix_or_dump; + } else { + return ((shard_position % mix_or_dump_base) == 0); + } +} + +void printFeatureValues(vector > &featureValues) +{ + for (size_t i = 0; i < featureValues.size(); ++i) { + for (size_t j = 0; j < featureValues[i].size(); ++j) { + cerr << featureValues[i][j] << endl; + } + } + cerr << endl; +} + +void deleteTranslations(vector > &translations) +{ + for (size_t i = 0; i < translations.size(); ++i) { + for (size_t j = 0; j < translations[i].size(); ++j) { + delete translations[i][j]; + } + } +} + +void decodeHopeOrFear(size_t rank, size_t size, size_t decode, string filename, vector &inputSentences, MosesDecoder* decoder, size_t n, float bleuWeight) +{ + if (decode == 1) + cerr << "Rank " << rank << ", decoding dev input set according to hope objective.. " << endl; + else if (decode == 2) + cerr << "Rank " << rank << ", decoding dev input set according to fear objective.. " << endl; + else + cerr << "Rank " << rank << ", decoding dev input set according to normal objective.. " << endl; + + // Create shards according to the number of processes used + vector order; + for (size_t i = 0; i < inputSentences.size(); ++i) + order.push_back(i); + + vector shard; + float shardSize = (float) (order.size()) / size; + size_t shardStart = (size_t) (shardSize * rank); + size_t shardEnd = (size_t) (shardSize * (rank + 1)); + if (rank == size - 1) { + shardEnd = inputSentences.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank " << rank << ", shard start: " << shardStart << " Shard end: " << shardEnd << endl); + VERBOSE(1, "Rank " << rank << ", shard size: " << shardSize << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + + // open files for writing + stringstream fname; + fname << filename << ".rank" << rank; + filename = fname.str(); + ostringstream filename_nbest; + filename_nbest << filename << "." << n << "best"; + ofstream out(filename.c_str()); + ofstream nbest_out((filename_nbest.str()).c_str()); + if (!out) { + ostringstream msg; + msg << "Unable to open " << fname.str(); + throw runtime_error(msg.str()); + } + if (!nbest_out) { + ostringstream msg; + msg << "Unable to open " << filename_nbest; + throw runtime_error(msg.str()); + } + + for (size_t i = 0; i < shard.size(); ++i) { + size_t sid = shard[i]; + string& input = inputSentences[sid]; + + vector > dummyFeatureValues; + vector > dummyBleuScores; + vector > dummyModelScores; + + vector newFeatureValues; + vector newScores; + dummyFeatureValues.push_back(newFeatureValues); + dummyBleuScores.push_back(newScores); + dummyModelScores.push_back(newScores); + + float factor = 0.0; + if (decode == 1) factor = 1.0; + if (decode == 2) factor = -1.0; + cerr << "Rank " << rank << ", translating sentence " << sid << endl; + bool realBleu = false; + vector< vector > nbestOutput = decoder->getNBest(input, sid, n, factor, bleuWeight, dummyFeatureValues[0], + dummyBleuScores[0], dummyModelScores[0], n, realBleu, true, false, rank, 0, ""); + cerr << endl; + decoder->cleanup(StaticData::Instance().IsChart()); + + for (size_t i = 0; i < nbestOutput.size(); ++i) { + vector output = nbestOutput[i]; + stringstream translation; + for (size_t k = 0; k < output.size(); ++k) { + Word* w = const_cast(output[k]); + translation << w->GetString(0); + translation << " "; + } + + if (i == 0) + out << translation.str() << endl; + nbest_out << sid << " ||| " << translation.str() << " ||| " << dummyFeatureValues[0][i] << + " ||| " << dummyModelScores[0][i] << " ||| sBleu=" << dummyBleuScores[0][i] << endl; + } + } + + out.close(); + nbest_out.close(); + cerr << "Closing files " << filename << " and " << filename_nbest.str() << endl; + +#ifdef MPI_ENABLE + MPI_Finalize(); +#endif + + time_t now; + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now); + + delete decoder; + exit(0); +} + +void applyLearningRates(vector > &featureValues, float core_r0, float sparse_r0) +{ + for (size_t i=0; i > &featureValues, ScoreComponentCollection featureLearningRates, float sparse_r0) +{ + for (size_t i=0; i > &featureValues, size_t rank, size_t epoch) +{ + string name = sp->GetScoreProducerDescription(); + + // scale down score + float featureScore; + for (size_t i=0; i > &featureValues, size_t rank, size_t epoch) +{ + string name = sp->GetScoreProducerDescription(); + + // scale down score + for (size_t i=0; i featureScores = featureValues[i][j].GetScoresForProducer(sp); + for (size_t k=0; k + +#include "moses/ScoreComponentCollection.h" +#include "moses/Word.h" +#include "moses/FF/FeatureFunction.h" +#include "Decoder.h" + +typedef std::map > ProducerWeightMap; +typedef std::pair > ProducerWeightPair; + +template bool from_string(T& t, const std::string& s, std::ios_base& (*f)(std::ios_base&)) +{ + std::istringstream iss(s); + return !(iss >> f >> t).fail(); +} + +struct RandomIndex { + ptrdiff_t operator()(ptrdiff_t max) { + srand(time(0)); // Initialize random number generator with current time. + return static_cast (rand() % max); + } +}; + +//void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const TranslationSystem* system, long translationId); +bool loadSentences(const std::string& filename, std::vector& sentences); +bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size); +void printFeatureValues(std::vector > &featureValues); +void ignoreCoreFeatures(std::vector > &featureValues, ProducerWeightMap &coreWeightMap); +void takeLogs(std::vector > &featureValues, size_t base); +void deleteTranslations(std::vector > &translations); +void decodeHopeOrFear(size_t rank, size_t size, size_t decode, std::string decode_filename, std::vector &inputSentences, Mira::MosesDecoder* decoder, size_t n, float bleuWeight); +void applyLearningRates(std::vector > &featureValues, float core_r0, float sparse_r0); +void applyPerFeatureLearningRates(std::vector > &featureValues, Moses::ScoreComponentCollection featureLearningRates, float sparse_r0); +void scaleFeatureScore(const Moses::FeatureFunction *sp, float scaling_factor, std::vector > &featureValues, size_t rank, size_t epoch); +void scaleFeatureScores(const Moses::FeatureFunction *sp, float scaling_factor, std::vector > &featureValues, size_t rank, size_t epoch); + +#endif /* MAIN_H_ */ diff --git a/contrib/mira/MiraOptimiser.cpp b/contrib/mira/MiraOptimiser.cpp new file mode 100644 index 000000000..d4854a1c4 --- /dev/null +++ b/contrib/mira/MiraOptimiser.cpp @@ -0,0 +1,446 @@ +#include +#include "Optimiser.h" +#include "Hildreth.h" +#include "moses/StaticData.h" + +using namespace Moses; +using namespace std; + +namespace Mira +{ + +size_t MiraOptimiser::updateWeights( + ScoreComponentCollection& weightUpdate, + const vector >& featureValues, + const vector >& losses, + const vector >& bleuScores, + const vector >& modelScores, + const vector& oracleFeatureValues, + const vector oracleBleuScores, + const vector oracleModelScores, + float learning_rate, + size_t rank, + size_t epoch) +{ + + // vector of feature values differences for all created constraints + vector featureValueDiffs; + vector lossMinusModelScoreDiffs; + vector all_losses; + + // most violated constraint in batch + ScoreComponentCollection max_batch_featureValueDiff; + + // Make constraints for new hypothesis translations + float epsilon = 0.0001; + int violatedConstraintsBefore = 0; + float oldDistanceFromOptimum = 0; + // iterate over input sentences (1 (online) or more (batch)) + for (size_t i = 0; i < featureValues.size(); ++i) { + //size_t sentenceId = sentenceIds[i]; + // iterate over hypothesis translations for one input sentence + for (size_t j = 0; j < featureValues[i].size(); ++j) { + ScoreComponentCollection featureValueDiff = oracleFeatureValues[i]; + featureValueDiff.MinusEquals(featureValues[i][j]); + + // cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl; + if (featureValueDiff.GetL1Norm() == 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl; + continue; + } + + float loss = losses[i][j]; + + // check if constraint is violated + bool violated = false; +// float modelScoreDiff = featureValueDiff.InnerProduct(currWeights); + float modelScoreDiff = oracleModelScores[i] - modelScores[i][j]; + float diff = 0; + + if (loss > modelScoreDiff) + diff = loss - modelScoreDiff; + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl; + if (diff > epsilon) + violated = true; + + if (m_normaliseMargin) { + modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam; + loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam; + diff = 0; + if (loss > modelScoreDiff) { + diff = loss - modelScoreDiff; + } + cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl; + } + + if (m_scale_margin) { + diff *= oracleBleuScores[i]; + cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << oracleBleuScores[i] << endl; + } + + featureValueDiffs.push_back(featureValueDiff); + lossMinusModelScoreDiffs.push_back(diff); + all_losses.push_back(loss); + if (violated) { + ++violatedConstraintsBefore; + oldDistanceFromOptimum += diff; + } + } + } + + // run optimisation: compute alphas for all given constraints + vector alphas; + ScoreComponentCollection summedUpdate; + if (violatedConstraintsBefore > 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << + featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl; + if (m_slack != 0) { + alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack); + } else { + alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs); + } + + // Update the weight vector according to the alphas and the feature value differences + // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis)) + for (size_t k = 0; k < featureValueDiffs.size(); ++k) { + float alpha = alphas[k]; + cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl; + ScoreComponentCollection update(featureValueDiffs[k]); + update.MultiplyEquals(alpha); + + // sum updates + summedUpdate.PlusEquals(update); + } + } else { + cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl; +// return 0; + return 1; + } + + // apply learning rate + if (learning_rate != 1) { + cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl; + summedUpdate.MultiplyEquals(learning_rate); + } + + // scale update by BLEU of oracle (for batch size 1 only) + if (oracleBleuScores.size() == 1) { + if (m_scale_update) { + cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << oracleBleuScores[0] << endl; + summedUpdate.MultiplyEquals(oracleBleuScores[0]); + } + } + + // cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl; + weightUpdate.PlusEquals(summedUpdate); + + // Sanity check: are there still violated constraints after optimisation? + /* int violatedConstraintsAfter = 0; + float newDistanceFromOptimum = 0; + for (size_t i = 0; i < featureValueDiffs.size(); ++i) { + float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights); + float loss = all_losses[i]; + float diff = loss - modelScoreDiff; + if (diff > epsilon) { + ++violatedConstraintsAfter; + newDistanceFromOptimum += diff; + } + } + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl); + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);*/ +// return violatedConstraintsAfter; + return 0; +} + +size_t MiraOptimiser::updateWeightsHopeFear( + Moses::ScoreComponentCollection& weightUpdate, + const std::vector< std::vector >& featureValuesHope, + const std::vector< std::vector >& featureValuesFear, + const std::vector >& bleuScoresHope, + const std::vector >& bleuScoresFear, + const std::vector >& modelScoresHope, + const std::vector >& modelScoresFear, + float learning_rate, + size_t rank, + size_t epoch, + int updatePosition) +{ + + // vector of feature values differences for all created constraints + vector featureValueDiffs; + vector lossMinusModelScoreDiffs; + vector modelScoreDiffs; + vector all_losses; + + // most violated constraint in batch + ScoreComponentCollection max_batch_featureValueDiff; + + // Make constraints for new hypothesis translations + float epsilon = 0.0001; + int violatedConstraintsBefore = 0; + float oldDistanceFromOptimum = 0; + + // iterate over input sentences (1 (online) or more (batch)) + for (size_t i = 0; i < featureValuesHope.size(); ++i) { + if (updatePosition != -1) { + if (int(i) < updatePosition) + continue; + else if (int(i) > updatePosition) + break; + } + + // Pick all pairs[j,j] of hope and fear translations for one input sentence + for (size_t j = 0; j < featureValuesHope[i].size(); ++j) { + ScoreComponentCollection featureValueDiff = featureValuesHope[i][j]; + featureValueDiff.MinusEquals(featureValuesFear[i][j]); + //cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl; + if (featureValueDiff.GetL1Norm() == 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl; + continue; + } + + float loss = bleuScoresHope[i][j] - bleuScoresFear[i][j]; + + // check if constraint is violated + bool violated = false; + //float modelScoreDiff = featureValueDiff.InnerProduct(currWeights); + float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][j]; + float diff = 0; + if (loss > modelScoreDiff) + diff = loss - modelScoreDiff; + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl; + + if (diff > epsilon) + violated = true; + + if (m_normaliseMargin) { + modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam; + loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam; + diff = 0; + if (loss > modelScoreDiff) { + diff = loss - modelScoreDiff; + } + cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl; + } + + if (m_scale_margin) { + diff *= bleuScoresHope[i][j]; + cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << bleuScoresHope[i][j] << endl; + } + + featureValueDiffs.push_back(featureValueDiff); + lossMinusModelScoreDiffs.push_back(diff); + modelScoreDiffs.push_back(modelScoreDiff); + all_losses.push_back(loss); + if (violated) { + ++violatedConstraintsBefore; + oldDistanceFromOptimum += diff; + } + } + } + + // run optimisation: compute alphas for all given constraints + vector alphas; + ScoreComponentCollection summedUpdate; + if (violatedConstraintsBefore > 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << + featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl; + if (m_slack != 0) { + alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack); + } else { + alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs); + } + + // Update the weight vector according to the alphas and the feature value differences + // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis)) + for (size_t k = 0; k < featureValueDiffs.size(); ++k) { + float alpha = alphas[k]; + cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl; + if (alpha != 0) { + // apply boosting factor + if (m_boost && modelScoreDiffs[k] <= 0) { + // factor between 1.5 and 3 (for Bleu scores between 5 and 20, the factor is within the boundaries) + float factor = std::min(1.5f, (float) log2(bleuScoresHope[0][0])); // TODO: make independent of number of oracles!! + factor = min(3.0f, factor); + alpha = alpha * factor; + cerr << "Rank " << rank << ", epoch " << epoch << ", apply boosting factor " << factor << " to update." << endl; + } + + ScoreComponentCollection update(featureValueDiffs[k]); + update.MultiplyEquals(alpha); + + // sum updates + summedUpdate.PlusEquals(update); + } + } + } else { + cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl; + // return 0; + return 1; + } + + // apply learning rate + if (learning_rate != 1) { + cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl; + summedUpdate.MultiplyEquals(learning_rate); + } + + // scale update by BLEU of oracle (for batch size 1 only) + if (featureValuesHope.size() == 1) { + if (m_scale_update) { + cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << bleuScoresHope[0][0] << endl; + summedUpdate.MultiplyEquals(bleuScoresHope[0][0]); + } + } + + //cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl; + weightUpdate.PlusEquals(summedUpdate); + + // Sanity check: are there still violated constraints after optimisation? + /* int violatedConstraintsAfter = 0; + float newDistanceFromOptimum = 0; + for (size_t i = 0; i < featureValueDiffs.size(); ++i) { + float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights); + float loss = all_losses[i]; + float diff = loss - modelScoreDiff; + if (diff > epsilon) { + ++violatedConstraintsAfter; + newDistanceFromOptimum += diff; + } + } + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl); + VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);*/ +// return violatedConstraintsAfter; + return 0; +} + +size_t MiraOptimiser::updateWeightsAnalytically( + ScoreComponentCollection& weightUpdate, + ScoreComponentCollection& featureValuesHope, + ScoreComponentCollection& featureValuesFear, + float bleuScoreHope, + float bleuScoreFear, + float modelScoreHope, + float modelScoreFear, + float learning_rate, + size_t rank, + size_t epoch) +{ + + float epsilon = 0.0001; + float oldDistanceFromOptimum = 0; + bool constraintViolatedBefore = false; + +// cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl; +// cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl; + ScoreComponentCollection featureValueDiff = featureValuesHope; + featureValueDiff.MinusEquals(featureValuesFear); + if (featureValueDiff.GetL1Norm() == 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl; + return 1; + } + +// cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl; +// float modelScoreDiff = featureValueDiff.InnerProduct(currWeights); + float modelScoreDiff = modelScoreHope - modelScoreFear; + float loss = bleuScoreHope - bleuScoreFear; + float diff = 0; + if (loss > modelScoreDiff) + diff = loss - modelScoreDiff; + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl; + + if (m_normaliseMargin) { + modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam; + loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam; + if (loss > modelScoreDiff) + diff = loss - modelScoreDiff; + cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl; + } + + if (diff > epsilon) { + // squash it between 0 and 1 + //diff = tanh(diff); + //diff = (2/(1 + pow(2,-diff))) - 1; + /* if (m_normaliseMargin) { + diff = (2/(1 + exp(-diff))) - 1; + cerr << "Rank " << rank << ", epoch " << epoch << ", new margin: " << diff << endl; + }*/ + + // constraint violated + oldDistanceFromOptimum += diff; + constraintViolatedBefore = true; + + // compute alpha for given constraint: (loss - model score diff) / || feature value diff ||^2 + // featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm() == featureValueDiff.InnerProduct(featureValueDiff) + // from Crammer&Singer 2006: alpha = min {C , l_t/ ||x||^2} + float squaredNorm = featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm(); + + float alpha = diff / squaredNorm; + cerr << "Rank " << rank << ", epoch " << epoch << ", unclipped alpha: " << alpha << endl; + if (m_slack > 0 ) { + if (alpha > m_slack) { + alpha = m_slack; + } else if (alpha < m_slack*(-1)) { + alpha = m_slack*(-1); + } + } + + // apply learning rate + if (learning_rate != 1) + alpha = alpha * learning_rate; + + if (m_scale_update) { + cerr << "Rank " << rank << ", epoch " << epoch << ", scaling update with oracle bleu score " << bleuScoreHope << endl; + alpha *= bleuScoreHope; + } + + cerr << "Rank " << rank << ", epoch " << epoch << ", clipped/scaled alpha: " << alpha << endl; + + // apply boosting factor + if (m_boost && modelScoreDiff <= 0) { + // factor between 1.5 and 3 (for Bleu scores between 5 and 20, the factor is within the boundaries) + float factor = min(1.5f, (float) log2(bleuScoreHope)); + factor = min(3.0f, factor); + alpha = alpha * factor; + cerr << "Rank " << rank << ", epoch " << epoch << ", boosted alpha: " << alpha << endl; + } + + featureValueDiff.MultiplyEquals(alpha); + weightUpdate.PlusEquals(featureValueDiff); +// cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << weightUpdate << endl; + } + + if (!constraintViolatedBefore) { + // constraint satisfied, nothing to do + cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl; + return 1; + } + + // sanity check: constraint still violated after optimisation? + /* ScoreComponentCollection newWeights(currWeights); + newWeights.PlusEquals(weightUpdate); + bool constraintViolatedAfter = false; + float newDistanceFromOptimum = 0; + featureValueDiff = featureValuesHope; + featureValueDiff.MinusEquals(featureValuesFear); + modelScoreDiff = featureValueDiff.InnerProduct(newWeights); + diff = loss - modelScoreDiff; + // approximate comparison between floats! + if (diff > epsilon) { + constraintViolatedAfter = true; + newDistanceFromOptimum += (loss - modelScoreDiff); + } + + float hopeScore = featureValuesHope.InnerProduct(newWeights); + float fearScore = featureValuesFear.InnerProduct(newWeights); + cerr << "New hope score: " << hopeScore << endl; + cerr << "New fear score: " << fearScore << endl; + + VERBOSE(0, "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl); + VERBOSE(0, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl); + */ + return 0; +} + +} + diff --git a/contrib/mira/MiraTest.cpp b/contrib/mira/MiraTest.cpp new file mode 100644 index 000000000..774b324f8 --- /dev/null +++ b/contrib/mira/MiraTest.cpp @@ -0,0 +1,24 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2010 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + + +//Supplies the main for the mira test module +#define BOOST_TEST_MODULE mira +#include + diff --git a/contrib/mira/Optimiser.h b/contrib/mira/Optimiser.h new file mode 100644 index 000000000..6bae23055 --- /dev/null +++ b/contrib/mira/Optimiser.h @@ -0,0 +1,153 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2010 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ +#ifndef _MIRA_OPTIMISER_H_ +#define _MIRA_OPTIMISER_H_ + +#include + +#include "moses/ScoreComponentCollection.h" + + +namespace Mira +{ + +class Optimiser +{ +public: + Optimiser() {} + + virtual size_t updateWeightsHopeFear( + Moses::ScoreComponentCollection& weightUpdate, + const std::vector >& featureValuesHope, + const std::vector >& featureValuesFear, + const std::vector >& bleuScoresHope, + const std::vector >& bleuScoresFear, + const std::vector >& modelScoresHope, + const std::vector >& modelScoresFear, + float learning_rate, + size_t rank, + size_t epoch, + int updatePosition = -1) = 0; +}; + +class Perceptron : public Optimiser +{ +public: + virtual size_t updateWeightsHopeFear( + Moses::ScoreComponentCollection& weightUpdate, + const std::vector >& featureValuesHope, + const std::vector >& featureValuesFear, + const std::vector >& bleuScoresHope, + const std::vector >& bleuScoresFear, + const std::vector >& modelScoresHope, + const std::vector >& modelScoresFear, + float learning_rate, + size_t rank, + size_t epoch, + int updatePosition = -1); +}; + +class MiraOptimiser : public Optimiser +{ +public: + MiraOptimiser() : + Optimiser() { } + + MiraOptimiser(float slack) : + Optimiser(), + m_slack(slack), + m_scale_margin(false), + m_scale_update(false), + m_boost(false), + m_normaliseMargin(false), + m_sigmoidParam(1.0) { } + + MiraOptimiser(float slack, bool scale_margin, bool scale_update, + bool boost, bool normaliseMargin, float sigmoidParam) : + Optimiser(), + m_slack(slack), + m_scale_margin(scale_margin), + m_scale_update(scale_update), + m_boost(boost), + m_normaliseMargin(normaliseMargin), + m_sigmoidParam(sigmoidParam) { } + + size_t updateWeights( + Moses::ScoreComponentCollection& weightUpdate, + const std::vector >& featureValues, + const std::vector >& losses, + const std::vector >& bleuScores, + const std::vector >& modelScores, + const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues, + const std::vector< float> oracleBleuScores, + const std::vector< float> oracleModelScores, + float learning_rate, + size_t rank, + size_t epoch); + virtual size_t updateWeightsHopeFear( + Moses::ScoreComponentCollection& weightUpdate, + const std::vector >& featureValuesHope, + const std::vector >& featureValuesFear, + const std::vector >& bleuScoresHope, + const std::vector >& bleuScoresFear, + const std::vector >& modelScoresHope, + const std::vector >& modelScoresFear, + float learning_rate, + size_t rank, + size_t epoch, + int updatePosition = -1); + size_t updateWeightsAnalytically( + Moses::ScoreComponentCollection& weightUpdate, + Moses::ScoreComponentCollection& featureValuesHope, + Moses::ScoreComponentCollection& featureValuesFear, + float bleuScoreHope, + float bleuScoreFear, + float modelScoreHope, + float modelScoreFear, + float learning_rate, + size_t rank, + size_t epoch); + + void setSlack(float slack) { + m_slack = slack; + } + +private: + // regularise Hildreth updates + float m_slack; + + + // scale margin with BLEU score + bool m_scale_margin; + + // scale update with oracle BLEU score + bool m_scale_update; + + // boosting of updates on misranked candidates + bool m_boost; + + // squash margin between 0 and 1 (or depending on m_sigmoidParam) + bool m_normaliseMargin; + + // y=sigmoidParam is the axis that this sigmoid approaches + float m_sigmoidParam ; +}; +} + +#endif diff --git a/contrib/mira/Perceptron.cpp b/contrib/mira/Perceptron.cpp new file mode 100644 index 000000000..af61c28a9 --- /dev/null +++ b/contrib/mira/Perceptron.cpp @@ -0,0 +1,53 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2010 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include "Optimiser.h" + +using namespace Moses; +using namespace std; + +namespace Mira +{ + +size_t Perceptron::updateWeightsHopeFear( + ScoreComponentCollection& weightUpdate, + const vector< vector >& featureValuesHope, + const vector< vector >& featureValuesFear, + const vector< vector >& dummy1, + const vector< vector >& dummy2, + const vector< vector >& dummy3, + const vector< vector >& dummy4, + float perceptron_learning_rate, + size_t rank, + size_t epoch, + int updatePosition) +{ + cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope[0][0] << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear[0][0] << endl; + ScoreComponentCollection featureValueDiff = featureValuesHope[0][0]; + featureValueDiff.MinusEquals(featureValuesFear[0][0]); + cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl; + featureValueDiff.MultiplyEquals(perceptron_learning_rate); + weightUpdate.PlusEquals(featureValueDiff); + cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl; + return 0; +} + +} + diff --git a/contrib/mira/expt.cfg b/contrib/mira/expt.cfg new file mode 100644 index 000000000..416eb1d3f --- /dev/null +++ b/contrib/mira/expt.cfg @@ -0,0 +1,34 @@ +[general] +name=expt1 +moses-home=/path/to/moses/dir/ +cwd=/path/to/current/dir/ +working-dir=${cwd}/experiment +data-dir=${cwd}/data +decoder-settings=-mp -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 + +[train] +trainer=${moses-home}/mira/mira +input-file=${data-dir}/tune.input +reference-files=${data-dir}/tune.reference +moses-ini-file=${data-dir}/moses.ini +hours=48 +jobs=8 +slots=8 +epochs=10 +learner=mira +mixing-frequency=5 +weight-dump-frequency=1 +extra-args=--sentence-level-bleu 1 --hope-n 1 --fear-n 1 + +[devtest] +moses=${moses-home}/moses-cmd/src/moses +bleu=${moses-home}/scripts/generic/multi-bleu.perl +input-file=${data-dir}/devtest.input +reference-file=${data-dir}/devtest.reference +moses-ini-file=${data-dir}/moses.test.ini +hours=12 +extra-args= +skip-dev=1 +skip-devtest=0 +skip-submit=0 + diff --git a/contrib/mira/mira.xcodeproj/project.pbxproj b/contrib/mira/mira.xcodeproj/project.pbxproj new file mode 100644 index 000000000..67662f4e0 --- /dev/null +++ b/contrib/mira/mira.xcodeproj/project.pbxproj @@ -0,0 +1,401 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 1E141A311243527800123194 /* Perceptron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E141A2F1243527800123194 /* Perceptron.cpp */; }; + 1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */; }; + 1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC6391242602F0059001A /* Decoder.cpp */; }; + 1E9DC63D1242602F0059001A /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC63B1242602F0059001A /* Main.cpp */; }; + 1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D1124268310059001A /* libmoses-chart.a */; }; + 1E9DC6DB124268510059001A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6CB124268270059001A /* libmoses.a */; }; + 1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D9124268440059001A /* libOnDiskPt.a */; }; + 8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* mira.1 */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 1E9DC6CA124268270059001A /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */; + proxyType = 2; + remoteGlobalIDString = D2AAC046055464E500DB518D; + remoteInfo = moses; + }; + 1E9DC6D0124268310059001A /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */; + proxyType = 2; + remoteGlobalIDString = D2AAC046055464E500DB518D; + remoteInfo = "moses-chart"; + }; + 1E9DC6D8124268440059001A /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */; + proxyType = 2; + remoteGlobalIDString = D2AAC046055464E500DB518D; + remoteInfo = OnDiskPt; + }; + 1EF4E84C12440612006233A0 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D /* moses */; + remoteInfo = moses; + }; + 1EF4E84E12440612006233A0 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D /* moses-chart */; + remoteInfo = "moses-chart"; + }; + 1EF4E85012440612006233A0 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D /* OnDiskPt */; + remoteInfo = OnDiskPt; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 8DD76F690486A84900D96B5E /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + 8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 1E141A2F1243527800123194 /* Perceptron.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Perceptron.cpp; sourceTree = ""; }; + 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MiraOptimiser.cpp; sourceTree = ""; }; + 1E9DC6391242602F0059001A /* Decoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Decoder.cpp; sourceTree = ""; }; + 1E9DC63A1242602F0059001A /* Decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Decoder.h; sourceTree = ""; }; + 1E9DC63B1242602F0059001A /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Main.cpp; sourceTree = ""; }; + 1E9DC63E124260370059001A /* Optimiser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Optimiser.h; sourceTree = ""; }; + 1E9DC6C6124268270059001A /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; }; + 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = "moses-chart.xcodeproj"; path = "../moses-chart/moses-chart.xcodeproj"; sourceTree = SOURCE_ROOT; }; + 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; }; + 1E9DC76712426FC60059001A /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Main.h; sourceTree = ""; }; + 8DD76F6C0486A84900D96B5E /* mira */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mira; sourceTree = BUILT_PRODUCTS_DIR; }; + C6859E8B029090EE04C91782 /* mira.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mira.1; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8DD76F660486A84900D96B5E /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */, + 1E9DC6DB124268510059001A /* libmoses.a in Frameworks */, + 1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* mira */ = { + isa = PBXGroup; + children = ( + 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */, + 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */, + 1E9DC6C6124268270059001A /* moses.xcodeproj */, + 08FB7795FE84155DC02AAC07 /* Source */, + C6859E8C029090F304C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = mira; + sourceTree = ""; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */, + 1E141A2F1243527800123194 /* Perceptron.cpp */, + 1E9DC63E124260370059001A /* Optimiser.h */, + 1E9DC6391242602F0059001A /* Decoder.cpp */, + 1E9DC63A1242602F0059001A /* Decoder.h */, + 1E9DC63B1242602F0059001A /* Main.cpp */, + 1E9DC76712426FC60059001A /* Main.h */, + ); + name = Source; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 8DD76F6C0486A84900D96B5E /* mira */, + ); + name = Products; + sourceTree = ""; + }; + 1E9DC6C7124268270059001A /* Products */ = { + isa = PBXGroup; + children = ( + 1E9DC6CB124268270059001A /* libmoses.a */, + ); + name = Products; + sourceTree = ""; + }; + 1E9DC6CD124268310059001A /* Products */ = { + isa = PBXGroup; + children = ( + 1E9DC6D1124268310059001A /* libmoses-chart.a */, + ); + name = Products; + sourceTree = ""; + }; + 1E9DC6D5124268440059001A /* Products */ = { + isa = PBXGroup; + children = ( + 1E9DC6D9124268440059001A /* libOnDiskPt.a */, + ); + name = Products; + sourceTree = ""; + }; + C6859E8C029090F304C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + C6859E8B029090EE04C91782 /* mira.1 */, + ); + name = Documentation; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8DD76F620486A84900D96B5E /* mira */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */; + buildPhases = ( + 8DD76F640486A84900D96B5E /* Sources */, + 8DD76F660486A84900D96B5E /* Frameworks */, + 8DD76F690486A84900D96B5E /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + 1EF4E84D12440612006233A0 /* PBXTargetDependency */, + 1EF4E84F12440612006233A0 /* PBXTargetDependency */, + 1EF4E85112440612006233A0 /* PBXTargetDependency */, + ); + name = mira; + productInstallPath = "$(HOME)/bin"; + productName = mira; + productReference = 8DD76F6C0486A84900D96B5E /* mira */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* mira */; + projectDirPath = ""; + projectReferences = ( + { + ProductGroup = 1E9DC6CD124268310059001A /* Products */; + ProjectRef = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */; + }, + { + ProductGroup = 1E9DC6C7124268270059001A /* Products */; + ProjectRef = 1E9DC6C6124268270059001A /* moses.xcodeproj */; + }, + { + ProductGroup = 1E9DC6D5124268440059001A /* Products */; + ProjectRef = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */; + }, + ); + projectRoot = ""; + targets = ( + 8DD76F620486A84900D96B5E /* mira */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXReferenceProxy section */ + 1E9DC6CB124268270059001A /* libmoses.a */ = { + isa = PBXReferenceProxy; + fileType = archive.ar; + path = libmoses.a; + remoteRef = 1E9DC6CA124268270059001A /* PBXContainerItemProxy */; + sourceTree = BUILT_PRODUCTS_DIR; + }; + 1E9DC6D1124268310059001A /* libmoses-chart.a */ = { + isa = PBXReferenceProxy; + fileType = archive.ar; + path = "libmoses-chart.a"; + remoteRef = 1E9DC6D0124268310059001A /* PBXContainerItemProxy */; + sourceTree = BUILT_PRODUCTS_DIR; + }; + 1E9DC6D9124268440059001A /* libOnDiskPt.a */ = { + isa = PBXReferenceProxy; + fileType = archive.ar; + path = libOnDiskPt.a; + remoteRef = 1E9DC6D8124268440059001A /* PBXContainerItemProxy */; + sourceTree = BUILT_PRODUCTS_DIR; + }; +/* End PBXReferenceProxy section */ + +/* Begin PBXSourcesBuildPhase section */ + 8DD76F640486A84900D96B5E /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */, + 1E9DC63D1242602F0059001A /* Main.cpp in Sources */, + 1E141A311243527800123194 /* Perceptron.cpp in Sources */, + 1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 1EF4E84D12440612006233A0 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = moses; + targetProxy = 1EF4E84C12440612006233A0 /* PBXContainerItemProxy */; + }; + 1EF4E84F12440612006233A0 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = "moses-chart"; + targetProxy = 1EF4E84E12440612006233A0 /* PBXContainerItemProxy */; + }; + 1EF4E85112440612006233A0 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = OnDiskPt; + targetProxy = 1EF4E85012440612006233A0 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 1DEB923208733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + ../irstlm/lib/i386, + ../srilm/lib/macosx, + ); + OTHER_LDFLAGS = ( + "-lboost_program_options", + "-lz", + "-lirstlm", + "-lmisc", + "-ldstruct", + "-loolm", + "-lflm", + "-llattice", + ); + PRODUCT_NAME = mira; + }; + name = Debug; + }; + 1DEB923308733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + ../irstlm/lib/i386, + ../srilm/lib/macosx, + ); + OTHER_LDFLAGS = ( + "-lboost_program_options", + "-lz", + "-lirstlm", + "-lmisc", + "-ldstruct", + "-loolm", + "-lflm", + "-llattice", + ); + PRODUCT_NAME = mira; + }; + name = Release; + }; + 1DEB923608733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + /usr/local/include, + "../moses-chart/src", + ../moses/src, + ../irstlm/include, + ); + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Debug; + }; + 1DEB923708733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + /usr/local/include, + "../moses-chart/src", + ../moses/src, + ../irstlm/include, + ); + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923208733DC60010E9CD /* Debug */, + 1DEB923308733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923608733DC60010E9CD /* Debug */, + 1DEB923708733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} diff --git a/contrib/mira/training-expt.perl b/contrib/mira/training-expt.perl new file mode 100755 index 000000000..097ee7220 --- /dev/null +++ b/contrib/mira/training-expt.perl @@ -0,0 +1,994 @@ +#!/usr/bin/env perl + +use strict; +#eddie specific +use lib "/exports/informatics/inf_iccs_smt/perl/lib/perl5/site_perl"; +use Config::Simple; +use File::Basename; +use Getopt::Long "GetOptions"; + +my ($config_file,$execute,$continue); +die("training-expt.perl -config config-file [-exec]") + unless &GetOptions('config=s' => \$config_file, + 'cont=s' => \$continue, + 'exec' => \$execute); + +my $config = new Config::Simple($config_file) || + die "Error: unable to read config file \"$config_file\""; + +#substitution +foreach my $key ($config->param) { + my $value = $config->param($key); + while ($value =~ m/(.*?)\$\{(.*?)\}(.*)/) { + my $sub = $config->param($2); + if (! $sub) { + #try in this scope + my $scope = (split /\./, $key)[0]; + $sub = $config->param($scope . "." . $2); + } + if (! $sub) { + #then general + $sub = $config->param("general." . $2); + } + $value = $1 . $sub . $3; + } + print STDERR "$key => "; print STDERR $value; print STDERR "\n"; + $config->param($key,$value); +} + +#check if we're using sge +my $have_sge = 0; +if (`which qsub 2>/dev/null`) { + print "Using sge for job control.\n"; + $have_sge = 1; +} else { + print "No sge detected.\n"; +} + +#required global parameters +my $name = ¶m_required("general.name"); + +#optional globals +my $queue = ¶m("general.queue", "inf_iccs_smt"); +my $mpienv = ¶m("general.mpienv", "openmpi_smp8_mark2"); +my $vmem = ¶m("general.vmem", "6"); + +#wait for bleu files to appear in experiment folder if running as part of experiment.perl +my $wait_for_bleu = ¶m("general.wait-for-bleu", 0); + +#job control +my $jackknife = ¶m("general.jackknife", 0); +my $working_dir = ¶m("general.working-dir"); +my $general_decoder_settings = ¶m("general.decoder-settings", ""); +system("mkdir -p $working_dir") == 0 or die "Error: unable to create directory \"$working_dir\""; +my $train_script = "$name-train"; +my $job_name = "$name-t"; +my $hours = ¶m("train.hours",48); + +# check if we are tuning a meta feature +my $tuneMetaFeature = ¶m("general.tune-meta-feature", 0); +print STDERR "Tuning meta feature.. \n" if $tuneMetaFeature; + +# Check if a weight file with start weights was given +my $start_weight_file = ¶m("start.weightfile"); + +#required training parameters +my $singleRef = 1; +my ($moses_ini_file, $input_file, $reference_files); +my (@moses_ini_files_folds, @input_files_folds, @reference_files_folds); +if ($jackknife) { + my $array_ref = ¶m_required("train.moses-ini-files-folds"); + @moses_ini_files_folds= @$array_ref; + foreach my $ini (@moses_ini_files_folds) { + &check_exists ("moses ini file", $ini); + } + $array_ref = ¶m_required("train.input-files-folds"); + @input_files_folds = @$array_ref; + foreach my $in (@input_files_folds) { + &check_exists ("train input file", $in); + } + $array_ref = ¶m_required("train.reference-files-folds"); + @reference_files_folds = @$array_ref; + foreach my $ref (@reference_files_folds) { + &check_exists ("train reference file", $ref); + } +} +else { + $moses_ini_file = ¶m_required("train.moses-ini-file"); + &check_exists ("moses ini file", $moses_ini_file); + $input_file = ¶m_required("train.input-file"); + &check_exists ("train input file", $input_file); + $reference_files = ¶m_required("train.reference-files"); + if (&check_exists_noThrow ("ref files", $reference_files) != 0) { + for my $ref (glob $reference_files . "*") { + &check_exists ("ref files", $ref); + } + $singleRef = 0; + } +} + +# check if we want to continue an interrupted experiment +my $continue_expt = ¶m("general.continue-expt", 0); # number of experiment to continue +my $continue_epoch = 0; +if ($continue_expt > 0) { + die "ERROR: Continuing an experiment is not defined for tuning meta features.. \n\n" if ($tuneMetaFeature); + $continue_epoch = ¶m_required("general.continue-epoch", 0); + my $continue_weights = ¶m_required("general.continue-weights", 0); + open(CONT_WEIGHTS, $continue_weights); + my ($wp_weight, @pm_weights, $lm_weight, $lm2_weight, $d_weight, @lr_weights, %extra_weights); + my $num_core_weights = 0; + my $num_extra_weights = 0; + while() { + chomp; + my ($name,$value) = split; + next if ($name =~ /^!Unknown/); + next if ($name =~ /^BleuScore/); + next if ($name eq "DEFAULT_"); + if ($name eq "WordPenalty") { + $wp_weight = $value; + $num_core_weights += 1; + } elsif ($name =~ /^PhraseModel/) { + push @pm_weights,$value; + $num_core_weights += 1; + } elsif ($name =~ /^LM\:2/) { + $lm2_weight = $value; + $num_core_weights += 1; + } + elsif ($name =~ /^LM/) { + $lm_weight = $value; + $num_core_weights += 1; + } elsif ($name eq "Distortion") { + $d_weight = $value; + $num_core_weights += 1; + } elsif ($name =~ /^LexicalReordering/) { + push @lr_weights,$value; + $num_core_weights += 1; + } else { + $extra_weights{$name} = $value; + $num_extra_weights += 1; + } + } + close CONT_WEIGHTS; + print STDERR "num core weights to continue: $num_core_weights\n"; + print STDERR "num extra weights to continue: $num_extra_weights\n"; + + # write sparse weights to separate file + my $sparse_weights = $working_dir."/sparseWeights.expt".$continue_expt; + if ($num_extra_weights > 0) { + open(SPARSE, ">$sparse_weights"); + foreach my $name (sort keys %extra_weights) { + next if ($name eq "core"); + next if ($name eq "DEFAULT_"); + + # write only non-zero feature weights to file + if ($extra_weights{$name}) { + print SPARSE "$name $extra_weights{$name}\n"; + } + } + close SPARSE; + } + + # write new ini files with these weights + if ($jackknife) { + my @new_ini_files; + for (my $i=0; $i<=$#moses_ini_files_folds; $i++) { + my $ini_continue = $moses_ini_files_folds[$i].".continue".$continue_expt; + open(OLDINI, $moses_ini_files_folds[$i]); + open(NEWINI, ">$ini_continue"); + while() { + if (/weight-l/) { + print NEWINI "[weight-l]\n"; + print NEWINI $lm_weight; + print NEWINI "\n"; + + if (defined $lm2_weight) { + readline(OLDINI); + print NEWINI $lm2_weight; + print NEWINI "\n"; + } + + readline(OLDINI); + } elsif (/weight-t/) { + print NEWINI "[weight-t]\n"; + foreach my $pm_weight (@pm_weights) { + print NEWINI $pm_weight; + print NEWINI "\n"; + readline(OLDINI); + } + } elsif (/weight-d/) { + print NEWINI "[weight-d]\n"; + print NEWINI $d_weight; + print NEWINI "\n"; + readline(OLDINI); + foreach my $lr_weight (@lr_weights) { + print NEWINI $lr_weight; + print NEWINI "\n"; + readline(OLDINI); + } + } elsif (/weight-w/) { + print NEWINI "[weight-w]\n"; + print NEWINI $wp_weight; + print NEWINI "\n"; + readline(OLDINI); + } else { + print NEWINI; + } + } + if ($num_extra_weights > 0) { + print NEWINI "\n[weight-file]\n$sparse_weights\n"; + } + close OLDINI; + close NEWINI; + + print STDERR "new ini file: ".$ini_continue."\n"; + $moses_ini_files_folds[$i] = $ini_continue; + } + } + else { + my $ini_continue = $moses_ini_file.".continue".$continue_expt; + open(OLDINI, $moses_ini_file); + open(NEWINI, ">$ini_continue"); + while() { + if (/weight-l/) { + print NEWINI "[weight-l]\n"; + print NEWINI $lm_weight; + print NEWINI "\n"; + + if (defined $lm2_weight) { + readline(OLDINI); + print NEWINI $lm2_weight; + print NEWINI "\n"; + } + + readline(OLDINI); + } elsif (/weight-t/) { + print NEWINI "[weight-t]\n"; + foreach my $pm_weight (@pm_weights) { + print NEWINI $pm_weight; + print NEWINI "\n"; + readline(OLDINI); + } + } elsif (/weight-d/) { + print NEWINI "[weight-d]\n"; + print NEWINI $d_weight; + print NEWINI "\n"; + readline(OLDINI); + foreach my $lr_weight (@lr_weights) { + print NEWINI $lr_weight; + print NEWINI "\n"; + readline(OLDINI); + } + } elsif (/weight-w/) { + print NEWINI "[weight-w]\n"; + print NEWINI $wp_weight; + print NEWINI "\n"; + readline(OLDINI); + } + else { + print NEWINI; + } + } + if ($num_extra_weights > 0) { + print NEWINI "\n[weight-file]\n$sparse_weights\n"; + } + close OLDINI; + close NEWINI; + print STDERR "new ini file: ".$ini_continue."\n"; + $moses_ini_file = $ini_continue; + } +} + +my $trainer_exe = ¶m_required("train.trainer"); +&check_exists("Training executable", $trainer_exe); +#my $weights_file = ¶m_required("train.weights-file"); +#&check_exists("weights file ", $weights_file); + +#optional training parameters +my $epochs = ¶m("train.epochs"); +my $learner = ¶m("train.learner", "mira"); +my $batch = ¶m("train.batch", 1); # don't print this param twice (when printing training file) +my $extra_args = ¶m("train.extra-args"); +my $by_node = ¶m("train.by-node"); +my $slots = ¶m("train.slots",10); +my $jobs = ¶m("train.jobs",10); +my $mixing_frequency = ¶m("train.mixing-frequency", 1); # don't print this param twice +my $weight_dump_frequency = ¶m("train.weight-dump-frequency", 1); # don't print this param twice +my $burn_in = ¶m("train.burn-in"); +my $burn_in_input_file = ¶m("train.burn-in-input-file"); +my $burn_in_reference_files = ¶m("train.burn-in-reference-files"); +my $skipTrain = ¶m("train.skip"); +my $train_decoder_settings = ¶m("train.decoder-settings", ""); +if (!$train_decoder_settings) { + $train_decoder_settings = $general_decoder_settings; +} + +#devtest configuration +my ($devtest_input_file, $devtest_reference_files,$devtest_ini_file,$bleu_script,$use_moses); +my $test_exe = ¶m("devtest.moses"); +&check_exists("test executable", $test_exe); +$bleu_script = ¶m_required("devtest.bleu"); +&check_exists("multi-bleu script", $bleu_script); +$devtest_input_file = ¶m_required("devtest.input-file"); +&check_exists ("devtest input file", $devtest_input_file); +$devtest_reference_files = ¶m_required("devtest.reference-file"); +if (&check_exists_noThrow ("devtest ref file", $devtest_reference_files) != 0) { + for my $ref (glob $devtest_reference_files . "*") { + &check_exists ("devtest ref file", $ref); + } +} +$devtest_ini_file = ¶m_required("devtest.moses-ini-file"); +&check_exists ("devtest ini file", $devtest_ini_file); + + +my $weight_file_stem = "$name-weights"; +my $extra_memory_devtest = ¶m("devtest.extra-memory",0); +my $skip_devtest = ¶m("devtest.skip-devtest",0); +my $skip_dev = ¶m("devtest.skip-dev",0); +my $skip_submit_test = ¶m("devtest.skip-submit",0); +my $devtest_decoder_settings = ¶m("devtest.decoder-settings", ""); +if (!$devtest_decoder_settings) { + $devtest_decoder_settings = $general_decoder_settings; +} + + +# check that number of jobs, dump frequency and number of input sentences are compatible +# shard size = number of input sentences / number of jobs, ensure shard size >= dump frequency +if ($jackknife) { + # TODO.. +} +else { + my $result = `wc -l $input_file`; + my @result = split(/\s/, $result); + my $inputSize = $result[0]; + my $shardSize = $inputSize / $jobs; + if ($mixing_frequency) { + if ($shardSize < $mixing_frequency) { + $mixing_frequency = int($shardSize); + if ($mixing_frequency == 0) { + $mixing_frequency = 1; + } + + print STDERR "Warning: mixing frequency must not be larger than shard size, setting mixing frequency to $mixing_frequency\n"; + } + } + + if ($weight_dump_frequency != 0) { + if ($shardSize < $weight_dump_frequency) { + $weight_dump_frequency = int($shardSize); + if ($weight_dump_frequency == 0) { + $weight_dump_frequency = 1; + } + + print STDERR "Warning: weight dump frequency must not be larger than shard size, setting weight dump frequency to $weight_dump_frequency\n"; + } + } + + if ($mixing_frequency != 0) { + if ($mixing_frequency > ($shardSize/$batch)) { + $mixing_frequency = int($shardSize/$batch); + if ($mixing_frequency == 0) { + $mixing_frequency = 1; + } + + print STDERR "Warning: mixing frequency must not be larger than (shard size/batch size), setting mixing frequency to $mixing_frequency\n"; + } + } + + if ($weight_dump_frequency != 0) { + if ($weight_dump_frequency > ($shardSize/$batch)) { + $weight_dump_frequency = int($shardSize/$batch); + if ($weight_dump_frequency == 0) { + $weight_dump_frequency = 1; + } + + print STDERR "Warning: weight dump frequency must not be larger than (shard size/batch size), setting weight dump frequency to $weight_dump_frequency\n"; + } + } +} + +#file names +my $train_script_file = $working_dir . "/" . $train_script . ".sh"; +my $train_out = $train_script . ".out"; +my $train_err = $train_script . ".err"; +my $train_job_id = 0; + +my @refs; +if (ref($reference_files) eq 'ARRAY') { + @refs = @$reference_files; +} elsif ($singleRef){ + $refs[0] = $reference_files; +} else { + @refs = glob $reference_files . "*" +} +my $arr_refs = \@refs; + +if (!$skipTrain) { + #write the script + open TRAIN, ">$train_script_file" or die "Unable to open \"$train_script_file\" for writing"; + + &header(*TRAIN,$job_name,$working_dir,$slots,$jobs,$hours,$vmem,$train_out,$train_err); + if ($jobs == 1) { + print TRAIN "$trainer_exe "; + } + else { + if ($by_node) { + print TRAIN "mpirun -np $jobs --bynode $trainer_exe \\\n"; + } + else { + print TRAIN "mpirun -np \$NSLOTS $trainer_exe \\\n"; + } + } + + if ($jackknife) { + foreach my $ini (@moses_ini_files_folds) { + print TRAIN "--configs-folds $ini "; + } + print TRAIN "\\\n"; + foreach my $in (@input_files_folds) { + print TRAIN "--input-files-folds $in "; + } + print TRAIN "\\\n"; + for my $ref (@reference_files_folds) { + print TRAIN "--reference-files-folds $ref "; + } + print TRAIN "\\\n"; + } + else { + print TRAIN "-f $moses_ini_file \\\n"; + print TRAIN "-i $input_file \\\n"; + for my $ref (@refs) { + print TRAIN "-r $ref "; + } + print TRAIN "\\\n"; + } + if ($continue_epoch > 0) { + print TRAIN "--continue-epoch $continue_epoch \\\n"; + } + if ($burn_in) { + print TRAIN "--burn-in 1 \\\n"; + print TRAIN "--burn-in-input-file $burn_in_input_file \\\n"; + my @burnin_refs; + if (ref($burn_in_reference_files) eq 'ARRAY') { + @burnin_refs = @$burn_in_reference_files; + } else { + @burnin_refs = glob $burn_in_reference_files . "*"; # TODO: + } + for my $burnin_ref (@burnin_refs) { + &check_exists("burn-in ref file", $burnin_ref); + print TRAIN "--burn-in-reference-files $burnin_ref "; + } + print TRAIN "\\\n"; + } +#if ($weights_file) { +# print TRAIN "-w $weights_file \\\n"; +#} + if (defined $start_weight_file) { + print TRAIN "--start-weights $start_weight_file \\\n"; + } + print TRAIN "-l $learner \\\n"; + print TRAIN "--weight-dump-stem $weight_file_stem \\\n"; + print TRAIN "--mixing-frequency $mixing_frequency \\\n" if ($extra_args !~ /--mixing-frequency /); + print TRAIN "--weight-dump-frequency $weight_dump_frequency \\\n" if ($extra_args !~ /--weight-dump-frequency /); + print TRAIN "--epochs $epochs \\\n" if $epochs; + print TRAIN "--batch-size $batch \\\n" if ($extra_args !~ /--batch-size / && $extra_args !~ /-b /); + print TRAIN $extra_args." \\\n"; + print TRAIN "--decoder-settings \"$train_decoder_settings\" \\\n"; + if ($jobs == 1) { + print TRAIN "echo \"mira finished.\"\n"; + } + else { + print TRAIN "echo \"mpirun finished.\"\n"; + } + close TRAIN; + + if (! $execute) { + print STDERR "Written train file: $train_script_file\n"; + exit 0; + } + + #submit the training job + if ($have_sge) { + $train_job_id = &submit_job_sge($train_script_file); + + } else { + $train_job_id = &submit_job_no_sge($train_script_file, $train_out,$train_err); + } + + die "Failed to submit training job" unless $train_job_id; +} + +#wait for the next weights file to appear, or the training job to end +my $train_iteration = -1; +if ($continue_epoch > 0) { + $train_iteration += ($continue_epoch*$weight_dump_frequency); + print STDERR "Start from training iteration ".$train_iteration." instead of -1.\n"; +} +else { + print STDERR "Start from training iteration ".$train_iteration."\n"; +} + +while(1) { + my($epoch, $epoch_slice); + $train_iteration += 1; # starts at 0 + my $new_weight_file = "$working_dir/$weight_file_stem" . "_"; + if ($weight_dump_frequency == 0) { + print STDERR "No weights, no testing..\n"; + exit(0); + } + + #my $epoch = 1 + int $train_iteration / $weight_dump_frequency; + $epoch = int $train_iteration / $weight_dump_frequency; + $epoch_slice = $train_iteration % $weight_dump_frequency; + if ($weight_dump_frequency == 1) { + if ($train_iteration < 10) { + $new_weight_file .= "0".$train_iteration; + } + else { + $new_weight_file .= $train_iteration; + } + } else { + if ($epoch < 10) { + $new_weight_file .= "0".$epoch."_".$epoch_slice; + } + else { + $new_weight_file .= $epoch."_".$epoch_slice; + } + } + + print STDERR "Current epoch: ".$epoch."\n"; + my $expected_num_files = $epoch*$weight_dump_frequency; + if ($wait_for_bleu) { + print STDERR "Expected number of BLEU files: $expected_num_files \n"; + } + if (-e "$working_dir/stopping") { + wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu); + print STDERR "Training finished at " . scalar(localtime()) . " because stopping criterion was reached.\n"; + exit 0; + } + elsif (-e "$working_dir/finished") { + wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu); + print STDERR "Training finished at " . scalar(localtime()) . " because maximum number of epochs was reached.\n"; + exit 0; + } + else { + print STDERR "Waiting for $new_weight_file\n"; + if (!$skipTrain) { + while ((! -e $new_weight_file) && &check_running($train_job_id)) { + sleep 10; + } + } + if (! -e $new_weight_file ) { + if (-e "$working_dir/stopping") { + wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu); + print STDERR "Training finished at " . scalar(localtime()) . " because stopping criterion was reached.\n"; + exit 0; + } + elsif (-e "$working_dir/finished") { + wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu); + print STDERR "Training finished at " . scalar(localtime()) . " because maximum number of epochs was reached.\n"; + exit 0; + } + else { + # training finished with error + print STDERR "Error: training was aborted at " . scalar(localtime()) . "\n"; + exit 1; + } + } + } + + #new weight file written. create test script and submit + my $suffix = ""; + print STDERR "weight file exists? ".(-e $new_weight_file)."\n"; + if (!$skip_devtest) { + createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test); + + my $regularized_weight_file = $new_weight_file."_reg"; + if (-e $regularized_weight_file) { + print STDERR "Submitting test script for regularized weights. \n"; + $epoch_slice .= "_reg"; + createTestScriptAndSubmit($epoch, $epoch_slice, $regularized_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test); + } + } + if (!$skip_dev) { + createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $reference_files, $skip_submit_test); + } +} + +sub wait_for_bleu() { + my $expected_num_files = $_[0]; + my $working_dir = $_[1]; + print STDERR "Waiting for $expected_num_files bleu files..\n"; + print STDERR "Path: $working_dir/*.bleu \n"; + my @bleu_files = glob("$working_dir/*.bleu"); + while (scalar(@bleu_files) < $expected_num_files) { + sleep 30; + @bleu_files = glob("$working_dir/*.bleu"); + print STDERR "currently have ".(scalar(@bleu_files))."\n"; + } + print STDERR "$expected_num_files BLEU files completed, continue.\n"; +} + +sub createTestScriptAndSubmit { + my $epoch = $_[0]; + my $epoch_slice = $_[1]; + my $new_weight_file = $_[2]; + my $suffix = $_[3]; + my $testtype = $_[4]; + my $old_ini_file = $_[5]; + my $input_file = $_[6]; + my $reference_file = $_[7]; + my $skip_submit = $_[8]; + + #file names + my $output_file; + my $output_error_file; + my $bleu_file; + my $file_id = ""; + if ($weight_dump_frequency == 1) { + if ($train_iteration < 10) { + $output_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".out"; + $output_error_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".err"; + $bleu_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".bleu"; + $file_id = "0".$train_iteration.$suffix; + } + else { + $output_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".out"; + $output_error_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".err"; + $bleu_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".bleu"; + $file_id = $train_iteration.$suffix; + } + } + else { + if ($epoch < 10) { + $output_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".out"; + $output_error_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".err"; + $bleu_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu"; + $file_id = "0".$epoch."_".$epoch_slice.$suffix; + } + else { + $output_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".out"; + $output_error_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".err"; + $bleu_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu"; + $file_id = $epoch."_".$epoch_slice.$suffix; + } + } + + my $job_name = $name."_".$testtype."_".$file_id; + + my $test_script = "$name-$testtype"; + my $test_script_file = "$working_dir/$test_script.$file_id.sh"; + my $test_out = "$test_script.$file_id.out"; + my $test_err = "$test_script.$file_id.err"; + + if (! (open TEST, ">$test_script_file" )) { + die "Unable to create test script $test_script_file\n"; + } + + my $hours = ¶m("test.hours",12); + my $extra_args = ¶m("test.extra-args"); + + # Splice the weights into the moses ini file. + my ($default_weight,$wordpenalty_weight,@phrasemodel_weights,$lm_weight,$lm2_weight,$distortion_weight,@lexicalreordering_weights); + + if (! (open WEIGHTS, "$new_weight_file")) { + die "Unable to open weights file $new_weight_file\n"; + } + + my $readCoreWeights = 0; + my $readExtraWeights = 0; + my %extra_weights; + my $abs_weights = 0; + my $metaFeature_wt_weight = 0; + my $metaFeature_pp_weight = 0; + while() { + chomp; + my ($name,$value) = split; + next if ($name =~ /^!Unknown/); + next if ($name =~ /^BleuScore/); + if ($name eq "DEFAULT_") { + $default_weight = $value; + } else { + if ($name eq "WordPenalty") { + $wordpenalty_weight = $value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } elsif ($name =~ /^PhraseModel/) { + push @phrasemodel_weights,$value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } elsif ($name =~ /^LM\:2/) { + $lm2_weight = $value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } + elsif ($name =~ /^LM/) { + $lm_weight = $value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } elsif ($name eq "Distortion") { + $distortion_weight = $value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } elsif ($name =~ /^LexicalReordering/) { + push @lexicalreordering_weights,$value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } elsif ($name =~ /^MetaFeature_wt/) { + $metaFeature_wt_weight = $value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } + elsif ($name =~ /^MetaFeature_pp/) { + $metaFeature_pp_weight = $value; + $abs_weights += abs($value); + $readCoreWeights += 1; + } + else { + $extra_weights{$name} = $value; + $readExtraWeights += 1; + } + } + } + close WEIGHTS; + + print STDERR "Number of core weights read: ".$readCoreWeights."\n"; + print STDERR "Number of extra weights read: ".$readExtraWeights."\n"; + + # Create new ini file (changing format: expt1-devtest.00_2.ini instead of expt1-devtest.3.ini) + # my $new_ini_file = $working_dir."/".$test_script.".".$train_iteration.$suffix.".ini"; + my $new_ini_file = "$working_dir/$test_script.$file_id.ini"; + if (! (open NEWINI, ">$new_ini_file" )) { + die "Unable to create ini file $new_ini_file\n"; + } + if (! (open OLDINI, "$old_ini_file" )) { + die "Unable to read ini file $old_ini_file\n"; + } + + # write normalized weights to ini file + while() { + if (/weight-l/) { + print NEWINI "[weight-l]\n"; + print NEWINI ($lm_weight/$abs_weights); + print NEWINI "\n"; + + if (defined $lm2_weight) { + readline(OLDINI); + print NEWINI ($lm2_weight/$abs_weights); + print NEWINI "\n"; + } + + readline(OLDINI); + } elsif (/weight-t/) { + print NEWINI "[weight-t]\n"; + foreach my $phrasemodel_weight (@phrasemodel_weights) { + print NEWINI ($phrasemodel_weight/$abs_weights); + print NEWINI "\n"; + readline(OLDINI); + } + } elsif (/weight-d/) { + print NEWINI "[weight-d]\n"; + print NEWINI ($distortion_weight/$abs_weights); + print NEWINI "\n"; + readline(OLDINI); + foreach my $lexicalreordering_weight (@lexicalreordering_weights) { + print NEWINI ($lexicalreordering_weight/$abs_weights); + print NEWINI "\n"; + readline(OLDINI); + } + } elsif (/weight-wt/) { + print NEWINI "[weight-wt]\n"; + print NEWINI $metaFeature_wt_weight/$abs_weights; + print NEWINI "\n"; + readline(OLDINI); + } elsif (/weight-pp/) { + print NEWINI "[weight-pp]\n"; + print NEWINI $metaFeature_pp_weight/$abs_weights; + print NEWINI "\n"; + readline(OLDINI); + } + elsif (/weight-w/) { + print NEWINI "[weight-w]\n"; + print NEWINI ($wordpenalty_weight/$abs_weights); + print NEWINI "\n"; + readline(OLDINI); + } + else { + print NEWINI; + } + } + close OLDINI; + + my $writtenExtraWeights = 0; + + # if there are any non-core weights, write them to a weights file (normalized) + my $extra_weight_file = undef; + if (%extra_weights && !$tuneMetaFeature) { + $extra_weight_file = "$new_weight_file.sparse.scaled"; + if (! (open EXTRAWEIGHT,">$extra_weight_file")) { + print "Warning: unable to create extra weights file $extra_weight_file"; + next; + } +# my $core_weight = 1; +# if ($have_core) { +# $default_weight = $extra_weights{"DEFAULT_"}; +# $core_weight = $extra_weights{"core"}; +# } + foreach my $name (sort keys %extra_weights) { + next if ($name eq "core"); + next if ($name eq "DEFAULT_"); + my $value = $extra_weights{$name}/$abs_weights; + + # write only non-zero feature weights to file + if ($value) { +# $value /= $core_weight; + print EXTRAWEIGHT "$name $value\n"; + $writtenExtraWeights += 1; + } + } + } + + # add specification of sparse weight file to ini + if (!$tuneMetaFeature) { + print NEWINI "\n[weight-file] \n"; + print NEWINI "$extra_weight_file \n"; + close NEWINI; + } + + print TEST "#!/bin/sh\n"; + print TEST "#\$ -N $job_name\n"; + print TEST "#\$ -wd $working_dir\n"; + print TEST "#\$ -l h_rt=$hours:00:00\n"; + print TEST "#\$ -o $test_out\n"; + print TEST "#\$ -e $test_err\n"; + print TEST "\n"; + if ($have_sge) { +# some eddie specific stuff + print TEST ". /etc/profile.d/modules.sh\n"; + print TEST "module load openmpi/ethernet/gcc/latest\n"; + print TEST "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n"; + } + print TEST "$test_exe $devtest_decoder_settings -i $input_file -f $new_ini_file "; +# now written to ini file +# if ($extra_weight_file) { +# print TEST "-weight-file $extra_weight_file "; +# } + print TEST $extra_args; + print TEST " 1> $output_file 2> $output_error_file\n"; + print TEST "echo \"Decoding of ".$testtype." set finished.\"\n"; + print TEST "$bleu_script $reference_file < $output_file > $bleu_file\n"; + print TEST "echo \"Computed BLEU score of ".$testtype." set.\"\n"; + close TEST; + + #launch testing + if(!$skip_submit) { + if ($have_sge) { + if ($extra_memory_devtest) { + print STDERR "Extra memory for test job: $extra_memory_devtest \n"; + &submit_job_sge_extra_memory($test_script_file,$extra_memory_devtest); + } + else { + &submit_job_sge($test_script_file); + } + } else { + &submit_job_no_sge($test_script_file, $test_out,$test_err); + } + } +} + +sub param { + my ($key,$default) = @_; + my $value = $config->param($key); + $value = $default if !$value; + # Empty arguments get interpreted as arrays + $value = 0 if (ref($value) eq 'ARRAY' && scalar(@$value) == 0); + return $value; +} + +sub param_required { + my ($key) = @_; + my $value = ¶m($key); + die "Error: required parameter \"$key\" was missing" if (!defined($value)); + #$value = join $value if (ref($value) eq 'ARRAY'); + return $value; +} + +sub header { + my ($OUT,$name,$working_dir,$slots,$jobs,$hours,$vmem,$out,$err) = @_; + print $OUT "#!/bin/sh\n"; + if ($have_sge) { + print $OUT "#\$ -N $name\n"; + print $OUT "#\$ -wd $working_dir\n"; + if ($jobs != 1) { + print $OUT "#\$ -pe $mpienv $slots\n"; + } + print $OUT "#\$ -l h_rt=$hours:00:00\n"; + print $OUT "#\$ -l h_vmem=$vmem" . "G" . "\n"; + print $OUT "#\$ -o $out\n"; + print $OUT "#\$ -e $err\n"; + } else { + print $OUT "\nNSLOTS=$jobs\n"; + } + print $OUT "\n"; + if ($have_sge) { +# some eddie specific stuff + print $OUT ". /etc/profile.d/modules.sh\n"; + print $OUT "module load openmpi/ethernet/gcc/latest\n"; + print $OUT "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n"; + } +} + +sub check_exists { + my ($name,$filename) = @_; + die "Error: unable to read $name: \"$filename\"" if ! -r $filename; +} + +sub check_exists_noThrow { + my ($name,$filename) = @_; + return 1 if ! -r $filename; + return 0; +} + +# +# Used to submit train/test jobs. Return the job id, or 0 on failure +# + +sub submit_job_sge { + my($script_file) = @_; + my $qsub_result = `qsub -P $queue $script_file`; + print STDERR "SUBMIT CMD: qsub -P $queue $script_file\n"; + if ($qsub_result !~ /Your job (\d+)/) { + print STDERR "Failed to qsub job: $qsub_result\n"; + return 0; + } + my $job_name = basename($script_file); + print STDERR "Submitted job: $job_name id: $1 " . + scalar(localtime()) . "\n"; + return $1; +} + +sub submit_job_sge_extra_memory { + my($script_file,$extra_memory) = @_; + my $qsub_result = `qsub -pe $extra_memory -P $queue $script_file`; + print STDERR "SUBMIT CMD: qsub -pe $extra_memory -P $queue $script_file \n"; + if ($qsub_result !~ /Your job (\d+)/) { + print STDERR "Failed to qsub job: $qsub_result\n"; + return 0; + } + my $job_name = basename($script_file); + print STDERR "Submitted job: $job_name id: $1 " . + scalar(localtime()) . "\n"; + return $1; +} + +# +# As above, but no sge version. Returns the pid. +# + +sub submit_job_no_sge { + my($script_file,$out,$err) = @_; + my $pid = undef; + if ($pid = fork) { + my $job_name = basename($script_file); + print STDERR "Launched : $job_name pid: $pid " . scalar(localtime()) . "\n"; + return $pid; + } elsif (defined $pid) { + print STDERR "Executing script $script_file, writing to $out and $err.\n"; + `cd $working_dir; sh $script_file 1>$out 2> $err`; + exit; + } else { + # Fork failed + return 0; + } +} + +sub check_running { + my ($job_id) = @_; + if ($have_sge) { + return `qstat | grep $job_id`; + } else { + return `ps -e | grep $job_id | grep -v defunct`; + } +} + + + -- cgit v1.2.3