From 13ec6060b0d7ef600f056c15dd249d190cc16b25 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 8 Jan 2015 11:49:53 +0000
Subject: move mira code to contrib

---
 contrib/mira/Decoder.cpp                    |  352 +++++
 contrib/mira/Decoder.h                      |  138 ++
 contrib/mira/Hildreth.cpp                   |  175 +++
 contrib/mira/Hildreth.h                     |   13 +
 contrib/mira/HildrethTest.cpp               |  793 ++++++++++++
 contrib/mira/HypothesisQueue.cpp            |   66 +
 contrib/mira/HypothesisQueue.h              |   69 +
 contrib/mira/Jamfile                        |   15 +
 contrib/mira/Main.cpp                       | 1847 +++++++++++++++++++++++++++
 contrib/mira/Main.h                         |   58 +
 contrib/mira/MiraOptimiser.cpp              |  446 +++++++
 contrib/mira/MiraTest.cpp                   |   24 +
 contrib/mira/Optimiser.h                    |  153 +++
 contrib/mira/Perceptron.cpp                 |   53 +
 contrib/mira/expt.cfg                       |   34 +
 contrib/mira/mira.xcodeproj/project.pbxproj |  401 ++++++
 contrib/mira/training-expt.perl             |  994 ++++++++++++++
 17 files changed, 5631 insertions(+)
 create mode 100644 contrib/mira/Decoder.cpp
 create mode 100644 contrib/mira/Decoder.h
 create mode 100644 contrib/mira/Hildreth.cpp
 create mode 100644 contrib/mira/Hildreth.h
 create mode 100644 contrib/mira/HildrethTest.cpp
 create mode 100644 contrib/mira/HypothesisQueue.cpp
 create mode 100644 contrib/mira/HypothesisQueue.h
 create mode 100644 contrib/mira/Jamfile
 create mode 100644 contrib/mira/Main.cpp
 create mode 100644 contrib/mira/Main.h
 create mode 100644 contrib/mira/MiraOptimiser.cpp
 create mode 100644 contrib/mira/MiraTest.cpp
 create mode 100644 contrib/mira/Optimiser.h
 create mode 100644 contrib/mira/Perceptron.cpp
 create mode 100644 contrib/mira/expt.cfg
 create mode 100644 contrib/mira/mira.xcodeproj/project.pbxproj
 create mode 100755 contrib/mira/training-expt.perl

(limited to 'contrib')
diff --git a/contrib/mira/Decoder.cpp b/contrib/mira/Decoder.cpp
new file mode 100644
index 000000000..c9bb4c983
--- /dev/null
+++ b/contrib/mira/Decoder.cpp
@@ -0,0 +1,352 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Decoder.h"
+#include "moses/Manager.h"
+#include "moses/ChartManager.h"
+#include "moses/Sentence.h"
+#include "moses/InputType.h"
+#include "moses/Phrase.h"
+#include "moses/TrellisPathList.h"
+#include "moses/ChartKBestExtractor.h"
+
+using namespace std;
+using namespace Moses;
+
+
+namespace Mira
+{
+
+/**
+  * Allocates a char* and copies string into it.
+**/
+static char* strToChar(const string& s)
+{
+  char* c = new char[s.size()+1];
+  strcpy(c,s.c_str());
+  return c;
+}
+
+MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vector<string> decoder_params)
+  : m_manager(NULL)
+{
+  static int BASE_ARGC = 6;
+  Parameter* params = new Parameter();
+  char ** mosesargv = new char*[BASE_ARGC + argc];
+  mosesargv[0] = strToChar("-f");
+  mosesargv[1] = strToChar(inifile);
+  mosesargv[2] = strToChar("-v");
+  stringstream dbgin;
+  dbgin << debuglevel;
+  mosesargv[3] = strToChar(dbgin.str());
+
+  mosesargv[4] = strToChar("-no-cache");
+  mosesargv[5] = strToChar("true");
+  /*
+  mosesargv[4] = strToChar("-use-persistent-cache");
+  mosesargv[5] = strToChar("0");
+  mosesargv[6] = strToChar("-persistent-cache-size");
+  mosesargv[7] = strToChar("0");
+  */
+
+  for (int i = 0; i < argc; ++i) {
+    char *cstr = &(decoder_params[i])[0];
+    mosesargv[BASE_ARGC + i] = cstr;
+  }
+
+  if (!params->LoadParam(BASE_ARGC + argc,mosesargv)) {
+    cerr << "Loading static data failed, exit." << endl;
+    exit(1);
+  }
+  StaticData::LoadDataStatic(params, "mira");
+  for (int i = 0; i < BASE_ARGC; ++i) {
+    delete[] mosesargv[i];
+  }
+  delete[] mosesargv;
+
+  const std::vector<BleuScoreFeature*> &bleuFFs = BleuScoreFeature::GetColl();
+  assert(bleuFFs.size() == 1);
+  m_bleuScoreFeature = bleuFFs[0];
+}
+
+void MosesDecoder::cleanup(bool chartDecoding)
+{
+  delete m_manager;
+  if (chartDecoding)
+    delete m_chartManager;
+  else
+    delete m_sentence;
+}
+
+vector< vector<const Word*> > MosesDecoder::getNBest(const std::string& source,
+    size_t sentenceid,
+    size_t nBestSize,
+    float bleuObjectiveWeight,
+    float bleuScoreWeight,
+    vector< ScoreComponentCollection>& featureValues,
+    vector< float>& bleuScores,
+    vector< float>& modelScores,
+    size_t numReturnedTranslations,
+    bool realBleu,
+    bool distinct,
+    bool avgRefLength,
+    size_t rank,
+    size_t epoch,
+    string filename)
+{
+  StaticData &staticData = StaticData::InstanceNonConst();
+  bool chartDecoding = staticData.IsChart();
+  initialize(staticData, source, sentenceid, bleuObjectiveWeight, bleuScoreWeight, avgRefLength, chartDecoding);
+
+  // run the decoder
+  if (chartDecoding) {
+    return runChartDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight,
+                           featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch);
+  } else {
+    SearchAlgorithm search = staticData.GetSearchAlgorithm();
+    return runDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight,
+                      featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch,
+                      search, filename);
+  }
+}
+
+vector< vector<const Word*> > MosesDecoder::runDecoder(const std::string& source,
+    size_t sentenceid,
+    size_t nBestSize,
+    float bleuObjectiveWeight,
+    float bleuScoreWeight,
+    vector< ScoreComponentCollection>& featureValues,
+    vector< float>& bleuScores,
+    vector< float>& modelScores,
+    size_t numReturnedTranslations,
+    bool realBleu,
+    bool distinct,
+    size_t rank,
+    size_t epoch,
+    SearchAlgorithm& search,
+    string filename)
+{
+  // run the decoder
+  m_manager = new Moses::Manager(*m_sentence);
+  m_manager->Decode();
+  TrellisPathList nBestList;
+  m_manager->CalcNBest(nBestSize, nBestList, distinct);
+
+  // optionally print nbest to file (to extract scores and features.. currently just for sentence bleu scoring)
+  /*if (filename != "") {
+    ofstream out(filename.c_str());
+    if (!out) {
+      ostringstream msg;
+      msg << "Unable to open " << filename;
+      throw runtime_error(msg.str());
+    }
+    // TODO: handle sentence id (for now always 0)
+    //OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(), 0, false);
+    out.close();
+  }*/
+
+  // read off the feature values and bleu scores for each sentence in the nbest list
+  Moses::TrellisPathList::const_iterator iter;
+  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+    const Moses::TrellisPath &path = **iter;
+    featureValues.push_back(path.GetScoreBreakdown());
+    float bleuScore, dynBleuScore, realBleuScore;
+    if (realBleu) realBleuScore = m_bleuScoreFeature->CalculateBleu(path.GetTargetPhrase());
+    else dynBleuScore = getBleuScore(featureValues.back());
+    bleuScore = realBleu ? realBleuScore : dynBleuScore;
+    bleuScores.push_back(bleuScore);
+
+    //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
+    float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
+    modelScores.push_back(scoreWithoutBleu);
+
+    if (iter != nBestList.begin())
+      cerr << endl;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << path.GetTargetPhrase() << "\", score: "
+         << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << path.GetTotalScore();
+    if (m_bleuScoreFeature->Enabled() && realBleu)
+      cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";
+
+    // set bleu score to zero in the feature vector since we do not want to optimise its weight
+    setBleuScore(featureValues.back(), 0);
+  }
+
+  // prepare translations to return
+  vector< vector<const Word*> > translations;
+  for (size_t i=0; i < numReturnedTranslations && i < nBestList.GetSize(); ++i) {
+    const TrellisPath &path = nBestList.at(i);
+    Phrase phrase = path.GetTargetPhrase();
+
+    vector<const Word*> translation;
+    for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
+      const Word &word = phrase.GetWord(pos);
+      Word *newWord = new Word(word);
+      translation.push_back(newWord);
+    }
+    translations.push_back(translation);
+  }
+
+  return translations;
+}
+
+vector< vector<const Word*> > MosesDecoder::runChartDecoder(const std::string& source,
+    size_t sentenceid,
+    size_t nBestSize,
+    float bleuObjectiveWeight,
+    float bleuScoreWeight,
+    vector< ScoreComponentCollection>& featureValues,
+    vector< float>& bleuScores,
+    vector< float>& modelScores,
+    size_t numReturnedTranslations,
+    bool realBleu,
+    bool distinct,
+    size_t rank,
+    size_t epoch)
+{
+  // run the decoder
+  m_chartManager = new ChartManager(*m_sentence);
+  m_chartManager->Decode();
+  ChartKBestExtractor::KBestVec nBestList;
+  m_chartManager->CalcNBest(nBestSize, nBestList, distinct);
+
+  // read off the feature values and bleu scores for each sentence in the nbest list
+  for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+       p != nBestList.end(); ++p) {
+    const ChartKBestExtractor::Derivation &derivation = **p;
+    featureValues.push_back(*ChartKBestExtractor::GetOutputScoreBreakdown(derivation));
+    float bleuScore, dynBleuScore, realBleuScore;
+    dynBleuScore = getBleuScore(featureValues.back());
+    Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
+    realBleuScore = m_bleuScoreFeature->CalculateBleu(outputPhrase);
+    bleuScore = realBleu ? realBleuScore : dynBleuScore;
+    bleuScores.push_back(bleuScore);
+
+    float scoreWithoutBleu = derivation.score - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
+    modelScores.push_back(scoreWithoutBleu);
+
+    if (p != nBestList.begin())
+      cerr << endl;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << outputPhrase << "\", score: "
+         << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << derivation.score;
+    if (m_bleuScoreFeature->Enabled() && realBleu)
+      cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";
+
+    // set bleu score to zero in the feature vector since we do not want to optimise its weight
+    setBleuScore(featureValues.back(), 0);
+  }
+
+  // prepare translations to return
+  vector< vector<const Word*> > translations;
+  for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
+       p != nBestList.end(); ++p) {
+    const ChartKBestExtractor::Derivation &derivation = **p;
+    Phrase phrase = ChartKBestExtractor::GetOutputPhrase(derivation);
+
+    vector<const Word*> translation;
+    for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
+      const Word &word = phrase.GetWord(pos);
+      Word *newWord = new Word(word);
+      translation.push_back(newWord);
+    }
+    translations.push_back(translation);
+  }
+
+  return translations;
+}
+
+void MosesDecoder::initialize(StaticData& staticData, const std::string& source, size_t sentenceid,
+                              float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding)
+{
+  m_sentence = new Sentence();
+  stringstream in(source + "\n");
+  const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
+  m_sentence->Read(in,inputFactorOrder);
+
+  // set weight of BleuScoreFeature
+  //cerr << "Reload Bleu feature weight: " << bleuObjectiveWeight*bleuScoreWeight << " (" << bleuObjectiveWeight << "*" << bleuScoreWeight << ")" << endl;
+  //staticData.ReLoadBleuScoreFeatureParameter(bleuObjectiveWeight*bleuScoreWeight);
+
+  m_bleuScoreFeature->SetCurrSourceLength((*m_sentence).GetSize());
+  if (chartDecoding)
+    m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize()-2);
+  else
+    m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize());
+
+  if (avgRefLength)
+    m_bleuScoreFeature->SetCurrAvgRefLength(sentenceid);
+  else
+    m_bleuScoreFeature->SetCurrShortestRefLength(sentenceid);
+  m_bleuScoreFeature->SetCurrReferenceNgrams(sentenceid);
+}
+
+float MosesDecoder::getBleuScore(const ScoreComponentCollection& scores)
+{
+  return scores.GetScoreForProducer(m_bleuScoreFeature);
+}
+
+void MosesDecoder::setBleuScore(ScoreComponentCollection& scores, float bleu)
+{
+  scores.Assign(m_bleuScoreFeature, bleu);
+}
+
+ScoreComponentCollection MosesDecoder::getWeights()
+{
+  return StaticData::Instance().GetAllWeights();
+}
+
+void MosesDecoder::setWeights(const ScoreComponentCollection& weights)
+{
+  StaticData::InstanceNonConst().SetAllWeights(weights);
+}
+
+void MosesDecoder::updateHistory(const vector<const Word*>& words)
+{
+  m_bleuScoreFeature->UpdateHistory(words);
+}
+
+void MosesDecoder::updateHistory(const vector< vector< const Word*> >& words, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch)
+{
+  m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
+}
+
+void MosesDecoder::printBleuFeatureHistory(std::ostream& out)
+{
+  m_bleuScoreFeature->PrintHistory(out);
+}
+
+size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength)
+{
+  return m_bleuScoreFeature->GetClosestRefLength(ref_id, hypoLength);
+}
+
+size_t MosesDecoder::getShortestReferenceIndex(size_t ref_id)
+{
+  return m_bleuScoreFeature->GetShortestRefIndex(ref_id);
+}
+
+void MosesDecoder::setBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
+                                     bool scaleByInverseLength, bool scaleByAvgInverseLength,
+                                     float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu)
+{
+  m_bleuScoreFeature->SetBleuParameters(disable, sentenceBleu, scaleByInputLength, scaleByAvgInputLength,
+                                        scaleByInverseLength, scaleByAvgInverseLength,
+                                        scaleByX, historySmoothing, scheme, simpleHistoryBleu);
+}
+}
+
diff --git a/contrib/mira/Decoder.h b/contrib/mira/Decoder.h
new file mode 100644
index 000000000..0cc1eb3ab
--- /dev/null
+++ b/contrib/mira/Decoder.h
@@ -0,0 +1,138 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef _MIRA_DECODER_H_
+#define _MIRA_DECODER_H_
+
+#include <iostream>
+#include <cstring>
+#include <sstream>
+
+
+#include "moses/Hypothesis.h"
+#include "moses/Parameter.h"
+#include "moses/SearchNormal.h"
+#include "moses/Sentence.h"
+#include "moses/StaticData.h"
+#include "moses/FF/BleuScoreFeature.h"
+
+//
+// Wrapper functions and objects for the decoder.
+//
+
+namespace Mira
+{
+
+/**
+  * Wraps moses decoder.
+ **/
+class MosesDecoder
+{
+public:
+  /**
+   * Initialise moses (including StaticData) using the given ini file and debuglevel, passing through any
+   * other command line arguments.
+   **/
+  MosesDecoder(const std::string& inifile, int debuglevel,  int argc, std::vector<std::string> decoder_params);
+
+  //returns the best sentence
+  std::vector< std::vector<const Moses::Word*> > getNBest(const std::string& source,
+      size_t sentenceid,
+      size_t nbestSize,
+      float bleuObjectiveweight, //weight of bleu in objective
+      float bleuScoreWeight, //weight of bleu in score
+      std::vector< Moses::ScoreComponentCollection>& featureValues,
+      std::vector< float>& bleuScores,
+      std::vector< float>& modelScores,
+      size_t numReturnedTranslations,
+      bool realBleu,
+      bool distinct,
+      bool avgRefLength,
+      size_t rank,
+      size_t epoch,
+      std::string filename);
+  std::vector< std::vector<const Moses::Word*> > runDecoder(const std::string& source,
+      size_t sentenceid,
+      size_t nbestSize,
+      float bleuObjectiveweight, //weight of bleu in objective
+      float bleuScoreWeight, //weight of bleu in score
+      std::vector< Moses::ScoreComponentCollection>& featureValues,
+      std::vector< float>& bleuScores,
+      std::vector< float>& modelScores,
+      size_t numReturnedTranslations,
+      bool realBleu,
+      bool distinct,
+      size_t rank,
+      size_t epoch,
+      Moses::SearchAlgorithm& seach,
+      std::string filename);
+  std::vector< std::vector<const Moses::Word*> > runChartDecoder(const std::string& source,
+      size_t sentenceid,
+      size_t nbestSize,
+      float bleuObjectiveweight, //weight of bleu in objective
+      float bleuScoreWeight, //weight of bleu in score
+      std::vector< Moses::ScoreComponentCollection>& featureValues,
+      std::vector< float>& bleuScores,
+      std::vector< float>& modelScores,
+      size_t numReturnedTranslations,
+      bool realBleu,
+      bool distinct,
+      size_t rank,
+      size_t epoch);
+  void initialize(Moses::StaticData& staticData, const std::string& source, size_t sentenceid,
+                  float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding);
+  void updateHistory(const std::vector<const Moses::Word*>& words);
+  void updateHistory(const std::vector< std::vector< const Moses::Word*> >& words, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
+  void printBleuFeatureHistory(std::ostream& out);
+  void printReferenceLength(const std::vector<size_t>& ref_ids);
+  size_t getReferenceLength(size_t ref_id);
+  size_t getClosestReferenceLength(size_t ref_id, int hypoLength);
+  size_t getShortestReferenceIndex(size_t ref_id);
+  void setBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
+                         bool scaleByInverseLength, bool scaleByAvgInverseLength,
+                         float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu);
+  void setAvgInputLength (float l) {
+    m_bleuScoreFeature->SetAvgInputLength(l);
+  }
+  Moses::ScoreComponentCollection getWeights();
+  void setWeights(const Moses::ScoreComponentCollection& weights);
+  void cleanup(bool chartDecoding);
+
+  float getSourceLengthHistory() {
+    return m_bleuScoreFeature->GetSourceLengthHistory();
+  }
+  float getTargetLengthHistory() {
+    return m_bleuScoreFeature->GetTargetLengthHistory();
+  }
+  float getAverageInputLength() {
+    return m_bleuScoreFeature->GetAverageInputLength();
+  }
+
+private:
+  float getBleuScore(const Moses::ScoreComponentCollection& scores);
+  void setBleuScore(Moses::ScoreComponentCollection& scores, float bleu);
+  Moses::Manager *m_manager;
+  Moses::ChartManager *m_chartManager;
+  Moses::Sentence *m_sentence;
+  Moses::BleuScoreFeature *m_bleuScoreFeature;
+};
+
+
+} //namespace
+
+#endif
diff --git a/contrib/mira/Hildreth.cpp b/contrib/mira/Hildreth.cpp
new file mode 100644
index 000000000..03076e767
--- /dev/null
+++ b/contrib/mira/Hildreth.cpp
@@ -0,0 +1,175 @@
+#include "Hildreth.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Mira
+{
+
+vector<float> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<float>& b)
+{
+
+  size_t i;
+  int max_iter = 10000;
+  float eps = 0.00000001;
+  float zero = 0.000000000001;
+
+  vector<float> alpha ( b.size() );
+  vector<float> F ( b.size() );
+  vector<float> kkt ( b.size() );
+
+  float max_kkt = -1e100;
+
+  size_t K = b.size();
+
+  float A[K][K];
+  bool is_computed[K];
+  for ( i = 0; i < K; i++ ) {
+    A[i][i] = a[i].InnerProduct(a[i]);
+    is_computed[i] = false;
+  }
+
+  int max_kkt_i = -1;
+
+
+  for ( i = 0; i < b.size(); i++ ) {
+    F[i] = b[i];
+    kkt[i] = F[i];
+    if ( kkt[i] > max_kkt ) {
+      max_kkt = kkt[i];
+      max_kkt_i = i;
+    }
+  }
+
+  int iter = 0;
+  float diff_alpha;
+  float try_alpha;
+  float add_alpha;
+
+  while ( max_kkt >= eps && iter < max_iter ) {
+
+    diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
+    try_alpha = alpha[max_kkt_i] + diff_alpha;
+    add_alpha = 0.0;
+
+    if ( try_alpha < 0.0 )
+      add_alpha = -1.0 * alpha[max_kkt_i];
+    else
+      add_alpha = diff_alpha;
+
+    alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
+
+    if ( !is_computed[max_kkt_i] ) {
+      for ( i = 0; i < K; i++ ) {
+        A[i][max_kkt_i] = a[i].InnerProduct(a[max_kkt_i] ); // for version 1
+        //A[i][max_kkt_i] = 0; // for version 1
+        is_computed[max_kkt_i] = true;
+      }
+    }
+
+    for ( i = 0; i < F.size(); i++ ) {
+      F[i] -= add_alpha * A[i][max_kkt_i];
+      kkt[i] = F[i];
+      if ( alpha[i] > zero )
+        kkt[i] = abs ( F[i] );
+    }
+    max_kkt = -1e100;
+    max_kkt_i = -1;
+    for ( i = 0; i < F.size(); i++ )
+      if ( kkt[i] > max_kkt ) {
+        max_kkt = kkt[i];
+        max_kkt_i = i;
+      }
+
+    iter++;
+  }
+
+  return alpha;
+}
+
+vector<float> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<float>& b, float C)
+{
+
+  size_t i;
+  int max_iter = 10000;
+  float eps = 0.00000001;
+  float zero = 0.000000000001;
+
+  vector<float> alpha ( b.size() );
+  vector<float> F ( b.size() );
+  vector<float> kkt ( b.size() );
+
+  float max_kkt = -1e100;
+
+  size_t K = b.size();
+
+  float A[K][K];
+  bool is_computed[K];
+  for ( i = 0; i < K; i++ ) {
+    A[i][i] = a[i].InnerProduct(a[i]);
+    is_computed[i] = false;
+  }
+
+  int max_kkt_i = -1;
+
+
+  for ( i = 0; i < b.size(); i++ ) {
+    F[i] = b[i];
+    kkt[i] = F[i];
+    if ( kkt[i] > max_kkt ) {
+      max_kkt = kkt[i];
+      max_kkt_i = i;
+    }
+  }
+
+  int iter = 0;
+  float diff_alpha;
+  float try_alpha;
+  float add_alpha;
+
+  while ( max_kkt >= eps && iter < max_iter ) {
+
+    diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
+    try_alpha = alpha[max_kkt_i] + diff_alpha;
+    add_alpha = 0.0;
+
+    if ( try_alpha < 0.0 )
+      add_alpha = -1.0 * alpha[max_kkt_i];
+    else if (try_alpha > C)
+      add_alpha = C - alpha[max_kkt_i];
+    else
+      add_alpha = diff_alpha;
+
+    alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
+
+    if ( !is_computed[max_kkt_i] ) {
+      for ( i = 0; i < K; i++ ) {
+        A[i][max_kkt_i] = a[i].InnerProduct(a[max_kkt_i] ); // for version 1
+        //A[i][max_kkt_i] = 0; // for version 1
+        is_computed[max_kkt_i] = true;
+      }
+    }
+
+    for ( i = 0; i < F.size(); i++ ) {
+      F[i] -= add_alpha * A[i][max_kkt_i];
+      kkt[i] = F[i];
+      if (alpha[i] > C - zero)
+        kkt[i]=-kkt[i];
+      else if (alpha[i] > zero)
+        kkt[i] = abs(F[i]);
+
+    }
+    max_kkt = -1e100;
+    max_kkt_i = -1;
+    for ( i = 0; i < F.size(); i++ )
+      if ( kkt[i] > max_kkt ) {
+        max_kkt = kkt[i];
+        max_kkt_i = i;
+      }
+
+    iter++;
+  }
+
+  return alpha;
+}
+}
diff --git a/contrib/mira/Hildreth.h b/contrib/mira/Hildreth.h
new file mode 100644
index 000000000..373f2ac43
--- /dev/null
+++ b/contrib/mira/Hildreth.h
@@ -0,0 +1,13 @@
+#include "moses/FeatureVector.h"
+#include "moses/ScoreComponentCollection.h"
+
+namespace Mira
+{
+
+class Hildreth
+{
+public :
+  static std::vector<float> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<float>& b );
+  static std::vector<float> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<float>& b, float C);
+};
+}
diff --git a/contrib/mira/HildrethTest.cpp b/contrib/mira/HildrethTest.cpp
new file mode 100644
index 000000000..43e4403e4
--- /dev/null
+++ b/contrib/mira/HildrethTest.cpp
@@ -0,0 +1,793 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include <boost/test/unit_test.hpp>
+
+#include "Hildreth.h"
+#include "Optimiser.h"
+#include "ScoreComponentCollection.h"
+
+using namespace std;
+using namespace Moses;
+using namespace Mira;
+
+namespace MosesTest
+{
+
+class MockSingleFeature : public StatelessFeatureFunction
+{
+public:
+  MockSingleFeature(): StatelessFeatureFunction("MockSingle",1) {}
+  std::string GetScoreProducerWeightShortName(unsigned) const {
+    return "sf";
+  }
+};
+
+class MockMultiFeature : public StatelessFeatureFunction
+{
+public:
+  MockMultiFeature(): StatelessFeatureFunction("MockMulti",5) {}
+  std::string GetScoreProducerWeightShortName(unsigned) const {
+    return "mf";
+  }
+};
+
+class MockSparseFeature : public StatelessFeatureFunction
+{
+public:
+  MockSparseFeature(): StatelessFeatureFunction("MockSparse", ScoreProducer::unlimited) {}
+  std::string GetScoreProducerWeightShortName(unsigned) const {
+    return "sf";
+  }
+};
+
+struct MockProducers {
+  MockProducers() {}
+
+  MockSingleFeature single;
+  MockMultiFeature multi;
+  MockSparseFeature sparse;
+};
+
+
+
+BOOST_AUTO_TEST_SUITE(hildreth_test)
+
+BOOST_FIXTURE_TEST_CASE(test_hildreth_1, MockProducers)
+{
+  // Feasible example with 2 constraints
+  cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl;
+  vector< ScoreComponentCollection> featureValueDiffs;
+  vector< float> lossMinusModelScoreDiff;
+
+  // initial weights
+  float w[] = { 1, 1, 1, 1, 0 };
+  vector<float> vec(w,w+5);
+  ScoreComponentCollection weights;
+  weights.PlusEquals(&multi, vec);
+
+  // feature values (second is oracle)
+  //float arr1[] = {0, -5, -27.0908, -1.83258, 0 };
+  //float arr2[] = {0, -5, -29.158, -1.83258, 0 };
+  //float arr3[] = {0, -5, -27.0908, -1.83258, 0 };
+
+  // feature value differences (to oracle)
+  ScoreComponentCollection s1, s2, s3;
+  float arr1[] = { 0, 0, -2.0672, 0, 0 };
+  float arr2[] = { 0, 0, 0, 0, 0 };
+  float arr3[] = { 0, 0, -2.0672, 0, 0 };
+
+  float loss1 = 2.34085;
+  float loss2 = 0;
+  float loss3 = 2.34085;
+
+  vector<float> vec1(arr1,arr1+5);
+  vector<float> vec2(arr2,arr2+5);
+  vector<float> vec3(arr3,arr3+5);
+
+  s1.PlusEquals(&multi,vec1);
+  s2.PlusEquals(&multi,vec2);
+  s3.PlusEquals(&multi,vec3);
+
+  featureValueDiffs.push_back(s1);
+  featureValueDiffs.push_back(s2);
+  featureValueDiffs.push_back(s3);
+
+  cerr << "feature value diff: " << featureValueDiffs[0] << endl;
+  cerr << "feature value diff: " << featureValueDiffs[1] << endl;
+  cerr << "feature value diff: " << featureValueDiffs[2] << endl << endl;
+
+  float oldModelScoreDiff1 = featureValueDiffs[0].InnerProduct(weights);
+  float oldModelScoreDiff2 = featureValueDiffs[1].InnerProduct(weights);
+  float oldModelScoreDiff3 = featureValueDiffs[2].InnerProduct(weights);
+
+  cerr << "model score diff: " << oldModelScoreDiff1 << ", loss: " << loss1 << endl;
+  cerr << "model score diff: " << oldModelScoreDiff2 << ", loss: " << loss2 << endl;
+  cerr << "model score diff: " << oldModelScoreDiff3 << ", loss: " << loss3 << endl << endl;
+
+  lossMinusModelScoreDiff.push_back(loss1 - oldModelScoreDiff1);
+  lossMinusModelScoreDiff.push_back(loss2 - oldModelScoreDiff2);
+  lossMinusModelScoreDiff.push_back(loss3 - oldModelScoreDiff3);
+
+  vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff);
+  vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01);
+
+  cerr << "\nalphas without slack:" << endl;
+  for (size_t i = 0; i < alphas1.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas1[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
+  FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
+    featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
+    cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs1[k].GetScoresVector();
+    totalUpdate1 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate1 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate1(weights);
+  weightsUpdate1.PlusEquals(totalUpdate1);
+  cerr << "new weights: " << weightsUpdate1 << endl << endl;
+
+  float newModelScoreDiff1 = featureValueDiffs[0].InnerProduct(weightsUpdate1);
+  float newModelScoreDiff2 = featureValueDiffs[1].InnerProduct(weightsUpdate1);
+  float newModelScoreDiff3 = featureValueDiffs[2].InnerProduct(weightsUpdate1);
+
+  cerr << "new model score diff: " << newModelScoreDiff1 << ", loss: " << loss1 << endl;
+  cerr << "new model score diff: " << newModelScoreDiff2 << ", loss: " << loss2 << endl;
+  cerr << "new model score diff: " << newModelScoreDiff3 << ", loss: " << loss3 << endl;
+
+  cerr << "\n\nalphas with slack 0.01:" << endl;
+  for (size_t i = 0; i < alphas2.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas2[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
+  FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
+    featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
+    cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs2[k].GetScoresVector();
+    totalUpdate2 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate2 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate2(weights);
+  weightsUpdate2.PlusEquals(totalUpdate2);
+  cerr << "new weights: " << weightsUpdate2 << endl << endl;
+
+  float newModelScoreDiff4 = featureValueDiffs[0].InnerProduct(weightsUpdate2);
+  float newModelScoreDiff5 = featureValueDiffs[1].InnerProduct(weightsUpdate2);
+  float newModelScoreDiff6 = featureValueDiffs[2].InnerProduct(weightsUpdate2);
+
+  cerr << "new model score diff: " << newModelScoreDiff4 << ", loss: " << loss1 << endl;
+  cerr << "new model score diff: " << newModelScoreDiff5 << ", loss: " << loss2 << endl;
+  cerr << "new model score diff: " << newModelScoreDiff6 << ", loss: " << loss3 << endl;
+}
+
+
+BOOST_FIXTURE_TEST_CASE(test_hildreth_3, MockProducers)
+{
+  // Unfeasible example with 21 constraints
+  cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl;
+  vector< ScoreComponentCollection> featureValueDiffs;
+  vector< float> lossMinusModelScoreDiff;
+
+  // initial weights
+  float w[] = { 1, 1, 0.638672, 1, 0 };
+  vector<float> vec(w,w+5);
+  ScoreComponentCollection weights;
+  weights.PlusEquals(&multi, vec);
+
+  int numberOfConstraints = 21;
+
+  // feature value differences (to oracle)
+  // NOTE: these feature values are only approximations
+  ScoreComponentCollection s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21;
+  float arr1[] = { 0, 0, -2.0672, 0, 0 };
+  float arr2[] = { 0, 0, 0, 0, 0 };
+  float arr3[] = { 0, 0, -2.08436, 1.38629, 0 };
+  float arr4[] = { 0, 0, -0.0171661, 1.38629, 0 };
+  float arr5[] = { 0, 0, 4.4283, 0, 0 };
+  float arr6[] = { 0, 0, 3.84829, 1.38629, 0 };
+  float arr7[] = { 0, 0, 6.83689, 0, 0 };
+  float arr8[] = { 0, 0, 0, 0, 0 };
+  float arr9[] = { 0, 0, -2.0672, 0, 0 };
+  float arr10[] = { 0, 0, -0.0171661, 1.38629, 0 };
+  float arr11[] = { 0, 0, -2.08436, 1.38629, 0 };
+  float arr12[] = { 0, 0, 4.4283, 0, 0 };
+  float arr13[] = { 3, 0, 2.41089, 0, 0 };
+  float arr14[] = { 3, 0, 2.32709, 0, 0 };
+  float arr15[] = { 0, 0, -2.0672, 0, 0 };
+  float arr16[] = { 0, 0, -2.08436, 1.38629, 0 };
+  float arr17[] = { 0, 0, 4.4283, 0, 0 };
+  float arr18[] = { 0, 0, 3.84829, 1.38629, 0 };
+  float arr19[] = { 0, 0, -0.0171661, 1.38629, 0 };
+  float arr20[] = { 0, 0, 0, 0, 0 };
+  float arr21[] = { 0, 0, 6.83689, 0, 0 };
+
+  vector<float> losses;
+  losses.push_back(2.73485);
+  losses.push_back(0);
+  losses.push_back(3.64118);
+  losses.push_back(1.47347);
+  losses.push_back(3.64118);
+  losses.push_back(4.16278);
+  losses.push_back(3.13952);
+  losses.push_back(0);
+  losses.push_back(2.73485);
+  losses.push_back(1.47347);
+  losses.push_back(3.64118);
+  losses.push_back(3.64118);
+  losses.push_back(2.51662);
+  losses.push_back(2.73485);
+  losses.push_back(2.73485);
+  losses.push_back(3.64118);
+  losses.push_back(3.64118);
+  losses.push_back(4.16278);
+  losses.push_back(1.47347);
+  losses.push_back(0);
+  losses.push_back(3.13952);
+
+  vector<float> vec1(arr1,arr1+5);
+  vector<float> vec2(arr2,arr2+5);
+  vector<float> vec3(arr3,arr3+5);
+  vector<float> vec4(arr4,arr4+5);
+  vector<float> vec5(arr5,arr5+5);
+  vector<float> vec6(arr6,arr6+5);
+  vector<float> vec7(arr7,arr7+5);
+  vector<float> vec8(arr8,arr8+5);
+  vector<float> vec9(arr9,arr9+5);
+  vector<float> vec10(arr10,arr10+5);
+  vector<float> vec11(arr11,arr11+5);
+  vector<float> vec12(arr12,arr12+5);
+  vector<float> vec13(arr13,arr13+5);
+  vector<float> vec14(arr14,arr14+5);
+  vector<float> vec15(arr15,arr15+5);
+  vector<float> vec16(arr16,arr16+5);
+  vector<float> vec17(arr17,arr17+5);
+  vector<float> vec18(arr18,arr18+5);
+  vector<float> vec19(arr19,arr19+5);
+  vector<float> vec20(arr20,arr20+5);
+  vector<float> vec21(arr21,arr21+5);
+
+  s1.PlusEquals(&multi,vec1);
+  s2.PlusEquals(&multi,vec2);
+  s3.PlusEquals(&multi,vec3);
+  s4.PlusEquals(&multi,vec4);
+  s5.PlusEquals(&multi,vec5);
+  s6.PlusEquals(&multi,vec6);
+  s7.PlusEquals(&multi,vec7);
+  s8.PlusEquals(&multi,vec8);
+  s9.PlusEquals(&multi,vec9);
+  s10.PlusEquals(&multi,vec10);
+  s11.PlusEquals(&multi,vec11);
+  s12.PlusEquals(&multi,vec12);
+  s13.PlusEquals(&multi,vec13);
+  s14.PlusEquals(&multi,vec14);
+  s15.PlusEquals(&multi,vec15);
+  s16.PlusEquals(&multi,vec16);
+  s17.PlusEquals(&multi,vec17);
+  s18.PlusEquals(&multi,vec18);
+  s19.PlusEquals(&multi,vec19);
+  s20.PlusEquals(&multi,vec20);
+  s21.PlusEquals(&multi,vec21);
+
+  featureValueDiffs.push_back(s1);
+  featureValueDiffs.push_back(s2);
+  featureValueDiffs.push_back(s3);
+  featureValueDiffs.push_back(s4);
+  featureValueDiffs.push_back(s5);
+  featureValueDiffs.push_back(s6);
+  featureValueDiffs.push_back(s7);
+  featureValueDiffs.push_back(s8);
+  featureValueDiffs.push_back(s9);
+  featureValueDiffs.push_back(s10);
+  featureValueDiffs.push_back(s11);
+  featureValueDiffs.push_back(s12);
+  featureValueDiffs.push_back(s13);
+  featureValueDiffs.push_back(s14);
+  featureValueDiffs.push_back(s15);
+  featureValueDiffs.push_back(s16);
+  featureValueDiffs.push_back(s17);
+  featureValueDiffs.push_back(s18);
+  featureValueDiffs.push_back(s19);
+  featureValueDiffs.push_back(s20);
+  featureValueDiffs.push_back(s21);
+
+  vector<float> oldModelScoreDiff;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    oldModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weights));
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "old model score diff: " << oldModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (oldModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    lossMinusModelScoreDiff.push_back(losses[i] - oldModelScoreDiff[i]);
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "A: " << featureValueDiffs[i] << ", b: " << lossMinusModelScoreDiff[i] << endl;
+  }
+
+  vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff);
+  vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01);
+
+  cerr << "\nalphas without slack:" << endl;
+  for (size_t i = 0; i < alphas1.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas1[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
+  FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
+    featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
+    cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs1[k].GetScoresVector();
+    totalUpdate1 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate1 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate1(weights);
+  weightsUpdate1.PlusEquals(totalUpdate1);
+  cerr << "old weights: " << weights << endl;
+  cerr << "new weights: " << weightsUpdate1 << endl << endl;
+
+  vector<float> newModelScoreDiff;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate1));
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+  }
+
+  cerr << "\n\nalphas with slack 0.01:" << endl;
+  for (size_t i = 0; i < alphas2.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas2[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
+  FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
+    featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
+    cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs2[k].GetScoresVector();
+    totalUpdate2 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate2 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate2(weights);
+  weightsUpdate2.PlusEquals(totalUpdate2);
+  cerr << "old weights: " << weights << endl;
+  cerr << "new weights: " << weightsUpdate2 << endl << endl;
+
+  newModelScoreDiff.clear();
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate2));
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << endl;
+  }
+}
+
+BOOST_FIXTURE_TEST_CASE(test_hildreth_4, MockProducers)
+{
+  // Feasible example with 8 constraints
+  cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl;
+  vector< ScoreComponentCollection> featureValueDiffs;
+  vector< float> lossMinusModelScoreDiff;
+
+  // initial weights
+  float w[] = { 1, 1, 0.638672, 1, 0 };
+  vector<float> vec(w,w+5);
+  ScoreComponentCollection weights;
+  weights.PlusEquals(&multi, vec);
+
+  int numberOfConstraints = 8;
+
+  // feature value differences (to oracle)
+  // NOTE: these feature values are only approximations
+  ScoreComponentCollection s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21;
+  float arr1[] = { 0, 0, -2.0672, 0, 0 };
+  float arr2[] = { 0, 0, 0, 0, 0 };
+  float arr3[] = { 0, 0, -2.08436, 1.38629, 0 };
+  float arr4[] = { 0, 0, -0.0171661, 1.38629, 0 };
+//	float arr5[] = { 0, 0, 4.4283, 0, 0 };
+//	float arr6[] = { 0, 0, 3.84829, 1.38629, 0 };
+//	float arr7[] = { 0, 0, 6.83689, 0, 0 };
+
+  float arr8[] = { 0, 0, 0, 0, 0 };
+  float arr9[] = { 0, 0, -2.0672, 0, 0 };
+//	float arr10[] = { 0, 0, -0.0171661, 1.38629, 0 };
+//	float arr11[] = { 0, 0, -2.08436, 1.38629, 0 };
+//	float arr12[] = { 0, 0, 4.4283, 0, 0 };
+//	float arr13[] = { 3, 0, 2.41089, 0, 0 };
+//	float arr14[] = { 3, 0, 2.32709, 0, 0 };
+
+  float arr15[] = { 0, 0, -2.0672, 0, 0 };
+  float arr16[] = { 0, 0, -2.08436, 1.38629, 0 };
+//	float arr17[] = { 0, 0, 4.4283, 0, 0 };
+//	float arr18[] = { 0, 0, 3.84829, 1.38629, 0 };
+//	float arr19[] = { 0, 0, -0.0171661, 1.38629, 0 };
+//	float arr20[] = { 0, 0, 0, 0, 0 };
+//	float arr21[] = { 0, 0, 6.83689, 0, 0 };
+
+  vector<float> losses;
+  losses.push_back(2.73485);
+  losses.push_back(0);
+  losses.push_back(3.64118);
+  losses.push_back(1.47347);
+//	losses.push_back(3.64118);
+//	losses.push_back(4.16278);
+//	losses.push_back(3.13952);
+  losses.push_back(0);
+  losses.push_back(2.73485);
+//	losses.push_back(1.47347);
+//	losses.push_back(3.64118);
+//	losses.push_back(3.64118);
+//	losses.push_back(2.51662);
+//	losses.push_back(2.73485);
+  losses.push_back(2.73485);
+  losses.push_back(3.64118);
+//	losses.push_back(3.64118);
+//	losses.push_back(4.16278);
+//	losses.push_back(1.47347);
+//	losses.push_back(0);
+//	losses.push_back(3.13952);
+
+  vector<float> vec1(arr1,arr1+5);
+  vector<float> vec2(arr2,arr2+5);
+  vector<float> vec3(arr3,arr3+5);
+  vector<float> vec4(arr4,arr4+5);
+//	vector<float> vec5(arr5,arr5+5);
+//	vector<float> vec6(arr6,arr6+5);
+//	vector<float> vec7(arr7,arr7+5);
+  vector<float> vec8(arr8,arr8+5);
+  vector<float> vec9(arr9,arr9+5);
+//	vector<float> vec10(arr10,arr10+5);
+//	vector<float> vec11(arr11,arr11+5);
+//	vector<float> vec12(arr12,arr12+5);
+//	vector<float> vec13(arr13,arr13+5);
+//	vector<float> vec14(arr14,arr14+5);
+  vector<float> vec15(arr15,arr15+5);
+  vector<float> vec16(arr16,arr16+5);
+//	vector<float> vec17(arr17,arr17+5);
+//	vector<float> vec18(arr18,arr18+5);
+//	vector<float> vec19(arr19,arr19+5);
+//	vector<float> vec20(arr20,arr20+5);
+//	vector<float> vec21(arr21,arr21+5);
+
+  s1.PlusEquals(&multi,vec1);
+  s2.PlusEquals(&multi,vec2);
+  s3.PlusEquals(&multi,vec3);
+  s4.PlusEquals(&multi,vec4);
+//	s5.PlusEquals(&multi,vec5);
+//	s6.PlusEquals(&multi,vec6);
+//	s7.PlusEquals(&multi,vec7);
+  s8.PlusEquals(&multi,vec8);
+  s9.PlusEquals(&multi,vec9);
+//	s10.PlusEquals(&multi,vec10);
+//	s11.PlusEquals(&multi,vec11);
+//	s12.PlusEquals(&multi,vec12);
+//	s13.PlusEquals(&multi,vec13);
+//	s14.PlusEquals(&multi,vec14);
+  s15.PlusEquals(&multi,vec15);
+  s16.PlusEquals(&multi,vec16);
+//	s17.PlusEquals(&multi,vec17);
+//	s18.PlusEquals(&multi,vec18);
+//	s19.PlusEquals(&multi,vec19);
+//	s20.PlusEquals(&multi,vec20);
+//	s21.PlusEquals(&multi,vec21);
+
+  featureValueDiffs.push_back(s1);
+  featureValueDiffs.push_back(s2);
+  featureValueDiffs.push_back(s3);
+  featureValueDiffs.push_back(s4);
+//	featureValueDiffs.push_back(s5);
+//	featureValueDiffs.push_back(s6);
+//	featureValueDiffs.push_back(s7);
+  featureValueDiffs.push_back(s8);
+  featureValueDiffs.push_back(s9);
+//	featureValueDiffs.push_back(s10);
+//	featureValueDiffs.push_back(s11);
+//	featureValueDiffs.push_back(s12);
+//	featureValueDiffs.push_back(s13);
+//	featureValueDiffs.push_back(s14);
+  featureValueDiffs.push_back(s15);
+  featureValueDiffs.push_back(s16);
+//	featureValueDiffs.push_back(s17);
+//	featureValueDiffs.push_back(s18);
+//	featureValueDiffs.push_back(s19);
+//	featureValueDiffs.push_back(s20);
+//	featureValueDiffs.push_back(s21);
+
+  vector<float> oldModelScoreDiff;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    oldModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weights));
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "old model score diff: " << oldModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (oldModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    lossMinusModelScoreDiff.push_back(losses[i] - oldModelScoreDiff[i]);
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "A: " << featureValueDiffs[i] << ", b: " << lossMinusModelScoreDiff[i] << endl;
+  }
+
+  vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff);
+  vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01);
+
+  cerr << "\nalphas without slack:" << endl;
+  for (size_t i = 0; i < alphas1.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas1[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
+  FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
+    featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
+    cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs1[k].GetScoresVector();
+    totalUpdate1 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate1 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate1(weights);
+  weightsUpdate1.PlusEquals(totalUpdate1);
+  cerr << "old weights: " << weights << endl;
+  cerr << "new weights: " << weightsUpdate1 << endl << endl;
+
+  vector<float> newModelScoreDiff;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate1));
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+  }
+
+  cerr << "\n\nalphas with slack 0.01:" << endl;
+  for (size_t i = 0; i < alphas2.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas2[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
+  FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
+    featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
+    cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs2[k].GetScoresVector();
+    totalUpdate2 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate2 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate2(weights);
+  weightsUpdate2.PlusEquals(totalUpdate2);
+  cerr << "old weights: " << weights << endl;
+  cerr << "new weights: " << weightsUpdate2 << endl << endl;
+
+  newModelScoreDiff.clear();
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate2));
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << endl;
+  }
+}
+
+BOOST_FIXTURE_TEST_CASE(test_hildreth_5, MockProducers)
+{
+  // Unfeasible example with 2 constraints
+  cerr << "\n>>>>>Hildreth test, without slack and with 0.01 slack" << endl << endl;
+  vector< ScoreComponentCollection> featureValueDiffs;
+  vector< float> lossMinusModelScoreDiff;
+
+  // initial weights
+  float w[] = { 1, 1, 0.638672, 1, 0 };
+  vector<float> vec(w,w+5);
+  ScoreComponentCollection weights;
+  weights.PlusEquals(&multi, vec);
+
+  int numberOfConstraints = 2;
+
+  // feature value differences (to oracle)
+  // NOTE: these feature values are only approximations
+  ScoreComponentCollection s1, s17;
+  float arr1[] = { 0, 0, -2.0672, 0, 0 };
+  float arr17[] = { 0, 0, 4.4283, 0, 0 };
+  vector<float> losses;
+  losses.push_back(2.73485);
+  losses.push_back(3.64118);
+
+  vector<float> vec1(arr1,arr1+5);
+  vector<float> vec17(arr17,arr17+5);
+
+  s1.PlusEquals(&multi,vec1);
+  s17.PlusEquals(&multi,vec17);
+
+  featureValueDiffs.push_back(s1);
+  featureValueDiffs.push_back(s17);
+
+  vector<float> oldModelScoreDiff;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    oldModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weights));
+  }
+
+  float sumOfOldError = 0;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "old model score diff: " << oldModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (oldModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+    sumOfOldError += (losses[i] - oldModelScoreDiff[i]);
+  }
+  cerr << "sum of old error: " << sumOfOldError << endl;
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    lossMinusModelScoreDiff.push_back(losses[i] - oldModelScoreDiff[i]);
+  }
+
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "A: " << featureValueDiffs[i] << ", b: " << lossMinusModelScoreDiff[i] << endl;
+  }
+
+  vector< float> alphas1 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff);
+  vector< float> alphas2 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.01);
+  vector< float> alphas3 = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiff, 0.1);
+
+  cerr << "\nalphas without slack:" << endl;
+  for (size_t i = 0; i < alphas1.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas1[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs1(featureValueDiffs);
+  FVector totalUpdate1 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs1.size(); ++k) {
+    featureValueDiffs1[k].MultiplyEquals(alphas1[k]);
+    cerr << k << ": " << featureValueDiffs1[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs1[k].GetScoresVector();
+    totalUpdate1 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate1 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate1(weights);
+  weightsUpdate1.PlusEquals(totalUpdate1);
+  cerr << "old weights: " << weights << endl;
+  cerr << "new weights: " << weightsUpdate1 << endl << endl;
+
+  vector<float> newModelScoreDiff;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate1));
+  }
+
+  float sumOfNewError = 0;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+    sumOfNewError += (losses[i] - newModelScoreDiff[i]);
+  }
+  cerr << "sum of new error: " << sumOfNewError << endl;
+
+  cerr << "\n\nalphas with slack 0.01:" << endl;
+  for (size_t i = 0; i < alphas2.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas2[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs2(featureValueDiffs);
+  FVector totalUpdate2 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs2.size(); ++k) {
+    featureValueDiffs2[k].MultiplyEquals(alphas2[k]);
+    cerr << k << ": " << featureValueDiffs2[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs2[k].GetScoresVector();
+    totalUpdate2 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate2 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate2(weights);
+  weightsUpdate2.PlusEquals(totalUpdate2);
+  cerr << "old weights: " << weights << endl;
+  cerr << "new weights: " << weightsUpdate2 << endl << endl;
+
+  newModelScoreDiff.clear();
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate2));
+  }
+
+  sumOfNewError = 0;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+    sumOfNewError += (losses[i] - newModelScoreDiff[i]);
+  }
+  cerr << "sum of new error: " << sumOfNewError << endl;
+
+  cerr << "\n\nalphas with slack 0.1:" << endl;
+  for (size_t i = 0; i < alphas3.size(); ++i) {
+    cerr << "alpha " << i << ": " << alphas3[i] << endl;
+  }
+  cerr << endl;
+
+  cerr << "partial updates:" << endl;
+  vector< ScoreComponentCollection> featureValueDiffs3(featureValueDiffs);
+  FVector totalUpdate3 = ScoreComponentCollection::CreateFVector();
+  for (size_t k = 0; k < featureValueDiffs3.size(); ++k) {
+    featureValueDiffs3[k].MultiplyEquals(alphas3[k]);
+    cerr << k << ": " << featureValueDiffs3[k].GetScoresVector() << endl;
+    FVector update = featureValueDiffs3[k].GetScoresVector();
+    totalUpdate3 += update;
+  }
+  cerr << endl;
+  cerr << "total update: " << totalUpdate3 << endl << endl;
+
+  ScoreComponentCollection weightsUpdate3(weights);
+  weightsUpdate3.PlusEquals(totalUpdate3);
+  cerr << "old weights: " << weights << endl;
+  cerr << "new weights: " << weightsUpdate3 << endl << endl;
+
+  newModelScoreDiff.clear();
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    newModelScoreDiff.push_back(featureValueDiffs[i].InnerProduct(weightsUpdate3));
+  }
+
+  sumOfNewError = 0;
+  for (int i = 0; i < numberOfConstraints; ++i) {
+    cerr << "new model score diff: " << newModelScoreDiff[i] << ", loss: " << losses[i] << "\t" << (newModelScoreDiff[i] >= losses[i] ? 1 : 0) << endl;
+    sumOfNewError += (losses[i] - newModelScoreDiff[i]);
+  }
+  cerr << "sum of new error: " << sumOfNewError << endl;
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+}
+
diff --git a/contrib/mira/HypothesisQueue.cpp b/contrib/mira/HypothesisQueue.cpp
new file mode 100644
index 000000000..8c8daa4da
--- /dev/null
+++ b/contrib/mira/HypothesisQueue.cpp
@@ -0,0 +1,66 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <iostream>
+#include "HypothesisQueue.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+HypothesisQueue::~HypothesisQueue()
+{
+  m_queue.clear();
+}
+
+void HypothesisQueue::Push(BleuIndexPair hypo)
+{
+  //pair<set<BleuIndexPair>::iterator,bool> ret;
+
+  if (m_capacity == 0 || m_queue.size() < m_capacity) {
+    m_queue.insert(hypo);
+  } else if (hypo.first > (*(m_queue.rbegin())).first) {
+    // Remove the worst-scoring item from the queue and insert hypo (only erase item if new item was successfully added )
+    /*ret = m_queue.insert(hypo);
+    if ((*(ret.first)).second == 1) {
+      HypoQueueType::iterator p = m_queue.end();
+      --p;
+      m_queue.erase(p);
+      }*/
+    // with multisets we do not have to check whether the item was successfully added
+    m_queue.insert(hypo);
+    HypoQueueType::iterator p = m_queue.end();
+    --p;
+    m_queue.erase(p);
+  } else {
+    // The hypo is unusable: the queue is full and hypo has a worse (or
+    // equal) score than the worst-scoring item already held.
+  }
+}
+
+BleuIndexPair HypothesisQueue::Pop()
+{
+  HypoQueueType::iterator p = m_queue.begin();
+  BleuIndexPair top = *p;
+  m_queue.erase(p);
+  return top;
+}
+
+}  // namespace Moses
diff --git a/contrib/mira/HypothesisQueue.h b/contrib/mira/HypothesisQueue.h
new file mode 100644
index 000000000..63cabbd0f
--- /dev/null
+++ b/contrib/mira/HypothesisQueue.h
@@ -0,0 +1,69 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <set>
+
+namespace Moses
+{
+
+// pair of Bleu score and index
+typedef std::pair<float, size_t> BleuIndexPair;
+
+// A bounded priority queue of BleuIndexPairs. The top item is
+// the best scoring hypothesis.  The queue assumes ownership of pushed items and
+// relinquishes ownership when they are popped.  Any remaining items at the
+// time of the queue's destruction are deleted.
+class HypothesisQueue
+{
+
+public:
+  // Create empty queue with fixed capacity of c.  Capacity 0 means unbounded.
+  HypothesisQueue(size_t c) : m_capacity(c) {}
+  ~HypothesisQueue();
+
+  bool Empty() {
+    return m_queue.empty();
+  }
+
+  // Add the hypo to the queue or delete it if the queue is full and the
+  // score is no better than the queue's worst score.
+  void Push(BleuIndexPair hypo);
+
+  // Remove the best-scoring detour from the queue and return it.  The
+  // caller is responsible for deleting the object.
+  BleuIndexPair Pop();
+
+private:
+  struct HypothesisOrderer {
+    bool operator()(BleuIndexPair a,
+                    BleuIndexPair b) {
+      return (a.first > b.first);
+    }
+  };
+
+  typedef std::multiset<BleuIndexPair, HypothesisOrderer> HypoQueueType;
+  //typedef std::set<BleuIndexPair, HypothesisOrderer> HypoQueueType;
+
+  HypoQueueType m_queue;
+  const size_t m_capacity;
+};
+
+}  // namespace Moses
diff --git a/contrib/mira/Jamfile b/contrib/mira/Jamfile
new file mode 100644
index 000000000..e43a993b5
--- /dev/null
+++ b/contrib/mira/Jamfile
@@ -0,0 +1,15 @@
+lib mira_lib :
+[ glob *.cpp : *Test.cpp Main.cpp ]
+../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ;
+
+exe mira : Main.cpp mira_lib ../mert//mert_lib ../moses//moses ../OnDiskPt//OnDiskPt ..//boost_program_options ..//boost_filesystem  ; 
+
+alias programs : mira ;
+
+import testing ;
+
+unit-test mira_test : [ glob *Test.cpp ] mira_lib  ..//boost_unit_test_framework ;
+
+explicit mira_test ;
+
+
diff --git a/contrib/mira/Main.cpp b/contrib/mira/Main.cpp
new file mode 100644
index 000000000..abf92b598
--- /dev/null
+++ b/contrib/mira/Main.cpp
@@ -0,0 +1,1847 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <algorithm>
+#include <cstdlib>
+#include <ctime>
+#include <string>
+#include <vector>
+#include <map>
+
+#include <boost/program_options.hpp>
+#include <boost/algorithm/string.hpp>
+
+#ifdef MPI_ENABLE
+#include <boost/mpi.hpp>
+namespace mpi = boost::mpi;
+#endif
+
+#include "Main.h"
+#include "Optimiser.h"
+#include "Hildreth.h"
+#include "HypothesisQueue.h"
+#include "moses/StaticData.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/ThreadPool.h"
+#include "mert/BleuScorer.h"
+#include "moses/FeatureVector.h"
+
+#include "moses/FF/WordTranslationFeature.h"
+#include "moses/FF/PhrasePairFeature.h"
+#include "moses/FF/WordPenaltyProducer.h"
+#include "moses/LM/Base.h"
+
+using namespace Mira;
+using namespace std;
+using namespace Moses;
+namespace po = boost::program_options;
+
+int main(int argc, char** argv)
+{
+  size_t rank = 0;
+  size_t size = 1;
+#ifdef MPI_ENABLE
+  mpi::environment env(argc,argv);
+  mpi::communicator world;
+  rank = world.rank();
+  size = world.size();
+#endif
+
+  bool help;
+  int verbosity;
+  string mosesConfigFile;
+  string inputFile;
+  vector<string> referenceFiles;
+  vector<string> mosesConfigFilesFolds, inputFilesFolds, referenceFilesFolds;
+  //  string coreWeightFile, startWeightFile;
+  size_t epochs;
+  string learner;
+  bool shuffle;
+  size_t mixingFrequency;
+  size_t weightDumpFrequency;
+  string weightDumpStem;
+  bool scale_margin;
+  bool scale_update;
+  size_t n;
+  size_t batchSize;
+  bool distinctNbest;
+  bool accumulateWeights;
+  float historySmoothing;
+  bool scaleByInputLength, scaleByAvgInputLength;
+  bool scaleByInverseLength, scaleByAvgInverseLength;
+  float scaleByX;
+  float slack;
+  bool averageWeights;
+  bool weightConvergence;
+  float learning_rate;
+  float mira_learning_rate;
+  float perceptron_learning_rate;
+  string decoder_settings;
+  float min_weight_change;
+  bool normaliseWeights, normaliseMargin;
+  bool print_feature_values;
+  bool historyBleu   ;
+  bool sentenceBleu;
+  bool perceptron_update;
+  bool hope_fear;
+  bool model_hope_fear;
+  size_t hope_n, fear_n;
+  size_t bleu_smoothing_scheme;
+  float min_oracle_bleu;
+  float minBleuRatio, maxBleuRatio;
+  bool boost;
+  bool decode_hope, decode_fear, decode_model;
+  string decode_filename;
+  bool batchEqualsShard;
+  bool sparseAverage, dumpMixedWeights, sparseNoAverage;
+  int featureCutoff;
+  bool pruneZeroWeights;
+  bool printFeatureCounts, printNbestWithFeatures;
+  bool avgRefLength;
+  bool print_weights, print_core_weights, debug_model, scale_lm, scale_wp;
+  float scale_lm_factor, scale_wp_factor;
+  bool kbest;
+  string moses_src;
+  float sigmoidParam;
+  float bleuWeight, bleuWeight_hope, bleuWeight_fear;
+  bool bleu_weight_lm;
+  float bleu_weight_lm_factor;
+  bool l1_regularize, l2_regularize, l1_reg_sparse, l2_reg_sparse;
+  float l1_lambda, l2_lambda;
+  bool most_violated, most_violated_reg, all_violated, max_bleu_diff;
+  bool feature_confidence, signed_counts;
+  float decay_core, decay_sparse, core_r0, sparse_r0;
+  float bleu_weight_fear_factor;
+  bool hildreth;
+  float add2lm;
+
+  // compute real sentence Bleu scores on complete translations, disable Bleu feature
+  bool realBleu, disableBleuFeature;
+  bool rescaleSlack;
+  bool makePairs;
+  bool debug;
+  bool reg_on_every_mix;
+  size_t continue_epoch;
+  bool modelPlusBleu,  simpleHistoryBleu;
+  po::options_description desc("Allowed options");
+  desc.add_options()
+  ("continue-epoch", po::value<size_t>(&continue_epoch)->default_value(0), "Continue an interrupted experiment from this epoch on")
+  ("freq-reg", po::value<bool>(&reg_on_every_mix)->default_value(false), "Regularize after every weight mixing")
+  ("l1sparse", po::value<bool>(&l1_reg_sparse)->default_value(true), "L1-regularization for sparse weights only")
+  ("l2sparse", po::value<bool>(&l2_reg_sparse)->default_value(true), "L2-regularization for sparse weights only")
+  ("mv-reg", po::value<bool>(&most_violated_reg)->default_value(false), "Regularize most violated constraint")
+  ("most-violated", po::value<bool>(&most_violated)->default_value(false), "Add most violated constraint")
+  ("all-violated", po::value<bool>(&all_violated)->default_value(false), "Add all violated constraints")
+  ("feature-confidence", po::value<bool>(&feature_confidence)->default_value(false), "Confidence-weighted learning")
+  ("signed-counts", po::value<bool>(&signed_counts)->default_value(false), "Use signed feature counts for CWL")
+  ("dbg", po::value<bool>(&debug)->default_value(true), "More debug output")
+  ("make-pairs", po::value<bool>(&makePairs)->default_value(true), "Make pairs of hypotheses for 1slack")
+  ("debug", po::value<bool>(&debug)->default_value(true), "More debug output")
+  ("rescale-slack", po::value<bool>(&rescaleSlack)->default_value(false), "Rescale slack in 1-slack formulation")
+  ("add2lm", po::value<float>(&add2lm)->default_value(0.0), "Add the specified amount to all LM weights")
+  ("hildreth", po::value<bool>(&hildreth)->default_value(false), "Prefer Hildreth over analytical update")
+  ("model-plus-bleu", po::value<bool>(&modelPlusBleu)->default_value(false), "Use the sum of model score and +/- bleu to select hope and fear translations")
+  ("simple-history-bleu", po::value<bool>(&simpleHistoryBleu)->default_value(false), "Simple history Bleu")
+
+  ("bleu-weight", po::value<float>(&bleuWeight)->default_value(1.0), "Bleu weight used in decoder objective")
+  ("bw-hope", po::value<float>(&bleuWeight_hope)->default_value(-1.0), "Bleu weight used in decoder objective for hope")
+  ("bw-fear", po::value<float>(&bleuWeight_fear)->default_value(-1.0), "Bleu weight used in decoder objective for fear")
+
+  ("core-r0", po::value<float>(&core_r0)->default_value(1.0), "Start learning rate for core features")
+  ("sparse-r0", po::value<float>(&sparse_r0)->default_value(1.0), "Start learning rate for sparse features")
+  ("decay-core", po::value<float>(&decay_core)->default_value(0.01), "Decay for core feature learning rate")
+  ("decay-sparse", po::value<float>(&decay_sparse)->default_value(0.01), "Decay for sparse feature learning rate")
+
+  ("tie-bw-to-lm", po::value<bool>(&bleu_weight_lm)->default_value(true), "Make bleu weight depend on lm weight")
+  ("bw-lm-factor", po::value<float>(&bleu_weight_lm_factor)->default_value(2.0), "Make bleu weight depend on lm weight by this factor")
+  ("bw-factor-fear", po::value<float>(&bleu_weight_fear_factor)->default_value(1.0), "Multiply fear weight by this factor")
+  ("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
+  ("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
+  ("avg-ref-length", po::value<bool>(&avgRefLength)->default_value(false), "Use average reference length instead of shortest for BLEU score feature")
+  ("batch-equals-shard", po::value<bool>(&batchEqualsShard)->default_value(false), "Batch size is equal to shard size (purely batch)")
+  ("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
+  ("bleu-smoothing-scheme", po::value<size_t>(&bleu_smoothing_scheme)->default_value(1), "Set a smoothing scheme for sentence-Bleu: +1 (1), +0.1 (2), papineni (3) (default:1)")
+  ("boost", po::value<bool>(&boost)->default_value(false), "Apply boosting factor to updates on misranked candidates")
+  ("config,f", po::value<string>(&mosesConfigFile), "Moses ini-file")
+  ("configs-folds", po::value<vector<string> >(&mosesConfigFilesFolds), "Moses ini-files, one for each fold")
+  ("debug-model", po::value<bool>(&debug_model)->default_value(false), "Get best model translation for debugging purposes")
+  ("decode-hope", po::value<bool>(&decode_hope)->default_value(false), "Decode dev input set according to hope objective")
+  ("decode-fear", po::value<bool>(&decode_fear)->default_value(false), "Decode dev input set according to fear objective")
+  ("decode-model", po::value<bool>(&decode_model)->default_value(false), "Decode dev input set according to normal objective")
+  ("decode-filename", po::value<string>(&decode_filename), "Filename for Bleu objective translations")
+  ("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
+  ("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
+  ("dump-mixed-weights", po::value<bool>(&dumpMixedWeights)->default_value(false), "Dump mixed weights instead of averaged weights")
+  ("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
+  ("feature-cutoff", po::value<int>(&featureCutoff)->default_value(-1), "Feature cutoff as additional regularization for sparse features")
+  ("fear-n", po::value<size_t>(&fear_n)->default_value(1), "Number of fear translations used")
+  ("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+  ("history-bleu", po::value<bool>(&historyBleu)->default_value(false), "Use 1best translations to update the history")
+  ("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
+  ("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations for optimisation (not model)")
+  ("hope-n", po::value<size_t>(&hope_n)->default_value(2), "Number of hope translations used")
+  ("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
+  ("input-files-folds", po::value<vector<string> >(&inputFilesFolds), "Input files containing tokenised source, one for each fold")
+  ("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
+  ("l1-lambda", po::value<float>(&l1_lambda)->default_value(0.0001), "Lambda for l1-regularization (w_i +/- lambda)")
+  ("l2-lambda", po::value<float>(&l2_lambda)->default_value(0.01), "Lambda for l2-regularization (w_i * (1 - lambda))")
+  ("l1-reg", po::value<bool>(&l1_regularize)->default_value(false), "L1-regularization")
+  ("l2-reg", po::value<bool>(&l2_regularize)->default_value(false), "L2-regularization")
+  ("min-bleu-ratio", po::value<float>(&minBleuRatio)->default_value(-1), "Set a minimum BLEU ratio between hope and fear")
+  ("max-bleu-ratio", po::value<float>(&maxBleuRatio)->default_value(-1), "Set a maximum BLEU ratio between hope and fear")
+  ("max-bleu-diff", po::value<bool>(&max_bleu_diff)->default_value(true), "Select hope/fear with maximum Bleu difference")
+  ("min-oracle-bleu", po::value<float>(&min_oracle_bleu)->default_value(0), "Set a minimum oracle BLEU score")
+  ("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.0001), "Set minimum weight change for stopping criterion")
+  ("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
+  ("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(10), "How often per epoch to mix weights, when using mpi")
+  ("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation")
+  ("moses-src", po::value<string>(&moses_src)->default_value(""), "Moses source directory")
+  ("nbest,n", po::value<size_t>(&n)->default_value(30), "Number of translations in n-best list")
+  ("normalise-weights", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
+  ("normalise-margin", po::value<bool>(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1")
+  ("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
+  ("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
+  ("print-feature-counts", po::value<bool>(&printFeatureCounts)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch")
+  ("print-nbest-with-features", po::value<bool>(&printNbestWithFeatures)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch")
+  ("print-weights", po::value<bool>(&print_weights)->default_value(false), "Print out current weights")
+  ("print-core-weights", po::value<bool>(&print_core_weights)->default_value(true), "Print out current core weights")
+  ("prune-zero-weights", po::value<bool>(&pruneZeroWeights)->default_value(false), "Prune zero-valued sparse feature weights")
+  ("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
+  ("reference-files-folds", po::value<vector<string> >(&referenceFilesFolds), "Reference translation files for training, one for each fold")
+  ("kbest", po::value<bool>(&kbest)->default_value(true), "Select hope/fear pairs from a list of nbest translations")
+
+  ("scale-by-inverse-length", po::value<bool>(&scaleByInverseLength)->default_value(false), "Scale BLEU by (history of) inverse input length")
+  ("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale BLEU by (history of) input length")
+  ("scale-by-avg-input-length", po::value<bool>(&scaleByAvgInputLength)->default_value(false), "Scale BLEU by average input length")
+  ("scale-by-avg-inverse-length", po::value<bool>(&scaleByAvgInverseLength)->default_value(false), "Scale BLEU by average inverse input length")
+  ("scale-by-x", po::value<float>(&scaleByX)->default_value(0.1), "Scale the BLEU score by value x")
+  ("scale-lm", po::value<bool>(&scale_lm)->default_value(true), "Scale the language model feature")
+  ("scale-factor-lm", po::value<float>(&scale_lm_factor)->default_value(0.5), "Scale the language model feature by this factor")
+  ("scale-wp", po::value<bool>(&scale_wp)->default_value(false), "Scale the word penalty feature")
+  ("scale-factor-wp", po::value<float>(&scale_wp_factor)->default_value(2), "Scale the word penalty feature by this factor")
+  ("scale-margin", po::value<bool>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
+  ("sentence-level-bleu", po::value<bool>(&sentenceBleu)->default_value(true), "Use a sentences level Bleu scoring function")
+  ("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
+  ("sigmoid-param", po::value<float>(&sigmoidParam)->default_value(1), "y=sigmoidParam is the axis that this sigmoid approaches")
+  ("slack", po::value<float>(&slack)->default_value(0.05), "Use slack in optimiser")
+  ("sparse-average", po::value<bool>(&sparseAverage)->default_value(false), "Average weights by the number of processes")
+  ("sparse-no-average", po::value<bool>(&sparseNoAverage)->default_value(false), "Don't average sparse weights, just sum")
+  ("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
+  ("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
+  ("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(2), "How often per epoch to dump weights (mpi)")
+  ("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
+
+  po::options_description cmdline_options;
+  cmdline_options.add(desc);
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv). options(cmdline_options).run(), vm);
+  po::notify(vm);
+
+  if (help) {
+    std::cout << "Usage: " + string(argv[0])
+              + " -f mosesini-file -i input-file -r reference-file(s) [options]" << std::endl;
+    std::cout << desc << std::endl;
+    return 0;
+  }
+
+  const StaticData &staticData = StaticData::Instance();
+
+  bool trainWithMultipleFolds = false;
+  if (mosesConfigFilesFolds.size() > 0 || inputFilesFolds.size() > 0 || referenceFilesFolds.size() > 0) {
+    if (rank == 0)
+      cerr << "Training with " << mosesConfigFilesFolds.size() << " folds" << endl;
+    trainWithMultipleFolds = true;
+  }
+
+  if (dumpMixedWeights && (mixingFrequency != weightDumpFrequency)) {
+    cerr << "Set mixing frequency = weight dump frequency for dumping mixed weights!" << endl;
+    exit(1);
+  }
+
+  if ((sparseAverage || sparseNoAverage) && averageWeights) {
+    cerr << "Parameters --sparse-average 1/--sparse-no-average 1 and --average-weights 1 are incompatible (not implemented)" << endl;
+    exit(1);
+  }
+
+  if (trainWithMultipleFolds) {
+    if (!mosesConfigFilesFolds.size()) {
+      cerr << "Error: No moses ini files specified for training with folds" << endl;
+      exit(1);
+    }
+
+    if (!inputFilesFolds.size()) {
+      cerr << "Error: No input files specified for training with folds" << endl;
+      exit(1);
+    }
+
+    if (!referenceFilesFolds.size()) {
+      cerr << "Error: No reference files specified for training with folds" << endl;
+      exit(1);
+    }
+  } else {
+    if (mosesConfigFile.empty()) {
+      cerr << "Error: No moses ini file specified" << endl;
+      return 1;
+    }
+
+    if (inputFile.empty()) {
+      cerr << "Error: No input file specified" << endl;
+      return 1;
+    }
+
+    if (!referenceFiles.size()) {
+      cerr << "Error: No reference files specified" << endl;
+      return 1;
+    }
+  }
+
+  // load input and references
+  vector<string> inputSentences;
+  size_t inputSize = trainWithMultipleFolds? inputFilesFolds.size(): 0;
+  size_t refSize = trainWithMultipleFolds? referenceFilesFolds.size(): referenceFiles.size();
+  vector<vector<string> > inputSentencesFolds(inputSize);
+  vector<vector<string> > referenceSentences(refSize);
+
+  // number of cores for each fold
+  size_t coresPerFold = 0, myFold = 0;
+  if (trainWithMultipleFolds) {
+    if (mosesConfigFilesFolds.size() > size) {
+      cerr << "Number of cores has to be a multiple of the number of folds" << endl;
+      exit(1);
+    }
+    coresPerFold = size/mosesConfigFilesFolds.size();
+    if (size % coresPerFold > 0) {
+      cerr << "Number of cores has to be a multiple of the number of folds" << endl;
+      exit(1);
+    }
+
+    if (rank == 0)
+      cerr << "Number of cores per fold: " << coresPerFold << endl;
+    myFold = rank/coresPerFold;
+    cerr << "Rank " << rank << ", my fold: " << myFold << endl;
+  }
+
+  // NOTE: we do not actually need the references here, because we are reading them in from StaticData
+  if (trainWithMultipleFolds) {
+    if (!loadSentences(inputFilesFolds[myFold], inputSentencesFolds[myFold])) {
+      cerr << "Error: Failed to load input sentences from " << inputFilesFolds[myFold] << endl;
+      exit(1);
+    }
+    VERBOSE(1, "Rank " << rank << " reading inputs from " << inputFilesFolds[myFold] << endl);
+
+    if (!loadSentences(referenceFilesFolds[myFold], referenceSentences[myFold])) {
+      cerr << "Error: Failed to load reference sentences from " << referenceFilesFolds[myFold] << endl;
+      exit(1);
+    }
+    if (referenceSentences[myFold].size() != inputSentencesFolds[myFold].size()) {
+      cerr << "Error: Input file length (" << inputSentencesFolds[myFold].size() << ") != ("
+           << referenceSentences[myFold].size() << ") reference file length (rank " << rank << ")" << endl;
+      exit(1);
+    }
+    VERBOSE(1, "Rank " << rank << " reading references from " << referenceFilesFolds[myFold] << endl);
+  } else {
+    if (!loadSentences(inputFile, inputSentences)) {
+      cerr << "Error: Failed to load input sentences from " << inputFile << endl;
+      return 1;
+    }
+
+    for (size_t i = 0; i < referenceFiles.size(); ++i) {
+      if (!loadSentences(referenceFiles[i], referenceSentences[i])) {
+        cerr << "Error: Failed to load reference sentences from "
+             << referenceFiles[i] << endl;
+        return 1;
+      }
+      if (referenceSentences[i].size() != inputSentences.size()) {
+        cerr << "Error: Input file length (" << inputSentences.size() << ") != ("
+             << referenceSentences[i].size() << ") length of reference file " << i
+             << endl;
+        return 1;
+      }
+    }
+  }
+
+  if (scaleByAvgInputLength ||  scaleByInverseLength || scaleByAvgInverseLength)
+    scaleByInputLength = false;
+
+  if (historyBleu || simpleHistoryBleu) {
+    sentenceBleu = false;
+    cerr << "Using history Bleu. " << endl;
+  }
+
+  if (kbest) {
+    realBleu = true;
+    disableBleuFeature = true;
+    cerr << "Use kbest lists and real Bleu scores, disable Bleu feature.." << endl;
+  }
+
+  // initialise Moses
+  // add references to initialize Bleu feature
+  boost::trim(decoder_settings);
+  decoder_settings += " -mira -n-best-list - " + boost::lexical_cast<string>(n) + " distinct";
+
+  vector<string> decoder_params;
+  boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
+
+  // bleu feature
+  decoder_params.push_back("-feature-add");
+
+  decoder_settings = "BleuScoreFeature tuneable=false references=";
+  if (trainWithMultipleFolds) {
+    decoder_settings += referenceFilesFolds[myFold];
+  } else {
+    decoder_settings += referenceFiles[0];
+    for (size_t i=1; i < referenceFiles.size(); ++i) {
+      decoder_settings += ",";
+      decoder_settings += referenceFiles[i];
+    }
+  }
+  decoder_params.push_back(decoder_settings);
+
+  string configFile = trainWithMultipleFolds? mosesConfigFilesFolds[myFold] : mosesConfigFile;
+  VERBOSE(1, "Rank " << rank << " reading config file from " << configFile << endl);
+  MosesDecoder* decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
+  decoder->setBleuParameters(disableBleuFeature, sentenceBleu, scaleByInputLength, scaleByAvgInputLength,
+                             scaleByInverseLength, scaleByAvgInverseLength,
+                             scaleByX, historySmoothing, bleu_smoothing_scheme, simpleHistoryBleu);
+  bool chartDecoding = staticData.IsChart();
+
+  // Optionally shuffle the sentences
+  vector<size_t> order;
+  if (trainWithMultipleFolds) {
+    for (size_t i = 0; i < inputSentencesFolds[myFold].size(); ++i) {
+      order.push_back(i);
+    }
+  } else {
+    if (rank == 0) {
+      for (size_t i = 0; i < inputSentences.size(); ++i) {
+        order.push_back(i);
+      }
+    }
+  }
+
+  // initialise optimizer
+  Optimiser* optimiser = NULL;
+  if (learner == "mira") {
+    if (rank == 0) {
+      cerr << "Optimising using Mira" << endl;
+      cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
+      if (normaliseMargin)
+        cerr << "sigmoid parameter: " << sigmoidParam << endl;
+    }
+    optimiser = new MiraOptimiser(slack, scale_margin, scale_update, boost, normaliseMargin, sigmoidParam);
+    learning_rate = mira_learning_rate;
+    perceptron_update = false;
+  } else if (learner == "perceptron") {
+    if (rank == 0) {
+      cerr << "Optimising using Perceptron" << endl;
+    }
+    optimiser = new Perceptron();
+    learning_rate = perceptron_learning_rate;
+    perceptron_update = true;
+    model_hope_fear = false; // mira only
+    hope_fear = false; // mira only
+    n = 1;
+    hope_n = 1;
+    fear_n = 1;
+  } else {
+    cerr << "Error: Unknown optimiser: " << learner << endl;
+    return 1;
+  }
+
+  // resolve parameter dependencies
+  if (batchSize > 1 && perceptron_update) {
+    batchSize = 1;
+    cerr << "Info: Setting batch size to 1 for perceptron update" << endl;
+  }
+
+  if (hope_n == 0)
+    hope_n = n;
+  if (fear_n == 0)
+    fear_n = n;
+
+  if (model_hope_fear || kbest)
+    hope_fear = false; // is true by default
+  if (learner == "mira" && !(hope_fear || model_hope_fear || kbest)) {
+    cerr << "Error: Need to select one of parameters --hope-fear/--model-hope-fear/--kbest for mira update." << endl;
+    return 1;
+  }
+
+#ifdef MPI_ENABLE
+  if (!trainWithMultipleFolds)
+    mpi::broadcast(world, order, 0);
+#endif
+
+  // Create shards according to the number of processes used
+  vector<size_t> shard;
+  if (trainWithMultipleFolds) {
+    size_t shardSize = order.size()/coresPerFold;
+    size_t shardStart = (size_t) (shardSize * (rank % coresPerFold));
+    size_t shardEnd = shardStart + shardSize;
+    if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold
+      shardEnd = order.size();
+      shardSize = shardEnd - shardStart;
+    }
+    VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl);
+    VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl);
+    shard.resize(shardSize);
+    copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
+    batchSize = 1;
+  } else {
+    size_t shardSize = order.size() / size;
+    size_t shardStart = (size_t) (shardSize * rank);
+    size_t shardEnd = (size_t) (shardSize * (rank + 1));
+    if (rank == size - 1) {
+      shardEnd = order.size();
+      shardSize = shardEnd - shardStart;
+    }
+    VERBOSE(1, "Rank: " << rank << " Shard size: " << shardSize << endl);
+    VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl);
+    shard.resize(shardSize);
+    copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
+    if (batchEqualsShard)
+      batchSize = shardSize;
+  }
+
+  // get reference to feature functions
+  // const vector<FeatureFunction*> &featureFunctions = FeatureFunction::GetFeatureFunctions();
+  ScoreComponentCollection initialWeights = decoder->getWeights();
+
+  if (add2lm != 0) {
+    const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+    for (size_t i = 0; i < statefulFFs.size(); ++i) {
+      const StatefulFeatureFunction *ff = statefulFFs[i];
+      const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
+
+      if (lm) {
+        float lmWeight = initialWeights.GetScoreForProducer(lm) + add2lm;
+        initialWeights.Assign(lm, lmWeight);
+        cerr << "Rank " << rank << ", add " << add2lm << " to lm weight." << endl;
+      }
+    }
+  }
+
+  if (normaliseWeights) {
+    initialWeights.L1Normalise();
+    cerr << "Rank " << rank << ", normalised initial weights: " << initialWeights << endl;
+  }
+
+  decoder->setWeights(initialWeights);
+
+  // set bleu weight to twice the size of the language model weight(s)
+  if (bleu_weight_lm) {
+    float lmSum = 0;
+    const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+    for (size_t i = 0; i < statefulFFs.size(); ++i) {
+      const StatefulFeatureFunction *ff = statefulFFs[i];
+      const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
+
+      if (lm) {
+        lmSum += abs(initialWeights.GetScoreForProducer(lm));
+      }
+    }
+
+    bleuWeight = lmSum * bleu_weight_lm_factor;
+    if (!kbest) cerr << "Set bleu weight to lm weight * " << bleu_weight_lm_factor << endl;
+  }
+
+  // bleu weights can be set separately for hope and fear; otherwise they are both set to 'lm weight * bleu_weight_lm_factor'
+  if (bleuWeight_hope == -1) {
+    bleuWeight_hope = bleuWeight;
+  }
+  if (bleuWeight_fear == -1) {
+    bleuWeight_fear = bleuWeight;
+  }
+  bleuWeight_fear *= bleu_weight_fear_factor;
+  if (!kbest) {
+    cerr << "Bleu weight: " << bleuWeight << endl;
+    cerr << "Bleu weight fear: " << bleuWeight_fear << endl;
+  }
+
+  if (decode_hope || decode_fear || decode_model) {
+    size_t decode = 1;
+    if (decode_fear) decode = 2;
+    if (decode_model) decode = 3;
+    decodeHopeOrFear(rank, size, decode, decode_filename, inputSentences, decoder, n, bleuWeight);
+  }
+
+  //Main loop:
+  ScoreComponentCollection cumulativeWeights; // collect weights per epoch to produce an average
+  ScoreComponentCollection cumulativeWeightsBinary;
+  size_t numberOfUpdates = 0;
+  size_t numberOfUpdatesThisEpoch = 0;
+
+  time_t now;
+  time(&now);
+  cerr << "Rank " << rank << ", " << ctime(&now);
+
+  float avgInputLength = 0;
+  float sumOfInputs = 0;
+  size_t numberOfInputs = 0;
+
+  ScoreComponentCollection mixedWeights;
+  ScoreComponentCollection mixedWeightsPrevious;
+  ScoreComponentCollection mixedWeightsBeforePrevious;
+  ScoreComponentCollection mixedAverageWeights;
+  ScoreComponentCollection mixedAverageWeightsPrevious;
+  ScoreComponentCollection mixedAverageWeightsBeforePrevious;
+
+  bool stop = false;
+//	int sumStillViolatedConstraints;
+  float epsilon = 0.0001;
+
+  // Variables for feature confidence
+  ScoreComponentCollection confidenceCounts, mixedConfidenceCounts, featureLearningRates;
+  featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0); //initialise core learning rates
+  cerr << "Initial learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl;
+
+  for (size_t epoch = continue_epoch; epoch < epochs && !stop; ++epoch) {
+    if (shuffle) {
+      if (trainWithMultipleFolds || rank == 0) {
+        cerr << "Rank " << rank << ", epoch " << epoch << ", shuffling input sentences.." << endl;
+        RandomIndex rindex;
+        random_shuffle(order.begin(), order.end(), rindex);
+      }
+
+#ifdef MPI_ENABLE
+      if (!trainWithMultipleFolds)
+        mpi::broadcast(world, order, 0);
+#endif
+
+      // redo shards
+      if (trainWithMultipleFolds) {
+        size_t shardSize = order.size()/coresPerFold;
+        size_t shardStart = (size_t) (shardSize * (rank % coresPerFold));
+        size_t shardEnd = shardStart + shardSize;
+        if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold
+          shardEnd = order.size();
+          shardSize = shardEnd - shardStart;
+        }
+        VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl);
+        VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl);
+        shard.resize(shardSize);
+        copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
+        batchSize = 1;
+      } else {
+        size_t shardSize = order.size()/size;
+        size_t shardStart = (size_t) (shardSize * rank);
+        size_t shardEnd = (size_t) (shardSize * (rank + 1));
+        if (rank == size - 1) {
+          shardEnd = order.size();
+          shardSize = shardEnd - shardStart;
+        }
+        VERBOSE(1, "Shard size: " << shardSize << endl);
+        VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl);
+        shard.resize(shardSize);
+        copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
+        if (batchEqualsShard)
+          batchSize = shardSize;
+      }
+    }
+
+    // sum of violated constraints in an epoch
+    // sumStillViolatedConstraints = 0;
+
+    numberOfUpdatesThisEpoch = 0;
+    // Sum up weights over one epoch, final average uses weights from last epoch
+    if (!accumulateWeights) {
+      cumulativeWeights.ZeroAll();
+      cumulativeWeightsBinary.ZeroAll();
+    }
+
+    // number of weight dumps this epoch
+    size_t weightMixingThisEpoch = 0;
+    size_t weightEpochDump = 0;
+
+    size_t shardPosition = 0;
+    vector<size_t>::const_iterator sid = shard.begin();
+    while (sid != shard.end()) {
+      // feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues)
+      vector<vector<ScoreComponentCollection> > featureValues;
+      vector<vector<float> > bleuScores;
+      vector<vector<float> > modelScores;
+
+      // variables for hope-fear/perceptron setting
+      vector<vector<ScoreComponentCollection> > featureValuesHope;
+      vector<vector<ScoreComponentCollection> > featureValuesFear;
+      vector<vector<float> > bleuScoresHope;
+      vector<vector<float> > bleuScoresFear;
+      vector<vector<float> > modelScoresHope;
+      vector<vector<float> > modelScoresFear;
+
+      // get moses weights
+      ScoreComponentCollection mosesWeights = decoder->getWeights();
+      VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", weights: " << mosesWeights << endl);
+
+      if (historyBleu || simpleHistoryBleu) {
+        decoder->printBleuFeatureHistory(cerr);
+      }
+
+      // BATCHING: produce nbest lists for all input sentences in batch
+      vector<float> oracleBleuScores;
+      vector<float> oracleModelScores;
+      vector<vector<const Word*> > oneBests;
+      vector<ScoreComponentCollection> oracleFeatureValues;
+      vector<size_t> inputLengths;
+      vector<size_t> ref_ids;
+      size_t actualBatchSize = 0;
+
+      size_t examples_in_batch = 0;
+      bool skip_example = false;
+      for (size_t batchPosition = 0; batchPosition < batchSize && sid
+           != shard.end(); ++batchPosition) {
+        string input;
+        if (trainWithMultipleFolds)
+          input = inputSentencesFolds[myFold][*sid];
+        else
+          input = inputSentences[*sid];
+
+        Moses::Sentence *sentence = new Sentence();
+        stringstream in(input + "\n");
+        const vector<FactorType> inputFactorOrder = staticData.GetInputFactorOrder();
+        sentence->Read(in,inputFactorOrder);
+        cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"";
+        sentence->Print(cerr);
+        cerr << "\"" << " (batch pos " << batchPosition << ")" << endl;
+        size_t current_input_length = (*sentence).GetSize();
+
+        if (epoch == 0 && (scaleByAvgInputLength || scaleByAvgInverseLength)) {
+          sumOfInputs += current_input_length;
+          ++numberOfInputs;
+          avgInputLength = sumOfInputs/numberOfInputs;
+          decoder->setAvgInputLength(avgInputLength);
+          cerr << "Rank " << rank << ", epoch 0, average input length: " << avgInputLength << endl;
+        }
+
+        vector<ScoreComponentCollection> newFeatureValues;
+        vector<float> newScores;
+        if (model_hope_fear) {
+          featureValues.push_back(newFeatureValues);
+          bleuScores.push_back(newScores);
+          modelScores.push_back(newScores);
+        }
+        if (hope_fear || perceptron_update) {
+          featureValuesHope.push_back(newFeatureValues);
+          featureValuesFear.push_back(newFeatureValues);
+          bleuScoresHope.push_back(newScores);
+          bleuScoresFear.push_back(newScores);
+          modelScoresHope.push_back(newScores);
+          modelScoresFear.push_back(newScores);
+          if (historyBleu || simpleHistoryBleu || debug_model) {
+            featureValues.push_back(newFeatureValues);
+            bleuScores.push_back(newScores);
+            modelScores.push_back(newScores);
+          }
+        }
+        if (kbest) {
+          // for decoding
+          featureValues.push_back(newFeatureValues);
+          bleuScores.push_back(newScores);
+          modelScores.push_back(newScores);
+
+          // for storing selected examples
+          featureValuesHope.push_back(newFeatureValues);
+          featureValuesFear.push_back(newFeatureValues);
+          bleuScoresHope.push_back(newScores);
+          bleuScoresFear.push_back(newScores);
+          modelScoresHope.push_back(newScores);
+          modelScoresFear.push_back(newScores);
+        }
+
+        size_t ref_length;
+        float avg_ref_length;
+
+        if (print_weights)
+          cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: " << mosesWeights << endl;
+        if (print_core_weights) {
+          cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: ";
+          mosesWeights.PrintCoreFeatures();
+          cerr << endl;
+        }
+
+        // check LM weight
+        const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+        for (size_t i = 0; i < statefulFFs.size(); ++i) {
+          const StatefulFeatureFunction *ff = statefulFFs[i];
+          const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
+
+          if (lm) {
+            float lmWeight = mosesWeights.GetScoreForProducer(lm);
+            cerr << "Rank " << rank << ", epoch " << epoch << ", lm weight: " << lmWeight << endl;
+            if (lmWeight <= 0) {
+              cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: language model weight should never be <= 0." << endl;
+              mosesWeights.Assign(lm, 0.1);
+              cerr << "Rank " << rank << ", epoch " << epoch << ", assign lm weights of 0.1" << endl;
+            }
+          }
+        }
+
+        // select inference scheme
+        cerr << "Rank " << rank << ", epoch " << epoch << ", real Bleu? " << realBleu << endl;
+        if (hope_fear || perceptron_update) {
+          // HOPE
+          cerr << "Rank " << rank << ", epoch " << epoch << ", " << hope_n <<
+               "best hope translations" << endl;
+          vector< vector<const Word*> > outputHope = decoder->getNBest(input, *sid, hope_n, 1.0, bleuWeight_hope,
+              featureValuesHope[batchPosition], bleuScoresHope[batchPosition], modelScoresHope[batchPosition],
+              1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+          vector<const Word*> oracle = outputHope[0];
+          decoder->cleanup(chartDecoding);
+          ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
+          avg_ref_length = ref_length;
+          float hope_length_ratio = (float)oracle.size()/ref_length;
+          cerr << endl;
+
+          // count sparse features occurring in hope translation
+          featureValuesHope[batchPosition][0].IncrementSparseHopeFeatures();
+
+          vector<const Word*> bestModel;
+          if (debug_model || historyBleu || simpleHistoryBleu) {
+            // MODEL (for updating the history only, using dummy vectors)
+            cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (debug or history)" << endl;
+            vector< vector<const Word*> > outputModel = decoder->getNBest(input, *sid, n, 0.0, bleuWeight,
+                featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
+                1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+            bestModel = outputModel[0];
+            decoder->cleanup(chartDecoding);
+            cerr << endl;
+            ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
+          }
+
+          // FEAR
+          //float fear_length_ratio = 0;
+          float bleuRatioHopeFear = 0;
+          //int fearSize = 0;
+          cerr << "Rank " << rank << ", epoch " << epoch << ", " << fear_n << "best fear translations" << endl;
+          vector< vector<const Word*> > outputFear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuWeight_fear,
+              featureValuesFear[batchPosition], bleuScoresFear[batchPosition], modelScoresFear[batchPosition],
+              1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+          vector<const Word*> fear = outputFear[0];
+          decoder->cleanup(chartDecoding);
+          ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
+          avg_ref_length += ref_length;
+          avg_ref_length /= 2;
+          //fear_length_ratio = (float)fear.size()/ref_length;
+          //fearSize = (int)fear.size();
+          cerr << endl;
+          for (size_t i = 0; i < fear.size(); ++i)
+            delete fear[i];
+
+          // count sparse features occurring in fear translation
+          featureValuesFear[batchPosition][0].IncrementSparseFearFeatures();
+
+          // Bleu-related example selection
+          bool skip = false;
+          bleuRatioHopeFear = bleuScoresHope[batchPosition][0] / bleuScoresFear[batchPosition][0];
+          if (minBleuRatio != -1 && bleuRatioHopeFear < minBleuRatio)
+            skip = true;
+          if(maxBleuRatio != -1 && bleuRatioHopeFear > maxBleuRatio)
+            skip = true;
+
+          // sanity check
+          if (historyBleu || simpleHistoryBleu) {
+            if (bleuScores[batchPosition][0] > bleuScoresHope[batchPosition][0] &&
+                modelScores[batchPosition][0] > modelScoresHope[batchPosition][0]) {
+              if (abs(bleuScores[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon &&
+                  abs(modelScores[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) {
+                cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: MODEL translation better than HOPE translation." << endl;
+                skip = true;
+              }
+            }
+            if (bleuScoresFear[batchPosition][0] > bleuScores[batchPosition][0] &&
+                modelScoresFear[batchPosition][0] > modelScores[batchPosition][0]) {
+              if (abs(bleuScoresFear[batchPosition][0] - bleuScores[batchPosition][0]) > epsilon &&
+                  abs(modelScoresFear[batchPosition][0] - modelScores[batchPosition][0]) > epsilon) {
+                cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than MODEL translation." << endl;
+                skip = true;
+              }
+            }
+          }
+          if (bleuScoresFear[batchPosition][0] > bleuScoresHope[batchPosition][0]) {
+            if (abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon) {
+              // check if it's an error or a warning
+              skip = true;
+              if (modelScoresFear[batchPosition][0] > modelScoresHope[batchPosition][0] && abs(modelScoresFear[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) {
+                cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <<endl;
+              } else {
+                cerr << "Rank " << rank << ", epoch " << epoch << ", WARNING: FEAR translation has better Bleu than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <<endl;
+              }
+            }
+          }
+
+          if (skip) {
+            cerr << "Rank " << rank << ", epoch " << epoch << ", skip example (" << hope_length_ratio << ", " << bleuRatioHopeFear << ").. " << endl;
+            featureValuesHope[batchPosition].clear();
+            featureValuesFear[batchPosition].clear();
+            bleuScoresHope[batchPosition].clear();
+            bleuScoresFear[batchPosition].clear();
+            if (historyBleu || simpleHistoryBleu || debug_model) {
+              featureValues[batchPosition].clear();
+              bleuScores[batchPosition].clear();
+            }
+          } else {
+            examples_in_batch++;
+
+            // needed for history
+            if (historyBleu || simpleHistoryBleu)  {
+              inputLengths.push_back(current_input_length);
+              ref_ids.push_back(*sid);
+              oneBests.push_back(bestModel);
+            }
+          }
+        }
+        if (model_hope_fear) {
+          cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best hope translations" << endl;
+          size_t oraclePos = featureValues[batchPosition].size();
+          decoder->getNBest(input, *sid, n, 1.0, bleuWeight_hope,
+                            featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
+                            0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+          //vector<const Word*> oracle = outputHope[0];
+          // needed for history
+          inputLengths.push_back(current_input_length);
+          ref_ids.push_back(*sid);
+          decoder->cleanup(chartDecoding);
+          //ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
+          //float hope_length_ratio = (float)oracle.size()/ref_length;
+          cerr << endl;
+
+          oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
+          oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]);
+          oracleModelScores.push_back(modelScores[batchPosition][oraclePos]);
+
+          // MODEL
+          cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl;
+          if (historyBleu || simpleHistoryBleu) {
+            vector< vector<const Word*> > outputModel = decoder->getNBest(input, *sid, n, 0.0,
+                bleuWeight, featureValues[batchPosition], bleuScores[batchPosition],
+                modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+            vector<const Word*> bestModel = outputModel[0];
+            oneBests.push_back(bestModel);
+            inputLengths.push_back(current_input_length);
+            ref_ids.push_back(*sid);
+          } else {
+            decoder->getNBest(input, *sid, n, 0.0, bleuWeight,
+                              featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
+                              0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+          }
+          decoder->cleanup(chartDecoding);
+          //ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
+          //float model_length_ratio = (float)bestModel.size()/ref_length;
+          cerr << endl;
+
+          // FEAR
+          cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl;
+          decoder->getNBest(input, *sid, n, -1.0, bleuWeight_fear,
+                            featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
+                            0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+          decoder->cleanup(chartDecoding);
+          //ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
+          //float fear_length_ratio = (float)fear.size()/ref_length;
+
+          examples_in_batch++;
+        }
+        if (kbest) {
+          // MODEL
+          cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl;
+          if (historyBleu || simpleHistoryBleu) {
+            vector< vector<const Word*> > outputModel = decoder->getNBest(input, *sid, n, 0.0,
+                bleuWeight, featureValues[batchPosition], bleuScores[batchPosition],
+                modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength,	rank, epoch, "");
+            vector<const Word*> bestModel = outputModel[0];
+            oneBests.push_back(bestModel);
+            inputLengths.push_back(current_input_length);
+            ref_ids.push_back(*sid);
+          } else {
+            decoder->getNBest(input, *sid, n, 0.0, bleuWeight,
+                              featureValues[batchPosition], bleuScores[batchPosition],
+                              modelScores[batchPosition], 0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
+          }
+          decoder->cleanup(chartDecoding);
+          //ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
+          //float model_length_ratio = (float)bestModel.size()/ref_length;
+          cerr << endl;
+
+          examples_in_batch++;
+
+          HypothesisQueue queueHope(hope_n);
+          HypothesisQueue queueFear(fear_n);
+          cerr << endl;
+          if (most_violated || all_violated) {
+            float bleuHope = -1000;
+            float bleuFear = 1000;
+            int indexHope = -1;
+            int indexFear = -1;
+
+            vector<float> bleuHopeList;
+            vector<float> bleuFearList;
+            vector<float> indexHopeList;
+            vector<float> indexFearList;
+
+            if (most_violated)
+              cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with most violated constraint" << endl;
+            else if (all_violated)
+              cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with violated constraints";
+            else
+              cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with hope";
+
+            // find best hope, then find fear that violates our constraint most
+            for (size_t i=0; i<bleuScores[batchPosition].size(); ++i) {
+              if (abs(bleuScores[batchPosition][i] - bleuHope) < epsilon) { // equal bleu scores
+                if (modelScores[batchPosition][i] > modelScores[batchPosition][indexHope]) {
+                  if (abs(modelScores[batchPosition][i] - modelScores[batchPosition][indexHope]) > epsilon) {
+                    // better model score
+                    bleuHope = bleuScores[batchPosition][i];
+                    indexHope = i;
+                  }
+                }
+              } else if (bleuScores[batchPosition][i] > bleuHope) { // better than current best
+                bleuHope = bleuScores[batchPosition][i];
+                indexHope = i;
+              }
+            }
+
+            float currentViolation = 0;
+            for (size_t i=0; i<bleuScores[batchPosition].size(); ++i) {
+              float bleuDiff = bleuHope - bleuScores[batchPosition][i];
+              float modelDiff = modelScores[batchPosition][indexHope] - modelScores[batchPosition][i];
+              if ((bleuDiff > epsilon) && (modelDiff < bleuDiff)) {
+                float diff = bleuDiff - modelDiff;
+                if (diff > epsilon) {
+                  if (all_violated) {
+                    cerr << ".. adding pair";
+                    bleuHopeList.push_back(bleuHope);
+                    bleuFearList.push_back(bleuScores[batchPosition][i]);
+                    indexHopeList.push_back(indexHope);
+                    indexFearList.push_back(i);
+                  } else if (most_violated && diff > currentViolation) {
+                    currentViolation = diff;
+                    bleuFear = bleuScores[batchPosition][i];
+                    indexFear = i;
+                    cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << currentViolation << " (" << modelDiff << " >= " << bleuDiff << ")" << endl;
+                  }
+                }
+              }
+            }
+
+            if (most_violated) {
+              if (currentViolation > 0) {
+                cerr << "Rank " << rank << ", epoch " << epoch << ", adding pair with violation " << currentViolation << endl;
+                cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << bleuHope << " (" << indexHope  << "), fear: " << bleuFear << " (" << indexFear << ")" << endl;
+                bleuScoresHope[batchPosition].push_back(bleuHope);
+                bleuScoresFear[batchPosition].push_back(bleuFear);
+                featureValuesHope[batchPosition].push_back(featureValues[batchPosition][indexHope]);
+                featureValuesFear[batchPosition].push_back(featureValues[batchPosition][indexFear]);
+                float modelScoreHope = modelScores[batchPosition][indexHope];
+                float modelScoreFear = modelScores[batchPosition][indexFear];
+                if (most_violated_reg) {
+                  // reduce model score difference by factor ~0.5
+                  float reg = currentViolation/4;
+                  modelScoreHope += abs(reg);
+                  modelScoreFear -= abs(reg);
+                  float newViolation = (bleuHope - bleuFear) - (modelScoreHope - modelScoreFear);
+                  cerr << "Rank " << rank << ", epoch " << epoch << ", regularized violation: " << newViolation << endl;
+                }
+                modelScoresHope[batchPosition].push_back(modelScoreHope);
+                modelScoresFear[batchPosition].push_back(modelScoreFear);
+
+                featureValues[batchPosition][indexHope].IncrementSparseHopeFeatures();
+                featureValues[batchPosition][indexFear].IncrementSparseFearFeatures();
+              } else {
+                cerr << "Rank " << rank << ", epoch " << epoch << ", no violated constraint found." << endl;
+                skip_example = 1;
+              }
+            } else cerr << endl;
+          }
+          if (max_bleu_diff) {
+            cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with max Bleu diff from list: " << bleuScores[batchPosition].size() << endl;
+            for (size_t i=0; i<bleuScores[batchPosition].size(); ++i) {
+              float hopeScore = bleuScores[batchPosition][i];
+              if (modelPlusBleu) hopeScore += modelScores[batchPosition][i];
+              BleuIndexPair hope(hopeScore, i);
+              queueHope.Push(hope);
+
+              float fearScore = -1*(bleuScores[batchPosition][i]);
+              if (modelPlusBleu) fearScore += modelScores[batchPosition][i];
+              BleuIndexPair fear(fearScore, i);
+              queueFear.Push(fear);
+            }
+            skip_example = 0;
+          }
+          cerr << endl;
+
+          vector<BleuIndexPair> hopeList, fearList;
+          for (size_t i=0; i<hope_n && !queueHope.Empty(); ++i) hopeList.push_back(queueHope.Pop());
+          for (size_t i=0; i<fear_n && !queueFear.Empty(); ++i) fearList.push_back(queueFear.Pop());
+          for (size_t i=0; i<hopeList.size(); ++i) {
+            //float bleuHope = hopeList[i].first;
+            size_t indexHope = hopeList[i].second;
+            float bleuHope = bleuScores[batchPosition][indexHope];
+            for (size_t j=0; j<fearList.size(); ++j) {
+              //float bleuFear = -1*(fearList[j].first);
+              size_t indexFear = fearList[j].second;
+              float bleuFear = bleuScores[batchPosition][indexFear];
+              cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << bleuHope << " (" << indexHope  << "), fear: " << bleuFear << " (" << indexFear << ")" << endl;
+              bleuScoresHope[batchPosition].push_back(bleuHope);
+              bleuScoresFear[batchPosition].push_back(bleuFear);
+              featureValuesHope[batchPosition].push_back(featureValues[batchPosition][indexHope]);
+              featureValuesFear[batchPosition].push_back(featureValues[batchPosition][indexFear]);
+              float modelScoreHope = modelScores[batchPosition][indexHope];
+              float modelScoreFear = modelScores[batchPosition][indexFear];
+
+              modelScoresHope[batchPosition].push_back(modelScoreHope);
+              modelScoresFear[batchPosition].push_back(modelScoreFear);
+
+              featureValues[batchPosition][indexHope].IncrementSparseHopeFeatures();
+              featureValues[batchPosition][indexFear].IncrementSparseFearFeatures();
+            }
+          }
+          if (!makePairs)
+            cerr << "Rank " << rank << ", epoch " << epoch << "summing up hope and fear vectors, no pairs" << endl;
+        }
+
+        // next input sentence
+        ++sid;
+        ++actualBatchSize;
+        ++shardPosition;
+      } // end of batch loop
+
+      if (examples_in_batch == 0 || (kbest && skip_example)) {
+        cerr << "Rank " << rank << ", epoch " << epoch << ", batch is empty." << endl;
+      } else {
+        vector<vector<float> > losses(actualBatchSize);
+        if (model_hope_fear) {
+          // Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
+          for (size_t batchPosition = 0; batchPosition < actualBatchSize; ++batchPosition) {
+            for (size_t j = 0; j < bleuScores[batchPosition].size(); ++j) {
+              losses[batchPosition].push_back(oracleBleuScores[batchPosition] - bleuScores[batchPosition][j]);
+            }
+          }
+        }
+
+        // set weight for bleu feature to 0 before optimizing
+        vector<FeatureFunction*>::const_iterator iter;
+        const vector<FeatureFunction*> &featureFunctions2 = FeatureFunction::GetFeatureFunctions();
+        for (iter = featureFunctions2.begin(); iter != featureFunctions2.end(); ++iter) {
+          if ((*iter)->GetScoreProducerDescription() == "BleuScoreFeature") {
+            mosesWeights.Assign(*iter, 0);
+            break;
+          }
+        }
+
+        // scale LM feature (to avoid rapid changes)
+        if (scale_lm) {
+          cerr << "scale lm" << endl;
+          const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+          for (size_t i = 0; i < statefulFFs.size(); ++i) {
+            const StatefulFeatureFunction *ff = statefulFFs[i];
+            const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
+
+            if (lm) {
+              // scale down score
+              if (model_hope_fear) {
+                scaleFeatureScore(lm, scale_lm_factor, featureValues, rank, epoch);
+              } else {
+                scaleFeatureScore(lm, scale_lm_factor, featureValuesHope, rank, epoch);
+                scaleFeatureScore(lm, scale_lm_factor, featureValuesFear, rank, epoch);
+              }
+            }
+          }
+        }
+
+        // scale WP
+        if (scale_wp) {
+          // scale up weight
+          WordPenaltyProducer &wp = WordPenaltyProducer::InstanceNonConst();
+
+          // scale down score
+          if (model_hope_fear) {
+            scaleFeatureScore(&wp, scale_wp_factor, featureValues, rank, epoch);
+          } else {
+            scaleFeatureScore(&wp, scale_wp_factor, featureValuesHope, rank, epoch);
+            scaleFeatureScore(&wp, scale_wp_factor, featureValuesFear, rank, epoch);
+          }
+        }
+
+        // print out the feature values
+        if (print_feature_values) {
+          cerr << "\nRank " << rank << ", epoch " << epoch << ", feature values: " << endl;
+          if (model_hope_fear) printFeatureValues(featureValues);
+          else {
+            cerr << "hope: " << endl;
+            printFeatureValues(featureValuesHope);
+            cerr << "fear: " << endl;
+            printFeatureValues(featureValuesFear);
+          }
+        }
+
+        // apply learning rates to feature vectors before optimization
+        if (feature_confidence) {
+          cerr << "Rank " << rank << ", epoch " << epoch << ", apply feature learning rates with decays " << decay_core << "/" << decay_sparse << ": " << featureLearningRates << endl;
+          if (model_hope_fear) {
+            applyPerFeatureLearningRates(featureValues, featureLearningRates, sparse_r0);
+          } else {
+            applyPerFeatureLearningRates(featureValuesHope, featureLearningRates, sparse_r0);
+            applyPerFeatureLearningRates(featureValuesFear, featureLearningRates, sparse_r0);
+          }
+        } else {
+          // apply fixed learning rates
+          cerr << "Rank " << rank << ", epoch " << epoch << ", apply fixed learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl;
+          if (core_r0 != 1.0 || sparse_r0 != 1.0) {
+            if (model_hope_fear) {
+              applyLearningRates(featureValues, core_r0, sparse_r0);
+            } else {
+              applyLearningRates(featureValuesHope, core_r0, sparse_r0);
+              applyLearningRates(featureValuesFear, core_r0, sparse_r0);
+            }
+          }
+        }
+
+        // Run optimiser on batch:
+        VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
+        size_t update_status = 1;
+        ScoreComponentCollection weightUpdate;
+        if (perceptron_update) {
+          vector<vector<float> > dummy1;
+          update_status = optimiser->updateWeightsHopeFear( weightUpdate, featureValuesHope,
+                          featureValuesFear, dummy1, dummy1, dummy1, dummy1, learning_rate, rank, epoch);
+        } else if (hope_fear) {
+          if (bleuScoresHope[0][0] >= min_oracle_bleu) {
+            if (hope_n == 1 && fear_n ==1 && batchSize == 1 && !hildreth) {
+              update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(weightUpdate,
+                              featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0],
+                              bleuScoresFear[0][0], modelScoresHope[0][0], modelScoresFear[0][0], learning_rate, rank, epoch);
+            } else
+              update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope,
+                              featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope,
+                              modelScoresFear, learning_rate, rank, epoch);
+          } else
+            update_status = 1;
+        } else if (kbest) {
+          if (batchSize == 1 && featureValuesHope[0].size() == 1 && !hildreth) {
+            cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl;
+            cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl;
+            update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(
+                              weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0],
+                              bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0],
+                              modelScoresFear[0][0], learning_rate, rank, epoch);
+          } else {
+            cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl;
+            cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl;
+            update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope,
+                            featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope,
+                            modelScoresFear, learning_rate, rank, epoch);
+          }
+        } else {
+          // model_hope_fear
+          update_status = ((MiraOptimiser*) optimiser)->updateWeights(weightUpdate,
+                          featureValues, losses, bleuScores, modelScores, oracleFeatureValues,
+                          oracleBleuScores, oracleModelScores, learning_rate, rank, epoch);
+        }
+
+        // sumStillViolatedConstraints += update_status;
+
+        if (update_status == 0) {	 // if weights were updated
+          // apply weight update
+          if (debug)
+            cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << weightUpdate << endl;
+
+          if (feature_confidence) {
+            // update confidence counts based on weight update
+            confidenceCounts.UpdateConfidenceCounts(weightUpdate, signed_counts);
+
+            // update feature learning rates
+            featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0);
+          }
+
+          // apply weight update to Moses weights
+          mosesWeights.PlusEquals(weightUpdate);
+
+          if (normaliseWeights)
+            mosesWeights.L1Normalise();
+
+          cumulativeWeights.PlusEquals(mosesWeights);
+          if (sparseAverage) {
+            ScoreComponentCollection binary;
+            binary.SetToBinaryOf(mosesWeights);
+            cumulativeWeightsBinary.PlusEquals(binary);
+          }
+
+          ++numberOfUpdates;
+          ++numberOfUpdatesThisEpoch;
+          if (averageWeights) {
+            ScoreComponentCollection averageWeights(cumulativeWeights);
+            if (accumulateWeights) {
+              averageWeights.DivideEquals(numberOfUpdates);
+            } else {
+              averageWeights.DivideEquals(numberOfUpdatesThisEpoch);
+            }
+
+            mosesWeights = averageWeights;
+          }
+
+          // set new Moses weights
+          decoder->setWeights(mosesWeights);
+          //cerr << "Rank " << rank << ", epoch " << epoch << ", new weights: " << mosesWeights << endl;
+        }
+
+        // update history (for approximate document Bleu)
+        if (historyBleu || simpleHistoryBleu) {
+          for (size_t i = 0; i < oneBests.size(); ++i)
+            cerr << "Rank " << rank << ", epoch " << epoch << ", update history with 1best length: " << oneBests[i].size() << " ";
+          decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch);
+          deleteTranslations(oneBests);
+        }
+      } // END TRANSLATE AND UPDATE BATCH
+
+      // size of all shards except for the last one
+      size_t generalShardSize;
+      if (trainWithMultipleFolds)
+        generalShardSize = order.size()/coresPerFold;
+      else
+        generalShardSize = order.size()/size;
+
+      size_t mixing_base = mixingFrequency == 0 ? 0 : generalShardSize / mixingFrequency;
+      size_t dumping_base = weightDumpFrequency == 0 ? 0 : generalShardSize / weightDumpFrequency;
+      bool mix = evaluateModulo(shardPosition, mixing_base, actualBatchSize);
+
+      // mix weights?
+      if (mix) {
+#ifdef MPI_ENABLE
+        cerr << "Rank " << rank << ", epoch " << epoch << ", mixing weights.. " << endl;
+        // collect all weights in mixedWeights and divide by number of processes
+        mpi::reduce(world, mosesWeights, mixedWeights, SCCPlus(), 0);
+
+        // mix confidence counts
+        //mpi::reduce(world, confidenceCounts, mixedConfidenceCounts, SCCPlus(), 0);
+        ScoreComponentCollection totalBinary;
+        if (sparseAverage) {
+          ScoreComponentCollection binary;
+          binary.SetToBinaryOf(mosesWeights);
+          mpi::reduce(world, binary, totalBinary, SCCPlus(), 0);
+        }
+        if (rank == 0) {
+          // divide by number of processes
+          if (sparseNoAverage)
+            mixedWeights.CoreDivideEquals(size); // average only core weights
+          else if (sparseAverage)
+            mixedWeights.DivideEquals(totalBinary);
+          else
+            mixedWeights.DivideEquals(size);
+
+          // divide confidence counts
+          //mixedConfidenceCounts.DivideEquals(size);
+
+          // normalise weights after averaging
+          if (normaliseWeights) {
+            mixedWeights.L1Normalise();
+          }
+
+          ++weightMixingThisEpoch;
+
+          if (pruneZeroWeights) {
+            size_t pruned = mixedWeights.PruneZeroWeightFeatures();
+            cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                 << pruned << " zero-weighted features pruned from mixedWeights." << endl;
+
+            pruned = cumulativeWeights.PruneZeroWeightFeatures();
+            cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                 << pruned << " zero-weighted features pruned from cumulativeWeights." << endl;
+          }
+
+          if (featureCutoff != -1 && weightMixingThisEpoch == mixingFrequency) {
+            size_t pruned = mixedWeights.PruneSparseFeatures(featureCutoff);
+            cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                 << pruned << " features pruned from mixedWeights." << endl;
+
+            pruned = cumulativeWeights.PruneSparseFeatures(featureCutoff);
+            cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                 << pruned << " features pruned from cumulativeWeights." << endl;
+          }
+
+          if (weightMixingThisEpoch == mixingFrequency || reg_on_every_mix) {
+            if (l1_regularize) {
+              size_t pruned;
+              if (l1_reg_sparse)
+                pruned = mixedWeights.SparseL1Regularize(l1_lambda);
+              else
+                pruned = mixedWeights.L1Regularize(l1_lambda);
+              cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                   << "l1-reg. on mixedWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl;
+            }
+            if (l2_regularize) {
+              if (l2_reg_sparse)
+                mixedWeights.SparseL2Regularize(l2_lambda);
+              else
+                mixedWeights.L2Regularize(l2_lambda);
+              cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                   << "l2-reg. on mixedWeights with lambda=" << l2_lambda << endl;
+            }
+          }
+        }
+
+        // broadcast average weights from process 0
+        mpi::broadcast(world, mixedWeights, 0);
+        decoder->setWeights(mixedWeights);
+        mosesWeights = mixedWeights;
+
+        // broadcast summed confidence counts
+        //mpi::broadcast(world, mixedConfidenceCounts, 0);
+        //confidenceCounts = mixedConfidenceCounts;
+#endif
+#ifndef MPI_ENABLE
+        //cerr << "\nRank " << rank << ", no mixing, weights: " << mosesWeights << endl;
+        mixedWeights = mosesWeights;
+#endif
+      } // end mixing
+
+      // Dump weights?
+      if (trainWithMultipleFolds || weightEpochDump == weightDumpFrequency) {
+        // dump mixed weights at end of every epoch to enable continuing a crashed experiment
+        // (for jackknife every time the weights are mixed)
+        ostringstream filename;
+        if (epoch < 10)
+          filename << weightDumpStem << "_mixed_0" << epoch;
+        else
+          filename << weightDumpStem << "_mixed_" << epoch;
+
+        if (weightDumpFrequency > 1)
+          filename << "_" << weightEpochDump;
+
+        mixedWeights.Save(filename.str());
+        cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl;
+      }
+      if (dumpMixedWeights) {
+        if (mix && rank == 0 && !weightDumpStem.empty()) {
+          // dump mixed weights instead of average weights
+          ostringstream filename;
+          if (epoch < 10)
+            filename << weightDumpStem << "_0" << epoch;
+          else
+            filename << weightDumpStem << "_" << epoch;
+
+          if (weightDumpFrequency > 1)
+            filename << "_" << weightEpochDump;
+
+          cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl;
+          mixedWeights.Save(filename.str());
+          ++weightEpochDump;
+        }
+      } else {
+        if (evaluateModulo(shardPosition, dumping_base, actualBatchSize)) {
+          cerr << "Rank " << rank << ", epoch " << epoch << ", dump weights.. (pos: " << shardPosition << ", base: " << dumping_base << ")" << endl;
+          ScoreComponentCollection tmpAverageWeights(cumulativeWeights);
+          bool proceed = false;
+          if (accumulateWeights) {
+            if (numberOfUpdates > 0) {
+              tmpAverageWeights.DivideEquals(numberOfUpdates);
+              proceed = true;
+            }
+          } else {
+            if (numberOfUpdatesThisEpoch > 0) {
+              if (sparseNoAverage) // average only core weights
+                tmpAverageWeights.CoreDivideEquals(numberOfUpdatesThisEpoch);
+              else if (sparseAverage)
+                tmpAverageWeights.DivideEquals(cumulativeWeightsBinary);
+              else
+                tmpAverageWeights.DivideEquals(numberOfUpdatesThisEpoch);
+              proceed = true;
+            }
+          }
+
+          if (proceed) {
+#ifdef MPI_ENABLE
+            // average across processes
+            mpi::reduce(world, tmpAverageWeights, mixedAverageWeights, SCCPlus(), 0);
+            ScoreComponentCollection totalBinary;
+            if (sparseAverage) {
+              ScoreComponentCollection binary;
+              binary.SetToBinaryOf(mosesWeights);
+              mpi::reduce(world, binary, totalBinary, SCCPlus(), 0);
+            }
+#endif
+#ifndef MPI_ENABLE
+            mixedAverageWeights = tmpAverageWeights;
+            //FIXME: What do to for non-mpi version
+            ScoreComponentCollection totalBinary;
+#endif
+            if (rank == 0 && !weightDumpStem.empty()) {
+              // divide by number of processes
+              if (sparseNoAverage)
+                mixedAverageWeights.CoreDivideEquals(size); // average only core weights
+              else if (sparseAverage)
+                mixedAverageWeights.DivideEquals(totalBinary);
+              else
+                mixedAverageWeights.DivideEquals(size);
+
+              // normalise weights after averaging
+              if (normaliseWeights) {
+                mixedAverageWeights.L1Normalise();
+              }
+
+              // dump final average weights
+              ostringstream filename;
+              if (epoch < 10) {
+                filename << weightDumpStem << "_0" << epoch;
+              } else {
+                filename << weightDumpStem << "_" << epoch;
+              }
+
+              if (weightDumpFrequency > 1) {
+                filename << "_" << weightEpochDump;
+              }
+
+              /*if (accumulateWeights) {
+              cerr << "\nMixed average weights (cumulative) during epoch "	<< epoch << ": " << mixedAverageWeights << endl;
+              } else {
+              cerr << "\nMixed average weights during epoch " << epoch << ": " << mixedAverageWeights << endl;
+              }*/
+
+              cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl;
+              mixedAverageWeights.Save(filename.str());
+              ++weightEpochDump;
+
+              if (weightEpochDump == weightDumpFrequency) {
+                if (l1_regularize) {
+                  size_t pruned = mixedAverageWeights.SparseL1Regularize(l1_lambda);
+                  cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                       << "l1-reg. on mixedAverageWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl;
+
+                }
+                if (l2_regularize) {
+                  mixedAverageWeights.SparseL2Regularize(l2_lambda);
+                  cerr << "Rank " << rank << ", epoch " << epoch << ", "
+                       << "l2-reg. on mixedAverageWeights with lambda=" << l2_lambda << endl;
+                }
+
+                if (l1_regularize || l2_regularize) {
+                  filename << "_reg";
+                  cerr << "Dumping regularized mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl;
+                  mixedAverageWeights.Save(filename.str());
+                }
+              }
+
+              if (weightEpochDump == weightDumpFrequency && printFeatureCounts) {
+                // print out all features with counts
+                stringstream s1, s2;
+                s1 << "sparse_feature_hope_counts" << "_" << epoch;
+                s2 << "sparse_feature_fear_counts" << "_" << epoch;
+                ofstream sparseFeatureCountsHope(s1.str().c_str());
+                ofstream sparseFeatureCountsFear(s2.str().c_str());
+
+                mixedAverageWeights.PrintSparseHopeFeatureCounts(sparseFeatureCountsHope);
+                mixedAverageWeights.PrintSparseFearFeatureCounts(sparseFeatureCountsFear);
+                sparseFeatureCountsHope.close();
+                sparseFeatureCountsFear.close();
+              }
+            }
+          }
+        }// end dumping
+      } // end if dump
+    } // end of shard loop, end of this epoch
+    cerr << "Rank " << rank << ", epoch " << epoch << ", end of epoch.." << endl;
+
+    if (historyBleu || simpleHistoryBleu) {
+      cerr << "Bleu feature history after epoch " <<  epoch << endl;
+      decoder->printBleuFeatureHistory(cerr);
+    }
+    //		cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl;
+
+    // Check whether there were any weight updates during this epoch
+    size_t sumUpdates;
+    size_t *sendbuf_uint, *recvbuf_uint;
+    sendbuf_uint = (size_t *) malloc(sizeof(size_t));
+    recvbuf_uint = (size_t *) malloc(sizeof(size_t));
+#ifdef MPI_ENABLE
+    sendbuf_uint[0] = numberOfUpdatesThisEpoch;
+    recvbuf_uint[0] = 0;
+    MPI_Reduce(sendbuf_uint, recvbuf_uint, 1, MPI_UNSIGNED, MPI_SUM, 0, world);
+    sumUpdates = recvbuf_uint[0];
+#endif
+#ifndef MPI_ENABLE
+    sumUpdates = numberOfUpdatesThisEpoch;
+#endif
+    if (rank == 0 && sumUpdates == 0) {
+      cerr << "\nNo weight updates during this epoch.. stopping." << endl;
+      stop = true;
+#ifdef MPI_ENABLE
+      mpi::broadcast(world, stop, 0);
+#endif
+    }
+
+    if (!stop) {
+      // Test if weights have converged
+      if (weightConvergence) {
+        bool reached = true;
+        if (rank == 0 && (epoch >= 2)) {
+          ScoreComponentCollection firstDiff, secondDiff;
+          if (dumpMixedWeights) {
+            firstDiff = mixedWeights;
+            firstDiff.MinusEquals(mixedWeightsPrevious);
+            secondDiff = mixedWeights;
+            secondDiff.MinusEquals(mixedWeightsBeforePrevious);
+          } else {
+            firstDiff = mixedAverageWeights;
+            firstDiff.MinusEquals(mixedAverageWeightsPrevious);
+            secondDiff = mixedAverageWeights;
+            secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
+          }
+          VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << " (max: " << firstDiff.GetLInfNorm() << ")" << endl);
+          VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << " (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl);
+
+          // check whether stopping criterion has been reached
+          // (both difference vectors must have all weight changes smaller than min_weight_change)
+          if (firstDiff.GetLInfNorm() >= min_weight_change)
+            reached = false;
+          if (secondDiff.GetLInfNorm() >= min_weight_change)
+            reached = false;
+          if (reached) {
+            // stop MIRA
+            stop = true;
+            cerr << "\nWeights have converged after epoch " << epoch << ".. stopping MIRA." << endl;
+            ScoreComponentCollection dummy;
+            ostringstream endfilename;
+            endfilename << "stopping";
+            dummy.Save(endfilename.str());
+          }
+        }
+
+        mixedWeightsBeforePrevious = mixedWeightsPrevious;
+        mixedWeightsPrevious = mixedWeights;
+        mixedAverageWeightsBeforePrevious = mixedAverageWeightsPrevious;
+        mixedAverageWeightsPrevious = mixedAverageWeights;
+#ifdef MPI_ENABLE
+        mpi::broadcast(world, stop, 0);
+#endif
+      } //end if (weightConvergence)
+    }
+  } // end of epoch loop
+
+#ifdef MPI_ENABLE
+  MPI_Finalize();
+#endif
+
+  time(&now);
+  cerr << "Rank " << rank << ", " << ctime(&now);
+
+  if (rank == 0) {
+    ScoreComponentCollection dummy;
+    ostringstream endfilename;
+    endfilename << "finished";
+    dummy.Save(endfilename.str());
+  }
+
+  delete decoder;
+  exit(0);
+}
+
+bool loadSentences(const string& filename, vector<string>& sentences)
+{
+  ifstream in(filename.c_str());
+  if (!in)
+    return false;
+  string line;
+  while (getline(in, line))
+    sentences.push_back(line);
+  return true;
+}
+
+bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size)
+{
+  if (mix_or_dump_base == 0) return 0;
+  if (actual_batch_size > 1) {
+    bool mix_or_dump = false;
+    size_t numberSubtracts = actual_batch_size;
+    do {
+      if (shard_position % mix_or_dump_base == 0) {
+        mix_or_dump = true;
+        break;
+      }
+      --shard_position;
+      --numberSubtracts;
+    } while (numberSubtracts > 0);
+    return mix_or_dump;
+  } else {
+    return ((shard_position % mix_or_dump_base) == 0);
+  }
+}
+
+void printFeatureValues(vector<vector<ScoreComponentCollection> > &featureValues)
+{
+  for (size_t i = 0; i < featureValues.size(); ++i) {
+    for (size_t j = 0; j < featureValues[i].size(); ++j) {
+      cerr << featureValues[i][j] << endl;
+    }
+  }
+  cerr << endl;
+}
+
+void deleteTranslations(vector<vector<const Word*> > &translations)
+{
+  for (size_t i = 0; i < translations.size(); ++i) {
+    for (size_t j = 0; j < translations[i].size(); ++j) {
+      delete translations[i][j];
+    }
+  }
+}
+
+void decodeHopeOrFear(size_t rank, size_t size, size_t decode, string filename, vector<string> &inputSentences, MosesDecoder* decoder, size_t n, float bleuWeight)
+{
+  if (decode == 1)
+    cerr << "Rank " << rank << ", decoding dev input set according to hope objective.. " << endl;
+  else if (decode == 2)
+    cerr << "Rank " << rank << ", decoding dev input set according to fear objective.. " << endl;
+  else
+    cerr << "Rank " << rank << ", decoding dev input set according to normal objective.. " << endl;
+
+  // Create shards according to the number of processes used
+  vector<size_t> order;
+  for (size_t i = 0; i < inputSentences.size(); ++i)
+    order.push_back(i);
+
+  vector<size_t> shard;
+  float shardSize = (float) (order.size()) / size;
+  size_t shardStart = (size_t) (shardSize * rank);
+  size_t shardEnd = (size_t) (shardSize * (rank + 1));
+  if (rank == size - 1) {
+    shardEnd = inputSentences.size();
+    shardSize = shardEnd - shardStart;
+  }
+  VERBOSE(1, "Rank " << rank << ", shard start: " << shardStart << " Shard end: " << shardEnd << endl);
+  VERBOSE(1, "Rank " << rank << ", shard size: " << shardSize << endl);
+  shard.resize(shardSize);
+  copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
+
+  // open files for writing
+  stringstream fname;
+  fname << filename << ".rank" << rank;
+  filename = fname.str();
+  ostringstream filename_nbest;
+  filename_nbest << filename << "." << n << "best";
+  ofstream out(filename.c_str());
+  ofstream nbest_out((filename_nbest.str()).c_str());
+  if (!out) {
+    ostringstream msg;
+    msg << "Unable to open " << fname.str();
+    throw runtime_error(msg.str());
+  }
+  if (!nbest_out) {
+    ostringstream msg;
+    msg << "Unable to open " << filename_nbest;
+    throw runtime_error(msg.str());
+  }
+
+  for (size_t i = 0; i < shard.size(); ++i) {
+    size_t sid = shard[i];
+    string& input = inputSentences[sid];
+
+    vector<vector<ScoreComponentCollection> > dummyFeatureValues;
+    vector<vector<float> > dummyBleuScores;
+    vector<vector<float> > dummyModelScores;
+
+    vector<ScoreComponentCollection> newFeatureValues;
+    vector<float> newScores;
+    dummyFeatureValues.push_back(newFeatureValues);
+    dummyBleuScores.push_back(newScores);
+    dummyModelScores.push_back(newScores);
+
+    float factor = 0.0;
+    if (decode == 1) factor = 1.0;
+    if (decode == 2) factor = -1.0;
+    cerr << "Rank " << rank << ", translating sentence " << sid << endl;
+    bool realBleu = false;
+    vector< vector<const Word*> > nbestOutput = decoder->getNBest(input, sid, n, factor, bleuWeight, dummyFeatureValues[0],
+        dummyBleuScores[0], dummyModelScores[0], n, realBleu, true, false, rank, 0, "");
+    cerr << endl;
+    decoder->cleanup(StaticData::Instance().IsChart());
+
+    for (size_t i = 0; i < nbestOutput.size(); ++i) {
+      vector<const Word*> output = nbestOutput[i];
+      stringstream translation;
+      for (size_t k = 0; k < output.size(); ++k) {
+        Word* w = const_cast<Word*>(output[k]);
+        translation << w->GetString(0);
+        translation << " ";
+      }
+
+      if (i == 0)
+        out << translation.str() << endl;
+      nbest_out << sid << " ||| " << translation.str() << " ||| " << dummyFeatureValues[0][i] <<
+                " ||| " << dummyModelScores[0][i] << " ||| sBleu=" << dummyBleuScores[0][i] << endl;
+    }
+  }
+
+  out.close();
+  nbest_out.close();
+  cerr << "Closing files " << filename << " and " << filename_nbest.str() << endl;
+
+#ifdef MPI_ENABLE
+  MPI_Finalize();
+#endif
+
+  time_t now;
+  time(&now);
+  cerr << "Rank " << rank << ", " << ctime(&now);
+
+  delete decoder;
+  exit(0);
+}
+
+void applyLearningRates(vector<vector<ScoreComponentCollection> > &featureValues, float core_r0, float sparse_r0)
+{
+  for (size_t i=0; i<featureValues.size(); ++i) // each item in batch
+    for (size_t j=0; j<featureValues[i].size(); ++j) // each item in nbest
+      featureValues[i][j].MultiplyEquals(core_r0, sparse_r0);
+}
+
+void applyPerFeatureLearningRates(vector<vector<ScoreComponentCollection> > &featureValues, ScoreComponentCollection featureLearningRates, float sparse_r0)
+{
+  for (size_t i=0; i<featureValues.size(); ++i) // each item in batch
+    for (size_t j=0; j<featureValues[i].size(); ++j) // each item in nbest
+      featureValues[i][j].MultiplyEqualsBackoff(featureLearningRates, sparse_r0);
+}
+
+void scaleFeatureScore(const FeatureFunction *sp, float scaling_factor, vector<vector<ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch)
+{
+  string name = sp->GetScoreProducerDescription();
+
+  // scale down score
+  float featureScore;
+  for (size_t i=0; i<featureValues.size(); ++i) { // each item in batch
+    for (size_t j=0; j<featureValues[i].size(); ++j) { // each item in nbest
+      featureScore = featureValues[i][j].GetScoreForProducer(sp);
+      featureValues[i][j].Assign(sp, featureScore*scaling_factor);
+      //cerr << "Rank " << rank << ", epoch " << epoch << ", " << name << " score scaled from " << featureScore << " to " << featureScore/scaling_factor << endl;
+    }
+  }
+}
+
+void scaleFeatureScores(const FeatureFunction *sp, float scaling_factor, vector<vector<ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch)
+{
+  string name = sp->GetScoreProducerDescription();
+
+  // scale down score
+  for (size_t i=0; i<featureValues.size(); ++i) { // each item in batch
+    for (size_t j=0; j<featureValues[i].size(); ++j) { // each item in nbest
+      vector<float> featureScores = featureValues[i][j].GetScoresForProducer(sp);
+      for (size_t k=0; k<featureScores.size(); ++k)
+        featureScores[k] *= scaling_factor;
+      featureValues[i][j].Assign(sp, featureScores);
+      //cerr << "Rank " << rank << ", epoch " << epoch << ", " << name << " score scaled from " << featureScore << " to " << featureScore/scaling_factor << endl;
+    }
+  }
+}
diff --git a/contrib/mira/Main.h b/contrib/mira/Main.h
new file mode 100644
index 000000000..8736257f6
--- /dev/null
+++ b/contrib/mira/Main.h
@@ -0,0 +1,58 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef MAIN_H_
+#define MAIN_H_
+
+#include <vector>
+
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Word.h"
+#include "moses/FF/FeatureFunction.h"
+#include "Decoder.h"
+
+typedef std::map<const Moses::FeatureFunction*, std::vector< float > > ProducerWeightMap;
+typedef std::pair<const Moses::FeatureFunction*, std::vector< float > > ProducerWeightPair;
+
+template <class T> bool from_string(T& t, const std::string& s, std::ios_base& (*f)(std::ios_base&))
+{
+  std::istringstream iss(s);
+  return !(iss >> f >> t).fail();
+}
+
+struct RandomIndex {
+  ptrdiff_t operator()(ptrdiff_t max) {
+    srand(time(0));  // Initialize random number generator with current time.
+    return static_cast<ptrdiff_t> (rand() % max);
+  }
+};
+
+//void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const TranslationSystem* system, long translationId);
+bool loadSentences(const std::string& filename, std::vector<std::string>& sentences);
+bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size);
+void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues);
+void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, ProducerWeightMap &coreWeightMap);
+void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
+void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);
+void decodeHopeOrFear(size_t rank, size_t size, size_t decode, std::string decode_filename, std::vector<std::string> &inputSentences, Mira::MosesDecoder* decoder, size_t n, float bleuWeight);
+void applyLearningRates(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, float core_r0, float sparse_r0);
+void applyPerFeatureLearningRates(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, Moses::ScoreComponentCollection featureLearningRates, float sparse_r0);
+void scaleFeatureScore(const Moses::FeatureFunction *sp, float scaling_factor,  std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch);
+void scaleFeatureScores(const Moses::FeatureFunction *sp, float scaling_factor,  std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch);
+
+#endif /* MAIN_H_ */
diff --git a/contrib/mira/MiraOptimiser.cpp b/contrib/mira/MiraOptimiser.cpp
new file mode 100644
index 000000000..d4854a1c4
--- /dev/null
+++ b/contrib/mira/MiraOptimiser.cpp
@@ -0,0 +1,446 @@
+#include <algorithm>
+#include "Optimiser.h"
+#include "Hildreth.h"
+#include "moses/StaticData.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Mira
+{
+
+size_t MiraOptimiser::updateWeights(
+  ScoreComponentCollection& weightUpdate,
+  const vector<vector<ScoreComponentCollection> >& featureValues,
+  const vector<vector<float> >& losses,
+  const vector<vector<float> >& bleuScores,
+  const vector<vector<float> >& modelScores,
+  const vector<ScoreComponentCollection>& oracleFeatureValues,
+  const vector<float> oracleBleuScores,
+  const vector<float> oracleModelScores,
+  float learning_rate,
+  size_t rank,
+  size_t epoch)
+{
+
+  // vector of feature values differences for all created constraints
+  vector<ScoreComponentCollection> featureValueDiffs;
+  vector<float> lossMinusModelScoreDiffs;
+  vector<float> all_losses;
+
+  // most violated constraint in batch
+  ScoreComponentCollection max_batch_featureValueDiff;
+
+  // Make constraints for new hypothesis translations
+  float epsilon = 0.0001;
+  int violatedConstraintsBefore = 0;
+  float oldDistanceFromOptimum = 0;
+  // iterate over input sentences (1 (online) or more (batch))
+  for (size_t i = 0; i < featureValues.size(); ++i) {
+    //size_t sentenceId = sentenceIds[i];
+    // iterate over hypothesis translations for one input sentence
+    for (size_t j = 0; j < featureValues[i].size(); ++j) {
+      ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
+      featureValueDiff.MinusEquals(featureValues[i][j]);
+
+      //			cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl;
+      if (featureValueDiff.GetL1Norm() == 0) {
+        cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+        continue;
+      }
+
+      float loss = losses[i][j];
+
+      // check if constraint is violated
+      bool violated = false;
+//		    float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
+      float modelScoreDiff = oracleModelScores[i] - modelScores[i][j];
+      float diff = 0;
+
+      if (loss > modelScoreDiff)
+        diff = loss - modelScoreDiff;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
+      if (diff > epsilon)
+        violated = true;
+
+      if (m_normaliseMargin) {
+        modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam;
+        loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam;
+        diff = 0;
+        if (loss > modelScoreDiff) {
+          diff = loss - modelScoreDiff;
+        }
+        cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
+      }
+
+      if (m_scale_margin) {
+        diff *= oracleBleuScores[i];
+        cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score "  << oracleBleuScores[i] << endl;
+      }
+
+      featureValueDiffs.push_back(featureValueDiff);
+      lossMinusModelScoreDiffs.push_back(diff);
+      all_losses.push_back(loss);
+      if (violated) {
+        ++violatedConstraintsBefore;
+        oldDistanceFromOptimum += diff;
+      }
+    }
+  }
+
+  // run optimisation: compute alphas for all given constraints
+  vector<float> alphas;
+  ScoreComponentCollection summedUpdate;
+  if (violatedConstraintsBefore > 0) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
+         featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
+    if (m_slack != 0) {
+      alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
+    } else {
+      alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs);
+    }
+
+    // Update the weight vector according to the alphas and the feature value differences
+    // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
+    for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
+      float alpha = alphas[k];
+      cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
+      ScoreComponentCollection update(featureValueDiffs[k]);
+      update.MultiplyEquals(alpha);
+
+      // sum updates
+      summedUpdate.PlusEquals(update);
+    }
+  } else {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
+//		return 0;
+    return 1;
+  }
+
+  // apply learning rate
+  if (learning_rate != 1) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl;
+    summedUpdate.MultiplyEquals(learning_rate);
+  }
+
+  // scale update by BLEU of oracle (for batch size 1 only)
+  if (oracleBleuScores.size() == 1) {
+    if (m_scale_update) {
+      cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << oracleBleuScores[0] << endl;
+      summedUpdate.MultiplyEquals(oracleBleuScores[0]);
+    }
+  }
+
+  //	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
+  weightUpdate.PlusEquals(summedUpdate);
+
+  // Sanity check: are there still violated constraints after optimisation?
+  /*	int violatedConstraintsAfter = 0;
+  	float newDistanceFromOptimum = 0;
+  	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
+  		float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
+  		float loss = all_losses[i];
+  		float diff = loss - modelScoreDiff;
+  		if (diff > epsilon) {
+  			++violatedConstraintsAfter;
+  			newDistanceFromOptimum += diff;
+  		}
+  	}
+  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
+  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);*/
+//	return violatedConstraintsAfter;
+  return 0;
+}
+
+size_t MiraOptimiser::updateWeightsHopeFear(
+  Moses::ScoreComponentCollection& weightUpdate,
+  const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+  const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+  const std::vector<std::vector<float> >& bleuScoresHope,
+  const std::vector<std::vector<float> >& bleuScoresFear,
+  const std::vector<std::vector<float> >& modelScoresHope,
+  const std::vector<std::vector<float> >& modelScoresFear,
+  float learning_rate,
+  size_t rank,
+  size_t epoch,
+  int updatePosition)
+{
+
+  // vector of feature values differences for all created constraints
+  vector<ScoreComponentCollection> featureValueDiffs;
+  vector<float> lossMinusModelScoreDiffs;
+  vector<float> modelScoreDiffs;
+  vector<float> all_losses;
+
+  // most violated constraint in batch
+  ScoreComponentCollection max_batch_featureValueDiff;
+
+  // Make constraints for new hypothesis translations
+  float epsilon = 0.0001;
+  int violatedConstraintsBefore = 0;
+  float oldDistanceFromOptimum = 0;
+
+  // iterate over input sentences (1 (online) or more (batch))
+  for (size_t i = 0; i < featureValuesHope.size(); ++i) {
+    if (updatePosition != -1) {
+      if (int(i) < updatePosition)
+        continue;
+      else if (int(i) > updatePosition)
+        break;
+    }
+
+    // Pick all pairs[j,j] of hope and fear translations for one input sentence
+    for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
+      ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
+      featureValueDiff.MinusEquals(featureValuesFear[i][j]);
+      //cerr << "Rank " << rank << ", epoch " << epoch << ", feature value diff: " << featureValueDiff << endl;
+      if (featureValueDiff.GetL1Norm() == 0) {
+        cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+        continue;
+      }
+
+      float loss = bleuScoresHope[i][j] - bleuScoresFear[i][j];
+
+      // check if constraint is violated
+      bool violated = false;
+      //float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
+      float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][j];
+      float diff = 0;
+      if (loss > modelScoreDiff)
+        diff = loss - modelScoreDiff;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
+
+      if (diff > epsilon)
+        violated = true;
+
+      if (m_normaliseMargin) {
+        modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam;
+        loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam;
+        diff = 0;
+        if (loss > modelScoreDiff) {
+          diff = loss - modelScoreDiff;
+        }
+        cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
+      }
+
+      if (m_scale_margin) {
+        diff *= bleuScoresHope[i][j];
+        cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score "  << bleuScoresHope[i][j] << endl;
+      }
+
+      featureValueDiffs.push_back(featureValueDiff);
+      lossMinusModelScoreDiffs.push_back(diff);
+      modelScoreDiffs.push_back(modelScoreDiff);
+      all_losses.push_back(loss);
+      if (violated) {
+        ++violatedConstraintsBefore;
+        oldDistanceFromOptimum += diff;
+      }
+    }
+  }
+
+  // run optimisation: compute alphas for all given constraints
+  vector<float> alphas;
+  ScoreComponentCollection summedUpdate;
+  if (violatedConstraintsBefore > 0) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
+         featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
+    if (m_slack != 0) {
+      alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
+    } else {
+      alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs);
+    }
+
+    // Update the weight vector according to the alphas and the feature value differences
+    // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
+    for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
+      float alpha = alphas[k];
+      cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
+      if (alpha != 0) {
+        // apply boosting factor
+        if (m_boost && modelScoreDiffs[k] <= 0) {
+          // factor between 1.5 and 3 (for Bleu scores between 5 and 20, the factor is within the boundaries)
+          float factor = std::min(1.5f, (float) log2(bleuScoresHope[0][0])); // TODO: make independent of number of oracles!!
+          factor = min(3.0f, factor);
+          alpha = alpha * factor;
+          cerr << "Rank " << rank << ", epoch " << epoch << ", apply boosting factor " << factor << " to update." << endl;
+        }
+
+        ScoreComponentCollection update(featureValueDiffs[k]);
+        update.MultiplyEquals(alpha);
+
+        // sum updates
+        summedUpdate.PlusEquals(update);
+      }
+    }
+  } else {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
+    //	  return 0;
+    return 1;
+  }
+
+  // apply learning rate
+  if (learning_rate != 1) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl;
+    summedUpdate.MultiplyEquals(learning_rate);
+  }
+
+  // scale update by BLEU of oracle (for batch size 1 only)
+  if (featureValuesHope.size() == 1) {
+    if (m_scale_update) {
+      cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << bleuScoresHope[0][0] << endl;
+      summedUpdate.MultiplyEquals(bleuScoresHope[0][0]);
+    }
+  }
+
+  //cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
+  weightUpdate.PlusEquals(summedUpdate);
+
+  // Sanity check: are there still violated constraints after optimisation?
+  /*	int violatedConstraintsAfter = 0;
+  	float newDistanceFromOptimum = 0;
+  	for (size_t i = 0; i < featureValueDiffs.size(); ++i) {
+  	float modelScoreDiff = featureValueDiffs[i].InnerProduct(currWeights);
+  	float loss = all_losses[i];
+  	float diff = loss - modelScoreDiff;
+  	if (diff > epsilon) {
+  	++violatedConstraintsAfter;
+  	newDistanceFromOptimum += diff;
+  	}
+  	}
+  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", violated constraint before: " << violatedConstraintsBefore << ", after: " << violatedConstraintsAfter  << ", change: " << violatedConstraintsBefore - violatedConstraintsAfter << endl);
+  	VERBOSE(1, "Rank " << rank << ", epoch " << epoch << ", error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);*/
+//	return violatedConstraintsAfter;
+  return 0;
+}
+
+size_t MiraOptimiser::updateWeightsAnalytically(
+  ScoreComponentCollection& weightUpdate,
+  ScoreComponentCollection& featureValuesHope,
+  ScoreComponentCollection& featureValuesFear,
+  float bleuScoreHope,
+  float bleuScoreFear,
+  float modelScoreHope,
+  float modelScoreFear,
+  float learning_rate,
+  size_t rank,
+  size_t epoch)
+{
+
+  float epsilon = 0.0001;
+  float oldDistanceFromOptimum = 0;
+  bool constraintViolatedBefore = false;
+
+// cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope << endl;
+// cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear << endl;
+  ScoreComponentCollection featureValueDiff = featureValuesHope;
+  featureValueDiff.MinusEquals(featureValuesFear);
+  if (featureValueDiff.GetL1Norm() == 0) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+    return 1;
+  }
+
+//  cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
+//  float modelScoreDiff = featureValueDiff.InnerProduct(currWeights);
+  float modelScoreDiff = modelScoreHope - modelScoreFear;
+  float loss = bleuScoreHope - bleuScoreFear;
+  float diff = 0;
+  if (loss > modelScoreDiff)
+    diff = loss - modelScoreDiff;
+  cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
+
+  if (m_normaliseMargin) {
+    modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam;
+    loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam;
+    if (loss > modelScoreDiff)
+      diff = loss - modelScoreDiff;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
+  }
+
+  if (diff > epsilon) {
+    // squash it between 0 and 1
+    //diff = tanh(diff);
+    //diff = (2/(1 + pow(2,-diff))) - 1;
+    /*  	if (m_normaliseMargin) {
+    	diff = (2/(1 + exp(-diff))) - 1;
+    	cerr << "Rank " << rank << ", epoch " << epoch << ", new margin: " << diff << endl;
+    }*/
+
+    // constraint violated
+    oldDistanceFromOptimum += diff;
+    constraintViolatedBefore = true;
+
+    // compute alpha for given constraint: (loss - model score diff) / || feature value diff ||^2
+    // featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm() == featureValueDiff.InnerProduct(featureValueDiff)
+    // from Crammer&Singer 2006: alpha = min {C , l_t/ ||x||^2}
+    float squaredNorm = featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm();
+
+    float alpha = diff / squaredNorm;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", unclipped alpha: " << alpha << endl;
+    if (m_slack > 0 ) {
+      if (alpha > m_slack) {
+        alpha = m_slack;
+      } else if (alpha < m_slack*(-1)) {
+        alpha = m_slack*(-1);
+      }
+    }
+
+    // apply learning rate
+    if (learning_rate != 1)
+      alpha = alpha * learning_rate;
+
+    if (m_scale_update) {
+      cerr << "Rank " << rank << ", epoch " << epoch << ", scaling update with oracle bleu score " << bleuScoreHope << endl;
+      alpha *= bleuScoreHope;
+    }
+
+    cerr << "Rank " << rank << ", epoch " << epoch << ", clipped/scaled alpha: " << alpha << endl;
+
+    // apply boosting factor
+    if (m_boost && modelScoreDiff <= 0) {
+      // factor between 1.5 and 3 (for Bleu scores between 5 and 20, the factor is within the boundaries)
+      float factor = min(1.5f, (float) log2(bleuScoreHope));
+      factor = min(3.0f, factor);
+      alpha = alpha * factor;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", boosted alpha: " << alpha << endl;
+    }
+
+    featureValueDiff.MultiplyEquals(alpha);
+    weightUpdate.PlusEquals(featureValueDiff);
+//  	cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << weightUpdate << endl;
+  }
+
+  if (!constraintViolatedBefore) {
+    // constraint satisfied, nothing to do
+    cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl;
+    return 1;
+  }
+
+  // sanity check: constraint still violated after optimisation?
+  /*  ScoreComponentCollection newWeights(currWeights);
+    newWeights.PlusEquals(weightUpdate);
+    bool constraintViolatedAfter = false;
+    float newDistanceFromOptimum = 0;
+    featureValueDiff = featureValuesHope;
+    featureValueDiff.MinusEquals(featureValuesFear);
+    modelScoreDiff = featureValueDiff.InnerProduct(newWeights);
+    diff = loss - modelScoreDiff;
+    // approximate comparison between floats!
+    if (diff > epsilon) {
+      constraintViolatedAfter = true;
+      newDistanceFromOptimum += (loss - modelScoreDiff);
+    }
+
+    float hopeScore = featureValuesHope.InnerProduct(newWeights);
+    float fearScore = featureValuesFear.InnerProduct(newWeights);
+    cerr << "New hope score: " << hopeScore << endl;
+    cerr << "New fear score: " << fearScore << endl;
+
+    VERBOSE(0, "Rank " << rank << ", epoch " << epoch << ", check, constraint violated before? " << constraintViolatedBefore << ", after? " << constraintViolatedAfter << endl);
+    VERBOSE(0, "Rank " << rank << ", epoch " << epoch << ", check, error before: " << oldDistanceFromOptimum << ", after: " << newDistanceFromOptimum << ", change: " << oldDistanceFromOptimum - newDistanceFromOptimum << endl);
+  */
+  return 0;
+}
+
+}
+
diff --git a/contrib/mira/MiraTest.cpp b/contrib/mira/MiraTest.cpp
new file mode 100644
index 000000000..774b324f8
--- /dev/null
+++ b/contrib/mira/MiraTest.cpp
@@ -0,0 +1,24 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+
+//Supplies the main for the mira test module
+#define BOOST_TEST_MODULE mira
+#include <boost/test/unit_test.hpp>
+
diff --git a/contrib/mira/Optimiser.h b/contrib/mira/Optimiser.h
new file mode 100644
index 000000000..6bae23055
--- /dev/null
+++ b/contrib/mira/Optimiser.h
@@ -0,0 +1,153 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef _MIRA_OPTIMISER_H_
+#define _MIRA_OPTIMISER_H_
+
+#include <vector>
+
+#include "moses/ScoreComponentCollection.h"
+
+
+namespace Mira
+{
+
+class Optimiser
+{
+public:
+  Optimiser() {}
+
+  virtual size_t updateWeightsHopeFear(
+    Moses::ScoreComponentCollection& weightUpdate,
+    const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+    const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+    const std::vector<std::vector<float> >& bleuScoresHope,
+    const std::vector<std::vector<float> >& bleuScoresFear,
+    const std::vector<std::vector<float> >& modelScoresHope,
+    const std::vector<std::vector<float> >& modelScoresFear,
+    float learning_rate,
+    size_t rank,
+    size_t epoch,
+    int updatePosition = -1) = 0;
+};
+
+class Perceptron : public Optimiser
+{
+public:
+  virtual size_t updateWeightsHopeFear(
+    Moses::ScoreComponentCollection& weightUpdate,
+    const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+    const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+    const std::vector<std::vector<float> >& bleuScoresHope,
+    const std::vector<std::vector<float> >& bleuScoresFear,
+    const std::vector<std::vector<float> >& modelScoresHope,
+    const std::vector<std::vector<float> >& modelScoresFear,
+    float learning_rate,
+    size_t rank,
+    size_t epoch,
+    int updatePosition = -1);
+};
+
+class MiraOptimiser : public Optimiser
+{
+public:
+  MiraOptimiser() :
+    Optimiser() { }
+
+  MiraOptimiser(float slack) :
+    Optimiser(),
+    m_slack(slack),
+    m_scale_margin(false),
+    m_scale_update(false),
+    m_boost(false),
+    m_normaliseMargin(false),
+    m_sigmoidParam(1.0) { }
+
+  MiraOptimiser(float slack, bool scale_margin, bool scale_update,
+                bool boost, bool normaliseMargin, float sigmoidParam) :
+    Optimiser(),
+    m_slack(slack),
+    m_scale_margin(scale_margin),
+    m_scale_update(scale_update),
+    m_boost(boost),
+    m_normaliseMargin(normaliseMargin),
+    m_sigmoidParam(sigmoidParam) { }
+
+  size_t updateWeights(
+    Moses::ScoreComponentCollection& weightUpdate,
+    const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
+    const std::vector<std::vector<float> >& losses,
+    const std::vector<std::vector<float> >& bleuScores,
+    const std::vector<std::vector<float> >& modelScores,
+    const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
+    const std::vector< float> oracleBleuScores,
+    const std::vector< float> oracleModelScores,
+    float learning_rate,
+    size_t rank,
+    size_t epoch);
+  virtual size_t updateWeightsHopeFear(
+    Moses::ScoreComponentCollection& weightUpdate,
+    const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+    const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+    const std::vector<std::vector<float> >& bleuScoresHope,
+    const std::vector<std::vector<float> >& bleuScoresFear,
+    const std::vector<std::vector<float> >& modelScoresHope,
+    const std::vector<std::vector<float> >& modelScoresFear,
+    float learning_rate,
+    size_t rank,
+    size_t epoch,
+    int updatePosition = -1);
+  size_t updateWeightsAnalytically(
+    Moses::ScoreComponentCollection& weightUpdate,
+    Moses::ScoreComponentCollection& featureValuesHope,
+    Moses::ScoreComponentCollection& featureValuesFear,
+    float bleuScoreHope,
+    float bleuScoreFear,
+    float modelScoreHope,
+    float modelScoreFear,
+    float learning_rate,
+    size_t rank,
+    size_t epoch);
+
+  void setSlack(float slack) {
+    m_slack = slack;
+  }
+
+private:
+  // regularise Hildreth updates
+  float m_slack;
+
+
+  // scale margin with BLEU score
+  bool m_scale_margin;
+
+  // scale update with oracle BLEU score
+  bool m_scale_update;
+
+  // boosting of updates on misranked candidates
+  bool m_boost;
+
+  // squash margin between 0 and 1 (or depending on m_sigmoidParam)
+  bool m_normaliseMargin;
+
+  // y=sigmoidParam is the axis that this sigmoid approaches
+  float m_sigmoidParam ;
+};
+}
+
+#endif
diff --git a/contrib/mira/Perceptron.cpp b/contrib/mira/Perceptron.cpp
new file mode 100644
index 000000000..af61c28a9
--- /dev/null
+++ b/contrib/mira/Perceptron.cpp
@@ -0,0 +1,53 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "Optimiser.h"
+
+using namespace Moses;
+using namespace std;
+
+namespace Mira
+{
+
+size_t Perceptron::updateWeightsHopeFear(
+  ScoreComponentCollection& weightUpdate,
+  const vector< vector<ScoreComponentCollection> >& featureValuesHope,
+  const vector< vector<ScoreComponentCollection> >& featureValuesFear,
+  const vector< vector<float> >& dummy1,
+  const vector< vector<float> >& dummy2,
+  const vector< vector<float> >& dummy3,
+  const vector< vector<float> >& dummy4,
+  float perceptron_learning_rate,
+  size_t rank,
+  size_t epoch,
+  int updatePosition)
+{
+  cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope[0][0] << endl;
+  cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear[0][0] << endl;
+  ScoreComponentCollection featureValueDiff = featureValuesHope[0][0];
+  featureValueDiff.MinusEquals(featureValuesFear[0][0]);
+  cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
+  featureValueDiff.MultiplyEquals(perceptron_learning_rate);
+  weightUpdate.PlusEquals(featureValueDiff);
+  cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
+  return 0;
+}
+
+}
+
diff --git a/contrib/mira/expt.cfg b/contrib/mira/expt.cfg
new file mode 100644
index 000000000..416eb1d3f
--- /dev/null
+++ b/contrib/mira/expt.cfg
@@ -0,0 +1,34 @@
+[general]
+name=expt1
+moses-home=/path/to/moses/dir/
+cwd=/path/to/current/dir/
+working-dir=${cwd}/experiment
+data-dir=${cwd}/data
+decoder-settings=-mp -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000
+
+[train]
+trainer=${moses-home}/mira/mira
+input-file=${data-dir}/tune.input
+reference-files=${data-dir}/tune.reference
+moses-ini-file=${data-dir}/moses.ini
+hours=48
+jobs=8
+slots=8
+epochs=10
+learner=mira
+mixing-frequency=5
+weight-dump-frequency=1
+extra-args=--sentence-level-bleu 1 --hope-n 1 --fear-n 1
+
+[devtest]
+moses=${moses-home}/moses-cmd/src/moses
+bleu=${moses-home}/scripts/generic/multi-bleu.perl 
+input-file=${data-dir}/devtest.input           
+reference-file=${data-dir}/devtest.reference
+moses-ini-file=${data-dir}/moses.test.ini
+hours=12
+extra-args=
+skip-dev=1
+skip-devtest=0
+skip-submit=0
+
diff --git a/contrib/mira/mira.xcodeproj/project.pbxproj b/contrib/mira/mira.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..67662f4e0
--- /dev/null
+++ b/contrib/mira/mira.xcodeproj/project.pbxproj
@@ -0,0 +1,401 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 45;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		1E141A311243527800123194 /* Perceptron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E141A2F1243527800123194 /* Perceptron.cpp */; };
+		1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */; };
+		1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC6391242602F0059001A /* Decoder.cpp */; };
+		1E9DC63D1242602F0059001A /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC63B1242602F0059001A /* Main.cpp */; };
+		1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D1124268310059001A /* libmoses-chart.a */; };
+		1E9DC6DB124268510059001A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6CB124268270059001A /* libmoses.a */; };
+		1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D9124268440059001A /* libOnDiskPt.a */; };
+		8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* mira.1 */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		1E9DC6CA124268270059001A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */;
+			proxyType = 2;
+			remoteGlobalIDString = D2AAC046055464E500DB518D;
+			remoteInfo = moses;
+		};
+		1E9DC6D0124268310059001A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */;
+			proxyType = 2;
+			remoteGlobalIDString = D2AAC046055464E500DB518D;
+			remoteInfo = "moses-chart";
+		};
+		1E9DC6D8124268440059001A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */;
+			proxyType = 2;
+			remoteGlobalIDString = D2AAC046055464E500DB518D;
+			remoteInfo = OnDiskPt;
+		};
+		1EF4E84C12440612006233A0 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D /* moses */;
+			remoteInfo = moses;
+		};
+		1EF4E84E12440612006233A0 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D /* moses-chart */;
+			remoteInfo = "moses-chart";
+		};
+		1EF4E85012440612006233A0 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC045055464E500DB518D /* OnDiskPt */;
+			remoteInfo = OnDiskPt;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		8DD76F690486A84900D96B5E /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 8;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+				8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */,
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		1E141A2F1243527800123194 /* Perceptron.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Perceptron.cpp; sourceTree = "<group>"; };
+		1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MiraOptimiser.cpp; sourceTree = "<group>"; };
+		1E9DC6391242602F0059001A /* Decoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Decoder.cpp; sourceTree = "<group>"; };
+		1E9DC63A1242602F0059001A /* Decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Decoder.h; sourceTree = "<group>"; };
+		1E9DC63B1242602F0059001A /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Main.cpp; sourceTree = "<group>"; };
+		1E9DC63E124260370059001A /* Optimiser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Optimiser.h; sourceTree = "<group>"; };
+		1E9DC6C6124268270059001A /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; };
+		1E9DC6CC124268310059001A /* moses-chart.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = "moses-chart.xcodeproj"; path = "../moses-chart/moses-chart.xcodeproj"; sourceTree = SOURCE_ROOT; };
+		1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; };
+		1E9DC76712426FC60059001A /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Main.h; sourceTree = "<group>"; };
+		8DD76F6C0486A84900D96B5E /* mira */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mira; sourceTree = BUILT_PRODUCTS_DIR; };
+		C6859E8B029090EE04C91782 /* mira.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mira.1; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		8DD76F660486A84900D96B5E /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */,
+				1E9DC6DB124268510059001A /* libmoses.a in Frameworks */,
+				1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* mira */ = {
+			isa = PBXGroup;
+			children = (
+				1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */,
+				1E9DC6CC124268310059001A /* moses-chart.xcodeproj */,
+				1E9DC6C6124268270059001A /* moses.xcodeproj */,
+				08FB7795FE84155DC02AAC07 /* Source */,
+				C6859E8C029090F304C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = mira;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */,
+				1E141A2F1243527800123194 /* Perceptron.cpp */,
+				1E9DC63E124260370059001A /* Optimiser.h */,
+				1E9DC6391242602F0059001A /* Decoder.cpp */,
+				1E9DC63A1242602F0059001A /* Decoder.h */,
+				1E9DC63B1242602F0059001A /* Main.cpp */,
+				1E9DC76712426FC60059001A /* Main.h */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				8DD76F6C0486A84900D96B5E /* mira */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		1E9DC6C7124268270059001A /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1E9DC6CB124268270059001A /* libmoses.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		1E9DC6CD124268310059001A /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1E9DC6D1124268310059001A /* libmoses-chart.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		1E9DC6D5124268440059001A /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				1E9DC6D9124268440059001A /* libOnDiskPt.a */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		C6859E8C029090F304C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+				C6859E8B029090EE04C91782 /* mira.1 */,
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		8DD76F620486A84900D96B5E /* mira */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */;
+			buildPhases = (
+				8DD76F640486A84900D96B5E /* Sources */,
+				8DD76F660486A84900D96B5E /* Frameworks */,
+				8DD76F690486A84900D96B5E /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				1EF4E84D12440612006233A0 /* PBXTargetDependency */,
+				1EF4E84F12440612006233A0 /* PBXTargetDependency */,
+				1EF4E85112440612006233A0 /* PBXTargetDependency */,
+			);
+			name = mira;
+			productInstallPath = "$(HOME)/bin";
+			productName = mira;
+			productReference = 8DD76F6C0486A84900D96B5E /* mira */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */;
+			compatibilityVersion = "Xcode 3.1";
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* mira */;
+			projectDirPath = "";
+			projectReferences = (
+				{
+					ProductGroup = 1E9DC6CD124268310059001A /* Products */;
+					ProjectRef = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */;
+				},
+				{
+					ProductGroup = 1E9DC6C7124268270059001A /* Products */;
+					ProjectRef = 1E9DC6C6124268270059001A /* moses.xcodeproj */;
+				},
+				{
+					ProductGroup = 1E9DC6D5124268440059001A /* Products */;
+					ProjectRef = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */;
+				},
+			);
+			projectRoot = "";
+			targets = (
+				8DD76F620486A84900D96B5E /* mira */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXReferenceProxy section */
+		1E9DC6CB124268270059001A /* libmoses.a */ = {
+			isa = PBXReferenceProxy;
+			fileType = archive.ar;
+			path = libmoses.a;
+			remoteRef = 1E9DC6CA124268270059001A /* PBXContainerItemProxy */;
+			sourceTree = BUILT_PRODUCTS_DIR;
+		};
+		1E9DC6D1124268310059001A /* libmoses-chart.a */ = {
+			isa = PBXReferenceProxy;
+			fileType = archive.ar;
+			path = "libmoses-chart.a";
+			remoteRef = 1E9DC6D0124268310059001A /* PBXContainerItemProxy */;
+			sourceTree = BUILT_PRODUCTS_DIR;
+		};
+		1E9DC6D9124268440059001A /* libOnDiskPt.a */ = {
+			isa = PBXReferenceProxy;
+			fileType = archive.ar;
+			path = libOnDiskPt.a;
+			remoteRef = 1E9DC6D8124268440059001A /* PBXContainerItemProxy */;
+			sourceTree = BUILT_PRODUCTS_DIR;
+		};
+/* End PBXReferenceProxy section */
+
+/* Begin PBXSourcesBuildPhase section */
+		8DD76F640486A84900D96B5E /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */,
+				1E9DC63D1242602F0059001A /* Main.cpp in Sources */,
+				1E141A311243527800123194 /* Perceptron.cpp in Sources */,
+				1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		1EF4E84D12440612006233A0 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			name = moses;
+			targetProxy = 1EF4E84C12440612006233A0 /* PBXContainerItemProxy */;
+		};
+		1EF4E84F12440612006233A0 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			name = "moses-chart";
+			targetProxy = 1EF4E84E12440612006233A0 /* PBXContainerItemProxy */;
+		};
+		1EF4E85112440612006233A0 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			name = OnDiskPt;
+			targetProxy = 1EF4E85012440612006233A0 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB923208733DC60010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = YES;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INSTALL_PATH = /usr/local/bin;
+				LIBRARY_SEARCH_PATHS = (
+					../irstlm/lib/i386,
+					../srilm/lib/macosx,
+				);
+				OTHER_LDFLAGS = (
+					"-lboost_program_options",
+					"-lz",
+					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
+				);
+				PRODUCT_NAME = mira;
+			};
+			name = Debug;
+		};
+		1DEB923308733DC60010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_MODEL_TUNING = G5;
+				INSTALL_PATH = /usr/local/bin;
+				LIBRARY_SEARCH_PATHS = (
+					../irstlm/lib/i386,
+					../srilm/lib/macosx,
+				);
+				OTHER_LDFLAGS = (
+					"-lboost_program_options",
+					"-lz",
+					"-lirstlm",
+					"-lmisc",
+					"-ldstruct",
+					"-loolm",
+					"-lflm",
+					"-llattice",
+				);
+				PRODUCT_NAME = mira;
+			};
+			name = Release;
+		};
+		1DEB923608733DC60010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					/usr/local/include,
+					"../moses-chart/src",
+					../moses/src,
+					../irstlm/include,
+				);
+				ONLY_ACTIVE_ARCH = YES;
+				PREBINDING = NO;
+				SDKROOT = macosx10.6;
+			};
+			name = Debug;
+		};
+		1DEB923708733DC60010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = (
+					/usr/local/include,
+					"../moses-chart/src",
+					../moses/src,
+					../irstlm/include,
+				);
+				PREBINDING = NO;
+				SDKROOT = macosx10.6;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB923208733DC60010E9CD /* Debug */,
+				1DEB923308733DC60010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB923608733DC60010E9CD /* Debug */,
+				1DEB923708733DC60010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/contrib/mira/training-expt.perl b/contrib/mira/training-expt.perl
new file mode 100755
index 000000000..097ee7220
--- /dev/null
+++ b/contrib/mira/training-expt.perl
@@ -0,0 +1,994 @@
+#!/usr/bin/env perl
+
+use strict;
+#eddie specific
+use lib "/exports/informatics/inf_iccs_smt/perl/lib/perl5/site_perl";
+use Config::Simple;
+use File::Basename;
+use Getopt::Long "GetOptions";
+
+my ($config_file,$execute,$continue);
+die("training-expt.perl -config config-file [-exec]")
+    unless  &GetOptions('config=s' => \$config_file,
+        'cont=s' => \$continue,
+        'exec' => \$execute);
+
+my $config = new Config::Simple($config_file) || 
+    die "Error: unable to read config file \"$config_file\"";
+
+#substitution
+foreach my $key ($config->param) {
+    my $value = $config->param($key);
+    while ($value =~ m/(.*?)\$\{(.*?)\}(.*)/) {
+        my $sub = $config->param($2);
+        if (! $sub) {
+            #try in this scope
+            my $scope = (split /\./, $key)[0];
+            $sub = $config->param($scope . "." . $2);
+        }
+        if (! $sub) {
+            #then general
+            $sub = $config->param("general." . $2);
+        }
+        $value = $1 . $sub . $3;
+    }
+    print STDERR "$key => "; print STDERR $value; print STDERR "\n";
+    $config->param($key,$value);
+}
+
+#check if we're using sge
+my $have_sge = 0;
+if (`which qsub 2>/dev/null`) {
+  print "Using sge for job control.\n";
+  $have_sge = 1;
+} else {
+  print "No sge detected.\n";
+}
+
+#required global parameters
+my $name = &param_required("general.name");
+
+#optional globals
+my $queue = &param("general.queue", "inf_iccs_smt");
+my $mpienv = &param("general.mpienv", "openmpi_smp8_mark2");
+my $vmem = &param("general.vmem", "6");
+
+#wait for bleu files to appear in experiment folder if running as part of experiment.perl
+my $wait_for_bleu = &param("general.wait-for-bleu", 0);
+
+#job control
+my $jackknife = &param("general.jackknife", 0);
+my $working_dir = &param("general.working-dir");
+my $general_decoder_settings = &param("general.decoder-settings", "");
+system("mkdir -p $working_dir") == 0 or  die "Error: unable to create directory \"$working_dir\"";
+my $train_script = "$name-train";
+my $job_name = "$name-t";
+my $hours = &param("train.hours",48);
+
+# check if we are tuning a meta feature
+my $tuneMetaFeature = &param("general.tune-meta-feature", 0);
+print STDERR "Tuning meta feature.. \n" if $tuneMetaFeature; 
+
+# Check if a weight file with start weights was given 
+my $start_weight_file = &param("start.weightfile");
+
+#required training parameters
+my $singleRef = 1;
+my ($moses_ini_file, $input_file, $reference_files);
+my (@moses_ini_files_folds, @input_files_folds, @reference_files_folds);
+if ($jackknife) {
+    my $array_ref = &param_required("train.moses-ini-files-folds");
+    @moses_ini_files_folds= @$array_ref;
+    foreach my $ini (@moses_ini_files_folds) {
+	&check_exists ("moses ini file", $ini);
+    }
+    $array_ref = &param_required("train.input-files-folds");
+    @input_files_folds = @$array_ref;
+    foreach my $in (@input_files_folds) {
+	&check_exists ("train input file", $in);
+    }
+    $array_ref = &param_required("train.reference-files-folds");
+    @reference_files_folds = @$array_ref;
+    foreach my $ref (@reference_files_folds) {
+        &check_exists ("train reference file", $ref);
+    }
+}
+else {
+    $moses_ini_file = &param_required("train.moses-ini-file");
+    &check_exists ("moses ini file", $moses_ini_file);
+    $input_file = &param_required("train.input-file");
+    &check_exists ("train input file", $input_file);
+    $reference_files = &param_required("train.reference-files");
+    if (&check_exists_noThrow ("ref files", $reference_files) != 0) {
+	for my $ref (glob $reference_files . "*") {
+	    &check_exists ("ref files", $ref);
+	}
+	$singleRef = 0;
+    }
+}
+
+# check if we want to continue an interrupted experiment
+my $continue_expt = &param("general.continue-expt", 0); # number of experiment to continue
+my $continue_epoch = 0;
+if ($continue_expt > 0) {
+    die "ERROR: Continuing an experiment is not defined for tuning meta features.. \n\n" if ($tuneMetaFeature);
+    $continue_epoch = &param_required("general.continue-epoch", 0);
+    my $continue_weights = &param_required("general.continue-weights", 0);
+    open(CONT_WEIGHTS, $continue_weights);
+    my ($wp_weight, @pm_weights, $lm_weight, $lm2_weight, $d_weight, @lr_weights, %extra_weights);
+    my $num_core_weights = 0;
+    my $num_extra_weights = 0;
+    while(<CONT_WEIGHTS>) {
+        chomp;
+	my ($name,$value) = split;
+        next if ($name =~ /^!Unknown/);
+	next if ($name =~ /^BleuScore/);
+        next if ($name eq "DEFAULT_");
+	if ($name eq "WordPenalty") {
+	    $wp_weight = $value;
+	    $num_core_weights += 1;
+	} elsif ($name =~ /^PhraseModel/) {
+	    push @pm_weights,$value;
+	    $num_core_weights += 1;
+	} elsif ($name =~ /^LM\:2/) {
+	    $lm2_weight = $value;
+	    $num_core_weights += 1;
+	}  
+	elsif ($name =~ /^LM/) {
+	    $lm_weight = $value;
+	    $num_core_weights += 1;
+	} elsif ($name eq "Distortion") {
+	    $d_weight = $value;
+	    $num_core_weights += 1;
+	} elsif ($name =~ /^LexicalReordering/) {
+	    push @lr_weights,$value;
+	    $num_core_weights += 1;
+	} else {
+	    $extra_weights{$name} = $value;
+	    $num_extra_weights += 1;	    
+        }
+    }
+    close CONT_WEIGHTS;
+    print STDERR "num core weights to continue: $num_core_weights\n";
+    print STDERR "num extra weights to continue: $num_extra_weights\n";
+
+    # write sparse weights to separate file
+    my $sparse_weights = $working_dir."/sparseWeights.expt".$continue_expt;
+    if ($num_extra_weights > 0) {
+	open(SPARSE, ">$sparse_weights");
+	foreach my $name (sort keys %extra_weights) {
+	    next if ($name eq "core");
+	    next if ($name eq "DEFAULT_");
+	    
+	    # write only non-zero feature weights to file
+	    if ($extra_weights{$name}) {
+		print SPARSE "$name $extra_weights{$name}\n";
+	    } 
+	}
+	close SPARSE;
+    }
+
+    # write new ini files with these weights
+    if ($jackknife) {
+	my @new_ini_files;
+	for (my $i=0; $i<=$#moses_ini_files_folds; $i++) {
+	    my $ini_continue = $moses_ini_files_folds[$i].".continue".$continue_expt;
+	    open(OLDINI, $moses_ini_files_folds[$i]);
+	    open(NEWINI, ">$ini_continue");
+	    while(<OLDINI>) {
+		if (/weight-l/) {
+		    print NEWINI "[weight-l]\n";
+		    print NEWINI $lm_weight;
+		    print NEWINI "\n";
+		    
+		    if (defined $lm2_weight) {
+			readline(OLDINI);
+			print NEWINI $lm2_weight;
+			print NEWINI "\n";
+		    }
+		
+		    readline(OLDINI);
+		} elsif (/weight-t/) {
+		    print NEWINI "[weight-t]\n";
+		    foreach my $pm_weight (@pm_weights) {
+			print NEWINI $pm_weight;
+			print NEWINI "\n";
+			readline(OLDINI);
+		    }
+		} elsif (/weight-d/) {
+		    print NEWINI "[weight-d]\n";
+		    print NEWINI $d_weight;
+		    print NEWINI "\n";
+		    readline(OLDINI);
+		    foreach my $lr_weight (@lr_weights) {
+			print NEWINI $lr_weight;
+			print NEWINI "\n";
+			readline(OLDINI);
+		    }
+		} elsif (/weight-w/) {
+		    print NEWINI "[weight-w]\n";
+		    print NEWINI $wp_weight;
+		    print NEWINI "\n";
+		    readline(OLDINI);
+		} else {
+		    print NEWINI;
+		}
+	    }
+	    if ($num_extra_weights > 0) {
+		print NEWINI "\n[weight-file]\n$sparse_weights\n";
+	    }
+	    close OLDINI;
+	    close NEWINI;	    
+
+	    print STDERR "new ini file: ".$ini_continue."\n";
+	    $moses_ini_files_folds[$i] = $ini_continue;
+	}
+    }
+    else {
+	my $ini_continue = $moses_ini_file.".continue".$continue_expt;
+	open(OLDINI, $moses_ini_file);
+	open(NEWINI, ">$ini_continue");
+	while(<OLDINI>) {
+	    if (/weight-l/) {
+		print NEWINI "[weight-l]\n";
+		print NEWINI $lm_weight;
+		print NEWINI "\n";
+		
+		if (defined $lm2_weight) {
+		    readline(OLDINI);
+		    print NEWINI $lm2_weight;
+		    print NEWINI "\n";
+		}
+		
+		readline(OLDINI);
+	    } elsif (/weight-t/) {
+		print NEWINI "[weight-t]\n";
+		foreach my $pm_weight (@pm_weights) {
+		    print NEWINI $pm_weight;
+		    print NEWINI "\n";
+		    readline(OLDINI);
+		}
+	    } elsif (/weight-d/) {
+		print NEWINI "[weight-d]\n";
+		print NEWINI $d_weight;
+		print NEWINI "\n";
+		readline(OLDINI);
+		foreach my $lr_weight (@lr_weights) {
+		    print NEWINI $lr_weight;
+		    print NEWINI "\n";
+		    readline(OLDINI);
+		}
+	    } elsif (/weight-w/) {
+		print NEWINI "[weight-w]\n";
+		print NEWINI $wp_weight;
+		print NEWINI "\n";
+		readline(OLDINI);
+	    } 
+	    else {
+		print NEWINI;
+	    }
+	}
+	if ($num_extra_weights > 0) {
+	    print NEWINI "\n[weight-file]\n$sparse_weights\n";
+	}
+	close OLDINI;
+	close NEWINI;
+	print STDERR "new ini file: ".$ini_continue."\n";
+	$moses_ini_file = $ini_continue;
+    }
+}
+
+my $trainer_exe = &param_required("train.trainer");
+&check_exists("Training executable", $trainer_exe);
+#my $weights_file = &param_required("train.weights-file");
+#&check_exists("weights file ", $weights_file);
+
+#optional training parameters
+my $epochs = &param("train.epochs");
+my $learner = &param("train.learner", "mira");
+my $batch = &param("train.batch", 1); # don't print this param twice (when printing training file)
+my $extra_args = &param("train.extra-args");
+my $by_node = &param("train.by-node");
+my $slots = &param("train.slots",10);
+my $jobs = &param("train.jobs",10);
+my $mixing_frequency = &param("train.mixing-frequency", 1); # don't print this param twice
+my $weight_dump_frequency = &param("train.weight-dump-frequency", 1); # don't print this param twice
+my $burn_in = &param("train.burn-in");
+my $burn_in_input_file = &param("train.burn-in-input-file");
+my $burn_in_reference_files = &param("train.burn-in-reference-files");
+my $skipTrain = &param("train.skip");
+my $train_decoder_settings = &param("train.decoder-settings", "");
+if (!$train_decoder_settings) {
+    $train_decoder_settings = $general_decoder_settings;
+}
+
+#devtest configuration
+my ($devtest_input_file, $devtest_reference_files,$devtest_ini_file,$bleu_script,$use_moses);
+my $test_exe = &param("devtest.moses");
+&check_exists("test executable", $test_exe);
+$bleu_script  = &param_required("devtest.bleu");
+&check_exists("multi-bleu script", $bleu_script);
+$devtest_input_file = &param_required("devtest.input-file");
+&check_exists ("devtest input file", $devtest_input_file);
+$devtest_reference_files = &param_required("devtest.reference-file");
+if (&check_exists_noThrow ("devtest ref file", $devtest_reference_files) != 0) {
+    for my $ref (glob $devtest_reference_files . "*") {
+	&check_exists ("devtest ref file", $ref);
+    }
+}
+$devtest_ini_file = &param_required("devtest.moses-ini-file");
+&check_exists ("devtest ini file", $devtest_ini_file);
+
+
+my $weight_file_stem = "$name-weights";
+my $extra_memory_devtest = &param("devtest.extra-memory",0);
+my $skip_devtest = &param("devtest.skip-devtest",0);
+my $skip_dev = &param("devtest.skip-dev",0);
+my $skip_submit_test = &param("devtest.skip-submit",0);
+my $devtest_decoder_settings = &param("devtest.decoder-settings", "");
+if (!$devtest_decoder_settings) {
+    $devtest_decoder_settings = $general_decoder_settings;
+}
+
+
+# check that number of jobs, dump frequency and number of input sentences are compatible
+# shard size = number of input sentences / number of jobs, ensure shard size >= dump frequency
+if ($jackknife) {
+    # TODO..
+}
+else {
+    my $result = `wc -l $input_file`;
+    my @result = split(/\s/, $result);
+    my $inputSize = $result[0];
+    my $shardSize = $inputSize / $jobs;
+    if ($mixing_frequency) {
+	if ($shardSize < $mixing_frequency) {
+	    $mixing_frequency = int($shardSize);
+	    if ($mixing_frequency == 0) {
+		$mixing_frequency = 1;
+	    }
+
+	    print STDERR "Warning: mixing frequency must not be larger than shard size, setting mixing frequency to $mixing_frequency\n";
+	}
+    }
+
+    if ($weight_dump_frequency != 0) {
+	if ($shardSize < $weight_dump_frequency) {
+	    $weight_dump_frequency = int($shardSize);
+	    if ($weight_dump_frequency == 0) {
+		$weight_dump_frequency = 1;
+	    }
+	    
+	    print STDERR "Warning: weight dump frequency must not be larger than shard size, setting weight dump frequency to $weight_dump_frequency\n";
+	}
+    }
+
+    if ($mixing_frequency != 0) {
+	if ($mixing_frequency > ($shardSize/$batch)) {
+	    $mixing_frequency = int($shardSize/$batch);
+	    if ($mixing_frequency == 0) {
+		$mixing_frequency = 1;
+	    }
+	    
+	    print STDERR "Warning: mixing frequency must not be larger than (shard size/batch size), setting mixing frequency to $mixing_frequency\n";
+	}
+    }
+
+    if ($weight_dump_frequency != 0) {
+	if ($weight_dump_frequency > ($shardSize/$batch)) {
+	    $weight_dump_frequency = int($shardSize/$batch);
+	    if ($weight_dump_frequency == 0) {
+		$weight_dump_frequency = 1;
+	    }
+
+	    print STDERR "Warning: weight dump frequency must not be larger than (shard size/batch size), setting weight dump frequency to $weight_dump_frequency\n";
+	}
+    }
+}
+
+#file names
+my $train_script_file = $working_dir . "/" . $train_script . ".sh"; 
+my $train_out = $train_script . ".out";
+my $train_err = $train_script . ".err";
+my $train_job_id = 0;
+
+my @refs;
+if (ref($reference_files) eq 'ARRAY') {
+    @refs = @$reference_files;
+} elsif ($singleRef){
+    $refs[0] = $reference_files;
+} else {
+    @refs = glob $reference_files . "*"
+}
+my $arr_refs = \@refs;
+
+if (!$skipTrain) {
+    #write the script
+    open TRAIN, ">$train_script_file" or die "Unable to open \"$train_script_file\" for writing";
+
+    &header(*TRAIN,$job_name,$working_dir,$slots,$jobs,$hours,$vmem,$train_out,$train_err);
+    if ($jobs == 1) {
+	print TRAIN "$trainer_exe ";
+    }
+    else {
+	if ($by_node) {
+	    print TRAIN "mpirun -np $jobs --bynode $trainer_exe \\\n";
+	}
+	else {
+	    print TRAIN "mpirun -np \$NSLOTS $trainer_exe \\\n";
+	}
+    }
+    
+    if ($jackknife) {
+	foreach my $ini (@moses_ini_files_folds) {
+	    print TRAIN "--configs-folds $ini ";
+	}
+	print TRAIN "\\\n";
+	foreach my $in (@input_files_folds) {
+	    print TRAIN "--input-files-folds $in ";
+	}
+	print TRAIN "\\\n";
+	for my $ref (@reference_files_folds) {
+	    print TRAIN "--reference-files-folds $ref ";
+	}
+	print TRAIN "\\\n";
+    }
+    else {
+	print TRAIN "-f $moses_ini_file \\\n";
+	print TRAIN "-i $input_file \\\n";
+	for my $ref (@refs) {
+	    print TRAIN "-r $ref ";
+	}
+	print TRAIN "\\\n";
+    }
+    if ($continue_epoch > 0) {
+	print TRAIN "--continue-epoch $continue_epoch \\\n";
+    }
+    if ($burn_in) {
+	print TRAIN "--burn-in 1 \\\n";
+	print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
+	my @burnin_refs;
+	if (ref($burn_in_reference_files) eq 'ARRAY') {
+	    @burnin_refs = @$burn_in_reference_files;
+	} else {
+	    @burnin_refs = glob $burn_in_reference_files . "*"; # TODO:
+	}
+	for my $burnin_ref (@burnin_refs) {
+	    &check_exists("burn-in ref file",  $burnin_ref);
+	    print TRAIN "--burn-in-reference-files $burnin_ref ";
+	}
+	print TRAIN "\\\n";
+    }
+#if ($weights_file) {
+#    print TRAIN "-w $weights_file \\\n";
+#}
+    if (defined $start_weight_file) {
+	print TRAIN "--start-weights $start_weight_file \\\n"; 
+    }
+    print TRAIN "-l $learner \\\n";
+    print TRAIN "--weight-dump-stem $weight_file_stem \\\n";
+    print TRAIN "--mixing-frequency $mixing_frequency \\\n" if ($extra_args !~ /--mixing-frequency /);
+    print TRAIN "--weight-dump-frequency $weight_dump_frequency \\\n" if ($extra_args !~ /--weight-dump-frequency /);
+    print TRAIN "--epochs $epochs \\\n" if $epochs;
+    print TRAIN "--batch-size $batch \\\n" if ($extra_args !~ /--batch-size / && $extra_args !~ /-b /);
+    print TRAIN $extra_args." \\\n";
+    print TRAIN "--decoder-settings \"$train_decoder_settings\" \\\n";
+    if ($jobs == 1) {
+	print TRAIN "echo \"mira finished.\"\n";
+    }
+    else {
+	print TRAIN "echo \"mpirun finished.\"\n";
+    }
+    close TRAIN;
+    
+    if (! $execute) {
+	print STDERR "Written train file: $train_script_file\n";
+	exit 0;
+    }
+    
+    #submit the training job
+    if ($have_sge) {
+	$train_job_id = &submit_job_sge($train_script_file);
+	
+    } else {
+	$train_job_id = &submit_job_no_sge($train_script_file, $train_out,$train_err);
+    }
+    
+    die "Failed to submit training job" unless $train_job_id;
+}
+
+#wait for the next weights file to appear, or the training job to end
+my $train_iteration = -1;
+if ($continue_epoch > 0) {
+    $train_iteration += ($continue_epoch*$weight_dump_frequency);
+    print STDERR "Start from training iteration ".$train_iteration." instead of -1.\n";
+}
+else {
+    print STDERR "Start from training iteration ".$train_iteration."\n";
+}
+
+while(1) {
+    my($epoch, $epoch_slice);
+    $train_iteration += 1;   # starts at 0
+    my $new_weight_file = "$working_dir/$weight_file_stem" . "_";
+    if ($weight_dump_frequency == 0) {
+	print STDERR "No weights, no testing..\n";
+	exit(0);
+    }
+    
+    #my $epoch = 1 + int $train_iteration / $weight_dump_frequency;
+    $epoch = int $train_iteration / $weight_dump_frequency;
+    $epoch_slice = $train_iteration % $weight_dump_frequency;
+    if ($weight_dump_frequency == 1) {
+	if ($train_iteration < 10) {
+	    $new_weight_file .= "0".$train_iteration;
+	}
+	else {
+	    $new_weight_file .= $train_iteration;
+	}
+    } else {
+	if ($epoch < 10) {
+	    $new_weight_file .= "0".$epoch."_".$epoch_slice;
+	}
+	else {
+	    $new_weight_file .= $epoch."_".$epoch_slice;
+	}	
+    }
+    
+    print STDERR "Current epoch: ".$epoch."\n";
+    my $expected_num_files = $epoch*$weight_dump_frequency;
+    if ($wait_for_bleu) {
+	print STDERR "Expected number of BLEU files: $expected_num_files \n";
+    }
+    if (-e "$working_dir/stopping") {
+	wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu);
+	print STDERR "Training finished at " . scalar(localtime()) . " because stopping criterion was reached.\n";
+        exit 0;
+    }
+    elsif (-e "$working_dir/finished") {
+	wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu);
+	print STDERR "Training finished at " . scalar(localtime()) . " because maximum number of epochs was reached.\n";
+        exit 0;
+    }
+    else {
+	print STDERR "Waiting for $new_weight_file\n";
+	if (!$skipTrain) {
+	    while ((! -e $new_weight_file) && &check_running($train_job_id)) {
+		sleep 10;
+	    }
+	}
+	if (! -e $new_weight_file ) {
+	    if (-e "$working_dir/stopping") {
+		wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu);
+		print STDERR "Training finished at " . scalar(localtime()) . " because stopping criterion was reached.\n";
+		exit 0;
+	    }
+	    elsif (-e "$working_dir/finished") {
+		wait_for_bleu($expected_num_files, $working_dir) if ($wait_for_bleu);
+		print STDERR "Training finished at " . scalar(localtime()) . " because maximum number of epochs was reached.\n";
+		exit 0;
+	    }
+	    else {
+		# training finished with error
+		print STDERR "Error: training was aborted at " . scalar(localtime()) . "\n";
+                exit 1;
+	    }
+	}
+    }
+
+    #new weight file written. create test script and submit    
+    my $suffix = "";
+    print STDERR "weight file exists? ".(-e $new_weight_file)."\n";
+    if (!$skip_devtest) {
+	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test);
+
+	my $regularized_weight_file = $new_weight_file."_reg";
+	if (-e $regularized_weight_file) {
+	    print STDERR "Submitting test script for regularized weights. \n"; 
+	    $epoch_slice .= "_reg";
+	    createTestScriptAndSubmit($epoch, $epoch_slice, $regularized_weight_file, $suffix, "devtest", $devtest_ini_file, $devtest_input_file, $devtest_reference_files, $skip_submit_test);	    
+	}
+    }
+    if (!$skip_dev) {
+	createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $reference_files, $skip_submit_test);
+    }
+}
+
+sub wait_for_bleu() {
+    my $expected_num_files = $_[0];
+    my $working_dir = $_[1];
+    print STDERR "Waiting for $expected_num_files bleu files..\n";
+    print STDERR "Path: $working_dir/*.bleu \n";
+    my @bleu_files = glob("$working_dir/*.bleu");
+    while (scalar(@bleu_files) < $expected_num_files) {
+	sleep 30;
+	@bleu_files = glob("$working_dir/*.bleu");
+	print STDERR "currently have ".(scalar(@bleu_files))."\n";
+    }
+    print STDERR "$expected_num_files BLEU files completed, continue.\n"; 
+}
+
+sub createTestScriptAndSubmit {
+    my $epoch = $_[0];
+    my $epoch_slice = $_[1];
+    my $new_weight_file = $_[2];
+    my $suffix = $_[3];
+    my $testtype = $_[4];
+    my $old_ini_file = $_[5];
+    my $input_file = $_[6];
+    my $reference_file = $_[7];
+    my $skip_submit = $_[8];
+
+    #file names
+    my $output_file;
+    my $output_error_file;
+    my $bleu_file;
+    my $file_id = "";
+    if ($weight_dump_frequency == 1) {
+	if ($train_iteration < 10) {
+	    $output_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".out";
+	    $output_error_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".err";
+	    $bleu_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".bleu";
+	    $file_id = "0".$train_iteration.$suffix;
+	}
+	else {
+	    $output_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".out";
+	    $output_error_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".err";
+	    $bleu_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".bleu";
+	    $file_id = $train_iteration.$suffix;
+	}        
+    }
+    else {
+	if ($epoch < 10) {
+	    $output_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
+	    $output_error_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
+	    $bleu_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
+	    $file_id = "0".$epoch."_".$epoch_slice.$suffix;
+	}
+	else {
+	    $output_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
+	    $output_error_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
+	    $bleu_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
+	    $file_id = $epoch."_".$epoch_slice.$suffix;
+	}        
+    }
+
+    my $job_name = $name."_".$testtype."_".$file_id;
+
+    my $test_script = "$name-$testtype";
+    my $test_script_file = "$working_dir/$test_script.$file_id.sh"; 
+    my $test_out = "$test_script.$file_id.out";
+    my $test_err = "$test_script.$file_id.err";
+
+    if (! (open TEST, ">$test_script_file" )) {
+        die "Unable to create test script $test_script_file\n";
+    }
+    
+    my $hours = &param("test.hours",12);
+    my $extra_args = &param("test.extra-args");
+
+    # Splice the weights into the moses ini file.
+    my ($default_weight,$wordpenalty_weight,@phrasemodel_weights,$lm_weight,$lm2_weight,$distortion_weight,@lexicalreordering_weights);
+
+    if (! (open WEIGHTS, "$new_weight_file")) {
+	die "Unable to open weights file $new_weight_file\n";
+    }
+
+    my $readCoreWeights = 0;
+    my $readExtraWeights = 0;
+    my %extra_weights;
+    my $abs_weights = 0;
+    my $metaFeature_wt_weight = 0;
+    my $metaFeature_pp_weight = 0; 
+    while(<WEIGHTS>) {
+        chomp;
+	my ($name,$value) = split;
+        next if ($name =~ /^!Unknown/);
+	next if ($name =~ /^BleuScore/);
+        if ($name eq "DEFAULT_") {
+            $default_weight = $value;
+        } else {
+            if ($name eq "WordPenalty") {
+              $wordpenalty_weight = $value;
+	      $abs_weights += abs($value);
+	      $readCoreWeights += 1;
+            } elsif ($name =~ /^PhraseModel/) {
+              push @phrasemodel_weights,$value;
+	      $abs_weights += abs($value);
+	      $readCoreWeights += 1;
+	    } elsif ($name =~ /^LM\:2/) {
+              $lm2_weight = $value;
+	      $abs_weights += abs($value);
+	      $readCoreWeights += 1;
+            }  
+	    elsif ($name =~ /^LM/) {
+              $lm_weight = $value;
+	      $abs_weights += abs($value);
+	      $readCoreWeights += 1;
+            } elsif ($name eq "Distortion") {
+              $distortion_weight = $value;
+	      $abs_weights += abs($value);
+	      $readCoreWeights += 1;
+            } elsif ($name =~ /^LexicalReordering/) {
+              push @lexicalreordering_weights,$value;
+	      $abs_weights += abs($value);
+	      $readCoreWeights += 1;
+            } elsif ($name =~ /^MetaFeature_wt/) { 
+		$metaFeature_wt_weight = $value;
+		$abs_weights += abs($value);
+		$readCoreWeights += 1;
+	    }
+	    elsif ($name =~ /^MetaFeature_pp/) { 
+		$metaFeature_pp_weight = $value;
+		$abs_weights += abs($value);
+		$readCoreWeights += 1;
+	    }
+	    else {
+              $extra_weights{$name} = $value;
+	      $readExtraWeights += 1;
+            }
+        }
+    }
+    close WEIGHTS;
+    
+    print STDERR "Number of core weights read: ".$readCoreWeights."\n";
+    print STDERR "Number of extra weights read: ".$readExtraWeights."\n";
+          
+    # Create new ini file (changing format: expt1-devtest.00_2.ini instead of expt1-devtest.3.ini)
+    # my $new_ini_file = $working_dir."/".$test_script.".".$train_iteration.$suffix.".ini";
+    my $new_ini_file = "$working_dir/$test_script.$file_id.ini";
+    if (! (open NEWINI, ">$new_ini_file" )) {
+        die "Unable to create ini file $new_ini_file\n";
+    }
+    if (! (open OLDINI, "$old_ini_file" )) {
+        die "Unable to read ini file $old_ini_file\n";
+    }
+
+    # write normalized weights to ini file
+    while(<OLDINI>) {
+        if (/weight-l/) {
+            print NEWINI "[weight-l]\n";
+            print NEWINI ($lm_weight/$abs_weights);
+            print NEWINI "\n";
+
+	    if (defined $lm2_weight) {
+		readline(OLDINI);
+		print NEWINI ($lm2_weight/$abs_weights);
+		print NEWINI "\n";
+	    }
+
+            readline(OLDINI);
+        } elsif (/weight-t/) {
+            print NEWINI "[weight-t]\n";
+            foreach my $phrasemodel_weight (@phrasemodel_weights) {
+                print NEWINI ($phrasemodel_weight/$abs_weights);
+                print NEWINI "\n";
+                readline(OLDINI);
+            }
+        } elsif (/weight-d/) {
+            print NEWINI "[weight-d]\n";
+            print NEWINI ($distortion_weight/$abs_weights);
+            print NEWINI "\n";
+            readline(OLDINI);
+            foreach my $lexicalreordering_weight (@lexicalreordering_weights) {
+                print NEWINI ($lexicalreordering_weight/$abs_weights);
+                print NEWINI "\n";
+                readline(OLDINI);
+            }
+        } elsif (/weight-wt/) {
+	    print NEWINI "[weight-wt]\n";
+	    print NEWINI $metaFeature_wt_weight/$abs_weights;
+	    print NEWINI "\n";
+	    readline(OLDINI);
+	} elsif (/weight-pp/) {
+	    print NEWINI "[weight-pp]\n";
+	    print NEWINI $metaFeature_pp_weight/$abs_weights;
+	    print NEWINI "\n";
+	    readline(OLDINI);
+	}
+	elsif (/weight-w/) {
+            print NEWINI "[weight-w]\n";
+            print NEWINI ($wordpenalty_weight/$abs_weights);
+            print NEWINI "\n";
+            readline(OLDINI);
+        } 
+	else {
+            print NEWINI;
+        }
+    }
+    close OLDINI;
+
+    my $writtenExtraWeights = 0;
+
+    # if there are any non-core weights, write them to a weights file (normalized)
+    my $extra_weight_file = undef;
+    if (%extra_weights && !$tuneMetaFeature) {
+      $extra_weight_file = "$new_weight_file.sparse.scaled";
+      if (! (open EXTRAWEIGHT,">$extra_weight_file")) {
+        print "Warning: unable to create extra weights file $extra_weight_file";
+        next;
+      }
+#      my $core_weight = 1;
+#      if ($have_core) {
+#        $default_weight = $extra_weights{"DEFAULT_"};
+#        $core_weight = $extra_weights{"core"};
+#      }
+      foreach my $name (sort keys %extra_weights) {
+        next if ($name eq "core");
+        next if ($name eq "DEFAULT_");
+        my $value = $extra_weights{$name}/$abs_weights;
+	
+	# write only non-zero feature weights to file
+	if ($value) {
+#	    $value /= $core_weight;
+	    print EXTRAWEIGHT "$name $value\n";
+	    $writtenExtraWeights += 1;
+	} 
+      }
+    }
+
+    # add specification of sparse weight file to ini
+    if (!$tuneMetaFeature) {
+	print NEWINI "\n[weight-file] \n";
+	print NEWINI "$extra_weight_file \n";
+	close NEWINI;
+    }
+    
+    print TEST "#!/bin/sh\n";
+    print TEST "#\$ -N $job_name\n";
+    print TEST "#\$ -wd $working_dir\n";
+    print TEST "#\$ -l h_rt=$hours:00:00\n";
+    print TEST "#\$ -o $test_out\n";
+    print TEST "#\$ -e $test_err\n";
+    print TEST "\n";
+    if ($have_sge) {
+# some eddie specific stuff                                                                                                                                          
+  	print TEST ". /etc/profile.d/modules.sh\n";
+	print TEST "module load openmpi/ethernet/gcc/latest\n";
+	print TEST "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n";
+    }
+    print TEST "$test_exe $devtest_decoder_settings -i $input_file -f $new_ini_file ";
+# now written to ini file
+#    if ($extra_weight_file) {
+#      print TEST "-weight-file $extra_weight_file ";
+#    }
+    print TEST $extra_args;
+    print TEST " 1> $output_file 2> $output_error_file\n";
+    print TEST "echo \"Decoding of ".$testtype." set finished.\"\n";
+    print TEST "$bleu_script $reference_file < $output_file > $bleu_file\n";
+    print TEST "echo \"Computed BLEU score of ".$testtype." set.\"\n";
+    close TEST;
+
+    #launch testing
+    if(!$skip_submit) {
+	if ($have_sge) {
+	    if ($extra_memory_devtest) {
+		print STDERR "Extra memory for test job: $extra_memory_devtest \n";
+		&submit_job_sge_extra_memory($test_script_file,$extra_memory_devtest);
+	    }
+	    else {
+		&submit_job_sge($test_script_file);
+	    }
+	} else {
+	    &submit_job_no_sge($test_script_file, $test_out,$test_err);
+	}
+    }
+}
+
+sub param {
+    my ($key,$default) = @_;
+    my $value = $config->param($key);
+    $value = $default if !$value;
+    # Empty arguments get interpreted as arrays
+    $value = 0 if (ref($value) eq 'ARRAY' && scalar(@$value) == 0);
+    return $value;
+}
+
+sub param_required {
+    my ($key) = @_;
+    my $value = &param($key);
+    die "Error: required parameter \"$key\" was missing" if (!defined($value));
+    #$value = join $value if (ref($value) eq 'ARRAY');
+    return $value;
+}
+
+sub header {
+    my ($OUT,$name,$working_dir,$slots,$jobs,$hours,$vmem,$out,$err) = @_;
+    print $OUT "#!/bin/sh\n";
+    if ($have_sge) {
+      print $OUT "#\$ -N $name\n";
+      print $OUT "#\$ -wd $working_dir\n";
+      if ($jobs != 1) {
+	  print $OUT "#\$ -pe $mpienv $slots\n";
+      }
+      print $OUT "#\$ -l h_rt=$hours:00:00\n";
+      print $OUT "#\$ -l h_vmem=$vmem" . "G" . "\n";
+      print $OUT "#\$ -o $out\n";
+      print $OUT "#\$ -e $err\n";
+    } else {
+      print $OUT "\nNSLOTS=$jobs\n";
+    }
+    print $OUT "\n";
+    if ($have_sge) {
+# some eddie specific stuff
+      print $OUT ". /etc/profile.d/modules.sh\n";
+      print $OUT "module load openmpi/ethernet/gcc/latest\n";
+      print $OUT "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n";
+    }
+}
+
+sub check_exists {
+    my ($name,$filename) = @_;
+    die "Error: unable to read $name: \"$filename\"" if ! -r $filename;
+}
+
+sub check_exists_noThrow {
+    my ($name,$filename) = @_;
+    return 1 if ! -r $filename;
+    return 0;
+}
+
+#
+# Used to submit train/test jobs. Return the job id, or 0 on failure
+#
+
+sub submit_job_sge {
+    my($script_file) = @_;
+    my $qsub_result = `qsub -P $queue $script_file`;
+    print STDERR "SUBMIT CMD: qsub -P $queue $script_file\n";
+    if ($qsub_result !~ /Your job (\d+)/) {
+        print STDERR "Failed to qsub job: $qsub_result\n";
+        return 0;
+    }
+    my $job_name = basename($script_file);
+    print STDERR "Submitted job: $job_name  id: $1  " .
+        scalar(localtime()) . "\n";
+    return $1;
+}
+
+sub submit_job_sge_extra_memory {
+    my($script_file,$extra_memory) = @_;
+    my $qsub_result = `qsub -pe $extra_memory -P $queue $script_file`;                                                                                
+    print STDERR "SUBMIT CMD: qsub -pe $extra_memory -P $queue $script_file \n";
+    if ($qsub_result !~ /Your job (\d+)/) {
+        print STDERR "Failed to qsub job: $qsub_result\n";
+        return 0;
+    }
+    my $job_name = basename($script_file);
+    print STDERR "Submitted job: $job_name  id: $1  " .
+        scalar(localtime()) . "\n";
+    return $1;
+}
+
+#
+# As above, but no sge version. Returns the pid.
+#
+
+sub submit_job_no_sge {
+  my($script_file,$out,$err) = @_;
+  my $pid = undef;
+  if ($pid = fork) {
+    my $job_name = basename($script_file);
+    print STDERR "Launched : $job_name  pid: $pid  " .  scalar(localtime()) . "\n";
+    return $pid;
+  } elsif (defined $pid) { 
+      print STDERR "Executing script $script_file, writing to $out and $err.\n";
+      `cd $working_dir; sh $script_file 1>$out 2> $err`;
+    exit;
+  } else {
+    # Fork failed
+    return 0;
+  }
+}
+
+sub check_running {
+  my ($job_id) = @_;
+  if ($have_sge) {
+    return `qstat | grep $job_id`; 
+  } else {
+    return `ps -e | grep $job_id | grep -v defunct`;
+  }
+}
+
+
+
-- 
cgit v1.2.3