18 files changed, 2154 insertions, 111 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 467855d9b..f6ada2aa8 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -266,12 +266,12 @@ float smoothedSentenceBleu
 float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
 {
   // Sum sent and background
-  std::vector<float> stats;
   UTIL_THROW_IF(sent.size()!=bg.size(), util::Exception, "Error");
   UTIL_THROW_IF(sent.size() != kBleuNgramOrder * 2 + 1, util::Exception, "Error");
+  std::vector<float> stats(sent.size());
 
   for(size_t i=0; i<sent.size(); i++)
-    stats.push_back(sent[i]+bg[i]);
+    stats[i] = sent[i]+bg[i];
 
   // Calculate BLEU
   float logbleu = 0.0;
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 8be567574..affa37fbf 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -13,7 +13,7 @@
 namespace MosesTuning
 {
 
-const int kBleuNgramOrder = 4;
+const size_t kBleuNgramOrder = 4;
 
 class NgramCounts;
 class Reference;
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 5a12be70a..a0c6a6ebc 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -14,6 +14,8 @@
 
 #include <boost/functional/hash.hpp>
 
+#include "util/murmur_hash.hh"
+
 #include "Util.h"
 
 using namespace std;
@@ -59,6 +61,11 @@ void SparseVector::set(const string& name, FeatureStatsType value)
   m_fvector[id] = value;
 }
 
+void SparseVector::set(size_t id, FeatureStatsType value) {
+  assert(m_id_to_name.size() > id);
+  m_fvector[id] = value;
+}
+
 void SparseVector::write(ostream& out, const string& sep) const
 {
   for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
@@ -91,6 +98,16 @@ void SparseVector::load(const string& file)
   }
 }
 
+SparseVector& SparseVector::operator+=(const SparseVector& rhs)
+{
+
+  for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+       i != rhs.m_fvector.end(); ++i) {
+    m_fvector[i->first] =  get(i->first) + (i->second);
+  }
+  return *this;
+}
+
 SparseVector& SparseVector::operator-=(const SparseVector& rhs)
 {
 
@@ -162,12 +179,18 @@ bool operator==(SparseVector const& item1, SparseVector const& item2)
   return item1.m_fvector==item2.m_fvector;
 }
 
+
 std::size_t hash_value(SparseVector const& item)
 {
-  boost::hash<SparseVector::fvector_t> hasher;
-  return hasher(item.m_fvector);
+  size_t seed = 0;
+  for (SparseVector::fvector_t::const_iterator i = item.m_fvector.begin(); i != item.m_fvector.end(); ++i) {
+    seed = util::MurmurHashNative(&(i->first), sizeof(i->first), seed);
+    seed = util::MurmurHashNative(&(i->second), sizeof(i->second), seed);
+  }
+  return seed;
 }
 
+
 FeatureStats::FeatureStats()
   : m_available_size(kAvailableSize), m_entries(0),
     m_array(new FeatureStatsType[m_available_size]) {}
@@ -181,8 +204,7 @@ FeatureStats::FeatureStats(const size_t size)
 
 FeatureStats::~FeatureStats()
 {
-  delete [] m_array;
-  m_array = NULL;
+   delete [] m_array;
 }
 
 void FeatureStats::Copy(const FeatureStats &stats)
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index a882e7358..2ccbac50c 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -14,6 +14,11 @@
 #include <map>
 #include <string>
 #include <vector>
+
+#include <boost/unordered_map.hpp>
+
+#include <util/string_piece.hh>
+
 #include "Types.h"
 
 namespace MosesTuning
@@ -31,6 +36,7 @@ public:
   FeatureStatsType get(const std::string& name) const;
   FeatureStatsType get(std::size_t id) const;
   void set(const std::string& name, FeatureStatsType value);
+  void set(size_t id, FeatureStatsType value);
   void clear();
   void load(const std::string& file);
   std::size_t size() const {
@@ -40,6 +46,7 @@ public:
   void write(std::ostream& out, const std::string& sep = " ") const;
 
   SparseVector& operator-=(const SparseVector& rhs);
+  SparseVector& operator+=(const SparseVector& rhs);
   FeatureStatsType inner_product(const SparseVector& rhs) const;
 
   // Added by cherryc
diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
new file mode 100644
index 000000000..c88b58e4c
--- /dev/null
+++ b/mert/ForestRescore.cpp
@@ -0,0 +1,422 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cmath>
+#include <limits>
+#include <list>
+
+#include <boost/unordered_set.hpp>
+
+#include "util/file_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "BleuScorer.h"
+#include "ForestRescore.h"
+
+using namespace std;
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
+  out << "[";
+  for (size_t i = 0; i < wordVec.size(); ++i) {
+    out << wordVec[i]->first;
+    if (i+1< wordVec.size()) out << " ";
+  }
+  out << "]";
+  return out;
+}
+
+
+void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
+  for (size_t i = 0; i < files.size(); ++i) {
+    util::FilePiece fh(files[i].c_str());
+    size_t sentenceId = 0;
+    while(true) {
+      StringPiece line;
+      try {
+        line = fh.ReadLine();
+      } catch (util::EndOfFileException &e) {
+        break;
+      }
+     AddLine(sentenceId, line, vocab);
+     ++sentenceId;
+    }
+  }
+
+}
+
+void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
+  //cerr << line << endl;
+  NgramCounter ngramCounts;
+  list<WordVec> openNgrams;
+  size_t length = 0;
+  //tokenize & count
+  for (util::TokenIter<util::SingleCharacter, true> j(line, util::SingleCharacter(' ')); j; ++j) {
+    const Vocab::Entry* nextTok = &(vocab.FindOrAdd(*j));
+    ++length;
+    openNgrams.push_front(WordVec());
+    for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
+      k->push_back(nextTok);
+      ++ngramCounts[*k]; 
+    }
+    if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
+  }
+
+  //merge into overall ngram map
+  for (NgramCounter::const_iterator ni = ngramCounts.begin();
+    ni != ngramCounts.end(); ++ni) {
+    size_t count = ni->second;
+    //cerr << *ni << " " << count <<  endl;
+    if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
+    NgramMap::iterator totalsIter = ngramCounts_[sentenceId].find(ni->first);
+    if (totalsIter == ngramCounts_[sentenceId].end()) {
+      ngramCounts_[sentenceId][ni->first] = pair<size_t,size_t>(count,count);
+    } else {
+      ngramCounts_[sentenceId][ni->first].first = max(count, ngramCounts_[sentenceId][ni->first].first); //clip
+      ngramCounts_[sentenceId][ni->first].second += count; //no clip
+    }
+  }
+  //length
+  if (lengths_.size() <= sentenceId) lengths_.resize(sentenceId+1);
+  //TODO - length strategy - this is MIN
+  if (!lengths_[sentenceId]) {
+    lengths_[sentenceId] = length;
+  } else {
+    lengths_[sentenceId] = min(length,lengths_[sentenceId]);
+  }
+  //cerr << endl;
+
+}
+  
+size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const  {
+  const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
+  NgramMap::const_iterator ngi = ngramCounts.find(ngram);
+  if (ngi == ngramCounts.end()) return 0;
+  return clip ? ngi->second.first : ngi->second.second;
+}
+
+VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
+
+void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
+  for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+    //cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) <<  endl;
+    size_t order = ngi->first.size();
+    size_t count = ngi->second;
+    bleuStats[(order-1)*2 + 1] += count;
+    bleuStats[(order-1) * 2] += min(count, references_.NgramMatches(sentenceId_,ngi->first,false));
+  }
+}
+
+size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
+  size_t targetLength = 0;
+  for (size_t i = 0; i < edge.Words().size(); ++i) {
+    const Vocab::Entry* word = edge.Words()[i];
+    if (word) ++targetLength;
+  }
+  for (size_t i = 0; i < edge.Children().size(); ++i) {
+    const VertexState& state = vertexStates_[edge.Children()[i]];
+    targetLength += state.targetLength;
+  }
+  return targetLength;
+}
+
+FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
+  NgramCounter ngramCounts;
+  size_t childId = 0;
+  size_t wordId = 0;
+  size_t contextId = 0; //position within left or right context
+  const VertexState* vertexState = NULL;
+  bool inLeftContext = false;
+  bool inRightContext = false;
+  list<WordVec> openNgrams;
+  const Vocab::Entry* currentWord = NULL;
+  while (wordId < edge.Words().size()) { 
+    currentWord = edge.Words()[wordId];
+    if (currentWord != NULL) {
+      ++wordId;
+    } else {
+      if (!inLeftContext && !inRightContext) {
+        //entering a vertex
+        assert(!vertexState);
+        vertexState = &(vertexStates_[edge.Children()[childId]]);
+        ++childId;
+        if (vertexState->leftContext.size()) {
+          inLeftContext = true;
+          contextId = 0;
+          currentWord = vertexState->leftContext[contextId];
+        } else {
+          //empty context
+          vertexState = NULL;
+          ++wordId;
+          continue;
+        }
+      } else {
+        //already in a vertex
+        ++contextId;
+        if (inLeftContext && contextId < vertexState->leftContext.size()) {
+          //still in left context
+          currentWord = vertexState->leftContext[contextId];
+        } else if (inLeftContext) {
+          //at end of left context
+          if (vertexState->leftContext.size() == kBleuNgramOrder-1) {
+            //full size context, jump to right state
+            openNgrams.clear();
+            inLeftContext = false;
+            inRightContext = true;
+            contextId = 0;
+            currentWord = vertexState->rightContext[contextId];
+          } else {
+            //short context, just ignore right context
+            inLeftContext = false;
+            vertexState = NULL;
+            ++wordId;
+            continue;
+          }
+        } else {
+          //in right context
+          if (contextId < vertexState->rightContext.size()) {
+            currentWord = vertexState->rightContext[contextId];
+          } else {
+            //leaving vertex
+            inRightContext = false;
+            vertexState = NULL;
+            ++wordId;
+            continue;
+          }
+        }
+      }
+    }
+    assert(currentWord);
+    if (graph_.IsBoundary(currentWord)) continue;
+    openNgrams.push_front(WordVec());
+    openNgrams.front().reserve(kBleuNgramOrder);
+    for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
+      k->push_back(currentWord);
+      //Only insert ngrams that cross boundaries
+      if (!vertexState || (inLeftContext && k->size() > contextId+1)) ++ngramCounts[*k];
+    }
+    if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
+  }
+  
+  //Collect matches
+  //This edge
+  //cerr << "edge ngrams" << endl;
+  UpdateMatches(ngramCounts, bleuStats);
+
+  //Child vertexes
+  for (size_t i = 0; i < edge.Children().size(); ++i) {
+    //cerr << "vertex ngrams " << edge.Children()[i] << endl;
+    for (size_t j = 0; j < bleuStats.size(); ++j) {
+      bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
+    }
+  }
+  
+
+  FeatureStatsType sourceLength = head.SourceCovered();
+  size_t referenceLength = references_.Length(sentenceId_);
+  FeatureStatsType effectiveReferenceLength = 
+    sourceLength / totalSourceLength_ * referenceLength;
+
+  bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
+  //backgroundBleu_[backgroundBleu_.size()-1] = 
+  //  backgroundRefLength_ * sourceLength / totalSourceLength_;
+  FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
+
+  return bleu;
+}
+
+void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
+  //TODO: Maybe more efficient to absorb into the Score() method
+  VertexState& vertexState = vertexStates_[vertexId];
+  //cerr << "Updating state for " << vertexId << endl;
+  
+  //leftContext
+  int wi = 0;
+  const VertexState* childState = NULL;
+  int contexti = 0; //index within child context
+  int childi = 0;
+  while (vertexState.leftContext.size() < (kBleuNgramOrder-1)) {
+    if ((size_t)wi >= winnerEdge.Words().size()) break;
+    const Vocab::Entry* word = winnerEdge.Words()[wi];
+    if (word != NULL) {
+      vertexState.leftContext.push_back(word);
+      ++wi;
+    } else {
+      if (childState == NULL) {
+        //start of child state
+        childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
+        contexti = 0;
+      } 
+      if ((size_t)contexti < childState->leftContext.size()) {
+        vertexState.leftContext.push_back(childState->leftContext[contexti++]); 
+      } else {
+        //end of child context
+        childState = NULL;
+        ++wi;
+      }
+    }
+  }
+
+  //rightContext
+  wi = winnerEdge.Words().size() - 1;
+  childState = NULL;
+  childi = winnerEdge.Children().size() - 1;
+  while (vertexState.rightContext.size() < (kBleuNgramOrder-1)) {
+    if (wi < 0) break;
+    const Vocab::Entry* word = winnerEdge.Words()[wi];
+    if (word != NULL) {
+      vertexState.rightContext.push_back(word);
+      --wi;
+    } else {
+      if (childState == NULL) {
+        //start (ie rhs) of child state
+        childState = &(vertexStates_[winnerEdge.Children()[childi--]]);
+        contexti = childState->rightContext.size()-1;
+      }
+      if (contexti >= 0) {
+        vertexState.rightContext.push_back(childState->rightContext[contexti--]);
+      } else {
+        //end (ie lhs) of child context
+        childState = NULL;
+        --wi;
+      }
+    }
+  }
+  reverse(vertexState.rightContext.begin(), vertexState.rightContext.end());
+
+  //length + counts
+  vertexState.targetLength = GetTargetLength(winnerEdge);
+  vertexState.bleuStats = bleuStats;
+}
+
+
+typedef pair<const Edge*,FeatureStatsType> BackPointer;
+
+
+/**
+ * Recurse through back pointers
+ **/
+static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
+     HgHypothesis* bestHypo) {
+  //cerr << "Expanding " << vertexId << endl;
+  //UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
+  if (!bps[vertexId].first) return;
+  const Edge* prevEdge = bps[vertexId].first;
+  bestHypo->featureVector += *(prevEdge->Features().get());
+  size_t childId = 0;
+  for (size_t i = 0; i < prevEdge->Words().size(); ++i) {
+    if (prevEdge->Words()[i] != NULL) {
+      bestHypo->text.push_back(prevEdge->Words()[i]);
+    } else {
+      size_t childVertexId = prevEdge->Children()[childId++];
+      HgHypothesis childHypo;
+      GetBestHypothesis(childVertexId,graph,bps,&childHypo);
+      bestHypo->text.insert(bestHypo->text.end(), childHypo.text.begin(), childHypo.text.end());
+      bestHypo->featureVector += childHypo.featureVector;
+    }
+  }
+}
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu,  HgHypothesis* bestHypo) 
+{
+  BackPointer init(NULL,kMinScore);
+  vector<BackPointer> backPointers(graph.VertexSize(),init);
+  HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu);
+  vector<FeatureStatsType> winnerStats(kBleuNgramOrder*2+1);
+  for (size_t vi = 0; vi < graph.VertexSize(); ++vi) {
+    //cerr << "vertex id " << vi <<  endl;
+    FeatureStatsType winnerScore = kMinScore;
+    const Vertex& vertex = graph.GetVertex(vi);
+    const vector<const Edge*>& incoming = vertex.GetIncoming();
+    if (!incoming.size()) {
+      //UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
+      //If no incoming edges, vertex is a dead end
+      backPointers[vi].first = NULL;
+      backPointers[vi].second = kMinScore/2;  
+    } else {
+      //cerr << "\nVertex: " << vi << endl;
+      for (size_t ei = 0; ei < incoming.size(); ++ei) {
+        //cerr << "edge id " << ei << endl;
+        FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+        for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+          size_t childId = incoming[ei]->Children()[i];
+          UTIL_THROW_IF(backPointers[childId].second == kMinScore,
+            HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+          incomingScore += backPointers[childId].second;
+        }
+        vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
+       // cerr << "Score: " << incomingScore << " Bleu: ";
+       // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
+        FeatureStatsType totalScore = incomingScore;
+        if (bleuWeight) { 
+          FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
+          UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
+          totalScore += bleuWeight * bleuScore;
+        //  cerr << bleuScore << " Total: " << incomingScore << endl << endl;
+          //cerr << "is " << incomingScore << " bs " << bleuScore << endl;
+        }
+        if (totalScore >= winnerScore) {
+          //We only store the feature score (not the bleu score) with the vertex,
+          //since the bleu score is always cumulative, ie from counts for the whole span.
+          winnerScore = totalScore;
+          backPointers[vi].first = incoming[ei];
+          backPointers[vi].second = incomingScore;
+          winnerStats = bleuStats;
+        }
+      }
+      //update with winner
+      //if (bleuWeight) {
+      //TODO: Not sure if we need this when computing max-model solution
+      bleuScorer.UpdateState(*(backPointers[vi].first), vi, winnerStats);
+
+    }
+  }
+
+  //expand back pointers
+  GetBestHypothesis(graph.VertexSize()-1, graph, backPointers, bestHypo);
+
+  //bleu stats and fv
+
+  //Need the actual (clipped) stats
+  //TODO: This repeats code in bleu scorer - factor out
+  bestHypo->bleuStats.resize(kBleuNgramOrder*2+1);
+  NgramCounter counts;
+  list<WordVec> openNgrams;
+  for (size_t i = 0; i < bestHypo->text.size(); ++i) {
+    const Vocab::Entry* entry = bestHypo->text[i];
+    if (graph.IsBoundary(entry)) continue;
+    openNgrams.push_front(WordVec());
+    for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
+      k->push_back(entry);
+      ++counts[*k];
+    }
+    if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
+  }
+  for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+    size_t order = ngi->first.size();
+    size_t count = ngi->second;
+    bestHypo->bleuStats[(order-1)*2 + 1] += count;
+    bestHypo->bleuStats[(order-1) * 2] += min(count, references.NgramMatches(sentenceId,ngi->first,true));
+  }
+  bestHypo->bleuStats[kBleuNgramOrder*2] = references.Length(sentenceId);
+}
+
+
+};
diff --git a/mert/ForestRescore.h b/mert/ForestRescore.h
new file mode 100644
index 000000000..900275b74
--- /dev/null
+++ b/mert/ForestRescore.h
@@ -0,0 +1,120 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef MERT_FOREST_RESCORE_H
+#define MERT_FOREST_RESCORE_H
+
+#include <valarray>
+#include <vector>
+
+#include <boost/unordered_set.hpp>
+
+#include "BleuScorer.h"
+#include "Hypergraph.h"
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
+
+struct NgramHash : public std::unary_function<const WordVec&, std::size_t> {
+  std::size_t operator()(const WordVec& ngram) const {
+    return util::MurmurHashNative(&(ngram[0]), ngram.size() * sizeof(WordVec::value_type));
+  }
+};
+
+struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&, bool> {
+  bool operator()(const WordVec& first, const WordVec& second) const {
+    if (first.size() != second.size()) return false;
+    return memcmp(&(first[0]), &(second[0]), first.size() * sizeof(WordVec::value_type)) == 0;
+  }
+};
+
+typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
+
+
+class ReferenceSet {
+
+
+public:
+  
+  void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
+
+  void Load(const std::vector<std::string>& files, Vocab& vocab);
+
+  size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
+
+  size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
+
+private:
+  //ngrams to (clipped,unclipped) counts
+  typedef boost::unordered_map<WordVec, std::pair<std::size_t,std::size_t>, NgramHash,NgramEquals> NgramMap;
+  std::vector<NgramMap> ngramCounts_;
+  std::vector<size_t> lengths_;
+
+};
+
+struct VertexState {
+  VertexState();
+
+  std::vector<FeatureStatsType> bleuStats;
+  WordVec leftContext;
+  WordVec rightContext;
+  size_t targetLength;
+};
+
+/**
+  * Used to score an rule (ie edge) when we are applying it.
+**/
+class HgBleuScorer {
+  public:
+    HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
+    references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
+      backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
+      vertexStates_.resize(graph.VertexSize());
+      totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
+    }
+
+    FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+
+    void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
+
+
+  private:
+    const ReferenceSet& references_;
+    std::vector<VertexState> vertexStates_;
+    size_t sentenceId_;
+    size_t totalSourceLength_;
+    const Graph& graph_;
+    std::vector<FeatureStatsType> backgroundBleu_;
+    FeatureStatsType backgroundRefLength_;
+
+    void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
+    size_t GetTargetLength(const Edge& edge) const;
+};
+
+struct HgHypothesis {
+  SparseVector featureVector;
+  WordVec text;
+  std::vector<FeatureStatsType> bleuStats;
+};
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo);
+
+};
+
+#endif
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
new file mode 100644
index 000000000..86975d3a5
--- /dev/null
+++ b/mert/ForestRescoreTest.cpp
@@ -0,0 +1,246 @@
+#include <iostream>
+
+#include "ForestRescore.h"
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
+{
+  Vocab vocab;
+  WordVec words;
+  string wordStrings[] =
+    {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
+  for (size_t i = 0; i < 9; ++i) {
+    words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+  }
+
+  const string f1 = "foo";
+  const string f2 = "bar";
+  Graph graph(vocab);
+  graph.SetCounts(5,5);
+
+  Edge* e0 = graph.NewEdge();
+  e0->AddWord(words[0]);
+  e0->AddFeature(f1, 2.0);
+
+  Vertex* v0 = graph.NewVertex();
+  v0->AddEdge(e0);
+
+  Edge* e1 = graph.NewEdge();
+  e1->AddWord(NULL);
+  e1->AddChild(0);
+  e1->AddWord(words[2]);
+  e1->AddWord(words[3]);
+  e1->AddFeature(f1, 1.0);
+  e1->AddFeature(f2, 3.0);
+
+  Vertex* v1 = graph.NewVertex();
+  v1->AddEdge(e1);
+
+  Edge* e2 = graph.NewEdge();
+  e2->AddWord(NULL);
+  e2->AddChild(1);
+  e2->AddWord(words[4]);
+  e2->AddWord(words[5]);
+  e2->AddFeature(f2, 2.5);
+
+  Vertex* v2 = graph.NewVertex();
+  v2->AddEdge(e2);
+
+  Edge* e3 = graph.NewEdge();
+  e3->AddWord(NULL);
+  e3->AddChild(2);
+  e3->AddWord(words[6]);
+  e3->AddWord(words[7]);
+  e3->AddWord(words[8]);
+  e3->AddFeature(f1, -1);
+
+  Vertex* v3 = graph.NewVertex();
+  v3->AddEdge(e3);
+
+  Edge* e4 = graph.NewEdge();
+  e4->AddWord(NULL);
+  e4->AddChild(3);
+  e4->AddWord(words[1]);
+  e3->AddFeature(f2, 0.5);
+
+  Vertex* v4 = graph.NewVertex();
+  v4->AddEdge(e4);
+
+  ReferenceSet references;
+  references.AddLine(0, "a b c k e f o", vocab);
+  HgHypothesis modelHypo;
+  vector<FeatureStatsType> bg(kBleuNgramOrder*2+1);
+  SparseVector weights;
+  weights.set(f1,2);
+  weights.set(f2,1);
+  Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+  BOOST_CHECK_CLOSE(2.0,modelHypo.featureVector.get(f1), 0.0001);
+  BOOST_CHECK_CLOSE(6.0,modelHypo.featureVector.get(f2), 0.0001);
+
+  BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+  BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+  BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+  BOOST_CHECK_EQUAL(words[4]->first, modelHypo.text[3]->first);
+  BOOST_CHECK_EQUAL(words[5]->first, modelHypo.text[4]->first);
+  BOOST_CHECK_EQUAL(words[6]->first, modelHypo.text[5]->first);
+  BOOST_CHECK_EQUAL(words[7]->first, modelHypo.text[6]->first);
+  BOOST_CHECK_EQUAL(words[8]->first, modelHypo.text[7]->first);
+  BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[8]->first);
+}
+
+
+
+BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
+{
+  Vocab vocab;
+  WordVec words;
+  string wordStrings[] =
+    {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+  for (size_t i = 0; i < 13; ++i) {
+    words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+  }
+
+  const string f1 = "foo";
+  const string f2 = "bar";
+  Graph graph(vocab);
+  graph.SetCounts(5,8);
+
+  Edge* e0 = graph.NewEdge();
+  e0->AddWord(words[0]);
+
+  Vertex* v0 = graph.NewVertex();
+  v0->AddEdge(e0);
+
+  Edge* e1 = graph.NewEdge();
+  e1->AddWord(NULL);
+  e1->AddChild(0);
+  e1->AddWord(words[2]);
+  e1->AddWord(words[3]);
+  e1->AddFeature(f1,1);
+  e1->AddFeature(f2,1);
+  Edge* e5 = graph.NewEdge();
+  e5->AddWord(NULL);
+  e5->AddChild(0);
+  e5->AddWord(words[9]);
+  e5->AddWord(words[10]);
+  e5->AddFeature(f1,2);
+  e5->AddFeature(f2,-2);
+
+  Vertex* v1 = graph.NewVertex();
+  v1->AddEdge(e1);
+  v1->AddEdge(e5);
+  v1->SetSourceCovered(1);
+
+  Edge* e2 = graph.NewEdge();
+  e2->AddWord(NULL);
+  e2->AddChild(1);
+  e2->AddWord(words[4]);
+  e2->AddWord(words[5]);
+  e2->AddFeature(f2,3);
+
+  Vertex* v2 = graph.NewVertex();
+  v2->AddEdge(e2);
+  v2->SetSourceCovered(3);
+
+  Edge* e3 = graph.NewEdge();
+  e3->AddWord(NULL);
+  e3->AddChild(2);
+  e3->AddWord(words[6]);
+  e3->AddWord(words[7]);
+  e3->AddWord(words[8]);
+  e3->AddFeature(f1,1);
+  Edge* e6 = graph.NewEdge();
+  e6->AddWord(NULL);
+  e6->AddChild(2);
+  e6->AddWord(words[9]);
+  e6->AddWord(words[12]);
+  e6->AddFeature(f2,1);
+  Edge* e7 = graph.NewEdge();
+  e7->AddWord(NULL);
+  e7->AddChild(1);
+  e7->AddWord(words[11]);
+  e7->AddWord(words[12]);
+  e7->AddFeature(f1,2);
+  e7->AddFeature(f2,3);
+
+  Vertex* v3 = graph.NewVertex();
+  v3->AddEdge(e3);
+  v3->AddEdge(e6);
+  v3->AddEdge(e7);
+  v3->SetSourceCovered(5);
+
+  Edge* e4 = graph.NewEdge();
+  e4->AddWord(NULL);
+  e4->AddChild(3);
+  e4->AddWord(words[1]);
+
+  Vertex* v4 = graph.NewVertex();
+  v4->AddEdge(e4);
+  v4->SetSourceCovered(6);
+
+  /*Paths     || foo || bar || s(2,1)
+   ab cd hk   || 1   ||  5  || 7
+   hi cd hk   || 2   ||  2  || 6
+   ab jk      || 3   ||  4  || 10
+   hi jk      || 4   ||  1  || 9
+   ab cd efg  || 2   ||  4  || 8
+   hi cd efg  || 3   ||  1  || 7
+  */
+
+  ReferenceSet references;
+  references.AddLine(0, "a b c d h k", vocab);
+  HgHypothesis modelHypo;
+  vector<FeatureStatsType> bg(kBleuNgramOrder*2+1, 0.1);
+  SparseVector weights;
+  weights.set(f1,2);
+  weights.set(f2,1);
+  Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+  BOOST_CHECK_CLOSE(3.0,modelHypo.featureVector.get(f1), 0.0001);
+  BOOST_CHECK_CLOSE(4.0,modelHypo.featureVector.get(f2), 0.0001);
+
+  BOOST_CHECK_EQUAL(6, modelHypo.text.size());
+
+  //expect ab jk
+  BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+  BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+  BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+  BOOST_CHECK_EQUAL(words[11]->first, modelHypo.text[3]->first);
+  BOOST_CHECK_EQUAL(words[12]->first, modelHypo.text[4]->first);
+  BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[5]->first);
+
+
+  HgHypothesis hopeHypo;
+  Viterbi(graph, weights, 1, references, 0, bg, &hopeHypo);
+  //expect abcdhk
+  BOOST_CHECK_EQUAL(8, hopeHypo.text.size());
+
+  BOOST_CHECK_EQUAL(words[0]->first, hopeHypo.text[0]->first);
+  BOOST_CHECK_EQUAL(words[2]->first, hopeHypo.text[1]->first);
+  BOOST_CHECK_EQUAL(words[3]->first, hopeHypo.text[2]->first);
+  BOOST_CHECK_EQUAL(words[4]->first, hopeHypo.text[3]->first);
+  BOOST_CHECK_EQUAL(words[5]->first, hopeHypo.text[4]->first);
+  BOOST_CHECK_EQUAL(words[9]->first, hopeHypo.text[5]->first);
+  BOOST_CHECK_EQUAL(words[12]->first, hopeHypo.text[6]->first);
+  BOOST_CHECK_EQUAL(words[1]->first, hopeHypo.text[7]->first);
+
+  BOOST_CHECK_EQUAL(kBleuNgramOrder*2+1, hopeHypo.bleuStats.size());
+  BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[0]);
+  BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[1]);
+  BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[2]);
+  BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[3]);
+  BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[4]);
+  BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[5]);
+  BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[6]);
+  BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[7]);
+  BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
+}
+
+
+
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
new file mode 100644
index 000000000..5f5c59966
--- /dev/null
+++ b/mert/HopeFearDecoder.cpp
@@ -0,0 +1,328 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cmath>
+#include <iterator>
+
+#include <boost/filesystem.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+
+#include "BleuScorer.h"
+#include "HopeFearDecoder.h"
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+namespace MosesTuning {
+
+static const ValType BLEU_RATIO = 5;
+
+ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
+  vector<ValType> stats(kBleuNgramOrder*2+1,0);
+  for(reset(); !finished(); next()) {
+    vector<ValType> sent;
+    MaxModel(wv,&sent);
+    for(size_t i=0; i<sent.size(); i++) {
+      stats[i]+=sent[i];
+    }
+  }
+  return unsmoothedBleu(stats);
+}
+
+NbestHopeFearDecoder::NbestHopeFearDecoder(
+      const vector<string>& featureFiles,
+      const vector<string>&  scoreFiles,
+      bool streaming,
+      bool  no_shuffle,
+      bool safe_hope
+      ) : safe_hope_(safe_hope) {
+  if (streaming) {
+    train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
+  } else {
+    train_.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
+  }
+}
+
+
+void NbestHopeFearDecoder::next() {
+  train_->next();
+}
+
+bool NbestHopeFearDecoder::finished() {
+  return train_->finished();
+}
+
+void NbestHopeFearDecoder::reset() {
+  train_->reset();
+}
+
+void NbestHopeFearDecoder::HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              ) {
+
+  
+  // Hope / fear decode
+  ValType hope_scale = 1.0;
+  size_t hope_index=0, fear_index=0, model_index=0;
+  ValType hope_score=0, fear_score=0, model_score=0;
+  for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+    ValType hope_bleu, hope_model;
+    for(size_t i=0; i< train_->cur_size(); i++) {
+      const MiraFeatureVector& vec=train_->featuresAt(i);
+      ValType score = wv.score(vec);
+      ValType bleu = sentenceLevelBackgroundBleu(train_->scoresAt(i),backgroundBleu);
+      // Hope
+      if(i==0 || (hope_scale*score + bleu) > hope_score) {
+        hope_score = hope_scale*score + bleu;
+        hope_index = i;
+        hope_bleu = bleu;
+        hope_model = score;
+      }
+      // Fear
+      if(i==0 || (score - bleu) > fear_score) {
+        fear_score = score - bleu;
+        fear_index = i;
+      }
+      // Model
+      if(i==0 || score > model_score) {
+        model_score = score;
+        model_index = i;
+      }
+    }
+    // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+    // where model score is having far more influence than BLEU
+    hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+    if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+      hope_scale = abs(hope_bleu) / abs(hope_model);
+    else break;
+  }
+  hopeFear->modelFeatures = train_->featuresAt(model_index);
+  hopeFear->hopeFeatures = train_->featuresAt(hope_index);
+  hopeFear->fearFeatures = train_->featuresAt(fear_index);
+
+  hopeFear->hopeStats = train_->scoresAt(hope_index);
+  hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+  const vector<float>& fear_stats = train_->scoresAt(fear_index);
+  hopeFear->fearBleu = sentenceLevelBackgroundBleu(fear_stats, backgroundBleu);
+
+  hopeFear->modelStats = train_->scoresAt(model_index);
+  hopeFear->hopeFearEqual = (hope_index == fear_index);
+}
+
+void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
+  // Find max model
+  size_t max_index=0;
+  ValType max_score=0;
+  for(size_t i=0; i<train_->cur_size(); i++) {
+    MiraFeatureVector vec(train_->featuresAt(i));
+    ValType score = wv.score(vec);
+    if(i==0 || score > max_score) {
+      max_index = i;
+      max_score = score;
+    }
+  }
+  *stats = train_->scoresAt(max_index);
+}
+
+
+
+HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
+                          (
+                            const string& hypergraphDir,
+                            const vector<string>& referenceFiles,
+                            size_t num_dense,
+                            bool streaming,
+                            bool no_shuffle,
+                            bool safe_hope,
+                            size_t hg_pruning,
+                            const MiraWeightVector& wv
+                          ) :
+                          num_dense_(num_dense) {
+
+  UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
+  UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
+  UTIL_THROW_IF(!referenceFiles.size(), util::Exception, "No reference files supplied");
+  references_.Load(referenceFiles, vocab_);
+
+  SparseVector weights;
+  wv.ToSparse(&weights);
+
+  static const string kWeights = "weights";
+  fs::directory_iterator dend;
+  size_t fileCount = 0;
+  cerr << "Reading hypergraphs" << endl;
+  for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
+    if (di->path().filename() == kWeights) continue;
+    Graph graph(vocab_);
+    size_t id = boost::lexical_cast<size_t>(di->path().stem().string());
+    util::FilePiece file(di->path().string().c_str());
+    ReadGraph(file,graph);
+
+    //cerr << "ref length " << references_.Length(id) << endl;
+    size_t edgeCount = hg_pruning * references_.Length(id);
+    boost::shared_ptr<Graph> prunedGraph;
+    prunedGraph.reset(new Graph(vocab_));
+    graph.Prune(prunedGraph.get(), weights, edgeCount);
+    graphs_[id] = prunedGraph;
+    //cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize()  << endl;
+    ++fileCount;
+    if (fileCount % 10 == 0) cerr << ".";
+    if (fileCount % 400 ==  0) cerr << " [count=" << fileCount << "]\n";
+  }
+  cerr << endl << "Done" << endl;
+
+
+}
+
+void HypergraphHopeFearDecoder::reset() {
+  graphIter_ = graphs_.begin();
+}
+
+void HypergraphHopeFearDecoder::next() {
+  ++graphIter_;
+}
+
+bool HypergraphHopeFearDecoder::finished() {
+  return graphIter_ == graphs_.end();
+}
+
+void HypergraphHopeFearDecoder::HopeFear(
+            const vector<ValType>& backgroundBleu,
+            const MiraWeightVector& wv,
+            HopeFearData* hopeFear
+            ) {
+  size_t sentenceId = graphIter_->first;
+  SparseVector weights;
+  wv.ToSparse(&weights);
+  const Graph& graph = *(graphIter_->second);
+
+  ValType hope_scale = 1.0;
+  HgHypothesis hopeHypo, fearHypo, modelHypo;
+  for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+
+    //hope decode
+    Viterbi(graph, weights, 1, references_, sentenceId, backgroundBleu, &hopeHypo);
+
+    //fear decode
+    Viterbi(graph, weights, -1, references_, sentenceId, backgroundBleu, &fearHypo);
+
+    //Model decode
+    Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
+
+
+  // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+    // where model score is having far more influence than BLEU
+  //  hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+  //  if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+  //    hope_scale = abs(hope_bleu) / abs(hope_model);
+  //  else break;
+    //TODO: Don't currently get model and bleu so commented this out for now.
+    break;
+  }
+  //modelFeatures, hopeFeatures and fearFeatures
+  hopeFear->modelFeatures = MiraFeatureVector(modelHypo.featureVector, num_dense_);
+  hopeFear->hopeFeatures = MiraFeatureVector(hopeHypo.featureVector, num_dense_);
+  hopeFear->fearFeatures = MiraFeatureVector(fearHypo.featureVector, num_dense_);
+
+  //Need to know which are to be mapped to dense features!
+
+  //Only C++11
+  //hopeFear->modelStats.assign(std::begin(modelHypo.bleuStats), std::end(modelHypo.bleuStats));
+  vector<ValType> fearStats(kBleuNgramOrder*2+1);
+  hopeFear->hopeStats.reserve(kBleuNgramOrder*2+1);
+  hopeFear->modelStats.reserve(kBleuNgramOrder*2+1);
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    hopeFear->modelStats.push_back(modelHypo.bleuStats[i]);
+    hopeFear->hopeStats.push_back(hopeHypo.bleuStats[i]);
+
+    fearStats[i] = fearHypo.bleuStats[i];
+  }
+  /*
+  cerr << "hope" << endl;;
+  for (size_t i = 0; i < hopeHypo.text.size(); ++i) {
+    cerr << hopeHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    cerr << hopeHypo.bleuStats[i] << " ";
+  }
+  cerr << endl;
+  cerr << "fear";
+  for (size_t i = 0; i < fearHypo.text.size(); ++i) {
+    cerr << fearHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    cerr  << fearHypo.bleuStats[i] << " ";
+  }
+  cerr << endl;
+  cerr << "model";
+  for (size_t i = 0; i < modelHypo.text.size(); ++i) {
+    cerr << modelHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    cerr << modelHypo.bleuStats[i] << " ";
+  }
+  cerr << endl;
+  */
+  hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+  hopeFear->fearBleu = sentenceLevelBackgroundBleu(fearStats, backgroundBleu);
+
+  //If fv and bleu stats are equal, then assume equal
+  hopeFear->hopeFearEqual = true; //(hopeFear->hopeBleu - hopeFear->fearBleu) >= 1e-8;
+  if (hopeFear->hopeFearEqual) {
+    for (size_t i = 0; i < fearStats.size(); ++i) {
+      if (fearStats[i] != hopeFear->hopeStats[i]) {
+         hopeFear->hopeFearEqual = false;
+         break;
+      }
+    }
+  }
+  hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
+}
+
+void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
+  assert(!finished());
+  HgHypothesis bestHypo;
+  size_t sentenceId = graphIter_->first;
+  SparseVector weights;
+  wv.ToSparse(&weights);
+  vector<ValType> bg(kBleuNgramOrder*2+1);
+  Viterbi(*(graphIter_->second), weights, 0, references_, sentenceId, bg, &bestHypo);
+  stats->resize(bestHypo.bleuStats.size());
+  /*
+  for (size_t i = 0; i < bestHypo.text.size(); ++i) {
+    cerr << bestHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  */
+  for (size_t i = 0; i < bestHypo.bleuStats.size(); ++i) {
+    (*stats)[i] = bestHypo.bleuStats[i];
+  }
+}
+
+
+
+};
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
new file mode 100644
index 000000000..e8323fc76
--- /dev/null
+++ b/mert/HopeFearDecoder.h
@@ -0,0 +1,151 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef MERT_HOPEFEARDECODER_H
+#define MERT_HOPEFEARDECODER_H
+
+#include <vector>
+
+#include <boost/scoped_ptr.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include "ForestRescore.h"
+#include "Hypergraph.h"
+#include "HypPackEnumerator.h"
+#include "MiraFeatureVector.h"
+#include "MiraWeightVector.h"
+
+//
+// Used by batch mira to get the hope, fear and model hypothesis. This wraps
+// the n-best list and lattice/hypergraph implementations
+//
+
+namespace MosesTuning {
+
+/** To be filled in by the decoder */
+struct HopeFearData {
+  MiraFeatureVector modelFeatures;
+  MiraFeatureVector hopeFeatures;
+  MiraFeatureVector fearFeatures;
+  
+  std::vector<float> modelStats;
+  std::vector<float> hopeStats;
+
+  ValType hopeBleu;
+  ValType fearBleu;
+
+  bool hopeFearEqual;
+};
+
+//Abstract base class
+class HopeFearDecoder {
+public:
+  //iterator methods
+  virtual void reset() = 0;
+  virtual void next() = 0;
+  virtual bool finished() = 0;
+
+  /**
+    * Calculate hope, fear and model hypotheses
+    **/
+  virtual void HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              ) = 0;
+
+  /** Max score decoding */
+  virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
+    = 0;
+
+  /** Calculate bleu on training set */
+  ValType Evaluate(const AvgWeightVector& wv);
+
+};
+
+
+/** Gets hope-fear from nbest lists */
+class NbestHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+  NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
+                         const std::vector<std::string>&  scoreFiles,
+                         bool streaming,
+                         bool  no_shuffle,
+                         bool safe_hope
+                         );
+
+  virtual void reset();
+  virtual void next();
+  virtual bool finished();
+
+  virtual void HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              );
+
+  virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+  boost::scoped_ptr<HypPackEnumerator> train_;
+  bool safe_hope_;
+
+};
+
+
+
+/** Gets hope-fear from hypergraphs */
+class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+  HypergraphHopeFearDecoder(
+                            const std::string& hypergraphDir,
+                            const std::vector<std::string>& referenceFiles,
+                            size_t num_dense,
+                            bool streaming,
+                            bool no_shuffle,
+                            bool safe_hope,
+                            size_t hg_pruning,
+                            const MiraWeightVector& wv
+                            );
+
+  virtual void reset();
+  virtual void next();
+  virtual bool finished();
+
+  virtual void HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              );
+
+  virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+  size_t num_dense_;
+  //maps sentence Id to graph ptr
+  typedef std::map<size_t, boost::shared_ptr<Graph> > GraphColl;
+  GraphColl graphs_;
+  GraphColl::const_iterator graphIter_;
+  ReferenceSet references_;
+  Vocab vocab_;
+};
+
+};
+
+#endif
+
diff --git a/mert/Hypergraph.cpp b/mert/Hypergraph.cpp
new file mode 100644
index 000000000..d80a5c12c
--- /dev/null
+++ b/mert/Hypergraph.cpp
@@ -0,0 +1,286 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#include <iostream>
+#include <set>
+
+#include <boost/lexical_cast.hpp>
+
+#include "util/double-conversion/double-conversion.h"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "Hypergraph.h"
+
+using namespace std;
+static const string kBOS = "<s>";
+static const string kEOS = "</s>";
+
+namespace MosesTuning {
+
+StringPiece NextLine(util::FilePiece& from) {
+  StringPiece line;
+  while ((line = from.ReadLine()).starts_with("#"));
+  return line;
+}
+
+Vocab::Vocab() :  eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
+}
+
+const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
+#if BOOST_VERSION >= 104200
+  Map::const_iterator i= map_.find(str, Hash(), Equals());
+#else
+  std::string copied_str(str.data(), str.size());
+  Map::const_iterator i = map_.find(copied_str.c_str());
+#endif
+  if (i != map_.end()) return *i;
+  char *copied = static_cast<char*>(piece_backing_.Allocate(str.size() + 1));
+  memcpy(copied, str.data(), str.size());
+  copied[str.size()] = 0;
+  return *map_.insert(Entry(copied, map_.size())).first;
+}
+
+double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+
+/**
+ * Reads an incoming edge. Returns edge and source words covered.
+**/
+static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
+  Edge* edge = graph.NewEdge();
+  StringPiece line = NextLine(from);
+  util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
+  //Target
+  for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+    StringPiece got = *i;
+    if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) {
+      // non-terminal
+      char *end_ptr;
+      unsigned long int child = std::strtoul(got.data() + 1, &end_ptr, 10);
+      UTIL_THROW_IF(end_ptr != got.data() + got.size() - 1, HypergraphException, "Bad non-terminal" << got);
+      UTIL_THROW_IF(child >= graph.VertexSize(), HypergraphException, "Reference to vertex " << child << " but we only have " << graph.VertexSize() << " vertices.  Is the file in bottom-up format?");
+      edge->AddWord(NULL);
+      edge->AddChild(child);
+    } else {
+      const Vocab::Entry &found = graph.MutableVocab().FindOrAdd(got);
+      edge->AddWord(&found);
+    }
+  }
+ 
+  //Features
+  ++pipes;
+  for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+    StringPiece fv = *i;
+    if (!fv.size()) break;
+    size_t equals = fv.find_last_of("=");
+    UTIL_THROW_IF(equals == fv.npos, HypergraphException, "Failed to parse feature '" << fv << "'");
+    StringPiece name = fv.substr(0,equals);
+    StringPiece value = fv.substr(equals+1);
+    int processed;
+    float score = converter.StringToFloat(value.data(), value.length(), &processed);
+    UTIL_THROW_IF(isnan(score), HypergraphException, "Failed to parse weight '" << value << "'");
+    edge->AddFeature(name,score);
+  }
+  //Covered words
+  ++pipes;
+  size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
+  return pair<Edge*,size_t>(edge,sourceCovered); 
+}
+
+void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
+
+  Graph& newGraph = *pNewGraph;
+  //TODO: Optimise case where no pruning required
+
+  //For debug
+  
+  /*
+  map<const Edge*, string> edgeIds;
+  for (size_t i = 0; i < edges_.Size(); ++i) {
+    stringstream str;
+    for (size_t j = 0; j < edges_[i].Words().size(); ++j) {
+      if (edges_[i].Words()[j]) str << edges_[i].Words()[j]->first;
+    }
+    edgeIds[&(edges_[i])] = str.str();
+  }*/
+
+  //end For debug
+
+  map<const Edge*, FeatureStatsType> edgeBackwardScores;
+  map<const Edge*, size_t> edgeHeads;
+  vector<FeatureStatsType> vertexBackwardScores(vertices_.Size(), kMinScore);
+  vector<vector<const Edge*> > outgoing(vertices_.Size());
+
+  //Compute backward scores
+  for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
+    //cerr << "Vertex " << vi << endl;
+    const Vertex& vertex = vertices_[vi];
+    const vector<const Edge*>& incoming = vertex.GetIncoming();
+    if (!incoming.size()) {
+      vertexBackwardScores[vi] = 0;
+    } else {
+      for (size_t ei = 0; ei < incoming.size(); ++ei) {
+        //cerr << "Edge " << edgeIds[incoming[ei]] << endl;
+        edgeHeads[incoming[ei]]= vi;
+        FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+        for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+       //   cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
+          size_t childId = incoming[ei]->Children()[i];
+          UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
+            HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+          outgoing[childId].push_back(incoming[ei]);
+          incomingScore += vertexBackwardScores[childId];
+        }
+        edgeBackwardScores[incoming[ei]]= incomingScore;
+     //   cerr << "Backward score: " << incomingScore << endl;
+        if (incomingScore > vertexBackwardScores[vi]) vertexBackwardScores[vi] = incomingScore;
+      }
+    }
+  }
+
+  //Compute forward scores
+  vector<FeatureStatsType> vertexForwardScores(vertices_.Size(), kMinScore);
+  map<const Edge*, FeatureStatsType> edgeForwardScores;
+  for (size_t i = 1; i <= vertices_.Size(); ++i) {
+    size_t vi = vertices_.Size() - i;
+    //cerr << "Vertex " << vi << endl;
+    if (!outgoing[vi].size()) {
+      vertexForwardScores[vi] = 0;
+    } else {
+      for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
+        //cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
+        FeatureStatsType outgoingScore = 0; 
+        //sum scores of siblings
+        for (size_t i = 0; i < outgoing[vi][ei]->Children().size(); ++i) {
+          size_t siblingId = outgoing[vi][ei]->Children()[i];
+          if (siblingId != vi) {
+         //   cerr << "\tSibling " << siblingId << endl;
+            outgoingScore += vertexBackwardScores[siblingId];
+          }
+        }
+        //add score of head
+        outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
+        //cerr << "Forward score " << outgoingScore << endl;
+        edgeForwardScores[outgoing[vi][ei]] = outgoingScore;
+        outgoingScore += outgoing[vi][ei]->GetScore(weights);
+        if (outgoingScore > vertexForwardScores[vi]) vertexForwardScores[vi] = outgoingScore;
+      }
+    }
+  }
+
+
+
+  multimap<FeatureStatsType, const Edge*> edgeScores;
+  for (size_t i = 0; i < edges_.Size(); ++i) {
+    const Edge* edge = &(edges_[i]);
+    if (edgeForwardScores.find(edge) == edgeForwardScores.end()) {
+      //This edge has no children, so didn't get a forward score. Its forward score
+      //is that of its head
+      edgeForwardScores[edge] = vertexForwardScores[edgeHeads[edge]];
+    }
+    FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
+    edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
+  //  cerr << edgeIds[edge] << " " << score << endl;
+  }
+
+
+  
+  multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
+  size_t edgeCount = 1;
+  while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
+    ++ei;
+    ++edgeCount;
+  }
+  multimap<FeatureStatsType, const Edge*>::const_iterator lowest = edgeScores.begin();
+  if (ei != edgeScores.rend())  lowest = edgeScores.lower_bound(ei->first);
+
+  //cerr << "Retained edges" << endl;
+  set<size_t> retainedVertices;
+  set<const Edge*> retainedEdges;
+  for (; lowest != edgeScores.end(); ++lowest) {
+    //cerr << lowest->first << " " << edgeIds[lowest->second] << endl;
+    retainedEdges.insert(lowest->second);
+    retainedVertices.insert(edgeHeads[lowest->second]);
+    for (size_t i = 0; i < lowest->second->Children().size(); ++i) {
+      retainedVertices.insert(lowest->second->Children()[i]);
+    }
+  }
+  newGraph.SetCounts(retainedVertices.size(), retainedEdges.size());
+
+  //cerr << "Retained vertices" << endl;
+  map<size_t,size_t> oldIdToNew;
+  size_t vi = 0;
+  for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
+    //cerr << *i << endl;
+    oldIdToNew[*i] = vi;
+    Vertex* vertex = newGraph.NewVertex();
+    vertex->SetSourceCovered(vertices_[*i].SourceCovered()); 
+  }
+
+  for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
+    Edge* newEdge = newGraph.NewEdge();
+    const Edge* oldEdge = *i;
+    for (size_t j = 0; j < oldEdge->Words().size(); ++j) {
+      newEdge->AddWord(oldEdge->Words()[j]);
+    }
+    for (size_t j = 0; j < oldEdge->Children().size(); ++j) {
+      newEdge->AddChild(oldIdToNew[oldEdge->Children()[j]]);
+    }
+    newEdge->SetFeatures(oldEdge->Features());
+    Vertex& newHead = newGraph.vertices_[oldIdToNew[edgeHeads[oldEdge]]];
+    newHead.AddEdge(newEdge);
+  }
+
+  
+
+
+}
+
+/**
+  * Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
+**/
+void ReadGraph(util::FilePiece &from, Graph &graph) {
+
+  //First line should contain field names
+  StringPiece line = from.ReadLine();
+  UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
+  line = NextLine(from);
+  
+  //Then expect numbers of vertices
+  util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
+  unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
+  ++i;
+  unsigned long int edges = boost::lexical_cast<unsigned long int>(*i);
+  graph.SetCounts(vertices, edges);
+  //cerr << "vertices: " << vertices << "; edges: " << edges << endl;
+  for (size_t i = 0; i < vertices; ++i) {
+    line = NextLine(from);
+    unsigned long int edge_count = boost::lexical_cast<unsigned long int>(line);
+    Vertex* vertex = graph.NewVertex();
+    for (unsigned long int e = 0; e < edge_count; ++e) {
+      pair<Edge*,size_t> edge = ReadEdge(from, graph);
+      vertex->AddEdge(edge.first);
+      //Note: the file format attaches this to the edge, but it's really a property 
+      //of the vertex.
+      if (!e) {vertex->SetSourceCovered(edge.second);}
+    }
+  }
+}
+
+};
diff --git a/mert/Hypergraph.h b/mert/Hypergraph.h
new file mode 100644
index 000000000..b6ee6c3f8
--- /dev/null
+++ b/mert/Hypergraph.h
@@ -0,0 +1,251 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef MERT_HYPERGRAPH_H
+#define MERT_HYPERGRAPH_H
+
+#include <string>
+
+#include <boost/noncopyable.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/functional/hash/hash.hpp>
+#include <boost/unordered_map.hpp>
+
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+
+#include "FeatureStats.h"
+
+namespace MosesTuning {
+
+typedef unsigned int WordIndex;
+const WordIndex kMaxWordIndex = UINT_MAX;
+const FeatureStatsType kMinScore = -std::numeric_limits<FeatureStatsType>::max();
+
+template <class T> class FixedAllocator : boost::noncopyable {
+  public:
+    FixedAllocator() : current_(NULL), end_(NULL) {}
+
+    void Init(std::size_t count) {
+      assert(!current_);
+      array_.reset(new T[count]);
+      current_ = array_.get();
+      end_ = current_ + count;
+    }
+
+    T &operator[](std::size_t idx) {
+      return array_.get()[idx];
+    }
+    const T &operator[](std::size_t idx) const {
+      return array_.get()[idx];
+    }
+
+    T *New() {
+      T *ret = current_++;
+      UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
+      return ret;
+    }
+
+    std::size_t Capacity() const {
+      return end_ - array_.get();
+    }
+
+    std::size_t Size() const {
+      return current_ - array_.get();
+    }
+
+  private:
+    boost::scoped_array<T> array_;
+    T *current_, *end_;
+};
+
+
+class Vocab {
+  public:
+    Vocab();
+
+    typedef std::pair<const char *const, WordIndex> Entry;
+
+    const Entry &FindOrAdd(const StringPiece &str);
+
+    const Entry& Bos() const {return bos_;}
+
+    const Entry& Eos() const {return eos_;}
+
+  private:
+    util::Pool piece_backing_;
+
+    struct Hash : public std::unary_function<const char *, std::size_t> {
+      std::size_t operator()(StringPiece str) const {
+        return util::MurmurHashNative(str.data(), str.size());
+      }
+    };
+
+    struct Equals : public std::binary_function<const char *, const char *, bool> {
+      bool operator()(StringPiece first, StringPiece second) const {
+        return first == second;
+      }
+    };
+
+    typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
+    Map map_;
+    Entry eos_;
+    Entry bos_;
+
+};
+
+typedef std::vector<const Vocab::Entry*> WordVec;
+
+class Vertex;
+
+//Use shared pointer to save copying when we prune
+typedef boost::shared_ptr<SparseVector> FeaturePtr;
+
+/**
+ * An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
+**/
+class Edge {
+  public:
+    Edge() {features_.reset(new SparseVector());}
+
+    void AddWord(const Vocab::Entry *word) {
+      words_.push_back(word);
+    }
+
+    void AddChild(size_t child) {
+      children_.push_back(child);
+    }
+
+    void AddFeature(const StringPiece& name, FeatureStatsType value) {
+      //TODO StringPiece interface
+      features_->set(name.as_string(),value);
+    }
+
+
+    const WordVec &Words() const {
+      return words_;
+    }
+    
+    const FeaturePtr& Features() const {
+      return features_;
+    }
+
+    void SetFeatures(const FeaturePtr& features) {
+      features_ = features;
+    }
+
+    const std::vector<size_t>& Children() const {
+      return children_;
+    }
+
+    FeatureStatsType GetScore(const SparseVector& weights) const {
+      return inner_product(*(features_.get()), weights);
+    }
+
+  private:
+    // NULL for non-terminals.  
+    std::vector<const Vocab::Entry*> words_;
+    std::vector<size_t> children_;
+    boost::shared_ptr<SparseVector> features_;
+};
+
+/*
+ * A vertex has 0..n incoming edges
+ **/
+class Vertex {
+  public:
+    Vertex() : sourceCovered_(0) {}
+
+    void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
+
+    void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
+
+    const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
+
+    size_t SourceCovered() const {return sourceCovered_;}
+
+  private:
+    std::vector<const Edge*> incoming_;
+    size_t sourceCovered_;
+};
+
+
+class Graph : boost::noncopyable {
+  public:
+    Graph(Vocab& vocab) : vocab_(vocab) {}
+
+    void SetCounts(std::size_t vertices, std::size_t edges) {
+      vertices_.Init(vertices);
+      edges_.Init(edges);
+    }
+
+    Vocab &MutableVocab() { return vocab_; }
+
+    Edge *NewEdge() {      
+      return edges_.New();
+    }
+
+    Vertex *NewVertex() {
+      return vertices_.New();
+    }
+
+    const Vertex &GetVertex(std::size_t index) const {
+      return vertices_[index];
+    }
+
+    Edge &GetEdge(std::size_t index) {
+      return edges_[index];
+    }
+
+    /* Created a pruned copy of this graph with minEdgeCount edges. Uses
+    the scores in the max-product semiring to rank edges, as suggested by
+    Colin Cherry */
+    void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
+
+    std::size_t VertexSize() const { return vertices_.Size(); }
+    std::size_t EdgeSize() const { return edges_.Size(); }
+
+    bool IsBoundary(const Vocab::Entry* word) const {
+      return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
+    }
+
+  private:
+    FixedAllocator<Edge> edges_;    
+    FixedAllocator<Vertex> vertices_;
+    Vocab& vocab_;
+};
+
+class HypergraphException : public util::Exception {
+  public:
+    HypergraphException() {}
+    ~HypergraphException() throw() {}
+};
+
+
+void ReadGraph(util::FilePiece &from, Graph &graph);
+
+
+};
+
+#endif
diff --git a/mert/HypergraphTest.cpp b/mert/HypergraphTest.cpp
new file mode 100644
index 000000000..345a445f0
--- /dev/null
+++ b/mert/HypergraphTest.cpp
@@ -0,0 +1,151 @@
+#include <iostream>
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+#include "Hypergraph.h"
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(prune) 
+{
+  Vocab vocab;
+  WordVec words;
+  string wordStrings[] =
+    {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+  for (size_t i = 0; i < 13; ++i) {
+    words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+  }
+
+  const string f1 = "foo";
+  const string f2 = "bar";
+  Graph graph(vocab);
+  graph.SetCounts(5,8);
+
+  Edge* e0 = graph.NewEdge();
+  e0->AddWord(words[0]);
+
+  Vertex* v0 = graph.NewVertex();
+  v0->AddEdge(e0);
+
+  Edge* e1 = graph.NewEdge();
+  e1->AddWord(NULL);
+  e1->AddChild(0);
+  e1->AddWord(words[2]);
+  e1->AddWord(words[3]);
+  e1->AddFeature(f1,1);
+  e1->AddFeature(f2,1);
+  Edge* e5 = graph.NewEdge();
+  e5->AddWord(NULL);
+  e5->AddChild(0);
+  e5->AddWord(words[9]);
+  e5->AddWord(words[10]);
+  e5->AddFeature(f1,2);
+  e5->AddFeature(f2,-2);
+
+  Vertex* v1 = graph.NewVertex();
+  v1->AddEdge(e1);
+  v1->AddEdge(e5);
+  v1->SetSourceCovered(1);
+
+  Edge* e2 = graph.NewEdge();
+  e2->AddWord(NULL);
+  e2->AddChild(1);
+  e2->AddWord(words[4]);
+  e2->AddWord(words[5]);
+  e2->AddFeature(f2,3);
+
+  Vertex* v2 = graph.NewVertex();
+  v2->AddEdge(e2);
+  v2->SetSourceCovered(3);
+
+  Edge* e3 = graph.NewEdge();
+  e3->AddWord(NULL);
+  e3->AddChild(2);
+  e3->AddWord(words[6]);
+  e3->AddWord(words[7]);
+  e3->AddWord(words[8]);
+  e3->AddFeature(f1,1);
+  Edge* e6 = graph.NewEdge();
+  e6->AddWord(NULL);
+  e6->AddChild(2);
+  e6->AddWord(words[9]);
+  e6->AddWord(words[12]);
+  e6->AddFeature(f2,1);
+  Edge* e7 = graph.NewEdge();
+  e7->AddWord(NULL);
+  e7->AddChild(1);
+  e7->AddWord(words[11]);
+  e7->AddWord(words[12]);
+  e7->AddFeature(f1,2);
+  e7->AddFeature(f2,3);
+
+  Vertex* v3 = graph.NewVertex();
+  v3->AddEdge(e3);
+  v3->AddEdge(e6);
+  v3->AddEdge(e7);
+  v3->SetSourceCovered(5);
+
+  Edge* e4 = graph.NewEdge();
+  e4->AddWord(NULL);
+  e4->AddChild(3);
+  e4->AddWord(words[1]);
+
+  Vertex* v4 = graph.NewVertex();
+  v4->AddEdge(e4);
+  v4->SetSourceCovered(6);
+
+  SparseVector weights;
+  weights.set(f1,2);
+  weights.set(f2,1);
+
+  Graph pruned(vocab);
+  graph.Prune(&pruned, weights, 5);
+
+  BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
+  BOOST_CHECK_EQUAL(4, pruned.VertexSize());
+  
+  //edges retained should be best path (<s> ab jk </s>) and hi
+  BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
+  BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
+  BOOST_CHECK_EQUAL(1, pruned.GetVertex(2).GetIncoming().size());
+  BOOST_CHECK_EQUAL(1, pruned.GetVertex(3).GetIncoming().size());
+
+  const Edge* edge;
+
+  edge =  pruned.GetVertex(0).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(1, edge->Words().size()); 
+  BOOST_CHECK_EQUAL(words[0], edge->Words()[0]); 
+
+  edge =  pruned.GetVertex(1).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(3, edge->Words().size()); 
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]); 
+  BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first); 
+  BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first); 
+
+  edge =  pruned.GetVertex(1).GetIncoming()[1];
+  BOOST_CHECK_EQUAL(3, edge->Words().size());
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+  BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first); 
+  BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
+
+  edge =  pruned.GetVertex(2).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(3, edge->Words().size());
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+  BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first); 
+  BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
+
+  edge =  pruned.GetVertex(3).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(2, edge->Words().size());
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+  BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first); 
+
+
+
+  
+
+//  BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);   
+  
+
+}
diff --git a/mert/Jamfile b/mert/Jamfile
index 34c640b06..d848c258f 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -15,6 +15,9 @@ FeatureStats.cpp
 FeatureArray.cpp
 FeatureData.cpp
 FeatureDataIterator.cpp
+ForestRescore.cpp
+HopeFearDecoder.cpp
+Hypergraph.cpp
 MiraFeatureVector.cpp
 MiraWeightVector.cpp
 HypPackEnumerator.cpp
@@ -62,13 +65,15 @@ exe sentence-bleu : sentence-bleu.cpp mert_lib ;
 
 exe pro : pro.cpp mert_lib ..//boost_program_options ;
 
-exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ;
+exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
 
 alias programs : mert extractor evaluator pro kbmira sentence-bleu ;
 
 unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index dea9b9b83..347ad488e 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -9,18 +9,17 @@ namespace MosesTuning
 {
 
 
-MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
-  : m_dense(vec.dense)
-{
-  vector<size_t> sparseFeats = vec.sparse.feats();
+void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
+  vector<size_t> sparseFeats = sparse.feats();
   bool bFirst = true;
   size_t lastFeat = 0;
   m_sparseFeats.reserve(sparseFeats.size());
   m_sparseVals.reserve(sparseFeats.size());
   for(size_t i=0; i<sparseFeats.size(); i++) {
+    if (sparseFeats[i] < ignoreLimit) continue;
     size_t feat = m_dense.size() + sparseFeats[i];
     m_sparseFeats.push_back(feat);
-    m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
+    m_sparseVals.push_back(sparse.get(sparseFeats[i]));
 
     // Check ordered property
     if(bFirst) {
@@ -35,6 +34,21 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
   }
 }
 
+MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
+  : m_dense(vec.dense)
+{
+  InitSparse(vec.sparse);
+}
+
+MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
+  m_dense.resize(num_dense);
+  //Assume that features with id [0,num_dense) are the dense features
+  for (size_t id = 0; id < num_dense; ++id) {
+    m_dense[id] = sparse.get(id);
+  }
+  InitSparse(sparse,num_dense);
+}
+
 MiraFeatureVector::MiraFeatureVector(const MiraFeatureVector& other)
   : m_dense(other.m_dense),
     m_sparseFeats(other.m_sparseFeats),
@@ -148,6 +162,22 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
   return MiraFeatureVector(dense,sparseFeats,sparseVals);
 }
 
+bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
+  ValType eps = 1e-8;
+  //dense features
+  if (a.m_dense.size() != b.m_dense.size()) return false;
+  for (size_t i = 0; i < a.m_dense.size(); ++i) {
+    if (fabs(a.m_dense[i]-b.m_dense[i]) < eps) return false;
+  }
+  if (a.m_sparseFeats.size() != b.m_sparseFeats.size()) return false;
+  for (size_t i = 0; i < a.m_sparseFeats.size(); ++i) {
+    if (a.m_sparseFeats[i] != b.m_sparseFeats[i]) return false;
+    if (fabs(a.m_sparseVals[i] != b.m_sparseVals[i])) return false;
+  }
+  return true;
+
+}
+
 ostream& operator<<(ostream& o, const MiraFeatureVector& e)
 {
   for(size_t i=0; i<e.size(); i++) {
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index cb2b1c87d..48aa496b5 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -26,7 +26,10 @@ typedef FeatureStatsType ValType;
 class MiraFeatureVector
 {
 public:
+  MiraFeatureVector() {}
   MiraFeatureVector(const FeatureDataItem& vec);
+  //Assumes that features in sparse with id < num_dense are dense features
+  MiraFeatureVector(const SparseVector& sparse, size_t num_dense);
   MiraFeatureVector(const MiraFeatureVector& other);
   MiraFeatureVector(const std::vector<ValType>& dense,
                     const std::vector<std::size_t>& sparseFeats,
@@ -42,7 +45,12 @@ public:
 
   friend std::ostream& operator<<(std::ostream& o, const MiraFeatureVector& e);
 
+  friend bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b);
+
 private:
+  //Ignore any sparse features with id < ignoreLimit
+  void InitSparse(const SparseVector& sparse, size_t ignoreLimit = 0);
+
   std::vector<ValType> m_dense;
   std::vector<std::size_t> m_sparseFeats;
   std::vector<ValType> m_sparseVals;
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index e23804cbf..3b7b1780c 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -93,6 +93,14 @@ void MiraWeightVector::update(size_t index, ValType delta)
   m_lastUpdated[index] = m_numUpdates;
 }
 
+void MiraWeightVector::ToSparse(SparseVector* sparse) const {
+  for (size_t i = 0; i < m_weights.size(); ++i) {
+    if(abs(m_weights[i])>1e-8) {
+      sparse->set(i,m_weights[i]);
+    }
+  }
+}
+
 /**
  * Make sure everyone's total is up-to-date
  */
@@ -163,6 +171,15 @@ size_t AvgWeightVector::size() const
   return m_wv.m_weights.size();
 }
 
+void AvgWeightVector::ToSparse(SparseVector* sparse) const {
+  for (size_t i = 0; i < size(); ++i) {
+    ValType w = weight(i);
+    if(abs(w)>1e-8) {
+      sparse->set(i,w);
+    }
+  }
+}
+
 // --Emacs trickery--
 // Local Variables:
 // mode:c++
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index eb27e8a6d..bbc28704b 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -63,6 +63,11 @@ public:
    */
   AvgWeightVector avg();
 
+  /**
+    * Convert to sparse vector, interpreting all features as sparse.
+   **/
+  void ToSparse(SparseVector* sparse) const;
+
   friend class AvgWeightVector;
 
   friend std::ostream& operator<<(std::ostream& o, const MiraWeightVector& e);
@@ -99,12 +104,12 @@ public:
   ValType score(const MiraFeatureVector& fv) const;
   ValType weight(std::size_t index) const;
   std::size_t size() const;
+  void ToSparse(SparseVector* sparse) const;
 private:
   const MiraWeightVector& m_wv;
 };
 
 
-#endif // MERT_WEIGHT_VECTOR_H
 
 // --Emacs trickery--
 // Local Variables:
@@ -113,3 +118,4 @@ private:
 // End:
 
 }
+#endif // MERT_WEIGHT_VECTOR_H
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index a2665ac13..da552fa36 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -39,8 +39,10 @@ de recherches du Canada
 #include <boost/program_options.hpp>
 #include <boost/scoped_ptr.hpp>
 
+#include "util/exception.hh"
+
 #include "BleuScorer.h"
-#include "HypPackEnumerator.h"
+#include "HopeFearDecoder.h"
 #include "MiraFeatureVector.h"
 #include "MiraWeightVector.h"
 
@@ -49,38 +51,16 @@ using namespace MosesTuning;
 
 namespace po = boost::program_options;
 
-ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv)
-{
-  vector<ValType> stats(kBleuNgramOrder*2+1,0);
-  for(train->reset(); !train->finished(); train->next()) {
-    // Find max model
-    size_t max_index=0;
-    ValType max_score=0;
-    for(size_t i=0; i<train->cur_size(); i++) {
-      MiraFeatureVector vec(train->featuresAt(i));
-      ValType score = wv.score(vec);
-      if(i==0 || score > max_score) {
-        max_index = i;
-        max_score = score;
-      }
-    }
-    // Update stats
-    const vector<float>& sent = train->scoresAt(max_index);
-    for(size_t i=0; i<sent.size(); i++) {
-      stats[i]+=sent[i];
-    }
-  }
-  return unsmoothedBleu(stats);
-}
-
 int main(int argc, char** argv)
 {
-  const ValType BLEU_RATIO = 5;
   bool help;
   string denseInitFile;
   string sparseInitFile;
+  string type = "nbest";
   vector<string> scoreFiles;
   vector<string> featureFiles;
+  vector<string> referenceFiles; //for hg mira
+  string hgDir;
   int seed;
   string outputFile;
   float c = 0.01;      // Step-size cap C
@@ -91,25 +71,30 @@ int main(int argc, char** argv)
   bool model_bg = false; // Use model for background corpus
   bool verbose = false; // Verbose updates
   bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
+  size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word 
 
   // Command-line processing follows pro.cpp
   po::options_description desc("Allowed options");
   desc.add_options()
   ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+  ("type,t", po::value<string>(&type), "Either nbest or hypergraph")
   ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
   ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+  ("hgdir,H", po::value<string> (&hgDir), "Directory containing hypergraphs")
+  ("reference,R", po::value<vector<string> > (&referenceFiles), "Reference files, only required for hypergraph mira")
   ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
   ("output-file,o", po::value<string>(&outputFile), "Output file")
   ("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
   ("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
   ("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
-  ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features")
+  ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features. This should have 'name= value' on each line, or (legacy) should be the Moses mert 'init.opt' format.")
   ("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
   ("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
   ("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
   ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background")
   ("verbose", po::value(&verbose)->zero_tokens()->default_value(false), "Verbose updates")
   ("safe-hope", po::value(&safe_hope)->zero_tokens()->default_value(false), "Mode score's influence on hope decoding is limited")
+  ("hg-prune", po::value<size_t>(&hgPruning), "Prune hypergraphs to have this many edges per reference word")
   ;
 
   po::options_description cmdline_options;
@@ -145,12 +130,56 @@ int main(int argc, char** argv)
       cerr << "could not open dense initfile: " << denseInitFile << endl;
       exit(3);
     }
+    if (verbose) cerr << "Reading dense features:" << endl;
     parameter_t val;
     getline(opt,buffer);
-    istringstream strstrm(buffer);
-    while(strstrm >> val) {
-      initParams.push_back(val);
+    if (buffer.find_first_of("=") == buffer.npos) {
+      UTIL_THROW_IF(type == "hypergraph", util::Exception, "For hypergraph version, require dense features in 'name= value' format");
+      cerr << "WARN: dense features in deprecated Moses mert format. Prefer 'name= value' format." << endl;
+      istringstream strstrm(buffer);
+      while(strstrm >> val) {
+        initParams.push_back(val);
+        if(verbose) cerr << val << endl;
+      }
+    } else {
+      vector<string> names;
+      string last_name = "";
+      size_t feature_ctr = 0;
+      do {
+        size_t equals = buffer.find_last_of("=");
+        UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
+          << buffer << "'");
+        string name = buffer.substr(0,equals);
+        names.push_back(name);
+        initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
+
+        //Names for features with several values need to have their id added
+        if (name != last_name) feature_ctr = 0;
+        last_name = name;
+        if (feature_ctr) {
+          stringstream namestr;
+          namestr << names.back() << feature_ctr;
+          names[names.size()-1] = namestr.str();
+          if (feature_ctr == 1) {
+            stringstream namestr;
+            namestr << names[names.size()-2] << (feature_ctr-1);
+            names[names.size()-2] = namestr.str();
+          }
+        }
+        ++feature_ctr;
+
+      } while(getline(opt,buffer));
+
+
+      //Make sure that SparseVector encodes dense feature names as 0..n-1.
+      for (size_t i = 0; i < names.size(); ++i) {
+        size_t id = SparseVector::encode(names[i]);
+        assert(id == i);     
+        if (verbose) cerr << names[i] << " " << initParams[i] << endl;
+      }
+
     }
+
     opt.close();
   }
   size_t initDenseSize = initParams.size();
@@ -188,82 +217,45 @@ int main(int argc, char** argv)
   }
   bg.push_back(kBleuNgramOrder);
 
+  boost::scoped_ptr<HopeFearDecoder> decoder;
+  if (type == "nbest") {
+    decoder.reset(new NbestHopeFearDecoder(featureFiles, scoreFiles, streaming, no_shuffle, safe_hope));
+  } else if (type == "hypergraph") {
+    decoder.reset(new HypergraphHopeFearDecoder(hgDir, referenceFiles, initDenseSize, streaming, no_shuffle, safe_hope, hgPruning, wv));
+  } else {
+    UTIL_THROW(util::Exception, "Unknown batch mira type: '" << type << "'");
+  }
+
   // Training loop
-  boost::scoped_ptr<HypPackEnumerator> train;
-  if(streaming)
-    train.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
-  else
-    train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
-  cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
+  cerr << "Initial BLEU = " << decoder->Evaluate(wv.avg()) << endl;
   ValType bestBleu = 0;
   for(int j=0; j<n_iters; j++) {
     // MIRA train for one epoch
-    int iNumHyps = 0;
     int iNumExamples = 0;
     int iNumUpdates = 0;
     ValType totalLoss = 0.0;
-    for(train->reset(); !train->finished(); train->next()) {
-      // Hope / fear decode
-      ValType hope_scale = 1.0;
-      size_t hope_index=0, fear_index=0, model_index=0;
-      ValType hope_score=0, fear_score=0, model_score=0;
-      int iNumHypsBackup = iNumHyps;
-      for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
-        iNumHyps = iNumHypsBackup;
-        ValType hope_bleu, hope_model;
-        for(size_t i=0; i< train->cur_size(); i++) {
-          const MiraFeatureVector& vec=train->featuresAt(i);
-          ValType score = wv.score(vec);
-          ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
-          // Hope
-          if(i==0 || (hope_scale*score + bleu) > hope_score) {
-            hope_score = hope_scale*score + bleu;
-            hope_index = i;
-            hope_bleu = bleu;
-            hope_model = score;
-          }
-          // Fear
-          if(i==0 || (score - bleu) > fear_score) {
-            fear_score = score - bleu;
-            fear_index = i;
-          }
-          // Model
-          if(i==0 || score > model_score) {
-            model_score = score;
-            model_index = i;
-          }
-          iNumHyps++;
-        }
-        // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
-        // where model score is having far more influence than BLEU
-        hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
-        if(safe_hope && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
-          hope_scale = abs(hope_bleu) / abs(hope_model);
-        else break;
-      }
+    size_t sentenceIndex = 0;
+    for(decoder->reset();!decoder->finished(); decoder->next()) {
+      HopeFearData hfd;
+      decoder->HopeFear(bg,wv,&hfd);
+    
       // Update weights
-      if(hope_index!=fear_index) {
+      if (!hfd.hopeFearEqual && hfd.hopeBleu  > hfd.fearBleu) { 
         // Vector difference
-        const MiraFeatureVector& hope=train->featuresAt(hope_index);
-        const MiraFeatureVector& fear=train->featuresAt(fear_index);
-        MiraFeatureVector diff = hope - fear;
+        MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
         // Bleu difference
-        const vector<float>& hope_stats = train->scoresAt(hope_index);
-        ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
-        const vector<float>& fear_stats = train->scoresAt(fear_index);
-        ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
-        assert(hopeBleu + 1e-8 >= fearBleu);
-        ValType delta = hopeBleu - fearBleu;
+        //assert(hfd.hopeBleu + 1e-8 >= hfd.fearBleu);
+        ValType delta = hfd.hopeBleu - hfd.fearBleu;
         // Loss and update
         ValType diff_score = wv.score(diff);
         ValType loss = delta - diff_score;
         if(verbose) {
-          cerr << "Updating sent " << train->cur_id() << endl;
+          cerr << "Updating sent " << sentenceIndex << endl;
           cerr << "Wght: " << wv << endl;
-          cerr << "Hope: " << hope << " BLEU:" << hopeBleu << " Score:" << wv.score(hope) << endl;
-          cerr << "Fear: " << fear << " BLEU:" << fearBleu << " Score:" << wv.score(fear) << endl;
+          cerr << "Hope: " << hfd.hopeFeatures << " BLEU:" << hfd.hopeBleu << " Score:" << wv.score(hfd.hopeFeatures) << endl;
+          cerr << "Fear: " << hfd.fearFeatures << " BLEU:" << hfd.fearBleu << " Score:" << wv.score(hfd.fearFeatures) << endl;
           cerr << "Diff: " << diff << " BLEU:" << delta << " Score:" << diff_score << endl;
-          cerr << "Loss: " << loss << " Scale: " << hope_scale << endl;
+          cerr << "Loss: " << loss <<  " Scale: " << 1 << endl;
           cerr << endl;
         }
         if(loss > 0) {
@@ -273,16 +265,16 @@ int main(int argc, char** argv)
           iNumUpdates++;
         }
         // Update BLEU statistics
-        const vector<float>& model_stats = train->scoresAt(model_index);
         for(size_t k=0; k<bg.size(); k++) {
           bg[k]*=decay;
           if(model_bg)
-            bg[k]+=model_stats[k];
+            bg[k]+=hfd.modelStats[k];
           else
-            bg[k]+=hope_stats[k];
+            bg[k]+=hfd.hopeStats[k];
         }
       }
       iNumExamples++;
+      ++sentenceIndex;
     }
     // Training Epoch summary
     cerr << iNumUpdates << "/" << iNumExamples << " updates"
@@ -291,15 +283,16 @@ int main(int argc, char** argv)
 
     // Evaluate current average weights
     AvgWeightVector avg = wv.avg();
-    ValType bleu = evaluate(train.get(), avg);
+    ValType bleu = decoder->Evaluate(avg);
     cerr << ", BLEU = " << bleu << endl;
     if(bleu > bestBleu) {
+      /*
       size_t num_dense = train->num_dense();
       if(initDenseSize>0 && initDenseSize!=num_dense) {
         cerr << "Error: Initial dense feature count and dense feature count from n-best do not match: "
              << initDenseSize << "!=" << num_dense << endl;
         exit(1);
-      }
+      }*/
       // Write to a file
       ostream* out;
       ofstream outFile;
@@ -314,11 +307,11 @@ int main(int argc, char** argv)
         out = &cout;
       }
       for(size_t i=0; i<avg.size(); i++) {
-        if(i<num_dense)
+        if(i<initDenseSize)
           *out << "F" << i << " " << avg.weight(i) << endl;
         else {
           if(abs(avg.weight(i))>1e-8)
-            *out << SparseVector::decode(i-num_dense) << " " << avg.weight(i) << endl;
+            *out << SparseVector::decode(i-initDenseSize) << " " << avg.weight(i) << endl;
         }
       }
       outFile.close();