Merge branch 'master' of ssh://github.com/moses-smt/mosesdecoder

author: Prashant Mathur <pramathur@ebay.com> 2016-06-15 15:33:42 +0300
committer: Prashant Mathur <pramathur@ebay.com> 2016-06-15 15:33:42 +0300
commit: e31bc247ead9f2b0e048b2394f7726d77b889736 (patch)
tree: f391d01d64b972dca9c977ae5f81a91eb16a47a2
parent: dee124b70aed617e62fff8810cc80986d4f050b9 (diff)
parent: bc5f8d15c6ce4bc678ba992860bfd4be6719cee8 (diff)
71 files changed, 3284 insertions, 539 deletions
diff --git a/compile.sh b/compile.sh
index 45c10325c..f47a697d6 100755
--- a/compile.sh
+++ b/compile.sh
@@ -3,6 +3,6 @@
 # you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
 
 set -e -o pipefail
-OPT=${OPT:-$(pwd)/OPT}
+OPT=${OPT:-$(pwd)/opt}
 ./bjam --with-irstlm=$OPT/irstlm-5.80.08 --with-boost=$OPT --with-cmph=$OPT --with-xmlrpc-c=$OPT --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@
 
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 26b838df9..222f19365 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1106,6 +1106,16 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ControlRecombination.h</locationURI>
 		</link>
 		<link>
+			<name>FF/CorrectionPattern.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/CorrectionPattern.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/CorrectionPattern.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/CorrectionPattern.h</locationURI>
+		</link>
+		<link>
 			<name>FF/CountNonTerms.cpp</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/CountNonTerms.cpp</locationURI>
@@ -1171,6 +1181,16 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/DynamicCacheBasedLanguageModel.h</locationURI>
 		</link>
 		<link>
+			<name>FF/EditOps.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/EditOps.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/EditOps.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/EditOps.h</locationURI>
+		</link>
+		<link>
 			<name>FF/FFState.cpp</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/FFState.cpp</locationURI>
diff --git a/mert/Jamfile b/mert/Jamfile
index e5adce76e..e3f083864 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -31,6 +31,8 @@ Point.cpp
 PerScorer.cpp
 HwcmScorer.cpp
 InternalTree.cpp
+M2.cpp
+M2Scorer.cpp
 Scorer.cpp
 ScorerFactory.cpp
 Optimizer.cpp
diff --git a/mert/M2.cpp b/mert/M2.cpp
new file mode 100644
index 000000000..58181d38e
--- /dev/null
+++ b/mert/M2.cpp
@@ -0,0 +1,61 @@
+
+#include <boost/algorithm/string.hpp>
+
+#include "M2.h"
+
+namespace MosesTuning
+{
+
+namespace M2
+{
+
+bool Annot::lowercase = true;
+
+std::string Annot::transform(const std::string& e)
+{
+  std::string temp = e;
+  if(lowercase) {
+    boost::erase_all(temp, " ");
+    return ToLower(temp);
+  } else
+    return e;
+}
+
+const std::string ToLower(const std::string& str)
+{
+  std::string lc(str);
+  std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower);
+  return lc;
+}
+
+
+Edit operator+(Edit& e1, Edit& e2)
+{
+  std::string edit;
+  if(e1.edit.size() > 0 && e2.edit.size() > 0)
+    edit = e1.edit + " " + e2.edit;
+  else if(e1.edit.size() > 0)
+    edit = e1.edit;
+  else if(e2.edit.size() > 0)
+    edit = e2.edit;
+
+  return Edit(e1.cost + e2.cost, e1.changed + e2.changed, e1.unchanged + e2.unchanged, edit);
+}
+
+
+Edge operator+(Edge e1, Edge e2)
+{
+  return Edge(e1.v, e2.u, e1.edit + e2.edit);
+}
+
+std::ostream& operator<<(std::ostream& o, Sentence s)
+{
+  for(Sentence::iterator it = s.begin(); it != s.end(); it++)
+    o << *it << " ";
+  return o;
+}
+
+
+}
+
+}
+\ No newline at end of file
diff --git a/mert/M2.h b/mert/M2.h
new file mode 100644
index 000000000..76f1aed6e
--- /dev/null
+++ b/mert/M2.h
@@ -0,0 +1,480 @@
+#pragma once
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <queue>
+#include <iostream>
+#include <fstream>
+#include <iterator>
+#include <algorithm>
+#include <limits>
+#include <sstream>
+#include <boost/algorithm/string.hpp>
+
+
+
+namespace MosesTuning
+{
+
+namespace M2
+{
+
+typedef std::vector<float> Stats;
+
+typedef std::vector<std::string> Sentence;
+
+std::ostream& operator<<(std::ostream& o, Sentence s);
+
+const std::string ToLower(const std::string& str);
+
+struct Annot {
+  size_t i;
+  size_t j;
+
+  std::string type;
+  std::string edit;
+
+  size_t annotator;
+
+  bool operator<(Annot a) const {
+    return i < a.i || (i == a.i && j < a.j)
+           || (i == a.i && j == a.j && annotator < a.annotator)
+           || (i == a.i && j == a.j && annotator == a.annotator && transform(edit) < transform(a.edit));
+  }
+
+  bool operator==(Annot a) const {
+    return (!(*this < a) && !(a < *this));
+  }
+
+  static std::string transform(const std::string& e);
+
+  static bool lowercase;
+};
+
+typedef std::set<Annot> Annots;
+typedef std::set<size_t> Users;
+
+struct Unit {
+  Sentence first;
+  Annots second;
+  Users third;
+};
+
+typedef std::vector<Unit> M2File;
+
+struct Edit {
+  Edit(float c = 1.0, size_t ch = 0, size_t unch = 1, std::string e = "")
+    : cost(c), changed(ch), unchanged(unch), edit(e) {}
+
+  float cost;
+  size_t changed;
+  size_t unchanged;
+  std::string edit;
+};
+
+Edit operator+(Edit& e1, Edit& e2);
+
+struct Vertex {
+  Vertex(size_t a = 0, size_t b = 0) : i(a), j(b) {}
+
+  bool operator<(const Vertex &v) const {
+    return i < v.i || (i == v.i && j < v.j);
+  }
+
+  bool operator==(const Vertex &v) const {
+    return i == v.i && j == v.j;
+  }
+
+  size_t i;
+  size_t j;
+};
+
+struct Edge {
+  Edge(Vertex vv = Vertex(), Vertex uu = Vertex(), Edit editt = Edit())
+    : v(vv), u(uu), edit(editt) {}
+
+  bool operator<(const Edge &e) const {
+    return v < e.v || (v == e.v && u < e.u);
+  }
+
+  Vertex v;
+  Vertex u;
+  Edit edit;
+};
+
+Edge operator+(Edge e1, Edge e2);
+
+typedef std::vector<size_t> Row;
+typedef std::vector<Row> Matrix;
+
+struct Info {
+  Info(Vertex vv = Vertex(), Edit editt = Edit())
+    : v(vv), edit(editt) {}
+
+  bool operator<(const Info &i) const {
+    return v < i.v;
+  }
+
+  Vertex v;
+  Edit edit;
+};
+
+typedef std::set<Info> Track;
+typedef std::vector<Track> TrackRow;
+typedef std::vector<TrackRow> TrackMatrix;
+
+typedef std::set<Vertex> Vertices;
+typedef std::set<Edge> Edges;
+
+class M2
+{
+private:
+  M2File m_m2;
+
+  size_t m_max_unchanged;
+  float m_beta;
+  bool m_lowercase;
+  bool m_verbose;
+
+public:
+  M2() : m_max_unchanged(2), m_beta(0.5), m_lowercase(true), m_verbose(false) { }
+  M2(size_t max_unchanged, float beta, bool truecase, bool verbose = false)
+    : m_max_unchanged(max_unchanged), m_beta(beta), m_lowercase(!truecase), m_verbose(verbose) {
+    if(!m_lowercase) {
+      Annot::lowercase = false;
+    }
+  }
+
+  float Beta() {
+    return m_beta;
+  }
+
+  void ReadM2(const std::string& filename) {
+    std::ifstream m2file(filename.c_str());
+    std::string line;
+
+    Unit unit;
+    bool first = true;
+
+    while(std::getline(m2file, line)) {
+      if(line.size() > 2) {
+        if(line.substr(0, 2) == "S ") {
+          if(!first) {
+            if(unit.third.empty())
+              unit.third.insert(0);
+            m_m2.push_back(unit);
+          }
+          first = false;
+
+          unit.first = Sentence();
+          unit.second = Annots();
+
+          std::string sentenceLine = line.substr(2);
+          boost::split(unit.first, sentenceLine, boost::is_any_of(" "), boost::token_compress_on);
+        }
+        if(line.substr(0, 2) == "A ") {
+          std::string annotLine = line.substr(2);
+
+          std::vector<std::string> annot;
+          boost::iter_split(annot, annotLine, boost::algorithm::first_finder("|||"));
+
+          if(annot[1] != "noop") {
+            Annot a;
+            std::stringstream rangeStr(annot[0]);
+            rangeStr >> a.i >> a.j;
+            a.type = annot[1];
+            a.edit = annot[2];
+
+            std::stringstream annotStr(annot[5]);
+            annotStr >> a.annotator;
+
+            unit.third.insert(a.annotator);
+            unit.second.insert(a);
+          } else {
+            std::stringstream annotStr(annot[5]);
+            size_t annotator;
+            annotStr >> annotator;
+            unit.third.insert(annotator);
+          }
+        }
+      }
+    }
+    if(unit.third.empty())
+      unit.third.insert(0);
+    m_m2.push_back(unit);
+  }
+
+  size_t LevenshteinMatrix(const Sentence &s1, const Sentence &s2, Matrix &d, TrackMatrix &bt) {
+    size_t n = s1.size();
+    size_t m = s2.size();
+
+    if (n == 0)
+      return m;
+    if (m == 0)
+      return n;
+
+    d.resize(n + 1, Row(m + 1, 0));
+    bt.resize(n + 1, TrackRow(m + 1));
+
+    for(size_t i = 0; i <= n; ++i) {
+      d[i][0] = i;
+      if(i > 0)
+        bt[i][0].insert(Info(Vertex(i - 1, 0), Edit(1, 1, 0, "")));
+    }
+    for(size_t j = 0; j <= m; ++j) {
+      d[0][j] = j;
+      if(j > 0)
+        bt[0][j].insert(Info(Vertex(0, j - 1), Edit(1, 1, 0, s2[j - 1])));
+    }
+
+    int cost;
+    for(size_t i = 1; i <= n; ++i) {
+      for(size_t j = 1; j <= m; ++j) {
+        if(Annot::transform(s1[i-1]) == Annot::transform(s2[j-1]))
+          cost = 0;
+        else
+          cost = 2;
+
+        size_t left = d[i][j - 1] + 1;
+        size_t down = d[i - 1][j] + 1;
+        size_t diag = d[i - 1][j - 1] + cost;
+
+        d[i][j] = std::min(left, std::min(down, diag));
+
+        if(d[i][j] == left)
+          bt[i][j].insert(Info(Vertex(i, j - 1), Edit(1, 1, 0, s2[j - 1])));
+        if(d[i][j] == down)
+          bt[i][j].insert(Info(Vertex(i - 1, j), Edit(1, 1, 0, "")));
+        if(d[i][j] == diag)
+          bt[i][j].insert(Info(Vertex(i - 1, j - 1), cost ? Edit(1, 1, 0, s2[j - 1]) : Edit(1, 0, 1, s2[j - 1]) ));
+      }
+    }
+    return d[n][m];
+  }
+
+
+  void BuildGraph(const TrackMatrix &bt, Vertices &V, Edges &E) {
+    Vertex start(bt.size() - 1, bt[0].size() - 1);
+
+    std::queue<Vertex> Q;
+    Q.push(start);
+    while(!Q.empty()) {
+      Vertex v = Q.front();
+      Q.pop();
+      if(V.count(v) > 0)
+        continue;
+      V.insert(v);
+      for(Track::iterator it = bt[v.i][v.j].begin();
+          it != bt[v.i][v.j].end(); ++it) {
+        Edge e(it->v, v, it->edit);
+        E.insert(e);
+        if(V.count(e.v) == 0)
+          Q.push(e.v);
+      }
+    }
+
+    Edges newE;
+    do {
+      newE.clear();
+      for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
+        for(Edges::iterator it2 = E.begin(); it2 != E.end(); ++it2) {
+          if(it1->u == it2->v) {
+            Edge e = *it1 + *it2;
+            if(e.edit.changed > 0 &&
+                e.edit.unchanged <= m_max_unchanged &&
+                E.count(e) == 0)
+              newE.insert(e);
+          }
+        }
+      }
+      E.insert(newE.begin(), newE.end());
+    } while(newE.size() > 0);
+  }
+
+  void AddWeights(Edges &E, const Unit &u, size_t aid) {
+    for(Edges::iterator it1 = E.begin(); it1 != E.end(); ++it1) {
+      if(it1->edit.changed > 0) {
+        const_cast<float&>(it1->edit.cost) += 0.001;
+        for(Annots::iterator it2 = u.second.begin(); it2 != u.second.end(); ++it2) {
+          // if matches an annotator
+          if(it1->v.i == it2->i && it1->u.i == it2->j
+              && Annot::transform(it1->edit.edit) == Annot::transform(it2->edit)
+              && it2->annotator == aid) {
+            int newWeight = -(m_max_unchanged + 1) * E.size();
+            const_cast<float&>(it1->edit.cost) = newWeight;
+          }
+        }
+      }
+    }
+  }
+
+  void BellmanFord(Vertices &V, Edges &E) {
+    Vertex source(0, 0);
+    std::map<Vertex, float> distance;
+    std::map<Vertex, Vertex> predecessor;
+
+    for(Vertices::iterator it = V.begin(); it != V.end(); ++it) {
+      if(*it == source)
+        distance[*it] = 0;
+      else {
+        distance[*it] = std::numeric_limits<float>::infinity();
+      }
+    }
+
+    for(size_t i = 1; i < V.size(); ++i) {
+      for(Edges::iterator it = E.begin(); it != E.end(); ++it) {
+        if(distance[it->v] + it->edit.cost < distance[it->u]) {
+          distance[it->u] = distance[it->v] + it->edit.cost;
+          predecessor[it->u] = it->v;
+        }
+      }
+    }
+
+    Edges newE;
+
+    Vertex v = *V.rbegin();
+    while(true) {
+      //std::cout << predecessor[v] << " -> " << v << std::endl;
+      Edges::iterator it = E.find(Edge(predecessor[v], v));
+      if(it != E.end()) {
+        Edge f = *it;
+        //std::cout << f << std::endl;
+        newE.insert(f);
+
+        v = predecessor[v];
+        if(v == source)
+          break;
+      } else {
+        std::cout << "Error" << std::endl;
+        break;
+      }
+    }
+    E.clear();
+    E.insert(newE.begin(), newE.end());
+  }
+
+  void AddStats(const std::vector<Edges> &Es, const Unit &u, Stats &stats, size_t line) {
+
+    std::map<size_t, Stats> statsPerAnnotator;
+    for(std::set<size_t>::iterator it = u.third.begin();
+        it != u.third.end(); ++it) {
+      statsPerAnnotator[*it] = Stats(4, 0);
+    }
+
+    for(Annots::iterator it = u.second.begin(); it != u.second.end(); it++)
+      statsPerAnnotator[it->annotator][2]++;
+
+    for(std::set<size_t>::iterator ait = u.third.begin();
+        ait != u.third.end(); ++ait) {
+      for(Edges::iterator eit = Es[*ait].begin(); eit != Es[*ait].end(); ++eit) {
+        if(eit->edit.changed > 0) {
+          statsPerAnnotator[*ait][1]++;
+          Annot f;
+          f.i = eit->v.i;
+          f.j = eit->u.i;
+          f.annotator = *ait;
+          f.edit = eit->edit.edit;
+          for(Annots::iterator fit = u.second.begin(); fit != u.second.end(); fit++) {
+            if(f == *fit)
+              statsPerAnnotator[*ait][0]++;
+          }
+        }
+      }
+    }
+    size_t bestAnnot = 0;
+    float  bestF = -1;
+    for(std::set<size_t>::iterator it = u.third.begin();
+        it != u.third.end(); ++it) {
+      Stats localStats = stats;
+      localStats[0] += statsPerAnnotator[*it][0];
+      localStats[1] += statsPerAnnotator[*it][1];
+      localStats[2] += statsPerAnnotator[*it][2];
+      if(m_verbose)
+        std::cerr << *it << " : " << localStats[0] << " " << localStats[1] << " " << localStats[2] << std::endl;
+      float f = FScore(localStats);
+      if(m_verbose)
+        std::cerr << f << std::endl;
+      if(f > bestF) {
+        bestF = f;
+        bestAnnot = *it;
+      }
+    }
+    if(m_verbose)
+      std::cerr << ">> Chosen Annotator for line " << line + 1 << " : " << bestAnnot << std::endl;
+    stats[0] += statsPerAnnotator[bestAnnot][0];
+    stats[1] += statsPerAnnotator[bestAnnot][1];
+    stats[2] += statsPerAnnotator[bestAnnot][2];
+  }
+
+  void SufStats(const std::string &sStr, size_t i, Stats &stats) {
+    std::string temp = sStr;
+
+    Sentence s;
+    boost::split(s, temp, boost::is_any_of(" "), boost::token_compress_on);
+
+    Unit &unit = m_m2[i];
+
+    Matrix d;
+    TrackMatrix bt;
+    size_t distance = LevenshteinMatrix(unit.first, s, d, bt);
+
+    std::vector<Vertices> Vs(unit.third.size());
+    std::vector<Edges> Es(unit.third.size());
+
+    if(distance > unit.first.size()) {
+      std::cerr << "Levenshtein distance is greater than source size." << std::endl;
+      stats[0] = 0;
+      stats[1] = distance;
+      stats[2] = 0;
+      stats[3] = unit.first.size();
+      return;
+    } else if(distance > 0) {
+      for(size_t j = 0; j < unit.third.size(); j++) {
+        BuildGraph(bt, Vs[j], Es[j]);
+        AddWeights(Es[j], unit, j);
+        BellmanFord(Vs[j], Es[j]);
+      }
+    }
+    AddStats(Es, unit, stats, i);
+    stats[3] = unit.first.size();
+  }
+
+
+  float FScore(const Stats& stats) {
+    float p = 1.0;
+    if(stats[1] != 0)
+      p = (float)stats[0] / (float)stats[1];
+
+    float r = 1.0;
+    if(stats[2] != 0)
+      r = (float)stats[0] / (float)stats[2];
+
+    float denom = (m_beta * m_beta * p + r);
+    float f = 0.0;
+    if(denom != 0)
+      f = ((1 + m_beta * m_beta) * p * r) / denom;
+    return f;
+  }
+
+  void FScore(const Stats& stats, float &p, float &r, float &f) {
+    p = 1.0;
+    if(stats[1] != 0)
+      p = (float)stats[0] / (float)stats[1];
+
+    r = 1.0;
+    if(stats[2] != 0)
+      r = (float)stats[0] / (float)stats[2];
+
+    float denom = (m_beta * m_beta * p + r);
+    f = 0.0;
+    if(denom != 0)
+      f = ((1 + m_beta * m_beta) * p * r) / denom;
+  }
+};
+
+}
+
+}
+\ No newline at end of file
diff --git a/mert/M2Scorer.cpp b/mert/M2Scorer.cpp
new file mode 100644
index 000000000..f7e276631
--- /dev/null
+++ b/mert/M2Scorer.cpp
@@ -0,0 +1,137 @@
+#include "M2Scorer.h"
+
+#include <algorithm>
+#include <fstream>
+#include <stdexcept>
+#include <sstream>
+#include <cstdlib>
+
+#include <boost/lexical_cast.hpp>
+
+
+using namespace std;
+
+namespace MosesTuning
+{
+
+M2Scorer::M2Scorer(const string& config)
+  : StatisticsBasedScorer("M2Scorer", config),
+    beta_(Scan<float>(getConfig("beta", "0.5"))),
+    max_unchanged_words_(Scan<int>(getConfig("max_unchanged_words", "2"))),
+    truecase_(Scan<bool>(getConfig("truecase", "false"))),
+    verbose_(Scan<bool>(getConfig("verbose", "false"))),
+    m2_(max_unchanged_words_, beta_, truecase_)
+{}
+
+void M2Scorer::setReferenceFiles(const vector<string>& referenceFiles)
+{
+  for(size_t i = 0; i < referenceFiles.size(); ++i) {
+    m2_.ReadM2(referenceFiles[i]);
+    break;
+  }
+}
+
+void M2Scorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
+{
+  string sentence = trimStr(this->preprocessSentence(text));
+  std::vector<ScoreStatsType> stats(4, 0);
+  m2_.SufStats(sentence, sid, stats);
+  entry.set(stats);
+}
+
+float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
+{
+
+  if (comps.size() != NumberOfScores()) {
+    throw runtime_error("Size of stat vector for M2Scorer is not " + NumberOfScores());
+  }
+
+  float beta = beta_;
+
+
+  float p = 0.0;
+  float r = 0.0;
+  float f = 0.0;
+
+  if(comps[1] != 0)
+    p = comps[0] / (double)comps[1];
+  else
+    p = 1.0;
+
+  if(comps[2] != 0)
+    r = comps[0] / (double)comps[2];
+  else
+    r = 1.0;
+
+  float denom = beta * beta * p + r;
+  if(denom != 0)
+    f = (1.0 + beta * beta) * p * r / denom;
+  else
+    f = 0.0;
+
+  if(verbose_)
+    std::cerr << comps[0] << " " << comps[1] << " " << comps[2] << std::endl;
+
+  if(verbose_)
+    std::cerr << p << " " << r << " " << f << std::endl;
+
+  return f;
+}
+
+float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
+{
+  return comps[3];
+}
+
+std::vector<ScoreStatsType> randomStats(float decay, int max)
+{
+  int gold = rand() % max;
+  int prop = rand() % max;
+  int corr = 0.0;
+
+  if(std::min(prop, gold) > 0)
+    corr = rand() % std::min(prop, gold);
+
+  //std::cerr << corr << " " << prop << " " << gold << std::endl;
+
+  std::vector<ScoreStatsType> stats(3, 0.0);
+  stats[0] = corr * decay;
+  stats[1] = prop * decay;
+  stats[2] = gold * decay;
+
+  return stats;
+}
+
+float sentenceM2(const std::vector<ScoreStatsType>& stats)
+{
+  float beta = 0.5;
+
+  std::vector<ScoreStatsType> smoothStats(3, 0.0); // = randomStats(0.001, 5);
+  smoothStats[0] += stats[0];
+  smoothStats[1] += stats[1];
+  smoothStats[2] += stats[2];
+
+  float p = 0.0;
+  float r = 0.0;
+  float f = 0.0;
+
+  if(smoothStats[1] != 0)
+    p = smoothStats[0] / smoothStats[1];
+  else
+    p = 1.0;
+
+  if(smoothStats[2] != 0)
+    r = smoothStats[0] / smoothStats[2];
+  else
+    r = 1.0;
+
+  float denom = beta * beta * p + r;
+  if(denom != 0)
+    f = (1.0 + beta * beta) * p * r / denom;
+  else
+    f = 0.0;
+
+  return f;
+}
+
+}
diff --git a/mert/M2Scorer.h b/mert/M2Scorer.h
new file mode 100644
index 000000000..2a807e447
--- /dev/null
+++ b/mert/M2Scorer.h
@@ -0,0 +1,52 @@
+#ifndef MERT_M2_SCORER_H_
+#define MERT_M2_SCORER_H_
+
+#include <string>
+#include <vector>
+#include <functional>
+
+#include "Types.h"
+#include "Util.h"
+#include "StatisticsBasedScorer.h"
+#include "M2.h"
+
+namespace MosesTuning
+{
+
+/**
+ * M2Scorer class can compute CoNLL m2 F-score.
+ */
+class M2Scorer: public StatisticsBasedScorer
+{
+public:
+  explicit M2Scorer(const std::string& config);
+
+  virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
+  virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
+
+  virtual std::size_t NumberOfScores() const {
+    return 4;
+  }
+
+  virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
+  virtual float getReferenceLength(const std::vector<ScoreStatsType>& comps) const;
+
+private:
+  float beta_;
+  int max_unchanged_words_;
+  bool truecase_;
+  bool verbose_;
+  M2::M2 m2_;
+
+  std::map<std::pair<size_t, std::string>, std::vector<ScoreStatsType> > seen_;
+
+  // no copying allowed
+  M2Scorer(const M2Scorer&);
+  M2Scorer& operator=(const M2Scorer&);
+};
+
+float sentenceM2 (const std::vector<ScoreStatsType>& stats);
+
+}
+
+#endif  // MERT_M2_SCORER_H_
diff --git a/mert/ScorerFactory.cpp b/mert/ScorerFactory.cpp
index 02573091c..8827f3e5d 100644
--- a/mert/ScorerFactory.cpp
+++ b/mert/ScorerFactory.cpp
@@ -11,6 +11,7 @@
 #include "SemposScorer.h"
 #include "PermutationScorer.h"
 #include "MeteorScorer.h"
+#include "M2Scorer.h"
 #include "HwcmScorer.h"
 #include "Reference.h"
 
@@ -34,6 +35,7 @@ vector<string> ScorerFactory::getTypes()
   types.push_back(string("LRSCORE"));
   types.push_back(string("METEOR"));
   types.push_back(string("HWCM"));
+  types.push_back(string("M2SCORER"));
   return types;
 }
 
@@ -54,6 +56,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config)
     return new CderScorer(config, false);
   } else if (type == "SEMPOS") {
     return new SemposScorer(config);
+  } else if (type == "M2SCORER") {
+    return new M2Scorer(config);
   } else if ((type == "HAMMING") || (type == "KENDALL")) {
     return (PermutationScorer*) new PermutationScorer(type, config);
   } else if (type == "METEOR") {
diff --git a/moses/FF/CorrectionPattern.cpp b/moses/FF/CorrectionPattern.cpp
new file mode 100644
index 000000000..915eaff2c
--- /dev/null
+++ b/moses/FF/CorrectionPattern.cpp
@@ -0,0 +1,354 @@
+#include <sstream>
+#include "CorrectionPattern.h"
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+#include "moses/InputPath.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TranslationOption.h"
+#include "util/string_piece_hash.hh"
+#include "util/exception.hh"
+
+#include <functional>
+#include <algorithm>
+
+#include <boost/foreach.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "Diffs.h"
+
+namespace Moses
+{
+
+using namespace std;
+
+std::string MakePair(const std::string &s1, const std::string &s2, bool general)
+{
+  std::vector<std::string> sourceList;
+  std::vector<std::string> targetList;
+
+  if(general) {
+    Diffs diffs = CreateDiff(s1, s2);
+
+    size_t i = 0, j = 0;
+    char lastType = 'm';
+
+    std::string source, target;
+    std::string match;
+
+    int count = 1;
+
+    BOOST_FOREACH(Diff type, diffs) {
+      if(type == 'm') {
+        if(lastType != 'm') {
+          sourceList.push_back(source);
+          targetList.push_back(target);
+        }
+        source.clear();
+        target.clear();
+
+        if(s1[i] == '+') {
+          if(match.size() >= 3) {
+            sourceList.push_back("(\\w{3,})·");
+            std::string temp = "1";
+            sprintf((char*)temp.c_str(), "%d", count);
+            targetList.push_back("\\" + temp + "·");
+            count++;
+          } else {
+            sourceList.push_back(match + "·");
+            targetList.push_back(match + "·");
+          }
+          match.clear();
+        } else
+          match.push_back(s1[i]);
+
+        i++;
+        j++;
+      } else if(type == 'd') {
+        if(s1[i] == '+')
+          source += "·";
+        else
+          source.push_back(s1[i]);
+        i++;
+      } else if(type == 'i') {
+        if(s2[j] == '+')
+          target += "·";
+        else
+          target.push_back(s2[j]);
+        j++;
+      }
+      if(type != 'm' && !match.empty()) {
+        if(match.size() >= 3) {
+          sourceList.push_back("(\\w{3,})");
+          std::string temp = "1";
+          sprintf((char*)temp.c_str(), "%d", count);
+          targetList.push_back("\\" + temp);
+          count++;
+        } else {
+          sourceList.push_back(match);
+          targetList.push_back(match);
+        }
+
+        match.clear();
+      }
+
+      lastType = type;
+    }
+    if(lastType != 'm') {
+      sourceList.push_back(source);
+      targetList.push_back(target);
+    }
+
+    if(!match.empty()) {
+      if(match.size() >= 3) {
+        sourceList.push_back("(\\w{3,})");
+        std::string temp = "1";
+        sprintf((char*)temp.c_str(), "%d", count);
+        targetList.push_back("\\"+ temp);
+        count++;
+      } else {
+        sourceList.push_back(match);
+        targetList.push_back(match);
+      }
+    }
+    match.clear();
+  } else {
+    std::string cs1 = s1;
+    std::string cs2 = s2;
+    boost::replace_all(cs1, "+", "·");
+    boost::replace_all(cs2, "+", "·");
+
+    sourceList.push_back(cs1);
+    targetList.push_back(cs2);
+  }
+
+  std::stringstream out;
+  out << "sub(«";
+  out << boost::join(sourceList, "");
+  out << "»,«";
+  out << boost::join(targetList, "");
+  out << "»)";
+
+  return out.str();
+}
+
+std::string CorrectionPattern::CreateSinglePattern(const Tokens &s1, const Tokens &s2) const
+{
+  std::stringstream out;
+  if(s1.empty()) {
+    out << "ins(«" << boost::join(s2, "·") << "»)";
+    return out.str();
+  } else if(s2.empty()) {
+    out << "del(«" << boost::join(s1, "·") << "»)";
+    return out.str();
+  } else {
+    typename Tokens::value_type v1 = boost::join(s1, "+");
+    typename Tokens::value_type v2 = boost::join(s2, "+");
+    out << MakePair(v1, v2, m_general);
+    return out.str();
+  }
+}
+
+std::vector<std::string> GetContext(size_t pos,
+                                    size_t len,
+                                    size_t window,
+                                    const InputType &input,
+                                    const InputPath &inputPath,
+                                    const std::vector<FactorType>& factorTypes,
+                                    bool isRight)
+{
+
+  const Sentence& sentence = static_cast<const Sentence&>(input);
+  const Range& range = inputPath.GetWordsRange();
+
+  int leftPos  = range.GetStartPos() + pos - len - 1;
+  int rightPos = range.GetStartPos() + pos;
+
+  std::vector<std::string> contexts;
+
+  for(int length = 1; length <= (int)window; ++length) {
+    std::vector<std::string> current;
+    if(!isRight) {
+      for(int i = 0; i < length; i++) {
+        if(leftPos - i >= 0) {
+          current.push_back(sentence.GetWord(leftPos - i).GetString(factorTypes, false));
+        } else {
+          current.push_back("<s>");
+        }
+      }
+
+      if(current.back() == "<s>" && current.size() >= 2 && current[current.size()-2] == "<s>")
+        continue;
+
+      std::reverse(current.begin(), current.end());
+      contexts.push_back("left(«" + boost::join(current, "·") + "»)_");
+    }
+    if(isRight) {
+      for(int i = 0; i < length; i++) {
+        if(rightPos + i < (int)sentence.GetSize()) {
+          current.push_back(sentence.GetWord(rightPos + i).GetString(factorTypes, false));
+        } else {
+          current.push_back("</s>");
+        }
+      }
+
+      if(current.back() == "</s>" && current.size() >= 2 && current[current.size()-2] == "</s>")
+        continue;
+
+      contexts.push_back("_right(«" + boost::join(current, "·") + "»)");
+    }
+  }
+  return contexts;
+}
+
+std::vector<std::string>
+CorrectionPattern::CreatePattern(const Tokens &s1,
+                                 const Tokens &s2,
+                                 const InputType &input,
+                                 const InputPath &inputPath) const
+{
+
+  Diffs diffs = CreateDiff(s1, s2);
+  size_t i = 0, j = 0;
+  char lastType = 'm';
+  std::vector<std::string> patternList;
+  Tokens source, target;
+  BOOST_FOREACH(Diff type, diffs) {
+    if(type == 'm') {
+      if(lastType != 'm') {
+        std::string pattern = CreateSinglePattern(source, target);
+        patternList.push_back(pattern);
+
+        if(m_context > 0) {
+          std::vector<std::string> leftContexts =  GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
+          std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
+
+          BOOST_FOREACH(std::string left, leftContexts)
+          patternList.push_back(left + pattern);
+
+          BOOST_FOREACH(std::string right, rightContexts)
+          patternList.push_back(pattern + right);
+
+          BOOST_FOREACH(std::string left, leftContexts)
+          BOOST_FOREACH(std::string right, rightContexts)
+          patternList.push_back(left + pattern + right);
+        }
+      }
+      source.clear();
+      target.clear();
+      if(s1[i] != s2[j]) {
+        source.push_back(s1[i]);
+        target.push_back(s2[j]);
+      }
+      i++;
+      j++;
+    } else if(type == 'd') {
+      source.push_back(s1[i]);
+      i++;
+    } else if(type == 'i') {
+      target.push_back(s2[j]);
+      j++;
+    }
+    lastType = type;
+  }
+  if(lastType != 'm') {
+    std::string pattern = CreateSinglePattern(source, target);
+    patternList.push_back(pattern);
+
+    if(m_context > 0) {
+      std::vector<std::string> leftContexts =  GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, false);
+      std::vector<std::string> rightContexts = GetContext(i, source.size(), m_context, input, inputPath, m_contextFactors, true);
+
+      BOOST_FOREACH(std::string left, leftContexts)
+      patternList.push_back(left + pattern);
+
+      BOOST_FOREACH(std::string right, rightContexts)
+      patternList.push_back(pattern + right);
+
+      BOOST_FOREACH(std::string left, leftContexts)
+      BOOST_FOREACH(std::string right, rightContexts)
+      patternList.push_back(left + pattern + right);
+    }
+  }
+
+  return patternList;
+}
+
+CorrectionPattern::CorrectionPattern(const std::string &line)
+  : StatelessFeatureFunction(0, line), m_factors(1, 0), m_general(false),
+    m_context(0), m_contextFactors(1, 0)
+{
+  std::cerr << "Initializing correction pattern feature.." << std::endl;
+  ReadParameters();
+}
+
+void CorrectionPattern::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "factor") {
+    m_factors = std::vector<FactorType>(1, Scan<FactorType>(value));
+  } else if (key == "context-factor") {
+    m_contextFactors = std::vector<FactorType>(1, Scan<FactorType>(value));
+  } else if (key == "general") {
+    m_general = Scan<bool>(value);
+  } else if (key == "context") {
+    m_context = Scan<size_t>(value);
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+
+void CorrectionPattern::EvaluateWithSourceContext(const InputType &input
+    , const InputPath &inputPath
+    , const TargetPhrase &targetPhrase
+    , const StackVec *stackVec
+    , ScoreComponentCollection &scoreBreakdown
+    , ScoreComponentCollection *estimatedFutureScore) const
+{
+  ComputeFeatures(input, inputPath, targetPhrase, &scoreBreakdown);
+}
+
+void CorrectionPattern::ComputeFeatures(
+  const InputType &input,
+  const InputPath &inputPath,
+  const TargetPhrase& target,
+  ScoreComponentCollection* accumulator) const
+{
+  const Phrase &source = inputPath.GetPhrase();
+
+  std::vector<std::string> sourceTokens;
+  for(size_t i = 0; i < source.GetSize(); ++i)
+    sourceTokens.push_back(source.GetWord(i).GetString(m_factors, false));
+
+  std::vector<std::string> targetTokens;
+  for(size_t i = 0; i < target.GetSize(); ++i)
+    targetTokens.push_back(target.GetWord(i).GetString(m_factors, false));
+
+  std::vector<std::string> patternList = CreatePattern(sourceTokens, targetTokens, input, inputPath);
+  for(size_t i = 0; i < patternList.size(); ++i)
+    accumulator->PlusEquals(this, patternList[i], 1);
+
+  /*
+  BOOST_FOREACH(std::string w, sourceTokens)
+    std::cerr << w << " ";
+  std::cerr << std::endl;
+  BOOST_FOREACH(std::string w, targetTokens)
+    std::cerr << w << " ";
+  std::cerr << std::endl;
+  BOOST_FOREACH(std::string w, patternList)
+    std::cerr << w << " ";
+  std::cerr << std::endl << std::endl;
+  */
+}
+
+bool CorrectionPattern::IsUseable(const FactorMask &mask) const
+{
+  bool ret = true;
+  for(size_t i = 0; i < m_factors.size(); ++i)
+    ret = ret && mask[m_factors[i]];
+  for(size_t i = 0; i < m_contextFactors.size(); ++i)
+    ret = ret && mask[m_contextFactors[i]];
+  return ret;
+}
+
+}
diff --git a/moses/FF/CorrectionPattern.h b/moses/FF/CorrectionPattern.h
new file mode 100644
index 000000000..516a56ce2
--- /dev/null
+++ b/moses/FF/CorrectionPattern.h
@@ -0,0 +1,73 @@
+#ifndef moses_CorrectionPattern_h
+#define moses_CorrectionPattern_h
+
+#include <string>
+#include <boost/unordered_set.hpp>
+
+#include "StatelessFeatureFunction.h"
+#include "moses/FactorCollection.h"
+#include "moses/AlignmentInfo.h"
+
+namespace Moses
+{
+
+typedef std::vector<std::string> Tokens;
+
+/** Sets the features for length of source phrase, target phrase, both.
+ */
+class CorrectionPattern : public StatelessFeatureFunction
+{
+private:
+  std::vector<FactorType> m_factors;
+  bool m_general;
+  size_t m_context;
+  std::vector<FactorType> m_contextFactors;
+
+public:
+  CorrectionPattern(const std::string &line);
+
+  bool IsUseable(const FactorMask &mask) const;
+
+  void EvaluateInIsolation(const Phrase &source
+                           , const TargetPhrase &targetPhrase
+                           , ScoreComponentCollection &scoreBreakdown
+                           , ScoreComponentCollection &estimatedFutureScore) const
+  {}
+
+  virtual void EvaluateWithSourceContext(const InputType &input
+                                         , const InputPath &inputPath
+                                         , const TargetPhrase &targetPhrase
+                                         , const StackVec *stackVec
+                                         , ScoreComponentCollection &scoreBreakdown
+                                         , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const
+  {}
+
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const
+  {}
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                           ScoreComponentCollection* accumulator) const
+  {}
+
+  void ComputeFeatures(const InputType &input,
+                       const InputPath &inputPath,
+                       const TargetPhrase& targetPhrase,
+                       ScoreComponentCollection* accumulator) const;
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+  std::vector<std::string> CreatePattern(const Tokens &s1,
+                                         const Tokens &s2,
+                                         const InputType &input,
+                                         const InputPath &inputPath) const;
+
+  std::string CreateSinglePattern(const Tokens &s1, const Tokens &s2) const;
+
+};
+
+}
+
+#endif // moses_CorrectionPattern_h
diff --git a/moses/FF/Diffs.h b/moses/FF/Diffs.h
new file mode 100644
index 000000000..8935d1fb9
--- /dev/null
+++ b/moses/FF/Diffs.h
@@ -0,0 +1,150 @@
+#ifndef moses_Diffs_h
+#define moses_Diffs_h
+
+#include <cmath>
+
+namespace Moses
+{
+
+typedef char Diff;
+typedef std::vector<Diff> Diffs;
+
+template <class Sequence, class Pred>
+void CreateDiffRec(size_t** c,
+                   const Sequence &s1,
+                   const Sequence &s2,
+                   size_t start,
+                   size_t i,
+                   size_t j,
+                   Diffs& diffs,
+                   Pred pred)
+{
+  if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
+    CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
+    diffs.push_back(Diff('m'));
+  } else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
+    CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
+    diffs.push_back(Diff('i'));
+  } else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
+    CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
+    diffs.push_back(Diff('d'));
+  }
+}
+
+template <class Sequence, class Pred>
+Diffs CreateDiff(const Sequence& s1,
+                 const Sequence& s2,
+                 Pred pred)
+{
+
+  Diffs diffs;
+
+  size_t n = s2.size();
+
+  int start = 0;
+  int m_end = s1.size() - 1;
+  int n_end = s2.size() - 1;
+
+  while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
+    diffs.push_back(Diff('m'));
+    start++;
+  }
+  while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) {
+    m_end--;
+    n_end--;
+  }
+
+  size_t m_new = m_end - start + 1;
+  size_t n_new = n_end - start + 1;
+
+  size_t** c = new size_t*[m_new + 1];
+  for(size_t i = 0; i <= m_new; ++i) {
+    c[i] = new size_t[n_new + 1];
+    c[i][0] = 0;
+  }
+  for(size_t j = 0; j <= n_new; ++j)
+    c[0][j] = 0;
+  for(size_t i = 1; i <= m_new; ++i)
+    for(size_t j = 1; j <= n_new; ++j)
+      if(pred(s1[i - 1 + start], s2[j - 1 + start]))
+        c[i][j] = c[i-1][j-1] + 1;
+      else
+        c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
+
+  CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
+
+  for(size_t i = 0; i <= m_new; ++i)
+    delete[] c[i];
+  delete[] c;
+
+  for (size_t i = n_end + 1; i < n; ++i)
+    diffs.push_back(Diff('m'));
+
+  return diffs;
+}
+
+template <class Sequence>
+Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
+{
+  return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
+}
+
+template <class Sequence, class Sig, class Stats>
+void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
+{
+  if(sig.size() != stats.size())
+    throw "Signature size differs from score array size.";
+
+  size_t m = 0, d = 0, i = 0, s = 0;
+  Diffs diff = CreateDiff(s1, s2);
+
+  for(int j = 0; j < (int)diff.size(); ++j) {
+    if(diff[j] == 'm')
+      m++;
+    else if(diff[j] == 'd') {
+      d++;
+      int k = 0;
+      while(j - k >= 0 && j + 1 + k < (int)diff.size() &&
+            diff[j - k] == 'd' && diff[j + 1 + k] == 'i') {
+        d--;
+        s++;
+        k++;
+      }
+      j += k;
+    } else if(diff[j] == 'i')
+      i++;
+  }
+
+  for(size_t j = 0; j < sig.size(); ++j) {
+    switch (sig[j]) {
+    case 'l':
+      stats[j] += d + i + s;
+      break;
+    case 'm':
+      stats[j] += m;
+      break;
+    case 'd':
+      stats[j] += d;
+      break;
+    case 'i':
+      stats[j] += i;
+      break;
+    case 's':
+      stats[j] += s;
+      break;
+    case 'r':
+      float macc = 1;
+      if (d + i + s + m)
+        macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
+      if(macc > 0)
+        stats[j] += log(macc);
+      else
+        stats[j] += log(1.0/(float)(d + i + s + m + 1));
+      break;
+    }
+  }
+}
+
+}
+
+#endif
diff --git a/moses/FF/EditOps.cpp b/moses/FF/EditOps.cpp
new file mode 100644
index 000000000..fa66acf1c
--- /dev/null
+++ b/moses/FF/EditOps.cpp
@@ -0,0 +1,119 @@
+#include <sstream>
+#include "EditOps.h"
+#include "moses/Phrase.h"
+#include "moses/TargetPhrase.h"
+#include "moses/Hypothesis.h"
+#include "moses/ChartHypothesis.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/TranslationOption.h"
+#include "util/string_piece_hash.hh"
+#include "util/exception.hh"
+
+#include <functional>
+
+#include <boost/foreach.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "Diffs.h"
+
+namespace Moses
+{
+
+using namespace std;
+
+std::string ParseScores(const std::string &line, const std::string& defaultScores)
+{
+  std::vector<std::string> toks = Tokenize(line);
+  UTIL_THROW_IF2(toks.empty(), "Empty line");
+
+  for (size_t i = 1; i < toks.size(); ++i) {
+    std::vector<std::string> args = TokenizeFirstOnly(toks[i], "=");
+    UTIL_THROW_IF2(args.size() != 2,
+                   "Incorrect format for feature function arg: " << toks[i]);
+
+    if (args[0] == "scores") {
+      return args[1];
+    }
+  }
+  return defaultScores;
+}
+
+EditOps::EditOps(const std::string &line)
+  : StatelessFeatureFunction(ParseScores(line, "dis").size(), line)
+  , m_factorType(0), m_chars(false), m_scores(ParseScores(line, "dis"))
+{
+  std::cerr << "Initializing EditOps feature.." << std::endl;
+  ReadParameters();
+}
+
+void EditOps::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "factor") {
+    m_factorType = Scan<FactorType>(value);
+  } else if (key == "chars") {
+    m_chars = Scan<bool>(value);
+  } else if (key == "scores") {
+    m_scores = value;
+  } else {
+    StatelessFeatureFunction::SetParameter(key, value);
+  }
+}
+
+void EditOps::Load()
+{ }
+
+void EditOps::EvaluateInIsolation(const Phrase &source
+                                  , const TargetPhrase &target
+                                  , ScoreComponentCollection &scoreBreakdown
+                                  , ScoreComponentCollection &estimatedFutureScore) const
+{
+  ComputeFeatures(source, target, &scoreBreakdown);
+}
+
+void EditOps::ComputeFeatures(
+  const Phrase &source,
+  const TargetPhrase& target,
+  ScoreComponentCollection* accumulator) const
+{
+  std::vector<float> ops(GetNumScoreComponents(), 0);
+
+  if(m_chars) {
+    std::vector<FactorType> factors;
+    factors.push_back(m_factorType);
+
+    std::string sourceStr = source.GetStringRep(factors);
+    std::string targetStr = target.GetStringRep(factors);
+
+    AddStats(sourceStr, targetStr, m_scores, ops);
+  } else {
+    std::vector<std::string> sourceTokens;
+    //std::cerr << "Ed src: ";
+    for(size_t i = 0; i < source.GetSize(); ++i) {
+      if(!source.GetWord(i).IsNonTerminal())
+        sourceTokens.push_back(source.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
+      //std::cerr << sourceTokens.back() << " ";
+    }
+    //std::cerr << std::endl;
+
+    std::vector<std::string> targetTokens;
+    //std::cerr << "Ed trg: ";
+    for(size_t i = 0; i < target.GetSize(); ++i) {
+      if(!target.GetWord(i).IsNonTerminal())
+        targetTokens.push_back(target.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
+      //std::cerr << targetTokens.back() << " ";
+    }
+    //std::cerr << std::endl;
+
+    AddStats(sourceTokens, targetTokens, m_scores, ops);
+  }
+
+  accumulator->PlusEquals(this, ops);
+}
+
+bool EditOps::IsUseable(const FactorMask &mask) const
+{
+  bool ret = mask[m_factorType];
+  return ret;
+}
+
+}
diff --git a/moses/FF/EditOps.h b/moses/FF/EditOps.h
new file mode 100644
index 000000000..e7e7dd315
--- /dev/null
+++ b/moses/FF/EditOps.h
@@ -0,0 +1,64 @@
+#ifndef moses_EditOps_h
+#define moses_EditOps_h
+
+#include <string>
+#include <boost/unordered_set.hpp>
+
+#include "StatelessFeatureFunction.h"
+#include "moses/FactorCollection.h"
+#include "moses/AlignmentInfo.h"
+
+namespace Moses
+{
+
+typedef std::vector<std::string> Tokens;
+
+/** Calculates string edit operations that transform source phrase into target
+ * phrase using the LCS algorithm. Potentially usefule for monolingual tasks
+ * like paraphrasing, summarization, correction.
+ */
+class EditOps : public StatelessFeatureFunction
+{
+private:
+  FactorType m_factorType;
+  bool m_chars;
+  std::string m_scores;
+
+public:
+  EditOps(const std::string &line);
+
+  bool IsUseable(const FactorMask &mask) const;
+
+  void Load();
+
+  virtual void EvaluateInIsolation(const Phrase &source
+                                   , const TargetPhrase &targetPhrase
+                                   , ScoreComponentCollection &scoreBreakdown
+                                   , ScoreComponentCollection &estimatedFutureScore) const;
+
+  void EvaluateWithSourceContext(const InputType &input
+                                 , const InputPath &inputPath
+                                 , const TargetPhrase &targetPhrase
+                                 , const StackVec *stackVec
+                                 , ScoreComponentCollection &scoreBreakdown
+                                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
+  {}
+  void EvaluateWhenApplied(const Hypothesis& hypo,
+                           ScoreComponentCollection* accumulator) const
+  {}
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
+                           ScoreComponentCollection* accumulator) const
+  {}
+  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const
+  {}
+
+  void ComputeFeatures(const Phrase &source,
+                       const TargetPhrase& targetPhrase,
+                       ScoreComponentCollection* accumulator) const;
+  void SetParameter(const std::string& key, const std::string& value);
+};
+
+}
+
+#endif // moses_CorrectionPattern_h
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 3d9be2fa3..9312f9779 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -73,8 +73,14 @@
 #include "moses/Syntax/InputWeightFF.h"
 #include "moses/Syntax/RuleTableFF.h"
 
+#include "moses/FF/EditOps.h"
+#include "moses/FF/CorrectionPattern.h"
+
 #ifdef HAVE_VW
 #include "moses/FF/VW/VW.h"
+#include "moses/FF/VW/VWFeatureContextBigrams.h"
+#include "moses/FF/VW/VWFeatureContextBilingual.h"
+#include "moses/FF/VW/VWFeatureContextWindow.h"
 #include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
 #include "moses/FF/VW/VWFeatureSourceBigrams.h"
 #include "moses/FF/VW/VWFeatureSourceIndicator.h"
@@ -294,8 +300,14 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(SkeletonTranslationOptionListFeature);
   MOSES_FNAME(SkeletonPT);
 
+  MOSES_FNAME(EditOps);
+  MOSES_FNAME(CorrectionPattern);
+
 #ifdef HAVE_VW
   MOSES_FNAME(VW);
+  MOSES_FNAME(VWFeatureContextBigrams);
+  MOSES_FNAME(VWFeatureContextBilingual);
+  MOSES_FNAME(VWFeatureContextWindow);
   MOSES_FNAME(VWFeatureSourceBagOfWords);
   MOSES_FNAME(VWFeatureSourceBigrams);
   MOSES_FNAME(VWFeatureSourceIndicator);
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 6957d7d7c..8391609a2 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -76,7 +76,7 @@ public:
                            , const TargetPhrase &targetPhrase
                            , ScoreComponentCollection &scoreBreakdown
                            , ScoreComponentCollection &estimatedScores) const {
-	}
+  }
 
   void EvaluateWhenApplied(const Hypothesis& hypo,
                            ScoreComponentCollection* accumulator) const {
diff --git a/moses/FF/OSM-Feature/KenOSM.cpp b/moses/FF/OSM-Feature/KenOSM.cpp
index 25a1e6a93..d20e762f6 100644
--- a/moses/FF/OSM-Feature/KenOSM.cpp
+++ b/moses/FF/OSM-Feature/KenOSM.cpp
@@ -3,10 +3,11 @@
 namespace Moses
 {
 
-OSMLM* ConstructOSMLM(const char *file)
+OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
 {
   lm::ngram::ModelType model_type;
   lm::ngram::Config config;
+  config.load_method = load_method;
   if (lm::ngram::RecognizeBinary(file, model_type)) {
     switch(model_type) {
     case lm::ngram::PROBING:
diff --git a/moses/FF/OSM-Feature/KenOSM.h b/moses/FF/OSM-Feature/KenOSM.h
index 53268442b..ce3872a35 100644
--- a/moses/FF/OSM-Feature/KenOSM.h
+++ b/moses/FF/OSM-Feature/KenOSM.h
@@ -47,7 +47,7 @@ private:
 
 typedef KenOSMBase OSMLM;
 
-OSMLM* ConstructOSMLM(const char *file);
+OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
 
 
 } // namespace
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index 4118c8690..1c889e329 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -17,6 +17,7 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
   tFactor = 0;
   numFeatures = 5;
   ReadParameters();
+  load_method = util::READ;
 }
 
 OpSequenceModel::~OpSequenceModel()
@@ -27,7 +28,7 @@ OpSequenceModel::~OpSequenceModel()
 void OpSequenceModel :: readLanguageModel(const char *lmFile)
 {
   string unkOp = "_TRANS_SLF_";
-  OSM = ConstructOSMLM(m_lmPath.c_str());
+  OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
 
   State startState = OSM->NullContextState();
   State endState;
@@ -248,6 +249,20 @@ void OpSequenceModel::SetParameter(const std::string& key, const std::string& va
     sFactor = Scan<int>(value);
   } else if (key == "output-factor") {
     tFactor = Scan<int>(value);
+  } else if (key == "load") {
+    if (value == "lazy") {
+      load_method = util::LAZY;
+    } else if (value == "populate_or_lazy") {
+      load_method = util::POPULATE_OR_LAZY;
+    } else if (value == "populate_or_read" || value == "populate") {
+      load_method = util::POPULATE_OR_READ;
+    } else if (value == "read") {
+      load_method = util::READ;
+    } else if (value == "parallel_read") {
+      load_method = util::PARALLEL_READ;
+    } else {
+      UTIL_THROW2("Unknown KenLM load method " << value);
+    }
   } else {
     StatefulFeatureFunction::SetParameter(key, value);
   }
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 925f9c83a..94beac5aa 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -20,6 +20,7 @@ public:
   int sFactor;	// Source Factor ...
   int tFactor;	// Target Factor ...
   int numFeatures;   // Number of features used ...
+  util::LoadMethod load_method; // method to load model
 
   OpSequenceModel(const std::string &line);
   ~OpSequenceModel();
diff --git a/moses/FF/VW/AlignmentConstraint.h b/moses/FF/VW/AlignmentConstraint.h
new file mode 100644
index 000000000..28ba7d4f3
--- /dev/null
+++ b/moses/FF/VW/AlignmentConstraint.h
@@ -0,0 +1,40 @@
+#pragma once
+
+namespace Moses
+{
+
+/**
+ * Helper class for storing alignment constraints.
+ */
+class AlignmentConstraint
+{
+public:
+  AlignmentConstraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
+
+  AlignmentConstraint(int min, int max) : m_min(min), m_max(max) {}
+
+  /**
+   * We are aligned to point => our min cannot be larger, our max cannot be smaller.
+   */
+  void Update(int point) {
+    if (m_min > point) m_min = point;
+    if (m_max < point) m_max = point;
+  }
+
+  bool IsSet() const {
+    return m_max != -1;
+  }
+
+  int GetMin() const {
+    return m_min;
+  }
+
+  int GetMax() const {
+    return m_max;
+  }
+
+private:
+  int m_min, m_max;
+};
+
+}
diff --git a/moses/FF/VW/VW.cpp b/moses/FF/VW/VW.cpp
new file mode 100644
index 000000000..e5e5316b6
--- /dev/null
+++ b/moses/FF/VW/VW.cpp
@@ -0,0 +1,637 @@
+#include <string>
+#include <map>
+#include <limits>
+#include <vector>
+
+#include <boost/unordered_map.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/PP/CountsPhraseProperty.h"
+#include "moses/TranslationOptionList.h"
+#include "moses/TranslationOption.h"
+#include "moses/Util.h"
+#include "moses/TypeDef.h"
+#include "moses/StaticData.h"
+#include "moses/Phrase.h"
+#include "moses/AlignmentInfo.h"
+#include "moses/AlignmentInfoCollection.h"
+#include "moses/Word.h"
+#include "moses/FactorCollection.h"
+
+#include "Normalizer.h"
+#include "Classifier.h"
+#include "VWFeatureBase.h"
+#include "TabbedSentence.h"
+#include "ThreadLocalByFeatureStorage.h"
+#include "TrainingLoss.h"
+#include "VWTargetSentence.h"
+#include "VWState.h"
+#include "VW.h"
+
+namespace Moses
+{
+
+VW::VW(const std::string &line)
+  : StatefulFeatureFunction(1, line)
+  , TLSTargetSentence(this)
+  , m_train(false)
+  , m_sentenceStartWord(Word())
+{
+  ReadParameters();
+  Discriminative::ClassifierFactory *classifierFactory = m_train
+      ? new Discriminative::ClassifierFactory(m_modelPath)
+      : new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
+
+  m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
+
+  m_tlsFutureScores = new TLSFloatHashMap(this);
+  m_tlsComputedStateExtensions = new TLSStateExtensions(this);
+  m_tlsTranslationOptionFeatures = new TLSFeatureVectorMap(this);
+  m_tlsTargetContextFeatures = new TLSFeatureVectorMap(this);
+
+  if (! m_normalizer) {
+    VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
+    m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
+  }
+
+  if (! m_trainingLoss) {
+    VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
+    m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
+  }
+
+  // create a virtual beginning-of-sentence word with all factors replaced by <S>
+  const Factor *bosFactor = FactorCollection::Instance().AddFactor(BOS_);
+  for (size_t i = 0; i < MAX_NUM_FACTORS; i++)
+    m_sentenceStartWord.SetFactor(i, bosFactor);
+}
+
+VW::~VW()
+{
+  delete m_tlsClassifier;
+  delete m_normalizer;
+  // TODO delete more stuff
+}
+
+FFState* VW::EvaluateWhenApplied(
+  const Hypothesis& curHypo,
+  const FFState* prevState,
+  ScoreComponentCollection* accumulator) const
+{
+  VERBOSE(3, "VW :: Evaluating translation options\n");
+
+  const VWState& prevVWState = *static_cast<const VWState *>(prevState);
+
+  const std::vector<VWFeatureBase*>& contextFeatures =
+    VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
+
+  if (contextFeatures.empty()) {
+    // no target context features => we already evaluated everything in
+    // EvaluateTranslationOptionListWithSourceContext(). Nothing to do now,
+    // no state information to track.
+    return new VWState();
+  }
+
+  size_t spanStart = curHypo.GetTranslationOption().GetStartPos();
+  size_t spanEnd   = curHypo.GetTranslationOption().GetEndPos();
+
+  // compute our current key
+  size_t cacheKey = MakeCacheKey(prevState, spanStart, spanEnd);
+
+  boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions
+  = *m_tlsComputedStateExtensions->GetStored();
+
+  if (computedStateExtensions.find(cacheKey) == computedStateExtensions.end()) {
+    // we have not computed this set of translation options yet
+    const TranslationOptionList *topts =
+      curHypo.GetManager().getSntTranslationOptions()->GetTranslationOptionList(spanStart, spanEnd);
+
+    const InputType& input = curHypo.GetManager().GetSource();
+
+    Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
+
+    // extract target context features
+    size_t contextHash = prevVWState.hash();
+
+    FeatureVectorMap &contextFeaturesCache = *m_tlsTargetContextFeatures->GetStored();
+
+    FeatureVectorMap::const_iterator contextIt = contextFeaturesCache.find(contextHash);
+    if (contextIt == contextFeaturesCache.end()) {
+      // we have not extracted features for this context yet
+
+      const Phrase &targetContext = prevVWState.GetPhrase();
+      Discriminative::FeatureVector contextVector;
+      const AlignmentInfo *alignInfo = TransformAlignmentInfo(curHypo, targetContext.GetSize());
+      for(size_t i = 0; i < contextFeatures.size(); ++i)
+        (*contextFeatures[i])(input, targetContext, *alignInfo, classifier, contextVector);
+
+      contextFeaturesCache[contextHash] = contextVector;
+      VERBOSE(3, "VW :: context cache miss\n");
+    } else {
+      // context already in cache, simply put feature IDs in the classifier object
+      classifier.AddLabelIndependentFeatureVector(contextIt->second);
+      VERBOSE(3, "VW :: context cache hit\n");
+    }
+
+    std::vector<float> losses(topts->size());
+
+    for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
+      const TranslationOption *topt = topts->Get(toptIdx);
+      const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
+      size_t toptHash = hash_value(*topt);
+
+      // start with pre-computed source-context-only VW scores
+      losses[toptIdx] = m_tlsFutureScores->GetStored()->find(toptHash)->second;
+
+      // add all features associated with this translation option
+      // (pre-computed when evaluated with source context)
+      const Discriminative::FeatureVector &targetFeatureVector =
+        m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
+
+      classifier.AddLabelDependentFeatureVector(targetFeatureVector);
+
+      // add classifier score with context+target features only to the total loss
+      losses[toptIdx] += classifier.Predict(MakeTargetLabel(targetPhrase));
+    }
+
+    // normalize classifier scores to get a probability distribution
+    (*m_normalizer)(losses);
+
+    // fill our cache with the results
+    FloatHashMap &toptScores = computedStateExtensions[cacheKey];
+    for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
+      const TranslationOption *topt = topts->Get(toptIdx);
+      size_t toptHash = hash_value(*topt);
+      toptScores[toptHash] = FloorScore(TransformScore(losses[toptIdx]));
+    }
+
+    VERBOSE(3, "VW :: cache miss\n");
+  } else {
+    VERBOSE(3, "VW :: cache hit\n");
+  }
+
+  // now our cache is guaranteed to contain the required score, simply look it up
+  std::vector<float> newScores(m_numScoreComponents);
+  size_t toptHash = hash_value(curHypo.GetTranslationOption());
+  newScores[0] = computedStateExtensions[cacheKey][toptHash];
+  VERBOSE(3, "VW :: adding score: " << newScores[0] << "\n");
+  accumulator->PlusEquals(this, newScores);
+
+  return new VWState(prevVWState, curHypo);
+}
+
+const FFState* VW::EmptyHypothesisState(const InputType &input) const
+{
+  size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
+  Phrase initialPhrase;
+  for (size_t i = 0; i < maxContextSize; i++)
+    initialPhrase.AddWord(m_sentenceStartWord);
+
+  return new VWState(initialPhrase);
+}
+
+void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
+    , const TranslationOptionList &translationOptionList) const
+{
+  Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
+
+  if (translationOptionList.size() == 0)
+    return; // nothing to do
+
+  VERBOSE(3, "VW :: Evaluating translation options\n");
+
+  // which feature functions do we use (on the source and target side)
+  const std::vector<VWFeatureBase*>& sourceFeatures =
+    VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
+
+  const std::vector<VWFeatureBase*>& contextFeatures =
+    VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
+
+  const std::vector<VWFeatureBase*>& targetFeatures =
+    VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
+
+  size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
+
+  // only use stateful score computation when needed
+  bool haveTargetContextFeatures = ! contextFeatures.empty();
+
+  const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
+
+  if (m_train) {
+    //
+    // extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
+    //
+
+    // find which topts are correct
+    std::vector<bool> correct(translationOptionList.size());
+    std::vector<int> startsAt(translationOptionList.size());
+    std::set<int> uncoveredStartingPositions;
+
+    for (size_t i = 0; i < translationOptionList.size(); i++) {
+      std::pair<bool, int> isCorrect = IsCorrectTranslationOption(* translationOptionList.Get(i));
+      correct[i] = isCorrect.first;
+      startsAt[i] = isCorrect.second;
+      if (isCorrect.first) {
+        uncoveredStartingPositions.insert(isCorrect.second);
+      }
+    }
+
+    // optionally update translation options using leave-one-out
+    std::vector<bool> keep = (m_leaveOneOut.size() > 0)
+                             ? LeaveOneOut(translationOptionList, correct)
+                             : std::vector<bool>(translationOptionList.size(), true);
+
+    while (! uncoveredStartingPositions.empty()) {
+      int currentStart = *uncoveredStartingPositions.begin();
+      uncoveredStartingPositions.erase(uncoveredStartingPositions.begin());
+
+      // check whether we (still) have some correct translation
+      int firstCorrect = -1;
+      for (size_t i = 0; i < translationOptionList.size(); i++) {
+        if (keep[i] && correct[i] && startsAt[i] == currentStart) {
+          firstCorrect = i;
+          break;
+        }
+      }
+
+      // do not train if there are no positive examples
+      if (firstCorrect == -1) {
+        VERBOSE(3, "VW :: skipping topt collection, no correct translation for span at current tgt start position\n");
+        continue;
+      }
+
+      // the first correct topt can be used by some loss functions
+      const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
+
+      // feature extraction *at prediction time* outputs feature hashes which can be cached;
+      // this is training time, simply store everything in this dummyVector
+      Discriminative::FeatureVector dummyVector;
+
+      // extract source side features
+      for(size_t i = 0; i < sourceFeatures.size(); ++i)
+        (*sourceFeatures[i])(input, sourceRange, classifier, dummyVector);
+
+      // build target-side context
+      Phrase targetContext;
+      for (size_t i = 0; i < maxContextSize; i++)
+        targetContext.AddWord(m_sentenceStartWord);
+
+      const Phrase *targetSent = GetStored()->m_sentence;
+
+      // word alignment info shifted by context size
+      AlignmentInfo contextAlignment = TransformAlignmentInfo(*GetStored()->m_alignment, maxContextSize, currentStart);
+
+      if (currentStart > 0)
+        targetContext.Append(targetSent->GetSubString(Range(0, currentStart - 1)));
+
+      // extract target-context features
+      for(size_t i = 0; i < contextFeatures.size(); ++i)
+        (*contextFeatures[i])(input, targetContext, contextAlignment, classifier, dummyVector);
+
+      // go over topts, extract target side features and train the classifier
+      for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
+
+        // this topt was discarded by leaving one out
+        if (! keep[toptIdx])
+          continue;
+
+        // extract target-side features for each topt
+        const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
+        for(size_t i = 0; i < targetFeatures.size(); ++i)
+          (*targetFeatures[i])(input, targetPhrase, classifier, dummyVector);
+
+        bool isCorrect = correct[toptIdx] && startsAt[toptIdx] == currentStart;
+        float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, isCorrect);
+
+        // train classifier on current example
+        classifier.Train(MakeTargetLabel(targetPhrase), loss);
+      }
+    }
+  } else {
+    //
+    // predict using a trained classifier, use this in decoding (=at test time)
+    //
+
+    std::vector<float> losses(translationOptionList.size());
+
+    Discriminative::FeatureVector outFeaturesSourceNamespace;
+
+    // extract source side features
+    for(size_t i = 0; i < sourceFeatures.size(); ++i)
+      (*sourceFeatures[i])(input, sourceRange, classifier, outFeaturesSourceNamespace);
+
+    for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
+      const TranslationOption *topt = translationOptionList.Get(toptIdx);
+      const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
+      Discriminative::FeatureVector outFeaturesTargetNamespace;
+
+      // extract target-side features for each topt
+      for(size_t i = 0; i < targetFeatures.size(); ++i)
+        (*targetFeatures[i])(input, targetPhrase, classifier, outFeaturesTargetNamespace);
+
+      // cache the extracted target features (i.e. features associated with given topt)
+      // for future use at decoding time
+      size_t toptHash = hash_value(*topt);
+      m_tlsTranslationOptionFeatures->GetStored()->insert(
+        std::make_pair(toptHash, outFeaturesTargetNamespace));
+
+      // get classifier score
+      losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
+    }
+
+    // normalize classifier scores to get a probability distribution
+    std::vector<float> rawLosses = losses;
+    (*m_normalizer)(losses);
+
+    // update scores of topts
+    for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
+      TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
+      if (! haveTargetContextFeatures) {
+        // no target context features; evaluate the FF now
+        std::vector<float> newScores(m_numScoreComponents);
+        newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
+
+        ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
+        scoreBreakDown.PlusEquals(this, newScores);
+
+        topt->UpdateScore();
+      } else {
+        // We have target context features => this is just a partial score,
+        // do not add it to the score component collection.
+        size_t toptHash = hash_value(*topt);
+
+        // Subtract the score contribution of target-only features, otherwise it would
+        // be included twice.
+        Discriminative::FeatureVector emptySource;
+        const Discriminative::FeatureVector &targetFeatureVector =
+          m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
+        classifier.AddLabelIndependentFeatureVector(emptySource);
+        classifier.AddLabelDependentFeatureVector(targetFeatureVector);
+        float targetOnlyLoss = classifier.Predict(VW_DUMMY_LABEL);
+
+        float futureScore = rawLosses[toptIdx] - targetOnlyLoss;
+        m_tlsFutureScores->GetStored()->insert(std::make_pair(toptHash, futureScore));
+      }
+    }
+  }
+}
+
+void VW::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "train") {
+    m_train = Scan<bool>(value);
+  } else if (key == "path") {
+    m_modelPath = value;
+  } else if (key == "vw-options") {
+    m_vwOptions = value;
+  } else if (key == "leave-one-out-from") {
+    m_leaveOneOut = value;
+  } else if (key == "training-loss") {
+    // which type of loss to use for training
+    if (value == "basic") {
+      m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
+    } else if (value == "bleu") {
+      m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
+    } else {
+      UTIL_THROW2("Unknown training loss type:" << value);
+    }
+  } else if (key == "loss") {
+    // which normalizer to use (theoretically depends on the loss function used for training the
+    // classifier (squared/logistic/hinge/...), hence the name "loss"
+    if (value == "logistic") {
+      m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
+    } else if (value == "squared") {
+      m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
+    } else {
+      UTIL_THROW2("Unknown loss type:" << value);
+    }
+  } else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
+void VW::InitializeForInput(ttasksptr const& ttask)
+{
+  // do not keep future cost estimates across sentences!
+  m_tlsFutureScores->GetStored()->clear();
+
+  // invalidate our caches after each sentence
+  m_tlsComputedStateExtensions->GetStored()->clear();
+
+  // it's not certain that we should clear these caches; we do it
+  // because they shouldn't be allowed to grow indefinitely large but
+  // target contexts and translation options will have identical features
+  // the next time we extract them...
+  m_tlsTargetContextFeatures->GetStored()->clear();
+  m_tlsTranslationOptionFeatures->GetStored()->clear();
+
+  InputType const& source = *(ttask->GetSource().get());
+  // tabbed sentence is assumed only in training
+  if (! m_train)
+    return;
+
+  UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
+                 "This feature function requires the TabbedSentence input type");
+
+  const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
+  UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
+                 "TabbedSentence must contain target<tab>alignment");
+
+  // target sentence represented as a phrase
+  Phrase *target = new Phrase();
+  target->CreateFromString(
+    Output
+    , StaticData::Instance().options()->output.factor_order
+    , tabbedSentence.GetColumns()[0]
+    , NULL);
+
+  // word alignment between source and target sentence
+  // we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
+  // sentences, not phrases
+  AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
+
+  VWTargetSentence &targetSent = *GetStored();
+  targetSent.Clear();
+  targetSent.m_sentence = target;
+  targetSent.m_alignment = alignment;
+
+  // pre-compute max- and min- aligned points for faster translation option checking
+  targetSent.SetConstraints(source.GetSize());
+}
+
+/*************************************************************************************
+ * private methods
+ ************************************************************************************/
+
+const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const
+{
+  std::set<std::pair<size_t, size_t> > alignmentPoints;
+  const Hypothesis *contextHypo = curHypo.GetPrevHypo();
+  int idxInContext = contextSize - 1;
+  int processedWordsInHypo = 0;
+  while (idxInContext >= 0 && contextHypo) {
+    int idxInHypo = contextHypo->GetCurrTargetLength() - 1 - processedWordsInHypo;
+    if (idxInHypo >= 0) {
+      const AlignmentInfo &hypoAlign = contextHypo->GetCurrTargetPhrase().GetAlignTerm();
+      std::set<size_t> alignedToTgt = hypoAlign.GetAlignmentsForTarget(idxInHypo);
+      size_t srcOffset = contextHypo->GetCurrSourceWordsRange().GetStartPos();
+      BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
+        alignmentPoints.insert(std::make_pair(srcOffset + srcIdx, idxInContext));
+      }
+      processedWordsInHypo++;
+      idxInContext--;
+    } else {
+      processedWordsInHypo = 0;
+      contextHypo = contextHypo->GetPrevHypo();
+    }
+  }
+
+  return AlignmentInfoCollection::Instance().Add(alignmentPoints);
+}
+
+AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const
+{
+  std::set<std::pair<size_t, size_t> > alignmentPoints;
+  for (int i = std::max(0, currentStart - (int)contextSize); i < currentStart; i++) {
+    std::set<size_t> alignedToTgt = alignInfo.GetAlignmentsForTarget(i);
+    BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
+      alignmentPoints.insert(std::make_pair(srcIdx, i + contextSize));
+    }
+  }
+  return AlignmentInfo(alignmentPoints);
+}
+
+std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const
+{
+
+  //std::cerr << topt.GetSourceWordsRange() << std::endl;
+
+  int sourceStart = topt.GetSourceWordsRange().GetStartPos();
+  int sourceEnd   = topt.GetSourceWordsRange().GetEndPos();
+
+  const VWTargetSentence &targetSentence = *GetStored();
+
+  // [targetStart, targetEnd] spans aligned target words
+  int targetStart = targetSentence.m_sentence->GetSize();
+  int targetEnd   = -1;
+
+  // get the left-most and right-most alignment point within source span
+  for(int i = sourceStart; i <= sourceEnd; ++i) {
+    if(targetSentence.m_sourceConstraints[i].IsSet()) {
+      if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
+        targetStart = targetSentence.m_sourceConstraints[i].GetMin();
+      if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
+        targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
+    }
+  }
+  // there was no alignment
+  if(targetEnd == -1)
+    return std::make_pair(false, -1);
+
+  //std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
+
+  // [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
+  int targetStart2 = targetStart;
+  for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
+    targetStart2 = i;
+
+  int targetEnd2   = targetEnd;
+  for(int i = targetEnd2;
+      i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
+      ++i)
+    targetEnd2 = i;
+
+  //std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
+
+  const TargetPhrase &tphrase = topt.GetTargetPhrase();
+  //std::cerr << tphrase << std::endl;
+
+  // if target phrase is shorter than inner span return false
+  if(tphrase.GetSize() < targetEnd - targetStart + 1)
+    return std::make_pair(false, -1);
+
+  // if target phrase is longer than outer span return false
+  if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
+    return std::make_pair(false, -1);
+
+  // for each possible starting point
+  for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
+    bool found = true;
+    // check if the target phrase is within longer span
+    for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
+      if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
+        found = false;
+        break;
+      }
+    }
+    // return true if there was a match
+    if(found) {
+      //std::cerr << "Found" << std::endl;
+      return std::make_pair(true, tempStart);
+    }
+  }
+
+  return std::make_pair(false, -1);
+}
+
+std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const
+{
+  UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
+
+  float sourceRawCount = 0.0;
+  const float ONE = 1.0001; // I don't understand floating point numbers
+
+  std::vector<bool> keepOpt;
+
+  for (size_t i = 0; i < topts.size(); i++) {
+    TranslationOption *topt = *(topts.begin() + i);
+    const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
+
+    // extract raw counts from phrase-table property
+    const CountsPhraseProperty *property =
+      static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
+
+    if (! property) {
+      VERBOSE(2, "VW :: Counts not found for topt! Is this an OOV?\n");
+      // keep all translation opts without updating, this is either OOV or bad usage...
+      keepOpt.assign(topts.size(), true);
+      return keepOpt;
+    }
+
+    if (sourceRawCount == 0.0) {
+      sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
+      if (sourceRawCount <= 0) {
+        // no translation options survived, source phrase was a singleton
+        keepOpt.assign(topts.size(), false);
+        return keepOpt;
+      }
+    }
+
+    float discount = correct[i] ? ONE : 0.0;
+    float target = property->GetTargetMarginal() - discount;
+    float joint  = property->GetJointCount() - discount;
+    if (discount != 0.0) VERBOSE(3, "VW :: leaving one out!\n");
+
+    if (joint > 0) {
+      // topt survived leaving one out, update its scores
+      const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
+      std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
+      UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
+      scores[0] = TransformScore(joint / target); // P(f|e)
+      scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
+
+      ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
+      scoreBreakDown.Assign(feature, scores);
+      topt->UpdateScore();
+      keepOpt.push_back(true);
+    } else {
+      // they only occurred together once, discard topt
+      VERBOSE(2, "VW :: discarded topt when leaving one out\n");
+      keepOpt.push_back(false);
+    }
+  }
+
+  return keepOpt;
+}
+
+} // namespace Moses
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
index da8a5cfb8..d94cce502 100644
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@@ -3,8 +3,12 @@
 #include <string>
 #include <map>
 #include <limits>
+#include <vector>
 
-#include "moses/FF/StatelessFeatureFunction.h"
+#include <boost/unordered_map.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "moses/FF/StatefulFeatureFunction.h"
 #include "moses/PP/CountsPhraseProperty.h"
 #include "moses/TranslationOptionList.h"
 #include "moses/TranslationOption.h"
@@ -13,6 +17,8 @@
 #include "moses/StaticData.h"
 #include "moses/Phrase.h"
 #include "moses/AlignmentInfo.h"
+#include "moses/Word.h"
+#include "moses/FactorCollection.h"
 
 #include "Normalizer.h"
 #include "Classifier.h"
@@ -20,119 +26,50 @@
 #include "TabbedSentence.h"
 #include "ThreadLocalByFeatureStorage.h"
 #include "TrainingLoss.h"
+#include "VWTargetSentence.h"
+
+/*
+ * VW classifier feature. See vw/README.md for further information.
+ *
+ * TODO: say which paper to cite.
+ */
 
 namespace Moses
 {
 
-const std::string VW_DUMMY_LABEL = "1111"; // VW does not use the actual label, other classifiers might
+// dummy class label; VW does not use the actual label, other classifiers might
+const std::string VW_DUMMY_LABEL = "1111";
 
-/**
- * Helper class for storing alignment constraints.
- */
-class Constraint
-{
-public:
-  Constraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
+// thread-specific classifier instance
+typedef ThreadLocalByFeatureStorage<Discriminative::Classifier, Discriminative::ClassifierFactory &> TLSClassifier;
 
-  Constraint(int min, int max) : m_min(min), m_max(max) {}
+// current target sentence, used in VW training (vwtrainer), not in decoding (prediction time)
+typedef ThreadLocalByFeatureStorage<VWTargetSentence> TLSTargetSentence;
 
-  /**
-   * We are aligned to point => our min cannot be larger, our max cannot be smaller.
-   */
-  void Update(int point) {
-    if (m_min > point) m_min = point;
-    if (m_max < point) m_max = point;
-  }
+// hash table of feature vectors
+typedef boost::unordered_map<size_t, Discriminative::FeatureVector> FeatureVectorMap;
 
-  bool IsSet() const {
-    return m_max != -1;
-  }
+// thread-specific feature vector hash
+typedef ThreadLocalByFeatureStorage<FeatureVectorMap> TLSFeatureVectorMap;
 
-  int GetMin() const {
-    return m_min;
-  }
+// hash table of partial scores
+typedef boost::unordered_map<size_t, float> FloatHashMap;
 
-  int GetMax() const {
-    return m_max;
-  }
+// thread-specific score hash table, used for caching
+typedef ThreadLocalByFeatureStorage<FloatHashMap> TLSFloatHashMap;
 
-private:
-  int m_min, m_max;
-};
+// thread-specific hash tablei for caching full classifier outputs
+typedef ThreadLocalByFeatureStorage<boost::unordered_map<size_t, FloatHashMap> > TLSStateExtensions;
 
-/**
- * VW thread-specific data about target sentence.
+/*
+ * VW feature function. A discriminative classifier with source and target context features.
  */
-struct VWTargetSentence {
-  VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
-
-  void Clear() {
-    if (m_sentence) delete m_sentence;
-    if (m_alignment) delete m_alignment;
-  }
-
-  ~VWTargetSentence() {
-    Clear();
-  }
-
-  void SetConstraints(size_t sourceSize) {
-    // initialize to unconstrained
-    m_sourceConstraints.assign(sourceSize, Constraint());
-    m_targetConstraints.assign(m_sentence->GetSize(), Constraint());
-
-    // set constraints according to alignment points
-    AlignmentInfo::const_iterator it;
-    for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
-      int src = it->first;
-      int tgt = it->second;
-
-      if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
-        UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
-      }
-
-      m_sourceConstraints[src].Update(tgt);
-      m_targetConstraints[tgt].Update(src);
-    }
-  }
-
-  Phrase *m_sentence;
-  AlignmentInfo *m_alignment;
-  std::vector<Constraint> m_sourceConstraints, m_targetConstraints;
-};
-
-typedef ThreadLocalByFeatureStorage<Discriminative::Classifier, Discriminative::ClassifierFactory &> TLSClassifier;
-
-typedef ThreadLocalByFeatureStorage<VWTargetSentence> TLSTargetSentence;
-
-class VW : public StatelessFeatureFunction, public TLSTargetSentence
+class VW : public StatefulFeatureFunction, public TLSTargetSentence
 {
 public:
-  VW(const std::string &line)
-    : StatelessFeatureFunction(1, line)
-    , TLSTargetSentence(this)
-    , m_train(false) {
-    ReadParameters();
-    Discriminative::ClassifierFactory *classifierFactory = m_train
-        ? new Discriminative::ClassifierFactory(m_modelPath)
-        : new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
-
-    m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
-
-    if (! m_normalizer) {
-      VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
-      m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
-    }
-
-    if (! m_trainingLoss) {
-      VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
-      m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
-    }
-  }
+  VW(const std::string &line);
 
-  virtual ~VW() {
-    delete m_tlsClassifier;
-    delete m_normalizer;
-  }
+  virtual ~VW();
 
   bool IsUseable(const FactorMask &mask) const {
     return true;
@@ -152,335 +89,89 @@ public:
                                  , ScoreComponentCollection *estimatedFutureScore = NULL) const {
   }
 
-  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
-      , const TranslationOptionList &translationOptionList) const {
-    Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
-
-    if (translationOptionList.size() == 0)
-      return; // nothing to do
-
-    VERBOSE(2, "VW :: Evaluating translation options\n");
-
-    // which feature functions do we use (on the source and target side)
-    const std::vector<VWFeatureBase*>& sourceFeatures =
-      VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
-
-    const std::vector<VWFeatureBase*>& targetFeatures =
-      VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
-
-    const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
-    const InputPath  &inputPath   = translationOptionList.Get(0)->GetInputPath();
-
-    if (m_train) {
-      //
-      // extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
-      //
-
-      // find which topts are correct
-      std::vector<bool> correct(translationOptionList.size());
-      for (size_t i = 0; i < translationOptionList.size(); i++)
-        correct[i] = IsCorrectTranslationOption(* translationOptionList.Get(i));
-
-      // optionally update translation options using leave-one-out
-      std::vector<bool> keep = (m_leaveOneOut.size() > 0)
-                               ? LeaveOneOut(translationOptionList, correct)
-                               : std::vector<bool>(translationOptionList.size(), true);
-
-      // check whether we (still) have some correct translation
-      int firstCorrect = -1;
-      for (size_t i = 0; i < translationOptionList.size(); i++) {
-        if (keep[i] && correct[i]) {
-          firstCorrect = i;
-          break;
-        }
-      }
-
-      // do not train if there are no positive examples
-      if (firstCorrect == -1) {
-        VERBOSE(2, "VW :: skipping topt collection, no correct translation for span\n");
-        return;
-      }
-
-      // the first correct topt can be used by some loss functions
-      const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
-
-      // extract source side features
-      for(size_t i = 0; i < sourceFeatures.size(); ++i)
-        (*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
-
-      // go over topts, extract target side features and train the classifier
-      for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
-
-        // this topt was discarded by leaving one out
-        if (! keep[toptIdx])
-          continue;
-
-        // extract target-side features for each topt
-        const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
-        for(size_t i = 0; i < targetFeatures.size(); ++i)
-          (*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
-
-        float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, correct[toptIdx]);
-
-        // train classifier on current example
-        classifier.Train(MakeTargetLabel(targetPhrase), loss);
-      }
-    } else {
-      //
-      // predict using a trained classifier, use this in decoding (=at test time)
-      //
-
-      std::vector<float> losses(translationOptionList.size());
-
-      // extract source side features
-      for(size_t i = 0; i < sourceFeatures.size(); ++i)
-        (*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
-
-      for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
-        const TranslationOption *topt = translationOptionList.Get(toptIdx);
-        const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
-
-        // extract target-side features for each topt
-        for(size_t i = 0; i < targetFeatures.size(); ++i)
-          (*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
-
-        // get classifier score
-        losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
-      }
-
-      // normalize classifier scores to get a probability distribution
-      (*m_normalizer)(losses);
-
-      // update scores of topts
-      for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
-        TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
-        std::vector<float> newScores(m_numScoreComponents);
-        newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
-
-        ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
-        scoreBreakDown.PlusEquals(this, newScores);
-
-        topt->UpdateScore();
-      }
-    }
-  }
-
-  void EvaluateWhenApplied(const Hypothesis& hypo,
-                           ScoreComponentCollection* accumulator) const {
-  }
+  // This behavior of this method depends on whether it's called during VW
+  // training (feature extraction) by vwtrainer or during decoding (prediction
+  // time) by Moses.
+  //
+  // When predicting, it evaluates all translation options with the VW model;
+  // if no target-context features are defined, this is the final score and it
+  // is added directly to the TranslationOption score. If there are target
+  // context features, the score is a partial score and it is only stored in
+  // cache; the final score is computed based on target context in
+  // EvaluateWhenApplied().
+  //
+  // This method is also used in training by vwtrainer in which case features
+  // are written to a file, no classifier predictions take place. Target-side
+  // context is constant at training time (we know the true target sentence),
+  // so target-context features are extracted here as well.
+  virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
+      , const TranslationOptionList &translationOptionList) const;
+
+  // Evaluate VW during decoding. This is only used at prediction time (not in training).
+  // When no target-context features are defined, VW predictions were already fully calculated
+  // in EvaluateTranslationOptionListWithSourceContext() and the scores were added to the model.
+  // If there are target-context features, we compute the context-dependent part of the
+  // classifier score and combine it with the source-context only partial score which was computed
+  // in EvaluateTranslationOptionListWithSourceContext(). Various caches are used to make this
+  // method more efficient.
+  virtual FFState* EvaluateWhenApplied(
+    const Hypothesis& curHypo,
+    const FFState* prevState,
+    ScoreComponentCollection* accumulator) const;
+
+  virtual FFState* EvaluateWhenApplied(
+    const ChartHypothesis&,
+    int,
+    ScoreComponentCollection* accumulator) const {
+    throw new std::logic_error("hiearchical/syntax not supported");
+  }
+
+  // Initial VW state; contains unaligned BOS symbols.
+  const FFState* EmptyHypothesisState(const InputType &input) const;
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+  // At prediction time, this clears our caches. At training time, we load the next sentence, its
+  // translation and word alignment.
+  virtual void InitializeForInput(ttasksptr const& ttask);
 
-  void EvaluateWhenApplied(const ChartHypothesis &hypo,
-                           ScoreComponentCollection* accumulator) const {
+private:
+  inline std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
+    return VW_DUMMY_LABEL; // VW does not care about class labels in our setting (--csoaa_ldf mc).
   }
 
-  void SetParameter(const std::string& key, const std::string& value) {
-    if (key == "train") {
-      m_train = Scan<bool>(value);
-    } else if (key == "path") {
-      m_modelPath = value;
-    } else if (key == "vw-options") {
-      m_vwOptions = value;
-    } else if (key == "leave-one-out-from") {
-      m_leaveOneOut = value;
-    } else if (key == "training-loss") {
-      // which type of loss to use for training
-      if (value == "basic") {
-        m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
-      } else if (value == "bleu") {
-        m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
-      } else {
-        UTIL_THROW2("Unknown training loss type:" << value);
-      }
-    } else if (key == "loss") {
-      // which normalizer to use (theoretically depends on the loss function used for training the
-      // classifier (squared/logistic/hinge/...), hence the name "loss"
-      if (value == "logistic") {
-        m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
-      } else if (value == "squared") {
-        m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
-      } else {
-        UTIL_THROW2("Unknown loss type:" << value);
-      }
-    } else {
-      StatelessFeatureFunction::SetParameter(key, value);
-    }
+  inline size_t MakeCacheKey(const FFState *prevState, size_t spanStart, size_t spanEnd) const {
+    size_t key = 0;
+    boost::hash_combine(key, prevState);
+    boost::hash_combine(key, spanStart);
+    boost::hash_combine(key, spanEnd);
+    return key;
   }
 
-  virtual void InitializeForInput(ttasksptr const& ttask) {
-    InputType const& source = *(ttask->GetSource().get());
-    // tabbed sentence is assumed only in training
-    if (! m_train)
-      return;
-
-    UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
-                   "This feature function requires the TabbedSentence input type");
-
-    const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
-    UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
-                   "TabbedSentence must contain target<tab>alignment");
-
-    // target sentence represented as a phrase
-    Phrase *target = new Phrase();
-    target->CreateFromString(
-      Output
-      , StaticData::Instance().options()->output.factor_order
-      , tabbedSentence.GetColumns()[0]
-      , NULL);
-
-    // word alignment between source and target sentence
-    // we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
-    // sentences, not phrases
-    AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
-
-    VWTargetSentence &targetSent = *GetStored();
-    targetSent.Clear();
-    targetSent.m_sentence = target;
-    targetSent.m_alignment = alignment;
-
-    // pre-compute max- and min- aligned points for faster translation option checking
-    targetSent.SetConstraints(source.GetSize());
-  }
+  // used in decoding to transform the global word alignment information into
+  // context-phrase internal alignment information (i.e., with target indices correspoding
+  // to positions in contextPhrase)
+  const AlignmentInfo *TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const;
 
+  // used during training to extract relevant alignment points from the full sentence alignment
+  // and shift them by target context size
+  AlignmentInfo TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const;
 
-private:
-  std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
-    return VW_DUMMY_LABEL;
-  }
+  // At training time, determine whether a translation option is correct for the current target sentence
+  // based on word alignment. This is a bit complicated because we need to handle various corner-cases
+  // where some word(s) on phrase borders are unaligned.
+  std::pair<bool, int> IsCorrectTranslationOption(const TranslationOption &topt) const;
 
-  bool IsCorrectTranslationOption(const TranslationOption &topt) const {
-
-    //std::cerr << topt.GetSourceWordsRange() << std::endl;
-
-    int sourceStart = topt.GetSourceWordsRange().GetStartPos();
-    int sourceEnd   = topt.GetSourceWordsRange().GetEndPos();
-
-    const VWTargetSentence &targetSentence = *GetStored();
-
-    // [targetStart, targetEnd] spans aligned target words
-    int targetStart = targetSentence.m_sentence->GetSize();
-    int targetEnd   = -1;
-
-    // get the left-most and right-most alignment point within source span
-    for(int i = sourceStart; i <= sourceEnd; ++i) {
-      if(targetSentence.m_sourceConstraints[i].IsSet()) {
-        if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
-          targetStart = targetSentence.m_sourceConstraints[i].GetMin();
-        if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
-          targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
-      }
-    }
-    // there was no alignment
-    if(targetEnd == -1)
-      return false;
-
-    //std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
-
-    // [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
-    int targetStart2 = targetStart;
-    for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
-      targetStart2 = i;
-
-    int targetEnd2   = targetEnd;
-    for(int i = targetEnd2;
-        i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
-        ++i)
-      targetEnd2 = i;
-
-    //std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
-
-    const TargetPhrase &tphrase = topt.GetTargetPhrase();
-    //std::cerr << tphrase << std::endl;
-
-    // if target phrase is shorter than inner span return false
-    if(tphrase.GetSize() < targetEnd - targetStart + 1)
-      return false;
-
-    // if target phrase is longer than outer span return false
-    if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
-      return false;
-
-    // for each possible starting point
-    for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
-      bool found = true;
-      // check if the target phrase is within longer span
-      for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
-        if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
-          found = false;
-          break;
-        }
-      }
-      // return true if there was a match
-      if(found) {
-        //std::cerr << "Found" << std::endl;
-        return true;
-      }
-    }
-
-    return false;
-  }
-
-  std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
-    UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
-
-    float sourceRawCount = 0.0;
-    const float ONE = 1.0001; // I don't understand floating point numbers
-
-    std::vector<bool> keepOpt;
-
-    for (size_t i = 0; i < topts.size(); i++) {
-      TranslationOption *topt = *(topts.begin() + i);
-      const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
-
-      // extract raw counts from phrase-table property
-      const CountsPhraseProperty *property =
-        static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
-
-      if (! property) {
-        VERBOSE(1, "VW :: Counts not found for topt! Is this an OOV?\n");
-        // keep all translation opts without updating, this is either OOV or bad usage...
-        keepOpt.assign(topts.size(), true);
-        return keepOpt;
-      }
-
-      if (sourceRawCount == 0.0) {
-        sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
-        if (sourceRawCount <= 0) {
-          // no translation options survived, source phrase was a singleton
-          keepOpt.assign(topts.size(), false);
-          return keepOpt;
-        }
-      }
-
-      float discount = correct[i] ? ONE : 0.0;
-      float target = property->GetTargetMarginal() - discount;
-      float joint  = property->GetJointCount() - discount;
-      if (discount != 0.0) VERBOSE(2, "VW :: leaving one out!\n");
-
-      if (joint > 0) {
-        // topt survived leaving one out, update its scores
-        const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
-        std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
-        UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
-        scores[0] = TransformScore(joint / target); // P(f|e)
-        scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
-
-        ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
-        scoreBreakDown.Assign(feature, scores);
-        topt->UpdateScore();
-        keepOpt.push_back(true);
-      } else {
-        // they only occurred together once, discard topt
-        VERBOSE(2, "VW :: discarded topt when leaving one out\n");
-        keepOpt.push_back(false);
-      }
-    }
-
-    return keepOpt;
-  }
+  // At training time, optionally discount occurrences of phrase pairs from the current sentence, helps prevent
+  // over-fitting.
+  std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const;
 
   bool m_train; // false means predict
-  std::string m_modelPath;
-  std::string m_vwOptions;
+  std::string m_modelPath; // path to the VW model file; at training time, this is where extracted features are stored
+  std::string m_vwOptions; // options for Vowpal Wabbit
+
+  // BOS token, all factors
+  Word m_sentenceStartWord;
 
   // calculator of training loss
   TrainingLoss *m_trainingLoss = NULL;
@@ -488,9 +179,16 @@ private:
   // optionally contains feature name of a phrase table where we recompute scores with leaving one out
   std::string m_leaveOneOut;
 
+  // normalizer, typically this means softmax
   Discriminative::Normalizer *m_normalizer = NULL;
+
+  // thread-specific classifier instance
   TLSClassifier *m_tlsClassifier;
+
+  // caches for partial scores and feature vectors
+  TLSFloatHashMap *m_tlsFutureScores;
+  TLSStateExtensions *m_tlsComputedStateExtensions;
+  TLSFeatureVectorMap *m_tlsTranslationOptionFeatures, *m_tlsTargetContextFeatures;
 };
 
 }
-
diff --git a/moses/FF/VW/VWFeatureBase.cpp b/moses/FF/VW/VWFeatureBase.cpp
index 874544203..e51396b3f 100644
--- a/moses/FF/VW/VWFeatureBase.cpp
+++ b/moses/FF/VW/VWFeatureBase.cpp
@@ -2,11 +2,26 @@
 #include <string>
 
 #include "VWFeatureBase.h"
+#include "VWFeatureContext.h"
 
 namespace Moses
 {
 std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_features;
 std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_sourceFeatures;
+std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetContextFeatures;
 std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatures;
+
+std::map<std::string, size_t> VWFeatureBase::s_targetContextLength;
+
+
+void VWFeatureBase::UpdateContextSize(const std::string &usedBy)
+{
+  // using the standard map behavior here: if the entry does not
+  // exist, it will be added and initialized to zero
+  size_t currentSize = s_targetContextLength[usedBy];
+  size_t newSize = static_cast<VWFeatureContext *const>(this)->GetContextSize();
+  s_targetContextLength[usedBy] = std::max(currentSize, newSize);
+}
+
 }
 
diff --git a/moses/FF/VW/VWFeatureBase.h b/moses/FF/VW/VWFeatureBase.h
index c8bd60a81..ca3317d31 100644
--- a/moses/FF/VW/VWFeatureBase.h
+++ b/moses/FF/VW/VWFeatureBase.h
@@ -12,11 +12,17 @@
 namespace Moses
 {
 
+enum VWFeatureType {
+  vwft_source,
+  vwft_target,
+  vwft_targetContext
+};
+
 class VWFeatureBase : public StatelessFeatureFunction
 {
 public:
-  VWFeatureBase(const std::string &line, bool isSource = true)
-    : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_isSource(isSource) {
+  VWFeatureBase(const std::string &line, VWFeatureType featureType = vwft_source)
+    : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_featureType(featureType) {
     // defaults
     m_sourceFactors.push_back(0);
     m_targetFactors.push_back(0);
@@ -71,26 +77,47 @@ public:
     return s_sourceFeatures[name];
   }
 
+  // Return only target-context classifier features
+  static const std::vector<VWFeatureBase*>& GetTargetContextFeatures(std::string name = "VW0") {
+    // don't throw an exception when there are no target-context features, this feature type is not mandatory
+    return s_targetContextFeatures[name];
+  }
+
   // Return only target-dependent classifier features
   static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
     UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
     return s_targetFeatures[name];
   }
 
+  // Required length context (maximum context size of defined target-context features)
+  static size_t GetMaximumContextSize(std::string name = "VW0") {
+    return s_targetContextLength[name]; // 0 by default
+  }
+
   // Overload to process source-dependent data, create features once for every
   // source sentence word range.
   virtual void operator()(const InputType &input
-                          , const InputPath &inputPath
                           , const Range &sourceRange
-                          , Discriminative::Classifier &classifier) const = 0;
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const = 0;
 
   // Overload to process target-dependent features, create features once for
-  // every target phrase. One source word range will have at leat one target
+  // every target phrase. One source word range will have at least one target
   // phrase, but may have more.
   virtual void operator()(const InputType &input
-                          , const InputPath &inputPath
                           , const TargetPhrase &targetPhrase
-                          , Discriminative::Classifier &classifier) const = 0;
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const = 0;
+
+  // Overload to process target-context dependent features, these features are
+  // evaluated during decoding. For efficiency, features are not fed directly into
+  // the classifier object but instead output in the vector "features" and managed
+  // separately in VW.h.
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const = 0;
 
 protected:
   std::vector<FactorType> m_sourceFactors, m_targetFactors;
@@ -99,10 +126,15 @@ protected:
     for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
         it != m_usedBy.end(); it++) {
       s_features[*it].push_back(this);
-      if(m_isSource)
+
+      if(m_featureType == vwft_source) {
         s_sourceFeatures[*it].push_back(this);
-      else
+      } else if (m_featureType == vwft_targetContext) {
+        s_targetContextFeatures[*it].push_back(this);
+        UpdateContextSize(*it);
+      } else {
         s_targetFeatures[*it].push_back(this);
+      }
     }
   }
 
@@ -112,11 +144,16 @@ private:
     Tokenize(m_usedBy, usedBy, ",");
   }
 
+  void UpdateContextSize(const std::string &usedBy);
+
   std::vector<std::string> m_usedBy;
-  bool m_isSource;
+  VWFeatureType m_featureType;
   static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
   static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
+  static std::map<std::string, std::vector<VWFeatureBase*> > s_targetContextFeatures;
   static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
+
+  static std::map<std::string, size_t> s_targetContextLength;
 };
 
 }
diff --git a/moses/FF/VW/VWFeatureContext.h b/moses/FF/VW/VWFeatureContext.h
new file mode 100644
index 000000000..18632d91b
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContext.h
@@ -0,0 +1,116 @@
+#pragma once
+
+#include <string>
+#include <boost/foreach.hpp>
+#include "VWFeatureBase.h"
+#include "moses/InputType.h"
+#include "moses/TypeDef.h"
+#include "moses/Word.h"
+
+namespace Moses
+{
+
+// Inherit from this for source-dependent classifier features. They will
+// automatically register with the classifier class named VW0 or one or more
+// names specified by the used-by=name1,name2,... parameter.
+//
+// The classifier gets a full list by calling
+// VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription())
+
+
+class VWFeatureContext : public VWFeatureBase
+{
+public:
+  VWFeatureContext(const std::string &line, size_t contextSize)
+    : VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) {
+  }
+
+  // Gets its pure virtual functions from VWFeatureBase
+
+  virtual void operator()(const InputType &input
+                          , const TargetPhrase &targetPhrase
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+  }
+
+  virtual void operator()(const InputType &input
+                          , const Range &sourceRange
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+  }
+
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    if (key == "size") {
+      m_contextSize = Scan<size_t>(value);
+    } else if (key == "factor-positions") {
+      // factor positions: assuming a factor such as positional morphological tag, use this
+      // option to select only certain positions; this assumes that only a single
+      // target-side factor is defined
+      Tokenize<size_t>(m_factorPositions, value, ",");
+    } else {
+      VWFeatureBase::SetParameter(key, value);
+    }
+  }
+
+  size_t GetContextSize() {
+    return m_contextSize;
+  }
+
+protected:
+  // Get word with the correct subset of factors as string. Because we're target
+  // context features, we look at a limited number of words to the left of the
+  // current translation. posFromEnd is interpreted like this:
+  // 0 = last word of the hypothesis
+  // 1 = next to last word
+  // ...etc.
+  inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const {
+    const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1);
+    if (m_factorPositions.empty()) {
+      return word.GetString(m_targetFactors, false);
+    } else {
+      if (m_targetFactors.size() != 1)
+        UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined.");
+      const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string();
+
+      // corner cases: at sentence beginning/end, we don't have the correct factors set up
+      // similarly for UNK
+      if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR)
+        return fullFactor;
+
+      std::string subFactor(m_factorPositions.size(), 'x'); // initialize string with correct size and placeholder chars
+      for (size_t i = 0; i < m_factorPositions.size(); i++)
+        subFactor[i] = fullFactor[m_factorPositions[i]];
+
+      return subFactor;
+    }
+  }
+
+  // some target-context feature functions also look at the source
+  inline std::string GetSourceWord(const InputType &input, size_t pos) const {
+    return input.GetWord(pos).GetString(m_sourceFactors, false);
+  }
+
+  // get source words aligned to a particular context word
+  std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
+      , const InputType &input
+      , const AlignmentInfo &alignInfo
+      , size_t posFromEnd) const {
+    size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
+    std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
+    std::vector<std::string> out;
+    out.reserve(alignedToTarget.size());
+    BOOST_FOREACH(size_t srcIdx, alignedToTarget) {
+      out.push_back(GetSourceWord(input, srcIdx));
+    }
+    return out;
+  }
+
+  // required context size
+  size_t m_contextSize;
+
+  // factor positions: assuming a factor such as positional morphological tag, use this
+  // option to select only certain positions
+  std::vector<size_t> m_factorPositions;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureContextBigrams.h b/moses/FF/VW/VWFeatureContextBigrams.h
new file mode 100644
index 000000000..92b652123
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContextBigrams.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureContextBigrams : public VWFeatureContext
+{
+public:
+  VWFeatureContextBigrams(const std::string &line)
+    : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+    ReadParameters();
+
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+    for (size_t i = 1; i < m_contextSize; i++)
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
+                            + "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
+  }
+
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureContext::SetParameter(key, value);
+  }
+
+private:
+  static const int DEFAULT_WINDOW_SIZE = 1;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureContextBilingual.h b/moses/FF/VW/VWFeatureContextBilingual.h
new file mode 100644
index 000000000..f681fcb78
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContextBilingual.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <string>
+#include <boost/foreach.hpp>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureContextBilingual : public VWFeatureContext
+{
+public:
+  VWFeatureContextBilingual(const std::string &line)
+    : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+    ReadParameters();
+
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+    for (size_t i = 0; i < m_contextSize; i++) {
+      std::string tgtWord = GetWord(contextPhrase, i);
+      std::vector<std::string> alignedTo = GetAlignedSourceWords(contextPhrase, input, alignmentInfo, i);
+      BOOST_FOREACH(const std::string &srcWord, alignedTo) {
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("tcblng^-" + SPrint(i + 1) + "^" + tgtWord + "^" + srcWord));
+      }
+    }
+  }
+
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureContext::SetParameter(key, value);
+  }
+
+private:
+  static const int DEFAULT_WINDOW_SIZE = 1;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureContextWindow.h b/moses/FF/VW/VWFeatureContextWindow.h
new file mode 100644
index 000000000..66c9c3ec5
--- /dev/null
+++ b/moses/FF/VW/VWFeatureContextWindow.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <string>
+#include <algorithm>
+#include "VWFeatureContext.h"
+#include "moses/Util.h"
+
+namespace Moses
+{
+
+class VWFeatureContextWindow : public VWFeatureContext
+{
+public:
+  VWFeatureContextWindow(const std::string &line)
+    : VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
+    ReadParameters();
+
+    // Call this last
+    VWFeatureBase::UpdateRegister();
+  }
+
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+    for (size_t i = 0; i < m_contextSize; i++)
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("tcwin^-" + SPrint(i + 1) + "^" + GetWord(contextPhrase, i)));
+  }
+
+  virtual void SetParameter(const std::string& key, const std::string& value) {
+    VWFeatureContext::SetParameter(key, value);
+  }
+
+private:
+  static const int DEFAULT_WINDOW_SIZE = 1;
+};
+
+}
diff --git a/moses/FF/VW/VWFeatureSource.h b/moses/FF/VW/VWFeatureSource.h
index 564f4a3b6..7a306b59c 100644
--- a/moses/FF/VW/VWFeatureSource.h
+++ b/moses/FF/VW/VWFeatureSource.h
@@ -19,15 +19,22 @@ class VWFeatureSource : public VWFeatureBase
 {
 public:
   VWFeatureSource(const std::string &line)
-    : VWFeatureBase(line, true) {
+    : VWFeatureBase(line, vwft_source) {
   }
 
   // Gets its pure virtual functions from VWFeatureBase
 
   virtual void operator()(const InputType &input
-                          , const InputPath &inputPath
                           , const TargetPhrase &targetPhrase
-                          , Discriminative::Classifier &classifier) const {
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+  }
+
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
   }
 
   virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureSourceBagOfWords.h b/moses/FF/VW/VWFeatureSourceBagOfWords.h
index 97a1cc6c3..b815b4d0e 100644
--- a/moses/FF/VW/VWFeatureSourceBagOfWords.h
+++ b/moses/FF/VW/VWFeatureSourceBagOfWords.h
@@ -18,11 +18,11 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const Range &sourceRange
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     for (size_t i = 0; i < input.GetSize(); i++) {
-      classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i));
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i)));
     }
   }
 
diff --git a/moses/FF/VW/VWFeatureSourceBigrams.h b/moses/FF/VW/VWFeatureSourceBigrams.h
index ce5430ab8..5de3ab2c3 100644
--- a/moses/FF/VW/VWFeatureSourceBigrams.h
+++ b/moses/FF/VW/VWFeatureSourceBigrams.h
@@ -18,11 +18,11 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const Range &sourceRange
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     for (size_t i = 1; i < input.GetSize(); i++) {
-      classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i));
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i)));
     }
   }
 
diff --git a/moses/FF/VW/VWFeatureSourceExternalFeatures.h b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
index bacc5d231..9995ad1b2 100644
--- a/moses/FF/VW/VWFeatureSourceExternalFeatures.h
+++ b/moses/FF/VW/VWFeatureSourceExternalFeatures.h
@@ -23,12 +23,12 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const Range &sourceRange
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     const Features& features = *m_tls.GetStored();
     for (size_t i = 0; i < features.size(); i++) {
-      classifier.AddLabelIndependentFeature("srcext^" + features[i]);
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("srcext^" + features[i]));
     }
   }
 
diff --git a/moses/FF/VW/VWFeatureSourceIndicator.h b/moses/FF/VW/VWFeatureSourceIndicator.h
index fda929f13..b0d43eb0f 100644
--- a/moses/FF/VW/VWFeatureSourceIndicator.h
+++ b/moses/FF/VW/VWFeatureSourceIndicator.h
@@ -20,9 +20,9 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const Range &sourceRange
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     size_t begin = sourceRange.GetStartPos();
     size_t end   = sourceRange.GetEndPos() + 1;
 
@@ -31,7 +31,7 @@ public:
     for (size_t i = 0; i < end - begin; i++)
       words[i] = GetWord(input, begin + i);
 
-    classifier.AddLabelIndependentFeature("sind^" + Join(" ", words));
+    outFeatures.push_back(classifier.AddLabelIndependentFeature("sind^" + Join(" ", words)));
   }
 
   virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureSourcePhraseInternal.h b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
index 4e7f6e8d1..b346660a0 100644
--- a/moses/FF/VW/VWFeatureSourcePhraseInternal.h
+++ b/moses/FF/VW/VWFeatureSourcePhraseInternal.h
@@ -20,14 +20,14 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const Range &sourceRange
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     size_t begin = sourceRange.GetStartPos();
     size_t end   = sourceRange.GetEndPos() + 1;
 
     while (begin < end) {
-      classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++)));
     }
   }
 
diff --git a/moses/FF/VW/VWFeatureSourceSenseWindow.h b/moses/FF/VW/VWFeatureSourceSenseWindow.h
index 614f7ff52..e7b1e1a71 100644
--- a/moses/FF/VW/VWFeatureSourceSenseWindow.h
+++ b/moses/FF/VW/VWFeatureSourceSenseWindow.h
@@ -51,9 +51,9 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const Range &sourceRange
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     int begin = sourceRange.GetStartPos();
     int end   = sourceRange.GetEndPos() + 1;
     int inputLen = input.GetSize();
@@ -64,24 +64,24 @@ public:
     // before current phrase
     for (int i = std::max(0, begin - m_size); i < begin; i++) {
       BOOST_FOREACH(const Sense &sense, senses[i]) {
-        classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
-        classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob);
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob));
       }
     }
 
     // within current phrase
     for (int i = begin; i < end; i++) {
       BOOST_FOREACH(const Sense &sense, senses[i]) {
-        classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
-        classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob);
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob));
       }
     }
 
     // after current phrase
     for (int i = end; i < std::min(end + m_size, inputLen); i++) {
       BOOST_FOREACH(const Sense &sense, senses[i]) {
-        classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
-        classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob);
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
+        outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob));
       }
     }
   }
diff --git a/moses/FF/VW/VWFeatureSourceWindow.h b/moses/FF/VW/VWFeatureSourceWindow.h
index 5205e4f2f..14c617586 100644
--- a/moses/FF/VW/VWFeatureSourceWindow.h
+++ b/moses/FF/VW/VWFeatureSourceWindow.h
@@ -20,19 +20,19 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const Range &sourceRange
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     int begin = sourceRange.GetStartPos();
     int end   = sourceRange.GetEndPos() + 1;
     int inputLen = input.GetSize();
 
     for (int i = std::max(0, begin - m_size); i < begin; i++) {
-      classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i)));
     }
 
     for (int i = end; i < std::min(end + m_size, inputLen); i++) {
-      classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i));
+      outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i)));
     }
   }
 
diff --git a/moses/FF/VW/VWFeatureTarget.h b/moses/FF/VW/VWFeatureTarget.h
index 2935b2b4e..ed936ebf3 100644
--- a/moses/FF/VW/VWFeatureTarget.h
+++ b/moses/FF/VW/VWFeatureTarget.h
@@ -17,15 +17,22 @@ class VWFeatureTarget : public VWFeatureBase
 {
 public:
   VWFeatureTarget(const std::string &line)
-    : VWFeatureBase(line, false) {
+    : VWFeatureBase(line, vwft_target) {
   }
 
   // Gets its pure virtual functions from VWFeatureBase
 
   virtual void operator()(const InputType &input
-                          , const InputPath &inputPath
                           , const Range &sourceRange
-                          , Discriminative::Classifier &classifier) const {
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
+  }
+
+  virtual void operator()(const InputType &input
+                          , const Phrase &contextPhrase
+                          , const AlignmentInfo &alignmentInfo
+                          , Discriminative::Classifier &classifier
+                          , Discriminative::FeatureVector &outFeatures) const {
   }
 
   virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureTargetBigrams.h b/moses/FF/VW/VWFeatureTargetBigrams.h
index 6f3f35270..30264dbf5 100644
--- a/moses/FF/VW/VWFeatureTargetBigrams.h
+++ b/moses/FF/VW/VWFeatureTargetBigrams.h
@@ -17,11 +17,11 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const TargetPhrase &targetPhrase
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     for (size_t i = 1; i < targetPhrase.GetSize(); i++) {
-      classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i));
+      outFeatures.push_back(classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i)));
     }
   }
 
diff --git a/moses/FF/VW/VWFeatureTargetIndicator.h b/moses/FF/VW/VWFeatureTargetIndicator.h
index 39d8a37a0..0195990d0 100644
--- a/moses/FF/VW/VWFeatureTargetIndicator.h
+++ b/moses/FF/VW/VWFeatureTargetIndicator.h
@@ -17,10 +17,10 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const TargetPhrase &targetPhrase
-                  , Discriminative::Classifier &classifier) const {
-    classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors));
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
+    outFeatures.push_back(classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors)));
   }
 
   virtual void SetParameter(const std::string& key, const std::string& value) {
diff --git a/moses/FF/VW/VWFeatureTargetPhraseInternal.h b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
index e376a1ed3..8a9928aaa 100644
--- a/moses/FF/VW/VWFeatureTargetPhraseInternal.h
+++ b/moses/FF/VW/VWFeatureTargetPhraseInternal.h
@@ -17,11 +17,11 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const TargetPhrase &targetPhrase
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
-      classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
+      outFeatures.push_back(classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i)));
     }
   }
 
diff --git a/moses/FF/VW/VWFeatureTargetPhraseScores.h b/moses/FF/VW/VWFeatureTargetPhraseScores.h
index 5a4519fb1..6c9ab63d2 100644
--- a/moses/FF/VW/VWFeatureTargetPhraseScores.h
+++ b/moses/FF/VW/VWFeatureTargetPhraseScores.h
@@ -20,9 +20,9 @@ public:
   }
 
   void operator()(const InputType &input
-                  , const InputPath &inputPath
                   , const TargetPhrase &targetPhrase
-                  , Discriminative::Classifier &classifier) const {
+                  , Discriminative::Classifier &classifier
+                  , Discriminative::FeatureVector &outFeatures) const {
     std::vector<FeatureFunction*> features = FeatureFunction::GetFeatureFunctions();
     for (size_t i = 0; i < features.size(); i++) {
       std::string fname = features[i]->GetScoreProducerDescription();
@@ -31,7 +31,7 @@ public:
 
       std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(features[i]);
       for(size_t j = 0; j < scores.size(); ++j)
-        classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]);
+        outFeatures.push_back(classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]));
     }
   }
 
diff --git a/moses/FF/VW/VWState.cpp b/moses/FF/VW/VWState.cpp
new file mode 100644
index 000000000..000b8532b
--- /dev/null
+++ b/moses/FF/VW/VWState.cpp
@@ -0,0 +1,77 @@
+#include "VWState.h"
+
+#include "moses/FF/FFState.h"
+#include "moses/Phrase.h"
+#include "moses/Hypothesis.h"
+#include "moses/Util.h"
+#include "moses/TypeDef.h"
+#include "moses/StaticData.h"
+#include "moses/TranslationOption.h"
+#include <boost/functional/hash.hpp>
+
+namespace Moses
+{
+
+VWState::VWState() : m_spanStart(0), m_spanEnd(0)
+{
+  ComputeHash();
+}
+
+VWState::VWState(const Phrase &phrase)
+  : m_phrase(phrase), m_spanStart(0), m_spanEnd(0)
+{
+  ComputeHash();
+}
+
+VWState::VWState(const VWState &prevState, const Hypothesis &curHypo)
+{
+  VERBOSE(3, "VW :: updating state\n>> previous state: " << prevState << "\n");
+
+  // copy phrase from previous state
+  Phrase phrase = prevState.GetPhrase();
+  size_t contextSize = phrase.GetSize(); // identical to VWFeatureBase::GetMaximumContextSize()
+
+  // add words from current hypothesis
+  phrase.Append(curHypo.GetCurrTargetPhrase());
+
+  VERBOSE(3, ">> current hypo: " << curHypo.GetCurrTargetPhrase() << "\n");
+
+  // get a slice of appropriate length
+  Range range(phrase.GetSize() - contextSize, phrase.GetSize() - 1);
+  m_phrase = phrase.GetSubString(range);
+
+  // set current span start/end
+  m_spanStart = curHypo.GetTranslationOption().GetStartPos();
+  m_spanEnd   = curHypo.GetTranslationOption().GetEndPos();
+
+  // compute our hash
+  ComputeHash();
+
+  VERBOSE(3, ">> updated state: " << *this << "\n");
+}
+
+bool VWState::operator==(const FFState& o) const
+{
+  const VWState &other = static_cast<const VWState &>(o);
+
+  return m_phrase == other.GetPhrase()
+         && m_spanStart == other.GetSpanStart()
+         && m_spanEnd == other.GetSpanEnd();
+}
+
+void VWState::ComputeHash()
+{
+  m_hash = 0;
+
+  boost::hash_combine(m_hash, m_phrase);
+  boost::hash_combine(m_hash, m_spanStart);
+  boost::hash_combine(m_hash, m_spanEnd);
+}
+
+std::ostream &operator<<(std::ostream &out, const VWState &state)
+{
+  out << state.GetPhrase() << "::" << state.GetSpanStart() << "-" << state.GetSpanEnd();
+  return out;
+}
+
+}
diff --git a/moses/FF/VW/VWState.h b/moses/FF/VW/VWState.h
new file mode 100644
index 000000000..d83035553
--- /dev/null
+++ b/moses/FF/VW/VWState.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <ostream>
+
+#include "moses/FF/FFState.h"
+#include "moses/Phrase.h"
+#include "moses/Hypothesis.h"
+
+namespace Moses
+{
+
+/**
+ * VW state, used in decoding (when target context is enabled).
+ */
+class VWState : public FFState
+{
+public:
+  // empty state, used only when VWState is ignored
+  VWState();
+
+  // used for construction of the initial VW state
+  VWState(const Phrase &phrase);
+
+  // continue from previous VW state with a new hypothesis
+  VWState(const VWState &prevState, const Hypothesis &curHypo);
+
+  virtual bool operator==(const FFState& o) const;
+
+  inline virtual size_t hash() const {
+    return m_hash;
+  }
+
+  inline const Phrase &GetPhrase() const {
+    return m_phrase;
+  }
+
+  inline size_t GetSpanStart() const {
+    return m_spanStart;
+  }
+
+  inline size_t GetSpanEnd() const {
+    return m_spanEnd;
+  }
+
+private:
+  void ComputeHash();
+
+  Phrase m_phrase;
+  size_t m_spanStart, m_spanEnd;
+  size_t m_hash;
+};
+
+// how to print a VW state
+std::ostream &operator<<(std::ostream &out, const VWState &state);
+
+}
diff --git a/moses/FF/VW/VWTargetSentence.h b/moses/FF/VW/VWTargetSentence.h
new file mode 100644
index 000000000..1387bc042
--- /dev/null
+++ b/moses/FF/VW/VWTargetSentence.h
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <vector>
+
+#include "moses/AlignmentInfo.h"
+#include "moses/Phrase.h"
+
+#include "AlignmentConstraint.h"
+
+namespace Moses
+{
+
+/**
+ * VW thread-specific data about target sentence.
+ */
+class VWTargetSentence
+{
+public:
+  VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
+
+  void Clear() {
+    if (m_sentence) delete m_sentence;
+    if (m_alignment) delete m_alignment;
+  }
+
+  ~VWTargetSentence() {
+    Clear();
+  }
+
+  void SetConstraints(size_t sourceSize) {
+    // initialize to unconstrained
+    m_sourceConstraints.assign(sourceSize, AlignmentConstraint());
+    m_targetConstraints.assign(m_sentence->GetSize(), AlignmentConstraint());
+
+    // set constraints according to alignment points
+    AlignmentInfo::const_iterator it;
+    for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
+      int src = it->first;
+      int tgt = it->second;
+
+      if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
+        UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
+      }
+
+      m_sourceConstraints[src].Update(tgt);
+      m_targetConstraints[tgt].Update(src);
+    }
+  }
+
+  Phrase *m_sentence;
+  AlignmentInfo *m_alignment;
+  std::vector<AlignmentConstraint> m_sourceConstraints, m_targetConstraints;
+};
+
+}
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index ada728919..67267ce90 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -59,6 +59,7 @@ Parameter::Parameter()
   AddParam(main_opts,"version", "show version of Moses and libraries used");
   AddParam(main_opts,"show-weights", "print feature weights and exit");
   AddParam(main_opts,"time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
+  AddParam(main_opts,"segment-time-out", "seconds for single segment after which is interrupted (-1=no time-out, default is -1)");
 
   ///////////////////////////////////////////////////////////////////////////////////////
   // factorization options
diff --git a/moses/ReorderingConstraint.cpp b/moses/ReorderingConstraint.cpp
index a5627508f..c4950daad 100644
--- a/moses/ReorderingConstraint.cpp
+++ b/moses/ReorderingConstraint.cpp
@@ -54,8 +54,8 @@ void ReorderingConstraint::SetWall( size_t pos, bool value )
 void ReorderingConstraint::FinalizeWalls()
 {
   for(size_t z = 0; z < m_zone.size(); z++ ) {
-    const size_t startZone = m_zone[z][0];
-    const size_t endZone = m_zone[z][1];// note: wall after endZone is not local
+    const size_t startZone = m_zone[z].first;
+    const size_t endZone = m_zone[z].second;// note: wall after endZone is not local
     for( size_t pos = startZone; pos < endZone; pos++ ) {
       if (m_wall[ pos ]) {
         m_localWall[ pos ] = z;
@@ -65,8 +65,8 @@ void ReorderingConstraint::FinalizeWalls()
       // enforce that local walls only apply to innermost zone
       else if (m_localWall[ pos ] != NOT_A_ZONE) {
         size_t assigned_z = m_localWall[ pos ];
-        if ((m_zone[assigned_z][0] < startZone) ||
-            (m_zone[assigned_z][1] > endZone)) {
+        if ((m_zone[assigned_z].first < startZone) ||
+            (m_zone[assigned_z].second > endZone)) {
           m_localWall[ pos ] = z;
         }
       }
@@ -97,9 +97,9 @@ void ReorderingConstraint::SetMonotoneAtPunctuation( const Phrase &sentence )
 void ReorderingConstraint::SetZone( size_t startPos, size_t endPos )
 {
   VERBOSE(3,"SETTING zone " << startPos << "-" << endPos << std::endl);
-  std::vector< size_t > newZone;
-  newZone.push_back( startPos );
-  newZone.push_back( endPos );
+  std::pair<size_t,size_t> newZone;
+  newZone.first = startPos;
+  newZone.second = endPos;
   m_zone.push_back( newZone );
   m_active = true;
 }
@@ -138,8 +138,8 @@ bool ReorderingConstraint::Check( const Bitmap &bitmap, size_t startPos, size_t
 
   // check zones
   for(size_t z = 0; z < m_zone.size(); z++ ) {
-    const size_t startZone = m_zone[z][0];
-    const size_t endZone = m_zone[z][1];
+    const size_t startZone = m_zone[z].first;
+    const size_t endZone = m_zone[z].second;
 
     // fine, if translation has not reached zone yet and phrase outside zone
     if (lastPos < startZone && ( endPos < startZone || startPos > endZone ) ) {
@@ -236,4 +236,25 @@ bool ReorderingConstraint::Check( const Bitmap &bitmap, size_t startPos, size_t
   return true;
 }
 
+std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj)
+{
+  out << "Zones:";
+  for (size_t i = 0; i < obj.m_zone.size(); ++i) {
+    const std::pair<size_t,size_t> &zone1 = obj.m_zone[i];
+    out << zone1.first << "-" << zone1.second << " ";
+  }
+
+  out << "Walls:";
+  for (size_t i = 0; i < obj.m_size; ++i) {
+    out << obj.m_wall[i];
+  }
+
+  out << " Local walls:";
+  for (size_t i = 0; i < obj.m_size; ++i) {
+    out << obj.m_localWall[i] << " ";
+  }
+
+  return out;
+}
+
 }
diff --git a/moses/ReorderingConstraint.h b/moses/ReorderingConstraint.h
index fc74dea7d..047382076 100644
--- a/moses/ReorderingConstraint.h
+++ b/moses/ReorderingConstraint.h
@@ -45,13 +45,13 @@ class Bitmap;
  */
 class ReorderingConstraint
 {
-  friend std::ostream& operator<<(std::ostream& out, const ReorderingConstraint& reorderingConstraint);
+  friend std::ostream& operator<<(std::ostream& out, const ReorderingConstraint &obj);
 protected:
   // const size_t m_size; /**< number of words in sentence */
   size_t m_size; /**< number of words in sentence */
   bool *m_wall;	/**< flag for each word if it is a wall */
   size_t *m_localWall;	/**< flag for each word if it is a local wall */
-  std::vector< std::vector< size_t > > m_zone; /** zones that limit reordering */
+  std::vector< std::pair<size_t,size_t> > m_zone; /** zones that limit reordering */
   bool   m_active; /**< flag indicating, if there are any active constraints */
   int m_max_distortion;
 public:
@@ -93,7 +93,7 @@ public:
   void SetZone( size_t startPos, size_t endPos );
 
   //! returns the vector of zones
-  std::vector< std::vector< size_t > > & GetZones() {
+  std::vector< std::pair<size_t,size_t> > & GetZones() {
     return m_zone;
   }
 
diff --git a/moses/Search.cpp b/moses/Search.cpp
index 2d8c74b5f..caf9425cf 100644
--- a/moses/Search.cpp
+++ b/moses/Search.cpp
@@ -17,21 +17,34 @@ Search::Search(Manager& manager)
   , interrupted_flag(0)
 {
   m_initialTransOpt.SetInputPath(m_inputPath);
+  m_timer.start();
 }
 
-
 bool
 Search::
 out_of_time()
 {
   int const& timelimit = m_options.search.timeout;
-  if (!timelimit) return false;
-  double elapsed_time = GetUserTime();
-  if (elapsed_time <= timelimit) return false;
-  VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
-          << timelimit << ")" << std::endl);
-  interrupted_flag = 1;
-  return true;
+  if (timelimit > 0) {
+    double elapsed_time = GetUserTime();
+    if (elapsed_time > timelimit) {
+      VERBOSE(1,"Decoding is out of time (" << elapsed_time << ","
+              << timelimit << ")" << std::endl);
+      interrupted_flag = 1;
+      return true;
+    }
+  }
+  int const& segment_timelimit = m_options.search.segment_timeout;
+  if (segment_timelimit > 0) {
+    double elapsed_time = m_timer.get_elapsed_time();
+    if (elapsed_time > segment_timelimit) {
+      VERBOSE(1,"Decoding for segment is out of time (" << elapsed_time << ","
+              << segment_timelimit << ")" << std::endl);
+      interrupted_flag = 1;
+      return true;
+    }
+  }
+  return false;
 }
 
 }
diff --git a/moses/Search.h b/moses/Search.h
index a0e07870d..7797f07a0 100644
--- a/moses/Search.h
+++ b/moses/Search.h
@@ -7,6 +7,7 @@
 #include "Phrase.h"
 #include "InputPath.h"
 #include "Bitmaps.h"
+#include "Timer.h"
 
 namespace Moses
 {
@@ -48,6 +49,7 @@ protected:
   /** flag indicating that decoder ran out of time (see switch -time-out) */
   size_t interrupted_flag;
 
+  Timer m_timer;
   bool out_of_time();
 };
 
diff --git a/moses/SearchCubePruning.cpp b/moses/SearchCubePruning.cpp
index 9984ecadb..f921b9860 100644
--- a/moses/SearchCubePruning.cpp
+++ b/moses/SearchCubePruning.cpp
@@ -97,7 +97,6 @@ void SearchCubePruning::Decode()
 
   // go through each stack
   size_t stackNo = 1;
-  int timelimit = m_options.search.timeout;
   std::vector < HypothesisStack* >::iterator iterStack;
   for (iterStack = m_hypoStackColl.begin() + 1 ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
     // BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp
index 4db022e5e..98bfb9e0a 100644
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@@ -155,7 +155,9 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
                                      m_xmlOptions,
                                      m_reorderingConstraint,
                                      xmlWalls, placeholders);
-    UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
+    if (!OK) {
+      TRACE_ERR("Unable to parse XML in line: " << line);
+    }
   }
 }
 
diff --git a/moses/TranslationModel/CompactPT/CanonicalHuffman.h b/moses/TranslationModel/CompactPT/CanonicalHuffman.h
index 9f6c14e56..10f3019b1 100644
--- a/moses/TranslationModel/CompactPT/CanonicalHuffman.h
+++ b/moses/TranslationModel/CompactPT/CanonicalHuffman.h
@@ -76,8 +76,9 @@ private:
     MinHeapSorter hs(A);
     std::make_heap(A.begin(), A.begin() + n, hs);
 
-    size_t h = n;
-    size_t m1, m2;
+    // marked volatile to prevent the intel compiler from generating bad code
+    volatile size_t h = n;
+    volatile size_t m1, m2;
     while(h > 1) {
       m1 = A[0];
       std::pop_heap(A.begin(), A.begin() + h, hs);
diff --git a/moses/parameters/SearchOptions.cpp b/moses/parameters/SearchOptions.cpp
index 678f9bfe0..958569e94 100644
--- a/moses/parameters/SearchOptions.cpp
+++ b/moses/parameters/SearchOptions.cpp
@@ -38,6 +38,7 @@ namespace Moses
     param.SetParameter(early_discarding_threshold, "early-discarding-threshold", 
                        DEFAULT_EARLY_DISCARDING_THRESHOLD);
     param.SetParameter(timeout, "time-out", 0);
+    param.SetParameter(segment_timeout, "segment-time-out", 0);
     param.SetParameter(max_phrase_length, "max-phrase-length", 
                        DEFAULT_MAX_PHRASE_LENGTH);
     param.SetParameter(trans_opt_threshold, "translation-option-threshold", 
diff --git a/moses/parameters/SearchOptions.h b/moses/parameters/SearchOptions.h
index 46c53e95b..30a612f05 100644
--- a/moses/parameters/SearchOptions.h
+++ b/moses/parameters/SearchOptions.h
@@ -25,6 +25,7 @@ namespace Moses
     float beam_width;
 
     int timeout;
+    int segment_timeout;
 
     bool consensus; //! Use Consensus decoding  (DeNero et al 2009)
     
diff --git a/scripts/Transliteration/train-transliteration-module.pl b/scripts/Transliteration/train-transliteration-module.pl
index d072719d1..8d22ae6ce 100755
--- a/scripts/Transliteration/train-transliteration-module.pl
+++ b/scripts/Transliteration/train-transliteration-module.pl
@@ -240,7 +240,7 @@ sub train_transliteration_module{
 
     `$MOSES_SRC_DIR/scripts/ems/support/substitute-filtered-tables.perl $OUT_DIR/tuning/filtered/moses.ini < $OUT_DIR/model/moses.ini > $OUT_DIR/tuning/moses.filtered.ini`;
 
-    `$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp  --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/mert -threads=16 --no-filter-phrase-table`;
+    `$MOSES_SRC_DIR/scripts/training/mert-moses.pl $OUT_DIR/tuning/input $OUT_DIR/tuning/reference $DECODER $OUT_DIR/tuning/moses.filtered.ini --nbest 100 --working-dir $OUT_DIR/tuning/tmp  --decoder-flags "-threads 16 -drop-unknown -v 0 -distortion-limit 0" --rootdir $MOSES_SRC_DIR/scripts -mertdir $MOSES_SRC_DIR/bin -threads=16 --no-filter-phrase-table`;
 
     `cp $OUT_DIR/tuning/tmp/moses.ini $OUT_DIR/tuning/moses.ini`;
 
diff --git a/scripts/ems/example/config.basic b/scripts/ems/example/config.basic
index 257166721..e6b2d4a5c 100644
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
 # For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
 #  Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
 #  "Farasa: A Fast and Furious Segmenter for Arabic"
-input-tokenizer = "$farasa-dir/farasa_moses.sh"
+#input-tokenizer = "$farasa-dir/farasa_moses.sh"
 
 
 # truecasers - comment out if you do not use the truecaser
diff --git a/scripts/ems/example/config.factored b/scripts/ems/example/config.factored
index 6f7beb438..7e1004db6 100644
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
 # For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
 #  Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
 #  "Farasa: A Fast and Furious Segmenter for Arabic"
-input-tokenizer = "$farasa-dir/farasa_moses.sh"
+#input-tokenizer = "$farasa-dir/farasa_moses.sh"
 
 # truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
diff --git a/scripts/ems/example/config.hierarchical b/scripts/ems/example/config.hierarchical
index 6fb77a18a..3d00ffd79 100644
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@@ -57,7 +57,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
 # For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
 #  Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
 #  "Farasa: A Fast and Furious Segmenter for Arabic"
-input-tokenizer = "$farasa-dir/farasa_moses.sh"
+#input-tokenizer = "$farasa-dir/farasa_moses.sh"
 
 # truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
diff --git a/scripts/ems/example/config.syntax b/scripts/ems/example/config.syntax
index ddde6baad..bdbd2b4e0 100644
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@@ -57,7 +57,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
 # For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
 #  Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
 #  "Farasa: A Fast and Furious Segmenter for Arabic"
-input-tokenizer = "$farasa-dir/farasa_moses.sh"
+#input-tokenizer = "$farasa-dir/farasa_moses.sh"
 
 # truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
diff --git a/scripts/ems/example/config.toy b/scripts/ems/example/config.toy
index dff4ed10d..6667a9744 100644
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
 # For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
 #  Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
 #  "Farasa: A Fast and Furious Segmenter for Arabic"
-input-tokenizer = "$farasa-dir/farasa_moses.sh"
+#input-tokenizer = "$farasa-dir/farasa_moses.sh"
 
 # truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
diff --git a/scripts/ems/example/config.toy.bilinguallm b/scripts/ems/example/config.toy.bilinguallm
index f4730a80f..9bf94613f 100644
--- a/scripts/ems/example/config.toy.bilinguallm
+++ b/scripts/ems/example/config.toy.bilinguallm
@@ -54,7 +54,7 @@ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-ext
 # For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
 #  Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
 #  "Farasa: A Fast and Furious Segmenter for Arabic"
-input-tokenizer = "$farasa-dir/farasa_moses.sh"
+#input-tokenizer = "$farasa-dir/farasa_moses.sh"
 
 # truecasers - comment out if you do not use the truecaser
 input-truecaser = $moses-script-dir/recaser/truecase.perl
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 11c69eab4..8713af8bf 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -827,7 +827,7 @@ create-config
 	in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table-pruned sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
 	out: config
 	ignore-if: use-hiero thot
-	rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature
+	rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature operation-sequence-model-load-method
 	default-name: model/moses.ini
 	error: Unknown option
 	error: requires an argument
@@ -1540,6 +1540,150 @@ analysis-precision
 	rerun-on-change: precision-by-coverage-base
 	final-model: yes
 
+[QUALITY-ESTIMATION] single
+tokenize-input
+	in: raw-input
+	out: tokenized-input
+	default-name: quality-estimation/input.tok
+	pass-unless: input-tokenizer
+	template: $input-tokenizer < IN > OUT
+tokenize-input-devtest
+	in: raw-input-devtest
+	out: tokenized-input-devtest
+	default-name: quality-estimation/input.devtest.tok
+	pass-unless: input-tokenizer
+	template: $input-tokenizer < IN > OUT
+lowercase-input
+	in: tokenized-input
+	out: truecased-input
+	default-name: quality-estimation/input.lc
+	pass-unless: input-lowercaser
+	ignore-if: input-truecaser
+	template: $input-lowercaser < IN > OUT
+lowercase-input-devtest
+	in: tokenized-input-devtest
+	out: truecased-input-devtest
+	default-name: quality-estimation/input.devtest.lc
+	pass-unless: input-lowercaser
+	ignore-if: input-truecaser
+	template: $input-lowercaser < IN > OUT
+truecase-input
+	in: tokenized-input TRUECASER:truecase-model
+	out: truecased-input
+	rerun-on-change: input-truecaser
+	default-name: quality-estimation/input.tc
+	ignore-unless: input-truecaser
+        template: $input-truecaser -model IN1.$input-extension < IN > OUT
+truecase-input-devtest
+	in: tokenized-input-devtest TRUECASER:truecase-model
+	out: truecased-input-devtest
+	rerun-on-change: input-truecaser
+	ignore-unless: input-truecaser
+	default-name: quality-estimation/input.devtest.tc
+        template: $input-truecaser -model IN1.$input-extension < IN > OUT
+split-input 
+	in: truecased-input SPLITTER:splitter-model
+	out: split-input
+	rerun-on-change: input-splitter
+	default-name: quality-estimation/input.split
+	pass-unless: input-splitter
+	template: $input-splitter -model IN1.$input-extension < IN > OUT
+split-input-devtest
+	in: truecased-input-devtest SPLITTER:splitter-model
+	out: split-input-devtest
+	rerun-on-change: input-splitter
+	default-name: quality-estimation/input.devtest.split
+	pass-unless: input-splitter
+	template: $input-splitter -model IN1.$input-extension < IN > OUT
+tokenize-reference
+	in: raw-reference
+	out: tokenized-reference
+	default-name: quality-estimation/reference.tok
+	pass-unless: output-tokenizer
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-tokenizer < IN > OUT
+tokenize-reference-devtest
+	in: raw-reference-devtest
+	out: tokenized-reference-devtest
+	default-name: quality-estimation/reference.devtest.tok
+	pass-unless: output-tokenizer
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-tokenizer < IN > OUT
+lowercase-reference
+	in: tokenized-reference
+	out: truecased-reference
+	default-name: quality-estimation/reference.lc
+	pass-unless: output-lowercaser
+	ignore-if: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-lowercaser < IN > OUT	
+lowercase-reference-devtest
+	in: tokenized-reference-devtest
+	out: truecased-reference-devtest
+	default-name: quality-estimation/reference.devtest.lc
+	pass-unless: output-lowercaser
+	ignore-if: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+	template: $output-lowercaser < IN > OUT	
+truecase-reference
+	in: tokenized-reference TRUECASER:truecase-model
+	out: truecased-reference
+	rerun-on-change: output-truecaser
+	default-name: quality-estimation/reference.tc
+	ignore-unless: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+        template: $output-truecaser -model IN1.$output-extension < IN > OUT
+truecase-reference-devtest
+	in: tokenized-reference-devtest TRUECASER:truecase-model
+	out: truecased-reference-devtest
+	rerun-on-change: output-truecaser
+	default-name: quality-estimation/reference.devtest.tc
+	ignore-unless: output-truecaser
+	multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
+        template: $output-truecaser -model IN1.$output-extension < IN > OUT
+decode
+	in: TUNING:config-with-reused-weights split-input
+	out: rich-output
+	default-name: quality-estimation/output
+	template: $decoder -v 0 -tt -f IN < IN1 > OUT
+	error: Translation was not performed correctly
+	not-error: trans: No such file or directory
+decode-devtest
+	in: TUNING:config-with-reused-weights split-input-devtest
+	out: rich-output-devtest
+	default-name: quality-estimation/output-devtest
+	template: $decoder -v 0 -tt -f IN < IN1 > OUT
+	error: Translation was not performed correctly
+	not-error: trans: No such file or directory
+remove-markup
+	in: rich-output
+	out: cleaned-output
+	default-name: quality-estimation/tokenized-output
+	template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
+remove-markup-devtest
+	in: rich-output-devtest
+	out: cleaned-output-devtest
+	default-name: quality-estimation/tokenized-output-devtest
+	template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT
+score-output
+	in: cleaned-output truecased-reference
+	out: scored-output
+	default-name: quality-estimation/output-scored
+	tmp-name: quality-estimation/ter
+	template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
+score-output-devtest
+	in: cleaned-output-devtest truecased-reference-devtest
+	out: scored-output-devtest
+	default-name: quality-estimation/output-scored-devtest
+	tmp-name: quality-estimation/ter-devtest
+	template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT
+train
+	in: input rich-output scored-output input-devtest rich-output-devtest scored-output-devtest
+	out: quality-estimation-model
+	default-name: quality-estimation/model
+	template: $trainer --train-rich IN1 --train-ter IN2 --eval-rich IN4 --eval-ter IN5 --model OUT
+	final-model: yes
+
 [REPORTING] single
 report
 	in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:multi-bleu-detok-score EVALUATION:multi-bleu-c-detok-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model EVALUATION:wade-analysis
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 6d0019838..e52c82319 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2660,12 +2660,16 @@ sub define_training_create_config {
 
     if ($osm) {
       my $osm_settings = &get("TRAINING:operation-sequence-model-settings");
-      if ($osm_settings =~ /-factor *(\S+)/){
+      if ($osm_settings =~ /-factor *(\S+)/) {
         $cmd .= "-osm-model $osm/ -osm-setting $1 ";
       }
       else {
         $cmd .= "-osm-model $osm/operationLM.bin ";
       }
+      my $osm_load_method = &get("TRAINING:operation-sequence-model-load-method");
+      if (defined($osm_load_method)) {
+        $cmd .= "-osm-load-method $osm_load_method ";
+      }
     }
 
     if (&get("TRAINING:phrase-orientation")) {
diff --git a/scripts/ems/support/create-xml.perl b/scripts/ems/support/create-xml.perl
new file mode 100755
index 000000000..610c2ccf8
--- /dev/null
+++ b/scripts/ems/support/create-xml.perl
@@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use warnings;
+use strict;
+
+my ($type) = @ARGV;
+if ($type =~ /^s/i) {
+	print "<srcset setid=\"test\" srclang=\"any\">\n";
+	print "<doc docid=\"doc\">\n";
+}
+elsif ($type =~ /^t/i) {
+	print "<tstset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
+	print "<doc sysid=\"moses\" docid=\"doc\">\n";
+}
+elsif ($type =~ /^r/i) {
+	print "<refset setid=\"test\" tgtlang=\"any\" srclang=\"any\">\n";
+	print "<doc sysid=\"ref\" docid=\"doc\">\n";
+}
+else {
+	die("ERROR: specify source / target / ref");
+}
+
+my $i = 0;
+while(<STDIN>) {
+  chomp;
+  print "<seg id=\"".(++$i)."\">$_</seg>\n";
+}
+
+print "</doc>\n";
+
+if ($type =~ /^s/i) {
+	print "</srcset>\n";
+}
+elsif ($type =~ /^t/i) {
+	print "</tstset>\n";	
+}
+elsif ($type =~ /^r/i) {
+	print "</refset>\n";
+}
diff --git a/scripts/ems/support/remove-segmentation-markup.perl b/scripts/ems/support/remove-segmentation-markup.perl
index 3b02bceaf..1e5820dd5 100755
--- a/scripts/ems/support/remove-segmentation-markup.perl
+++ b/scripts/ems/support/remove-segmentation-markup.perl
@@ -9,7 +9,16 @@ use strict;
 $|++;
 
 while(<STDIN>) {
-  s/ \|\d+\-\d+\| / /g;
-  s/ \|\d+\-\d+\|$//;
-  print $_;
+  chop;
+  s/\|[^\|]+\|//g;
+  s/\s+/ /g;
+  s/^ //;
+  s/ $//;
+  print $_."\n";
 }
+
+#while(<STDIN>) {
+#  s/ \|\d+\-\d+\| / /g;
+#  s/ \|\d+\-\d+\|$//;
+#  print $_;
+#}
diff --git a/scripts/ems/support/ter.perl b/scripts/ems/support/ter.perl
new file mode 100644
index 000000000..1bae6f146
--- /dev/null
+++ b/scripts/ems/support/ter.perl
@@ -0,0 +1,15 @@
+#!/usr/bin/env perl
+#
+# This file is part of moses.  Its use is licensed under the GNU Lesser General
+# Public License version 2.1 or, at your option, any later version.
+
+use strict;
+use FindBin qw($RealBin);
+
+my ($jar, $hyp,$ref,$tmp) = @ARGV;
+`mkdir -p $tmp`;
+`$RealBin/create-xml.perl test < $hyp > $tmp/hyp`;
+`$RealBin/create-xml.perl ref  < $ref > $tmp/ref`;
+`java -jar $jar -h $tmp/hyp -r $tmp/ref -o ter -n $tmp/out`;
+print `cat $tmp/out.ter`;
+
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 3e8dabb79..9fae8ec8b 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -83,6 +83,7 @@ my($_EXTERNAL_BINDIR,
    	$_CONFIG,
    	$_OSM,
    	$_OSM_FACTORS,
+   	$_OSM_LOAD_METHOD,
    	$_POST_DECODING_TRANSLIT,
    	$_TRANSLITERATION_PHRASE_TABLE,
    	$_HIERARCHICAL,
@@ -238,6 +239,7 @@ $_HELP = 1
 		       'config=s' => \$_CONFIG,
 		       'osm-model=s' => \$_OSM,
 		       'osm-setting=s' => \$_OSM_FACTORS,
+		       'osm-load-method=s' => \$_OSM_LOAD_METHOD,
 		       'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
 		       'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
 		       'mmsapt' => \$_MMSAPT,
@@ -2249,6 +2251,8 @@ sub create_ini {
 
   if($_OSM)
   {
+    my $load_method = "";
+    $load_method = " load=$_OSM_LOAD_METHOD" if defined($_OSM_LOAD_METHOD);
     if (defined($_OSM_FACTORS))
     {
 	my $count = 0;
@@ -2258,11 +2262,11 @@ sub create_ini {
 		my ($factor_f,$factor_e) = split(/\-/,$factor_val);
 
 		if($count == 0){
-		$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
+		$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
 	       $weight_spec  .= "OpSequenceModel$count= 0.08 -0.02 0.02 -0.001 0.03\n";
 		}
 		else{
-			$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
+			$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
 	       	$weight_spec  .= "OpSequenceModel$count= 0.08 \n";
 
 		}
@@ -2271,7 +2275,7 @@ sub create_ini {
     }
     else
     {
-      $feature_spec .= "OpSequenceModel name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
+      $feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
       $weight_spec  .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
     }
   }
@@ -2292,7 +2296,9 @@ sub create_ini {
     }
     $type = "KENLM" unless defined $type; # default to KENLM if no type given
 
-    if ($type =~ /^\d+$/) {
+    if ($type =~ /^8-(.+)/) {
+      $type = "KENLM load=$1";
+    } elsif ($type =~ /^\d+$/) {
       # backwards compatibility if the type is given not as string but as a number
       if ($type == 0) {
         $type = "SRILM";
diff --git a/vw/Classifier.h b/vw/Classifier.h
index 39b3461ad..cb2c8b227 100644
--- a/vw/Classifier.h
+++ b/vw/Classifier.h
@@ -24,6 +24,8 @@ class ezexample;
 
 namespace Discriminative
 {
+typedef std::pair<uint32_t, float> FeatureType; // feature hash (=ID) and value
+typedef std::vector<FeatureType> FeatureVector;
 
 /**
 * Abstract class to be implemented by classifiers.
@@ -34,12 +36,22 @@ public:
   /**
    * Add a feature that does not depend on the class (label).
    */
-  virtual void AddLabelIndependentFeature(const StringPiece &name, float value) = 0;
+  virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value) = 0;
 
   /**
    * Add a feature that is specific for the given class.
    */
-  virtual void AddLabelDependentFeature(const StringPiece &name, float value) = 0;
+  virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value) = 0;
+
+  /**
+   * Efficient addition of features when their IDs are already computed.
+   */
+  virtual void AddLabelIndependentFeatureVector(const FeatureVector &features) = 0;
+
+  /**
+   * Efficient addition of features when their IDs are already computed.
+   */
+  virtual void AddLabelDependentFeatureVector(const FeatureVector &features) = 0;
 
   /**
    * Train using current example. Use loss to distinguish positive and negative training examples.
@@ -54,12 +66,12 @@ public:
   virtual float Predict(const StringPiece &label) = 0;
 
   // helper methods for indicator features
-  void AddLabelIndependentFeature(const StringPiece &name) {
-    AddLabelIndependentFeature(name, 1.0);
+  FeatureType AddLabelIndependentFeature(const StringPiece &name) {
+    return AddLabelIndependentFeature(name, 1.0);
   }
 
-  void AddLabelDependentFeature(const StringPiece &name) {
-    AddLabelDependentFeature(name, 1.0);
+  FeatureType AddLabelDependentFeature(const StringPiece &name) {
+    return AddLabelDependentFeature(name, 1.0);
   }
 
   virtual ~Classifier() {}
@@ -95,8 +107,10 @@ public:
   VWTrainer(const std::string &outputFile);
   virtual ~VWTrainer();
 
-  virtual void AddLabelIndependentFeature(const StringPiece &name, float value);
-  virtual void AddLabelDependentFeature(const StringPiece &name, float value);
+  virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value);
+  virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value);
+  virtual void AddLabelIndependentFeatureVector(const FeatureVector &features);
+  virtual void AddLabelDependentFeatureVector(const FeatureVector &features);
   virtual void Train(const StringPiece &label, float loss);
   virtual float Predict(const StringPiece &label);
 
@@ -121,15 +135,17 @@ public:
   VWPredictor(const std::string &modelFile, const std::string &vwOptions);
   virtual ~VWPredictor();
 
-  virtual void AddLabelIndependentFeature(const StringPiece &name, float value);
-  virtual void AddLabelDependentFeature(const StringPiece &name, float value);
+  virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value);
+  virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value);
+  virtual void AddLabelIndependentFeatureVector(const FeatureVector &features);
+  virtual void AddLabelDependentFeatureVector(const FeatureVector &features);
   virtual void Train(const StringPiece &label, float loss);
   virtual float Predict(const StringPiece &label);
 
   friend class ClassifierFactory;
 
 protected:
-  void AddFeature(const StringPiece &name, float values);
+  FeatureType AddFeature(const StringPiece &name, float values);
 
   ::vw *m_VWInstance, *m_VWParser;
   ::ezexample *m_ex;
diff --git a/vw/Normalizer.h b/vw/Normalizer.h
index 74d94a79f..210b29060 100644
--- a/vw/Normalizer.h
+++ b/vw/Normalizer.h
@@ -2,6 +2,7 @@
 #define moses_Normalizer_h
 
 #include <vector>
+#include <algorithm>
 #include "Util.h"
 
 namespace Discriminative
@@ -45,16 +46,25 @@ public:
   virtual ~SquaredLossNormalizer() {}
 };
 
+// safe softmax
 class LogisticLossNormalizer : public Normalizer
 {
 public:
   virtual void operator()(std::vector<float> &losses) const {
-    float sum = 0;
     std::vector<float>::iterator it;
+
+    float sum = 0;
+    float max = 0;
     for (it = losses.begin(); it != losses.end(); it++) {
-      *it = exp(-*it);
+      *it = -*it;
+      max = std::max(max, *it);
+    }
+
+    for (it = losses.begin(); it != losses.end(); it++) {
+      *it = exp(*it - max);
       sum += *it;
     }
+
     for (it = losses.begin(); it != losses.end(); it++) {
       *it /= sum;
     }
diff --git a/vw/VWPredictor.cpp b/vw/VWPredictor.cpp
index 01192a9c6..88d8cfa7f 100644
--- a/vw/VWPredictor.cpp
+++ b/vw/VWPredictor.cpp
@@ -36,7 +36,7 @@ VWPredictor::~VWPredictor()
     VW::finish(*m_VWInstance);
 }
 
-void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float value)
+FeatureType VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float value)
 {
   // label-independent features are kept in a different feature namespace ('s' = source)
 
@@ -48,10 +48,10 @@ void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float valu
     m_ex->addns('s');
     if (DEBUG) std::cerr << "VW :: Setting source namespace\n";
   }
-  AddFeature(name, value); // namespace 's' is set up, add the feature
+  return AddFeature(name, value); // namespace 's' is set up, add the feature
 }
 
-void VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
+FeatureType VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
 {
   // VW does not use the label directly, instead, we do a Cartesian product between source and target feature
   // namespaces, where the source namespace ('s') contains label-independent features and the target
@@ -63,7 +63,37 @@ void VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
     m_ex->addns('t');
     if (DEBUG) std::cerr << "VW :: Setting target namespace\n";
   }
-  AddFeature(name, value);
+  return AddFeature(name, value);
+}
+
+void VWPredictor::AddLabelIndependentFeatureVector(const FeatureVector &features)
+{
+  if (m_isFirstSource) {
+    // the first feature of a new example => create the source namespace for
+    // label-independent features to live in
+    m_isFirstSource = false;
+    m_ex->finish();
+    m_ex->addns('s');
+    if (DEBUG) std::cerr << "VW :: Setting source namespace\n";
+  }
+
+  // add each feature index using this "low level" call to VW
+  for (FeatureVector::const_iterator it = features.begin(); it != features.end(); it++)
+    m_ex->addf(it->first, it->second);
+}
+
+void VWPredictor::AddLabelDependentFeatureVector(const FeatureVector &features)
+{
+  if (m_isFirstTarget) {
+    // the first target-side feature => create namespace 't'
+    m_isFirstTarget = false;
+    m_ex->addns('t');
+    if (DEBUG) std::cerr << "VW :: Setting target namespace\n";
+  }
+
+  // add each feature index using this "low level" call to VW
+  for (FeatureVector::const_iterator it = features.begin(); it != features.end(); it++)
+    m_ex->addf(it->first, it->second);
 }
 
 void VWPredictor::Train(const StringPiece &label, float loss)
@@ -82,10 +112,10 @@ float VWPredictor::Predict(const StringPiece &label)
   return loss;
 }
 
-void VWPredictor::AddFeature(const StringPiece &name, float value)
+FeatureType VWPredictor::AddFeature(const StringPiece &name, float value)
 {
   if (DEBUG) std::cerr << "VW :: Adding feature: " << EscapeSpecialChars(name.as_string()) << ":" << value << "\n";
-  m_ex->addf(EscapeSpecialChars(name.as_string()), value);
+  return std::make_pair(m_ex->addf(EscapeSpecialChars(name.as_string()), value), value);
 }
 
 } // namespace Discriminative
diff --git a/vw/VWTrainer.cpp b/vw/VWTrainer.cpp
index e513de3d2..c019bc0c6 100644
--- a/vw/VWTrainer.cpp
+++ b/vw/VWTrainer.cpp
@@ -25,7 +25,7 @@ VWTrainer::~VWTrainer()
   close(m_bfos);
 }
 
-void VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
+FeatureType VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
 {
   if (m_isFirstSource) {
     if (m_isFirstExample) {
@@ -43,9 +43,11 @@ void VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
   }
 
   AddFeature(name, value);
+
+  return std::make_pair(0, value); // we don't hash features
 }
 
-void VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
+FeatureType VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
 {
   if (m_isFirstTarget) {
     m_isFirstTarget = false;
@@ -56,6 +58,18 @@ void VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
   }
 
   AddFeature(name, value);
+
+  return std::make_pair(0, value); // we don't hash features
+}
+
+void VWTrainer::AddLabelIndependentFeatureVector(const FeatureVector &features)
+{
+  throw logic_error("VW trainer does not support feature IDs.");
+}
+
+void VWTrainer::AddLabelDependentFeatureVector(const FeatureVector &features)
+{
+  throw logic_error("VW trainer does not support feature IDs.");
 }
 
 void VWTrainer::Train(const StringPiece &label, float loss)
author	Prashant Mathur <pramathur@ebay.com>	2016-06-15 15:33:42 +0300
committer	Prashant Mathur <pramathur@ebay.com>	2016-06-15 15:33:42 +0300
commit	e31bc247ead9f2b0e048b2394f7726d77b889736 (patch)
tree	f391d01d64b972dca9c977ae5f81a91eb16a47a2
parent	dee124b70aed617e62fff8810cc80986d4f050b9 (diff)
parent	bc5f8d15c6ce4bc678ba992860bfd4be6719cee8 (diff)