Merged in upstream.

author: Christian Federmann <cfedermann@gmail.com> 2012-05-09 23:10:52 +0400
committer: Christian Federmann <cfedermann@gmail.com> 2012-05-09 23:10:52 +0400
commit: 25f43d13b8f3cc6cc0be19028605efe15eaa416b (patch)
tree: 5e2a7e22a17f88e0dc53032a473bb20ff368375a /mert
parent: d9e77ed5b1676956978e221427c8c39f372e7612 (diff)
parent: 440650bd6e03cdea9aa2d3f11b32697bb9340ca0 (diff)
104 files changed, 4351 insertions, 1879 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 09b0d292f..32c192a5c 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -1,24 +1,33 @@
 #include "BleuScorer.h"
 
 #include <algorithm>
+#include <cassert>
 #include <cmath>
 #include <climits>
 #include <fstream>
-#include <iterator>
+#include <iostream>
 #include <stdexcept>
+
+#include "util/check.hh"
+#include "Ngram.h"
+#include "Reference.h"
 #include "Util.h"
+#include "Vocabulary.h"
+
+namespace {
+
+// configure regularisation
+const char KEY_REFLEN[] = "reflen";
+const char REFLEN_AVERAGE[] = "average";
+const char REFLEN_SHORTEST[] = "shortest";
+const char REFLEN_CLOSEST[] = "closest";
+
+} // namespace
 
 BleuScorer::BleuScorer(const string& config)
-    : StatisticsBasedScorer("BLEU",config),
-      kLENGTH(4),
+    : StatisticsBasedScorer("BLEU", config),
       m_ref_length_type(CLOSEST) {
-  //configure regularisation
-  static string KEY_REFLEN = "reflen";
-  static string REFLEN_AVERAGE = "average";
-  static string REFLEN_SHORTEST = "shortest";
-  static string REFLEN_CLOSEST = "closest";
-
-  string reflen = getConfig(KEY_REFLEN,REFLEN_CLOSEST);
+  const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
   if (reflen == REFLEN_AVERAGE) {
     m_ref_length_type = AVERAGE;
   } else if (reflen == REFLEN_SHORTEST) {
@@ -28,18 +37,16 @@ BleuScorer::BleuScorer(const string& config)
   } else {
     throw runtime_error("Unknown reference length strategy: " + reflen);
   }
-  //    cerr << "Using reference length strategy: " << reflen << endl;
 }
 
 BleuScorer::~BleuScorer() {}
 
-size_t BleuScorer::countNgrams(const string& line, counts_t& counts, unsigned int n)
+size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
+                               unsigned int n)
 {
+  assert(n > 0);
   vector<int> encoded_tokens;
-  //cerr << line << endl;
   TokenizeAndEncode(line, encoded_tokens);
-  //copy(encoded_tokens.begin(), encoded_tokens.end(), ostream_iterator<int>(cerr," "));
-  //cerr << endl;
   for (size_t k = 1; k <= n; ++k) {
     //ngram order longer than sentence - no point
     if (k > encoded_tokens.size()) {
@@ -50,168 +57,176 @@ size_t BleuScorer::countNgrams(const string& line, counts_t& counts, unsigned in
       for (size_t j = i; j < i+k && j < encoded_tokens.size(); ++j) {
         ngram.push_back(encoded_tokens[j]);
       }
-      int count = 1;
-      counts_iterator oldcount = counts.find(ngram);
-      if (oldcount != counts.end()) {
-        count = (oldcount->second) + 1;
-      }
-      //cerr << count << endl;
-      counts[ngram] = count;
-      //cerr << endl;
+      counts.Add(ngram);
     }
   }
-  //cerr << "counted ngrams" << endl;
-  //dump_counts(counts);
   return encoded_tokens.size();
 }
 
 void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
 {
-  //make sure reference data is clear
-  m_ref_counts.reset();
-  m_ref_lengths.clear();
-  ClearEncoder();
+  // Make sure reference data is clear
+  m_references.reset();
+  mert::VocabularyFactory::GetVocabulary()->clear();
 
   //load reference data
   for (size_t i = 0; i < referenceFiles.size(); ++i) {
     TRACE_ERR("Loading reference from " << referenceFiles[i] << endl);
-    ifstream refin(referenceFiles[i].c_str());
-    if (!refin) {
-      throw runtime_error("Unable to open: " + referenceFiles[i]);
+
+    if (!OpenReference(referenceFiles[i].c_str(), i)) {
+      throw runtime_error("Unable to open " + referenceFiles[i]);
     }
-    string line;
-    size_t sid = 0; //sentence counter
-    while (getline(refin,line)) {
-      //cerr << line << endl;
-      if (i == 0) {
-        counts_t *counts = new counts_t; //these get leaked
-        m_ref_counts.push_back(counts);
-        vector<size_t> lengths;
-        m_ref_lengths.push_back(lengths);
-      }
-      if (m_ref_counts.size() <= sid) {
-        throw runtime_error("File " + referenceFiles[i] + " has too many sentences");
-      }
-      counts_t counts;
-      size_t length = countNgrams(line,counts,kLENGTH);
-      //for any counts larger than those already there, merge them in
-      for (counts_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
-        counts_iterator oldcount_it = m_ref_counts[sid]->find(ci->first);
-        int oldcount = 0;
-        if (oldcount_it != m_ref_counts[sid]->end()) {
-          oldcount = oldcount_it->second;
-        }
-        int newcount = ci->second;
-        if (newcount > oldcount) {
-          m_ref_counts[sid]->operator[](ci->first) = newcount;
-        }
-      }
-      //add in the length
-      m_ref_lengths[sid].push_back(length);
-      if (sid > 0 && sid % 100 == 0) {
-        TRACE_ERR(".");
+  }
+}
+
+bool BleuScorer::OpenReference(const char* filename, size_t file_id) {
+  ifstream ifs(filename);
+  if (!ifs) {
+    cerr << "Cannot open " << filename << endl;
+    return false;
+  }
+  return OpenReferenceStream(&ifs, file_id);
+}
+
+bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id) {
+  if (is == NULL) return false;
+
+  string line;
+  size_t sid = 0;
+  while (getline(*is, line)) {
+    line = preprocessSentence(line);
+    if (file_id == 0) {
+      Reference* ref = new Reference;
+      m_references.push_back(ref);    // Take ownership of the Reference object.
+    }
+    if (m_references.size() <= sid) {
+      cerr << "Reference " << file_id << "has too many sentences." << endl;
+      return false;
+    }
+    NgramCounts counts;
+    size_t length = CountNgrams(line, counts, kBleuNgramOrder);
+
+    //for any counts larger than those already there, merge them in
+    for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
+      const NgramCounts::Key& ngram = ci->first;
+      const NgramCounts::Value newcount = ci->second;
+
+      NgramCounts::Value oldcount = 0;
+      m_references[sid]->get_counts()->Lookup(ngram, &oldcount);
+      if (newcount > oldcount) {
+        m_references[sid]->get_counts()->operator[](ngram) = newcount;
       }
-      ++sid;
     }
-    TRACE_ERR(endl);
+    //add in the length
+    m_references[sid]->push_back(length);
+    if (sid > 0 && sid % 100 == 0) {
+      TRACE_ERR(".");
+    }
+    ++sid;
   }
+  return true;
 }
 
-
 void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
 {
-//      cerr << text << endl;
-//      cerr << sid << endl;
-  //dump_counts(*m_ref_counts[sid]);
-  if (sid >= m_ref_counts.size()) {
+  if (sid >= m_references.size()) {
     stringstream msg;
     msg << "Sentence id (" << sid << ") not found in reference set";
     throw runtime_error(msg.str());
   }
-  counts_t testcounts;
-  //stats for this line
-  vector<float> stats(kLENGTH*2);;
-  size_t length = countNgrams(text,testcounts,kLENGTH);
-  //dump_counts(testcounts);
-  if (m_ref_length_type == SHORTEST) {
-    //cerr << reflengths.size() << " " << sid << endl;
-    int shortest = *min_element(m_ref_lengths[sid].begin(), m_ref_lengths[sid].end());
-    stats.push_back(shortest);
-  } else if (m_ref_length_type == AVERAGE) {
-    int total = 0;
-    for (size_t i = 0; i < m_ref_lengths[sid].size(); ++i) {
-      total += m_ref_lengths[sid][i];
-    }
-    const float mean = static_cast<float>(total) / m_ref_lengths[sid].size();
-    stats.push_back(mean);
-  } else if (m_ref_length_type == CLOSEST)  {
-    int min_diff = INT_MAX;
-    int min_idx = 0;
-    for (size_t i = 0; i < m_ref_lengths[sid].size(); ++i) {
-      const int reflength = m_ref_lengths[sid][i];
-      const int diff = reflength - static_cast<int>(length);
-      const int absolute_diff = abs(diff) - abs(min_diff);
-
-      if (absolute_diff < 0) { //look for the closest reference
-        min_diff = diff;
-        min_idx = i;
-      } else if (absolute_diff == 0) { // if two references has the same closest length, take the shortest
-        if (reflength < static_cast<int>(m_ref_lengths[sid][min_idx])) {
-          min_idx = i;
-        }
-      }
-    }
-    stats.push_back(m_ref_lengths[sid][min_idx]);
-  } else {
-    throw runtime_error("Unsupported reflength strategy");
-  }
-  //cerr << "computed length" << endl;
+  NgramCounts testcounts;
+  // stats for this line
+  vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
+  string sentence = preprocessSentence(text);
+  const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder);
+
+  const int reference_len = CalcReferenceLength(sid, length);
+  stats.push_back(reference_len);
+
   //precision on each ngram type
-  for (counts_iterator testcounts_it = testcounts.begin();
+  for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
        testcounts_it != testcounts.end(); ++testcounts_it) {
-    counts_iterator refcounts_it = m_ref_counts[sid]->find(testcounts_it->first);
-    int correct = 0;
-    int guess = testcounts_it->second;
-    if (refcounts_it != m_ref_counts[sid]->end()) {
-      correct = min(refcounts_it->second,guess);
+    const NgramCounts::Value guess = testcounts_it->second;
+    const size_t len = testcounts_it->first.size();
+    NgramCounts::Value correct = 0;
+
+    NgramCounts::Value v = 0;
+    if (m_references[sid]->get_counts()->Lookup(testcounts_it->first, &v)) {
+      correct = min(v, guess);
     }
-    size_t len = testcounts_it->first.size();
-    stats[len*2-2] += correct;
-    stats[len*2-1] += guess;
+    stats[len * 2 - 2] += correct;
+    stats[len * 2 - 1] += guess;
   }
-  stringstream sout;
-  copy(stats.begin(),stats.end(),ostream_iterator<float>(sout," "));
-  //TRACE_ERR(sout.str() << endl);
-  string stats_str = sout.str();
-  entry.set(stats_str);
+  entry.set(stats);
 }
 
 float BleuScorer::calculateScore(const vector<int>& comps) const
 {
-  //cerr << "BLEU: ";
-  //copy(comps.begin(),comps.end(), ostream_iterator<int>(cerr," "));
+  CHECK(comps.size() == kBleuNgramOrder * 2 + 1);
+
   float logbleu = 0.0;
-  for (int i = 0; i < kLENGTH; ++i) {
+  for (int i = 0; i < kBleuNgramOrder; ++i) {
     if (comps[2*i] == 0) {
       return 0.0;
     }
     logbleu += log(comps[2*i]) - log(comps[2*i+1]);
 
   }
-  logbleu /= kLENGTH;
-  const float brevity = 1.0 - static_cast<float>(comps[kLENGTH*2]) / comps[1];//reflength divided by test length
+  logbleu /= kBleuNgramOrder;
+  // reflength divided by test length
+  const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1];
   if (brevity < 0.0) {
     logbleu += brevity;
   }
-  //cerr << " " << exp(logbleu) << endl;
   return exp(logbleu);
 }
 
-void BleuScorer::dump_counts(counts_t& counts) const {
-  for (counts_const_iterator i = counts.begin(); i != counts.end(); ++i) {
-    cerr << "(";
-    copy(i->first.begin(), i->first.end(), ostream_iterator<int>(cerr," "));
-    cerr << ") " << i->second << ", ";
+int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length) {
+  switch (m_ref_length_type) {
+    case AVERAGE:
+      return m_references[sentence_id]->CalcAverage();
+      break;
+    case CLOSEST:
+      return m_references[sentence_id]->CalcClosest(length);
+      break;
+    case SHORTEST:
+      return m_references[sentence_id]->CalcShortest();
+      break;
+    default:
+      cerr << "unknown reference types." << endl;
+      exit(1);
   }
-  cerr << endl;
+}
+
+void BleuScorer::DumpCounts(ostream* os,
+                            const NgramCounts& counts) const {
+  for (NgramCounts::const_iterator it = counts.begin();
+       it != counts.end(); ++it) {
+    *os << "(";
+    const NgramCounts::Key& keys = it->first;
+    for (size_t i = 0; i < keys.size(); ++i) {
+      if (i != 0) {
+        *os << " ";
+      }
+      *os << keys[i];
+    }
+    *os << ") : " << it->second << ", ";
+  }
+  *os << endl;
+}
+
+float sentenceLevelBleuPlusOne(const vector<float>& stats) {
+  CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
+
+  float logbleu = 0.0;
+  for (int j = 0; j < kBleuNgramOrder; j++) {
+    logbleu += log(stats[2 * j] + 1.0) - log(stats[2 * j + 1] + 1.0);
+  }
+  logbleu /= kBleuNgramOrder;
+  const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
+
+  if (brevity < 0.0) {
+    logbleu += brevity;
+  }
+  return exp(logbleu);
 }
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 5f105add2..b6503ba9b 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -1,7 +1,7 @@
-#ifndef __BLEUSCORER_H__
-#define __BLEUSCORER_H__
+#ifndef MERT_BLEU_SCORER_H_
+#define MERT_BLEU_SCORER_H_
 
-#include <iostream>
+#include <ostream>
 #include <string>
 #include <vector>
 
@@ -12,72 +12,64 @@
 
 using namespace std;
 
+const int kBleuNgramOrder = 4;
+
+class NgramCounts;
+class Reference;
+
 /**
  * Bleu scoring
  */
 class BleuScorer: public StatisticsBasedScorer
 {
 public:
+  enum ReferenceLengthType {
+    AVERAGE,
+    CLOSEST,
+    SHORTEST
+  };
+
   explicit BleuScorer(const string& config = "");
   ~BleuScorer();
 
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
   virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
   virtual float calculateScore(const vector<int>& comps) const;
+  virtual size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
 
-  virtual size_t NumberOfScores() const {
-    return 2 * kLENGTH + 1;
-  }
+  int CalcReferenceLength(size_t sentence_id, size_t length);
 
-private:
-  enum ReferenceLengthType {
-    AVERAGE,
-    SHORTEST,
-    CLOSEST,
-  };
+  ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; }
+  void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; }
 
-  //Used to construct the ngram map
-  struct CompareNgrams {
-    bool operator()(const vector<int>& a, const vector<int>& b) const {
-      size_t i;
-      const size_t as = a.size();
-      const size_t bs = b.size();
-      for (i = 0; i < as && i < bs; ++i) {
-        if (a[i] < b[i]) {
-          //cerr << "true" << endl;
-          return true;
-        }
-        if (a[i] > b[i]) {
-          //cerr << "false" << endl;
-          return false;
-        }
-      }
-      //entries are equal, shortest wins
-      return as < bs;;
-    }
-  };
-
-  typedef map<vector<int>,int,CompareNgrams> counts_t;
-  typedef map<vector<int>,int,CompareNgrams>::iterator counts_iterator;
-  typedef map<vector<int>,int,CompareNgrams>::const_iterator counts_const_iterator;
+  const std::vector<Reference*>& GetReferences() const { return m_references.get(); }
 
   /**
    * Count the ngrams of each type, up to the given length in the input line.
    */
-  size_t countNgrams(const string& line, counts_t& counts, unsigned int n);
+  size_t CountNgrams(const string& line, NgramCounts& counts, unsigned int n);
+
+  void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
 
-  void dump_counts(counts_t& counts) const;
+  bool OpenReference(const char* filename, size_t file_id);
 
-  const int kLENGTH;
+  // NOTE: this function is used for unit testing.
+  bool OpenReferenceStream(std::istream* is, size_t file_id);
+
+private:
   ReferenceLengthType m_ref_length_type;
 
-  // data extracted from reference files
-  ScopedVector<counts_t> m_ref_counts;
-  vector<vector<size_t> > m_ref_lengths;
+  // reference translations.
+  ScopedVector<Reference> m_references;
 
   // no copying allowed
   BleuScorer(const BleuScorer&);
   BleuScorer& operator=(const BleuScorer&);
 };
 
-#endif  // __BLEUSCORER_H__
+/** Computes sentence-level BLEU+1 score.
+ * This function is used in PRO.
+ */
+float sentenceLevelBleuPlusOne(const vector<float>& stats);
+
+#endif  // MERT_BLEU_SCORER_H_
diff --git a/mert/BleuScorerTest.cpp b/mert/BleuScorerTest.cpp
new file mode 100644
index 000000000..5da4cfc6c
--- /dev/null
+++ b/mert/BleuScorerTest.cpp
@@ -0,0 +1,272 @@
+#include "BleuScorer.h"
+
+#define BOOST_TEST_MODULE MertBleuScorer
+#include <boost/test/unit_test.hpp>
+
+#include <cmath>
+#include "Ngram.h"
+#include "Vocabulary.h"
+#include "Util.h"
+
+namespace {
+
+NgramCounts* g_counts = NULL;
+
+NgramCounts* GetNgramCounts() {
+  assert(g_counts);
+  return g_counts;
+}
+
+void SetNgramCounts(NgramCounts* counts) {
+  g_counts = counts;
+}
+
+struct Unigram {
+  Unigram(const std::string& a) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+  }
+  NgramCounts::Key instance;
+};
+
+struct Bigram {
+  Bigram(const std::string& a, const std::string& b) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+  }
+  NgramCounts::Key instance;
+};
+
+struct Trigram {
+  Trigram(const std::string& a, const std::string& b, const std::string& c) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
+  }
+  NgramCounts::Key instance;
+};
+
+struct Fourgram {
+  Fourgram(const std::string& a, const std::string& b,
+           const std::string& c, const std::string& d) {
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
+    instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(d));
+  }
+  NgramCounts::Key instance;
+};
+
+bool CheckUnigram(const std::string& str) {
+  Unigram unigram(str);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(unigram.instance, &v);
+}
+
+bool CheckBigram(const std::string& a, const std::string& b) {
+  Bigram bigram(a, b);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(bigram.instance, &v);
+}
+
+bool CheckTrigram(const std::string& a, const std::string& b,
+                  const std::string& c) {
+  Trigram trigram(a, b, c);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(trigram.instance, &v);
+}
+
+bool CheckFourgram(const std::string& a, const std::string& b,
+                   const std::string& c, const std::string& d) {
+  Fourgram fourgram(a, b, c, d);
+  NgramCounts::Value v;
+  return GetNgramCounts()->Lookup(fourgram.instance, &v);
+}
+
+void SetUpReferences(BleuScorer& scorer) {
+  // The following examples are taken from Koehn, "Statistical Machine Translation",
+  // Cambridge University Press, 2010.
+  {
+    std::stringstream ref1;
+    ref1 << "israeli officials are responsible for airport security" << std::endl;
+    BOOST_CHECK(scorer.OpenReferenceStream(&ref1, 0));
+  }
+
+  {
+    std::stringstream ref2;
+    ref2 << "israel is in charge of the security at this airport" << std::endl;
+    BOOST_CHECK(scorer.OpenReferenceStream(&ref2, 1));
+  }
+
+  {
+    std::stringstream ref3;
+    ref3 << "the security work for this airport is the responsibility of the israel government"
+         << std::endl;
+    BOOST_CHECK(scorer.OpenReferenceStream(&ref3, 2));
+  }
+
+  {
+    std::stringstream ref4;
+    ref4 << "israli side was in charge of the security of this airport" << std::endl;
+    BOOST_CHECK(scorer.OpenReferenceStream(&ref4, 3));
+  }
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(bleu_reference_type) {
+  BleuScorer scorer;
+  // BleuScorer will use "closest" by default.
+  BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::CLOSEST);
+
+  scorer.SetReferenceLengthType(BleuScorer::AVERAGE);
+  BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
+
+  scorer.SetReferenceLengthType(BleuScorer::SHORTEST);
+  BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
+}
+
+BOOST_AUTO_TEST_CASE(bleu_reference_type_with_config) {
+  {
+    BleuScorer scorer("reflen:average");
+    BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
+  }
+
+  {
+    BleuScorer scorer("reflen:shortest");
+    BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
+  }
+}
+
+BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
+  BleuScorer scorer;
+
+  std::string line = "I saw a girl with a telescope .";
+  // In the above string, we will get the 25 ngrams.
+  //
+  // unigram: "I", "saw", "a", "girl", "with", "telescope", "."
+  // bigram:  "I saw", "saw a", "a girl", "girl with", "with a", "a telescope"
+  //          "telescope ."
+  // trigram: "I saw a", "saw a girl", "a girl with", "girl with a",
+  //          "with a telescope", "a telescope ."
+  // 4-gram:  "I saw a girl", "saw a girl with", "a girl with a",
+  //          "girl with a telescope", "with a telescope ."
+  NgramCounts counts;
+  BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
+  BOOST_CHECK_EQUAL(25, counts.size());
+
+  mert::Vocabulary* vocab = scorer.GetVocab();
+  BOOST_CHECK_EQUAL(7, vocab->size());
+
+  std::vector<std::string> res;
+  Tokenize(line.c_str(), ' ', &res);
+  std::vector<int> ids(res.size());
+  for (std::size_t i = 0; i < res.size(); ++i) {
+    BOOST_CHECK(vocab->Lookup(res[i], &ids[i]));
+  }
+
+  SetNgramCounts(&counts);
+
+  // unigram
+  for (std::size_t i = 0; i < res.size(); ++i) {
+    BOOST_CHECK(CheckUnigram(res[i]));
+  }
+
+  // bigram
+  BOOST_CHECK(CheckBigram("I", "saw"));
+  BOOST_CHECK(CheckBigram("saw", "a"));
+  BOOST_CHECK(CheckBigram("a", "girl"));
+  BOOST_CHECK(CheckBigram("girl", "with"));
+  BOOST_CHECK(CheckBigram("with", "a"));
+  BOOST_CHECK(CheckBigram("a", "telescope"));
+  BOOST_CHECK(CheckBigram("telescope", "."));
+
+  // trigram
+  BOOST_CHECK(CheckTrigram("I", "saw", "a"));
+  BOOST_CHECK(CheckTrigram("saw", "a", "girl"));
+  BOOST_CHECK(CheckTrigram("a", "girl", "with"));
+  BOOST_CHECK(CheckTrigram("girl", "with", "a"));
+  BOOST_CHECK(CheckTrigram("with", "a", "telescope"));
+  BOOST_CHECK(CheckTrigram("a", "telescope", "."));
+
+  // 4-gram
+  BOOST_CHECK(CheckFourgram("I", "saw", "a", "girl"));
+  BOOST_CHECK(CheckFourgram("saw", "a", "girl", "with"));
+  BOOST_CHECK(CheckFourgram("a", "girl", "with", "a"));
+  BOOST_CHECK(CheckFourgram("girl", "with", "a", "telescope"));
+  BOOST_CHECK(CheckFourgram("with", "a", "telescope", "."));
+}
+
+BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
+  BleuScorer scorer;
+  SetUpReferences(scorer);
+  std::string line("israeli officials responsibility of airport safety");
+  ScoreStats entry;
+  scorer.prepareStats(0, line, entry);
+
+  BOOST_CHECK_EQUAL(entry.size(), 2 * kBleuNgramOrder + 1);
+
+  // Test hypothesis ngram counts
+  BOOST_CHECK_EQUAL(entry.get(0), 5);  // unigram
+  BOOST_CHECK_EQUAL(entry.get(2), 2);  // bigram
+  BOOST_CHECK_EQUAL(entry.get(4), 0);  // trigram
+  BOOST_CHECK_EQUAL(entry.get(6), 0);  // fourgram
+
+  // Test reference ngram counts.
+  BOOST_CHECK_EQUAL(entry.get(1), 6);  // unigram
+  BOOST_CHECK_EQUAL(entry.get(3), 5);  // bigram
+  BOOST_CHECK_EQUAL(entry.get(5), 4);  // trigram
+  BOOST_CHECK_EQUAL(entry.get(7), 3);  // fourgram
+}
+
+BOOST_AUTO_TEST_CASE(calculate_actual_score) {
+  BOOST_REQUIRE(4 == kBleuNgramOrder);
+  vector<int> stats(2 * kBleuNgramOrder + 1);
+  BleuScorer scorer;
+
+  // unigram
+  stats[0] = 6;
+  stats[1] = 6;
+
+  // bigram
+  stats[2] = 4;
+  stats[3] = 5;
+
+  // trigram
+  stats[4] = 2;
+  stats[5] = 4;
+
+  // fourgram
+  stats[6] = 1;
+  stats[7] = 3;
+
+  // reference-length
+  stats[8] = 7;
+
+  BOOST_CHECK(IsAlmostEqual(0.5115f, scorer.calculateScore(stats)));
+}
+
+BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
+  BOOST_REQUIRE(4 == kBleuNgramOrder);
+  vector<float> stats(2 * kBleuNgramOrder + 1);
+
+  // unigram
+  stats[0] = 6.0;
+  stats[1] = 6.0;
+
+  // bigram
+  stats[2] = 4.0;
+  stats[3] = 5.0;
+
+  // trigram
+  stats[4] = 2.0;
+  stats[5] = 4.0;
+
+  // fourgram
+  stats[6] = 1.0;
+  stats[7] = 3.0;
+
+  // reference-length
+  stats[8] = 7.0;
+
+  BOOST_CHECK(IsAlmostEqual(0.5985f, sentenceLevelBleuPlusOne(stats)));
+}
diff --git a/mert/CderScorer.cpp b/mert/CderScorer.cpp
index ef1f6195f..896017056 100644
--- a/mert/CderScorer.cpp
+++ b/mert/CderScorer.cpp
@@ -1,6 +1,6 @@
 #include "CderScorer.h"
 
-#include <iterator>
+#include <algorithm>
 #include <fstream>
 #include <stdexcept>
 
@@ -12,8 +12,9 @@ inline int CalcDistance(int word1, int word2) {
 
 } // namespace
 
-CderScorer::CderScorer(const string& config)
-    : StatisticsBasedScorer("CDER",config) {}
+CderScorer::CderScorer(const string& config, bool allowed_long_jumps)
+    : StatisticsBasedScorer("CDER", config),
+      m_allowed_long_jumps(allowed_long_jumps) {}
 
 CderScorer::~CderScorer() {}
 
@@ -31,6 +32,7 @@ void CderScorer::setReferenceFiles(const vector<string>& referenceFiles)
     m_ref_sentences.push_back(vector<sent_t>());
     string line;
     while (getline(refin,line)) {
+      line = this->preprocessSentence(line);
       sent_t encoded;
       TokenizeAndEncode(line, encoded);
       m_ref_sentences[rid].push_back(encoded);
@@ -40,13 +42,11 @@ void CderScorer::setReferenceFiles(const vector<string>& referenceFiles)
 
 void CderScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
 {
-  vector<int> stats;
-  prepareStatsVector(sid, text, stats);
+  string sentence = this->preprocessSentence(text);
 
-  stringstream sout;
-  copy(stats.begin(), stats.end(), ostream_iterator<float>(sout," "));
-  string stats_str = sout.str();
-  entry.set(stats_str);
+  vector<int> stats;
+  prepareStatsVector(sid, sentence, stats);
+  entry.set(stats);
 }
 
 void CderScorer::prepareStatsVector(size_t sid, const string& text, vector<int>& stats)
@@ -55,9 +55,11 @@ void CderScorer::prepareStatsVector(size_t sid, const string& text, vector<int>&
   TokenizeAndEncode(text, cand);
 
   float max = -2;
+  vector<int> tmp;
   for (size_t rid = 0; rid < m_ref_sentences.size(); ++rid) {
-    sent_t& ref = m_ref_sentences[rid][sid];
-    vector<int> tmp = computeCD(cand, ref);
+    const sent_t& ref = m_ref_sentences[rid][sid];
+    tmp.clear();
+    computeCD(cand, ref, tmp);
     if (calculateScore(tmp) > max) {
       stats = tmp;
     }
@@ -66,16 +68,15 @@ void CderScorer::prepareStatsVector(size_t sid, const string& text, vector<int>&
 
 float CderScorer::calculateScore(const vector<int>& comps) const
 {
-  if (comps.size() != 2)
-  {
+  if (comps.size() != 2) {
     throw runtime_error("Size of stat vector for CDER is not 2");
   }
-
-  return 1 - (comps[0] / static_cast<float>(comps[1]));
+  if (comps[1] == 0) return 1.0f;
+  return 1.0f - (comps[0] / static_cast<float>(comps[1]));
 }
 
-vector<int> CderScorer::computeCD(const sent_t& cand, const sent_t& ref) const
-{
+void CderScorer::computeCD(const sent_t& cand, const sent_t& ref,
+                           vector<int>& stats) const {
   int I = cand.size() + 1; // Number of inter-words positions in candidate sentence
   int L = ref.size() + 1; // Number of inter-words positions in reference sentence
 
@@ -102,21 +103,22 @@ vector<int> CderScorer::computeCD(const sent_t& cand, const sent_t& ref) const
       (*nextRow)[i] = *min_element(possibleCosts.begin(), possibleCosts.end());
     }
 
-    // Cost of LongJumps is the same for all in the row
-    int LJ = 1 + *min_element(nextRow->begin(), nextRow->end());
+    if (m_allowed_long_jumps) {
+      // Cost of LongJumps is the same for all in the row
+      int LJ = 1 + *min_element(nextRow->begin(), nextRow->end());
 
-    for (int i = 0; i < I; ++i) {
-      (*nextRow)[i] = min((*nextRow)[i], LJ); // LongJumps
+      for (int i = 0; i < I; ++i) {
+        (*nextRow)[i] = min((*nextRow)[i], LJ); // LongJumps
+      }
     }
 
     delete row;
     row = nextRow;
   }
 
-  vector<int> stats(2);
+  stats.resize(2);
   stats[0] = *(row->rbegin());  // CD distance is the cost of path from (0,0) to (I,L)
   stats[1] = ref.size();
 
   delete row;
-  return stats;
 }
diff --git a/mert/CderScorer.h b/mert/CderScorer.h
index bcc4946dc..dc6714115 100644
--- a/mert/CderScorer.h
+++ b/mert/CderScorer.h
@@ -1,8 +1,6 @@
-#ifndef __CDERSCORER_H__
-#define __CDERSCORER_H__
+#ifndef MERT_CDER_SCORER_H_
+#define MERT_CDER_SCORER_H_
 
-#include <algorithm>
-#include <iostream>
 #include <string>
 #include <vector>
 #include "Types.h"
@@ -10,10 +8,12 @@
 
 using namespace std;
 
-class CderScorer: public StatisticsBasedScorer
-{
-public:
-  explicit CderScorer(const string& config);
+/**
+ * CderScorer class can compute both CDER and WER metric.
+ */
+class CderScorer: public StatisticsBasedScorer {
+ public:
+  explicit CderScorer(const string& config, bool allowed_long_jumps = true);
   ~CderScorer();
 
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
@@ -22,21 +22,22 @@ public:
 
   virtual void prepareStatsVector(size_t sid, const string& text, vector<int>& stats);
 
-  virtual size_t NumberOfScores() const {
-    return 2;
-  }
+  virtual size_t NumberOfScores() const { return 2; }
 
   virtual float calculateScore(const vector<int>& comps) const;
 
-private:
+ private:
+  bool m_allowed_long_jumps;
+
   typedef vector<int> sent_t;
   vector<vector<sent_t> > m_ref_sentences;
 
-  vector<int> computeCD(const sent_t& cand, const sent_t& ref) const;
+  void computeCD(const sent_t& cand, const sent_t& ref,
+                 vector<int>& stats) const;
 
   // no copying allowed
   CderScorer(const CderScorer&);
   CderScorer& operator=(const CderScorer&);
 };
 
-#endif  // __CDERSCORER_H__
+#endif  // MERT_CDER_SCORER_H_
diff --git a/mert/Data.cpp b/mert/Data.cpp
index a4e6c2b24..19a89f754 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -1,13 +1,12 @@
 /*
  *  Data.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
 #include <algorithm>
-#include "util/check.hh"
 #include <cmath>
 #include <fstream>
 
@@ -16,87 +15,84 @@
 #include "Scorer.h"
 #include "ScorerFactory.h"
 #include "Util.h"
+#include "util/check.hh"
+
+using namespace std;
 
 Data::Data()
-  : theScorer(NULL),
-    number_of_scores(0),
-    _sparse_flag(false),
-    scoredata(),
-    featdata() {}
-
-Data::Data(Scorer& ptr)
-    : theScorer(&ptr),
-      score_type(theScorer->getName()),
-      number_of_scores(0),
-      _sparse_flag(false),
-      scoredata(new ScoreData(*theScorer)),
-      featdata(new FeatureData)
+  : m_scorer(NULL),
+    m_num_scores(0),
+    m_sparse_flag(false),
+    m_score_data(),
+    m_feature_data() {}
+
+Data::Data(Scorer* scorer)
+    : m_scorer(scorer),
+      m_score_type(m_scorer->getName()),
+      m_num_scores(0),
+      m_sparse_flag(false),
+      m_score_data(new ScoreData(m_scorer)),
+      m_feature_data(new FeatureData)
 {
-  TRACE_ERR("Data::score_type " << score_type << std::endl);
-  TRACE_ERR("Data::Scorer type from Scorer: " << theScorer->getName() << endl);
+  TRACE_ERR("Data::m_score_type " << m_score_type << endl);
+  TRACE_ERR("Data::Scorer type from Scorer: " << m_scorer->getName() << endl);
 }
 
 //ADDED BY TS
-void Data::remove_duplicates() {
-
-  size_t nSentences = featdata->size();
-  assert(scoredata->size() == nSentences);
-
-  for (size_t s=0; s < nSentences; s++) {
+// TODO: This is too long; consider creating additional functions to
+// reduce the lines of this function.
+void Data::removeDuplicates() {
+  size_t nSentences = m_feature_data->size();
+  assert(m_score_data->size() == nSentences);
 
-    FeatureArray& feat_array =  featdata->get(s);
-    ScoreArray& score_array =  scoredata->get(s);
+  for (size_t s = 0; s < nSentences; s++) {
+    FeatureArray& feat_array =  m_feature_data->get(s);
+    ScoreArray& score_array =  m_score_data->get(s);
 
     assert(feat_array.size() == score_array.size());
 
     //serves as a hash-map:
-    std::map<double, std::vector<size_t> > lookup;
+    map<double, vector<size_t> > lookup;
 
     size_t end_pos = feat_array.size() - 1;
 
     size_t nRemoved = 0;
-    for (size_t k=0; k <= end_pos; k++) {
 
+    for (size_t k = 0; k <= end_pos; k++) {
       const FeatureStats& cur_feats = feat_array.get(k);
-
       double sum = 0.0;
-      for (size_t l=0; l < cur_feats.size(); l++)
-	sum += cur_feats.get(l);
+      for (size_t l = 0; l < cur_feats.size(); l++)
+        sum += cur_feats.get(l);
 
       if (lookup.find(sum) != lookup.end()) {
 
-	//std::cerr << "hit" << std::endl;
-
-	std::vector<size_t>& cur_list = lookup[sum];
-
-	size_t l=0;
-	for (l=0; l < cur_list.size(); l++) {
-	  
-	  size_t j=cur_list[l];
-
-	  if (cur_feats == feat_array.get(j)
-	      && score_array.get(k) == score_array.get(j)) {
-
-	    if (k < end_pos) {
-	      
-	      feat_array.swap(k,end_pos);
-	      score_array.swap(k,end_pos);
-	      
-	      k--;
-	    }
-	    
-	    end_pos--;
-	    nRemoved++;
-	    break;
-	  }
-	}
-
-	if (l == lookup[sum].size())
-	  cur_list.push_back(k);
+        //cerr << "hit" << endl;
+        vector<size_t>& cur_list = lookup[sum];
+
+        // TODO: Make sure this is correct because we have already used 'l'.
+        // If this does not impact on the removing duplicates, it is better
+        // to change
+        size_t l = 0;
+        for (l = 0; l < cur_list.size(); l++) {
+          size_t j = cur_list[l];
+
+          if (cur_feats == feat_array.get(j)
+              && score_array.get(k) == score_array.get(j)) {
+            if (k < end_pos) {
+              feat_array.swap(k,end_pos);
+              score_array.swap(k,end_pos);
+              k--;
+            }
+            end_pos--;
+            nRemoved++;
+            break;
+          }
+        }
+        if (l == lookup[sum].size())
+          cur_list.push_back(k);
+      } else {
+        lookup[sum].push_back(k);
       }
-      else
-	lookup[sum].push_back(k);
-
       // for (size_t j=0; j < k; j++) {
 
       // 	if (feat_array.get(k) == feat_array.get(j)
@@ -115,11 +111,9 @@ void Data::remove_duplicates() {
       //          break;
       // 	}
       // }
-    }
-
+    } // end for k
 
     if (nRemoved > 0) {
-
       feat_array.resize(end_pos+1);
       score_array.resize(end_pos+1);
     }
@@ -127,124 +121,131 @@ void Data::remove_duplicates() {
 }
 //END_ADDED
 
+void Data::load(const std::string &featfile, const std::string &scorefile) {
+  m_feature_data->load(featfile);
+  m_score_data->load(scorefile);
+  if (m_feature_data->hasSparseFeatures())
+    m_sparse_flag = true;
+}
 
-void Data::loadnbest(const std::string &file)
+void Data::loadNBest(const string &file)
 {
-  TRACE_ERR("loading nbest from " << file << std::endl);
-
-  FeatureStats featentry;
-  ScoreStats scoreentry;
-  std::string sentence_index;
-
+  TRACE_ERR("loading nbest from " << file << endl);
   inputfilestream inp(file); // matches a stream with a file. Opens the file
-
   if (!inp.good())
     throw runtime_error("Unable to open: " + file);
 
-  std::string substring, subsubstring, stringBuf;
-  std::string theSentence;
-  std::string::size_type loc;
-
-  while (getline(inp,stringBuf,'\n')) {
-    if (stringBuf.empty()) continue;
-
-//              TRACE_ERR("stringBuf: " << stringBuf << std::endl);
-
-    getNextPound(stringBuf, substring, "|||"); //first field
-    sentence_index = substring;
-
-    getNextPound(stringBuf, substring, "|||"); //second field
-    theSentence = substring;
+  ScoreStats scoreentry;
+  string line, sentence_index, sentence, feature_str;
 
+  while (getline(inp, line, '\n')) {
+    if (line.empty()) continue;
     // adding statistics for error measures
-    featentry.reset();
     scoreentry.clear();
 
-    theScorer->prepareStats(sentence_index, theSentence, scoreentry);
-
-    scoredata->add(scoreentry, sentence_index);
+    getNextPound(line, sentence_index, "|||"); // first field
+    getNextPound(line, sentence, "|||");       // second field
+    getNextPound(line, feature_str, "|||");    // third field
 
-    getNextPound(stringBuf, substring, "|||"); //third field
+    m_scorer->prepareStats(sentence_index, sentence, scoreentry);
+    m_score_data->add(scoreentry, sentence_index);
 
     // examine first line for name of features
     if (!existsFeatureNames()) {
-      std::string stringsupport=substring;
-      std::string features="";
-      std::string tmpname="";
-
-      size_t tmpidx=0;
-      while (!stringsupport.empty()) {
-        //                      TRACE_ERR("Decompounding: " << substring << std::endl);
-        getNextPound(stringsupport, subsubstring);
-
-        // string ending with ":" are skipped, because they are the names of the features
-        if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
-          features+=tmpname+"_"+stringify(tmpidx)+" ";
-          tmpidx++;
-        }
-        // ignore sparse feature name
-        else if (subsubstring.find("_") != string::npos) {
-          // also ignore its value
-          getNextPound(stringsupport, subsubstring);
-        }
-        // update current feature name
-        else {
-          tmpidx=0;
-          tmpname=subsubstring.substr(0,subsubstring.size() - 1);
-        }
-      }
+      InitFeatureMap(feature_str);
+    }
+    AddFeatures(feature_str, sentence_index);
+  }
+  inp.close();
+}
 
-      featdata->setFeatureMap(features);
+void Data::save(const std::string &featfile, const std::string &scorefile, bool bin) {
+  if (bin)
+    cerr << "Binary write mode is selected" << endl;
+  else
+    cerr << "Binary write mode is NOT selected" << endl;
+
+  m_feature_data->save(featfile, bin);
+  m_score_data->save(scorefile, bin);
+}
+
+void Data::InitFeatureMap(const string& str) {
+  string buf = str;
+  string substr;
+  string features = "";
+  string tmp_name = "";
+  size_t tmp_index = 0;
+
+  while (!buf.empty()) {
+    getNextPound(buf, substr);
+
+    // string ending with ":" are skipped, because they are the names of the features
+    if (!EndsWith(substr, ":")) {
+      stringstream ss;
+      ss << tmp_name << "_" << tmp_index << " ";
+      features.append(ss.str());
+
+      tmp_index++;
+    } else if (substr.find("_") != string::npos) {
+      // ignore sparse feature name and its value
+      getNextPound(buf, substr);
+    } else {                              // update current feature name
+      tmp_index = 0;
+      tmp_name = substr.substr(0, substr.size() - 1);
     }
+  }
+  m_feature_data->setFeatureMap(features);
+}
 
-    // adding features
-    while (!substring.empty()) {
-//                      TRACE_ERR("Decompounding: " << substring << std::endl);
-      getNextPound(substring, subsubstring);
+void Data::AddFeatures(const string& str,
+                       const string& sentence_index) {
+  string buf = str;
+  string substr;
+  FeatureStats feature_entry;
+  feature_entry.reset();
 
-      // no ':' -> feature value that needs to be stored
-      if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
-        featentry.add(ConvertStringToFeatureStatsType(subsubstring));
-      }
+  while (!buf.empty()) {
+    getNextPound(buf, substr);
+
+    // no ':' -> feature value that needs to be stored
+    if (!EndsWith(substr, ":")) {
+      feature_entry.add(ConvertStringToFeatureStatsType(substr));
+    } else if (substr.find("_") != string::npos) {
       // sparse feature name? store as well
-      else if (subsubstring.find("_") != string::npos) {
-        std::string name = subsubstring;
-        getNextPound(substring, subsubstring);
-        featentry.addSparse( name, atof(subsubstring.c_str()) );
-        _sparse_flag = true;
-      }
+      string name = substr;
+      getNextPound(buf, substr);
+      feature_entry.addSparse(name, atof(substr.c_str()));
+      m_sparse_flag = true;
     }
-    //cerr << "number of sparse features: " << featentry.getSparse().size() << endl;
-    featdata->add(featentry,sentence_index);
   }
-
-  inp.close();
+  m_feature_data->add(feature_entry, sentence_index);
 }
 
 // TODO
 void Data::mergeSparseFeatures() {
-  std::cerr << "ERROR: sparse features can only be trained with pairwise ranked optimizer (PRO), not traditional MERT\n";
+  cerr << "ERROR: sparse features can only be trained with pairwise ranked optimizer (PRO), not traditional MERT\n";
   exit(1);
 }
 
 void Data::createShards(size_t shard_count, float shard_size, const string& scorerconfig,
-                        std::vector<Data>& shards)
+                        vector<Data>& shards)
 {
   CHECK(shard_count);
   CHECK(shard_size >= 0);
   CHECK(shard_size <= 1);
 
-  size_t data_size = scoredata->size();
-  CHECK(data_size == featdata->size());
+  size_t data_size = m_score_data->size();
+  CHECK(data_size == m_feature_data->size());
 
   shard_size *= data_size;
+  const float coeff = static_cast<float>(data_size) / shard_count;
 
   for (size_t shard_id = 0; shard_id < shard_count; ++shard_id) {
     vector<size_t> shard_contents;
     if (shard_size == 0) {
       //split into roughly equal size shards
-      const size_t shard_start = floor(0.5 + shard_id * static_cast<float>(data_size) / shard_count);
-      const size_t shard_end = floor(0.5 + (shard_id + 1) * static_cast<float>(data_size) / shard_count);
+      const size_t shard_start = floor(0.5 + shard_id * coeff);
+      const size_t shard_end = floor(0.5 + (shard_id + 1) * coeff);
       for (size_t i = shard_start; i < shard_end; ++i) {
         shard_contents.push_back(i);
       }
@@ -255,15 +256,15 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
       }
     }
 
-    Scorer* scorer = ScorerFactory::getScorer(score_type, scorerconfig);
+    Scorer* scorer = ScorerFactory::getScorer(m_score_type, scorerconfig);
 
-    shards.push_back(Data(*scorer));
-    shards.back().score_type = score_type;
-    shards.back().number_of_scores = number_of_scores;
-    shards.back()._sparse_flag = _sparse_flag;
+    shards.push_back(Data(scorer));
+    shards.back().m_score_type = m_score_type;
+    shards.back().m_num_scores = m_num_scores;
+    shards.back().m_sparse_flag = m_sparse_flag;
     for (size_t i = 0; i < shard_contents.size(); ++i) {
-      shards.back().featdata->add(featdata->get(shard_contents[i]));
-      shards.back().scoredata->add(scoredata->get(shard_contents[i]));
+      shards.back().m_feature_data->add(m_feature_data->get(shard_contents[i]));
+      shards.back().m_score_data->add(m_score_data->get(shard_contents[i]));
     }
     //cerr << endl;
   }
diff --git a/mert/Data.h b/mert/Data.h
index 171c6db41..37d4b5473 100644
--- a/mert/Data.h
+++ b/mert/Data.h
@@ -1,21 +1,16 @@
 /*
  *  Data.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef DATA_H
-#define DATA_H
+#ifndef MERT_DATA_H_
+#define MERT_DATA_H_
 
-using namespace std;
-
-#include <limits>
 #include <vector>
-#include <iostream>
-
-#include<boost/shared_ptr.hpp>
+#include <boost/shared_ptr.hpp>
 
 #include "Util.h"
 #include "FeatureData.h"
@@ -26,90 +21,65 @@ class Scorer;
 typedef boost::shared_ptr<ScoreData> ScoreDataHandle;
 typedef boost::shared_ptr<FeatureData> FeatureDataHandle;
 
+// NOTE: there is no copy constructor implemented, so only the
+// compiler synthesised shallow copy is available.
 class Data
 {
 private:
-  Scorer* theScorer;
-  std::string score_type;
-  size_t number_of_scores;
-  bool _sparse_flag;
-
-protected:
-  ScoreDataHandle scoredata;
-  FeatureDataHandle featdata;
+  Scorer* m_scorer;
+  std::string m_score_type;
+  std::size_t m_num_scores;
+  bool m_sparse_flag;
+  ScoreDataHandle m_score_data;
+  FeatureDataHandle m_feature_data;
 
 public:
-  explicit Data(Scorer& sc);
+  explicit Data(Scorer* scorer);
   Data();
 
-  //Note that there is no copy constructor implemented, so only the 
-  //compiler synthesised shallow copy is available
-
-  inline void clear() {
-    scoredata->clear();
-    featdata->clear();
+  void clear() {
+    m_score_data->clear();
+    m_feature_data->clear();
   }
 
-  ScoreDataHandle getScoreData() {
-    return scoredata;
-  }
+  ScoreDataHandle getScoreData() { return m_score_data; }
 
-  FeatureDataHandle getFeatureData() {
-    return featdata;
-  }
+  FeatureDataHandle getFeatureData() { return m_feature_data; }
 
-  Scorer* getScorer() {
-    return theScorer;
-  }
+  Scorer* getScorer() { return m_scorer; }
 
-  inline size_t NumberOfFeatures() const {
-    return featdata->NumberOfFeatures();
-  }
-  inline void NumberOfFeatures(size_t v) {
-    featdata->NumberOfFeatures(v);
-  }
-  inline std::string Features() const {
-    return featdata->Features();
-  }
-  inline void Features(const std::string &f) {
-    featdata->Features(f);
+  std::size_t NumberOfFeatures() const {
+    return m_feature_data->NumberOfFeatures();
   }
 
-  inline bool hasSparseFeatures() const { return _sparse_flag; }
-  void mergeSparseFeatures();
+  void NumberOfFeatures(std::size_t v) { m_feature_data->NumberOfFeatures(v); }
 
-  void loadnbest(const std::string &file);
-  
-  void load(const std::string &featfile,const std::string &scorefile) {
-    featdata->load(featfile);
-    scoredata->load(scorefile);
-    if (featdata->hasSparseFeatures())
-      _sparse_flag = true;
-  }
+  std::string Features() const { return m_feature_data->Features(); }
+  void Features(const std::string &f) { m_feature_data->Features(f); }
 
-  //ADDED BY TS
-  void remove_duplicates();
-  //END_ADDED
+  bool hasSparseFeatures() const { return m_sparse_flag; }
+  void mergeSparseFeatures();
 
-  void save(const std::string &featfile,const std::string &scorefile, bool bin=false) {
+  void loadNBest(const std::string &file);
 
-    if (bin) cerr << "Binary write mode is selected" << endl;
-    else cerr << "Binary write mode is NOT selected" << endl;
+  void load(const std::string &featfile, const std::string &scorefile);
 
-    featdata->save(featfile, bin);
-    scoredata->save(scorefile, bin);
-  }
+  void save(const std::string &featfile, const std::string &scorefile, bool bin=false);
+
+  //ADDED BY TS
+  void removeDuplicates();
+  //END_ADDED
 
   inline bool existsFeatureNames() const {
-    return featdata->existsFeatureNames();
+    return m_feature_data->existsFeatureNames();
   }
 
-  inline std::string getFeatureName(size_t idx) const {
-    return featdata->getFeatureName(idx);
+  inline std::string getFeatureName(std::size_t idx) const {
+    return m_feature_data->getFeatureName(idx);
   }
 
-  inline size_t getFeatureIndex(const std::string& name) const {
-    return featdata->getFeatureIndex(name);
+  inline std::size_t getFeatureIndex(const std::string& name) const {
+    return m_feature_data->getFeatureIndex(name);
   }
 
   /**
@@ -118,8 +88,13 @@ public:
    * the data (with replacement) and shard_size is interpreted as the proportion
    * of the total size.
    */
-  void createShards(size_t shard_count, float shard_size, const std::string& scorerconfig,
+  void createShards(std::size_t shard_count, float shard_size, const std::string& scorerconfig,
                     std::vector<Data>& shards);
+
+  // Helper functions for loadnbest();
+  void InitFeatureMap(const std::string& str);
+  void AddFeatures(const std::string& str,
+                   const std::string& sentence_index);
 };
 
-#endif  // DATA_H
+#endif  // MERT_DATA_H_
diff --git a/mert/DataTest.cpp b/mert/DataTest.cpp
index 0f02d64a0..43cf9bb24 100644
--- a/mert/DataTest.cpp
+++ b/mert/DataTest.cpp
@@ -5,12 +5,12 @@
 #define BOOST_TEST_MODULE MertData
 #include <boost/test/unit_test.hpp>
 
-
+#include <boost/scoped_ptr.hpp>
 
 //very basic test of sharding
 BOOST_AUTO_TEST_CASE(shard_basic) {
-  Scorer* scorer = ScorerFactory::getScorer("BLEU", "");
-  Data data(*scorer);
+  boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer("BLEU", ""));
+  Data data(scorer.get());
   FeatureArray fa1, fa2, fa3, fa4;
   ScoreArray sa1, sa2, sa3, sa4;
   fa1.setIndex("1");
@@ -36,3 +36,13 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
   BOOST_CHECK_EQUAL(shards.size(),2);
   BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),2);
 }
+
+BOOST_AUTO_TEST_CASE(init_feature_map_test) {
+  boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer("BLEU", ""));
+  Data data(scorer.get());
+
+  std::string s = " d: 0 -7.66174 0 0 -3.51621 0 0 lm: -41.3435 -40.3647 tm: -67.6349 -100.438 -27.6817 -23.4685 8.99907 w: -9 ";
+  std::string expected = "d_0 d_1 d_2 d_3 d_4 d_5 d_6 lm_0 lm_1 tm_0 tm_1 tm_2 tm_3 tm_4 w_0 ";
+  data.InitFeatureMap(s);
+  BOOST_CHECK_EQUAL(expected, data.Features());
+}
diff --git a/mert/Fdstream.h b/mert/Fdstream.h
new file mode 100644
index 000000000..92ccd355f
--- /dev/null
+++ b/mert/Fdstream.h
@@ -0,0 +1,167 @@
+/*
+ * This class creates c++ like stream from file descriptor
+ */
+
+#ifndef _FDSTREAM_
+#define _FDSTREAM_
+
+#include <iostream>
+#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#include <ext/stdio_filebuf.h>
+
+#define BUFFER_SIZE (1024)
+
+using namespace std;
+
+class _fdstream
+{
+protected:
+  _fdstream() :
+      _file_descriptor(-1), _filebuf(NULL)
+  { }
+
+  _fdstream(int file_descriptor, ios_base::openmode openmode) :
+      _file_descriptor(file_descriptor), _openmode(openmode)
+  {
+    _filebuf = NULL;
+    open(file_descriptor, openmode);
+  }
+
+  ios_base::openmode openmode() const { return _openmode; }
+
+  void open(int file_descriptor, ios_base::openmode openmode)
+  {
+    if (!_filebuf)
+      // We create a C++ stream from a file descriptor
+      // stdio_filebuf is not synced with stdio.
+      // From GCC 3.4.0 on exists in addition stdio_sync_filebuf
+      // You can also create the filebuf from a FILE* with
+      // FILE* f = fdopen(file_descriptor, mode);
+      _filebuf = new __gnu_cxx::stdio_filebuf<char> (file_descriptor,
+						     openmode);
+  }
+
+  ~_fdstream()
+  {
+    close(_file_descriptor);
+    delete _filebuf;
+    _filebuf = NULL;
+  }
+
+  int _file_descriptor;
+  __gnu_cxx::stdio_filebuf<char>* _filebuf;
+  ios_base::openmode _openmode;
+};
+
+class ifdstream : public _fdstream
+{
+public:
+  ifdstream() :
+      _fdstream(), _stream(NULL)
+  { }
+
+  ifdstream(int file_descriptor) :
+      _fdstream(file_descriptor, ios_base::in)
+  {
+    _stream = new istream (_filebuf);
+  }
+
+  void open(int file_descriptor)
+  {
+    if (!_stream)
+      {
+	_fdstream::open(file_descriptor, ios_base::in);
+	_stream = new istream (_filebuf);
+      }
+  }
+
+  ifdstream& operator>> (string& str)
+  {
+    (*_stream) >> str;
+
+    return *this;
+  }
+
+  size_t getline(string& str)
+  {
+    char tmp[BUFFER_SIZE];
+    size_t ret = getline(tmp, BUFFER_SIZE);
+    str = tmp;
+    return ret;
+  }
+
+  size_t getline (char* s, streamsize n)
+  {
+    return (getline(s, n, '\n'));
+  }
+
+  size_t getline (char* s, streamsize n, char delim)
+  {
+    int i = 0;
+    do{
+      s[i] = _stream->get();
+      i++;
+    }while(i < n-1 && s[i-1] != delim && s[i-1] != '\0');
+
+    s[i-1] = '\0'; // overwrite the delimiter given with string end
+
+    return i-1;
+  }
+
+  ~ifdstream()
+  {
+    //this->~_fdstream();
+    delete _stream;
+  }
+
+private:
+  istream* _stream;
+};
+
+class ofdstream : public _fdstream
+{
+public:
+  ofdstream() :
+      _fdstream(), _stream(NULL)
+  { }
+
+  ofdstream(int file_descriptor) :
+      _fdstream(file_descriptor, ios_base::out)
+  {
+    _stream = new ostream (_filebuf);
+  }
+
+  void open(int file_descriptor)
+  {
+    if (!_stream)
+      {
+	_fdstream::open(file_descriptor, ios_base::out);
+	_stream = new ostream (_filebuf);
+      }
+  }
+
+
+  ofdstream& operator<< (const string& str)
+  {
+    if (_stream->good())
+      (*_stream) << str;
+
+    _stream->flush();
+    return *this;
+  }
+
+  ~ofdstream()
+  {
+    //this->~_fdstream();
+    delete _stream;
+  }
+
+private:
+  ostream* _stream;
+};
+
+#else
+#error "Not supported"
+#endif
+
+#endif // _FDSTREAM_
diff --git a/mert/FeatureArray.cpp b/mert/FeatureArray.cpp
index 854bcef79..62f9ceda5 100644
--- a/mert/FeatureArray.cpp
+++ b/mert/FeatureArray.cpp
@@ -1,140 +1,152 @@
 /*
  *  FeatureArray.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
+#include <fstream>
 #include "FeatureArray.h"
 #include "FileStream.h"
 #include "Util.h"
 
-
 FeatureArray::FeatureArray()
-    : idx(""), number_of_features(0), _sparse_flag(false) {}
+    : m_index(""), m_num_features(0), m_sparse_flag(false) {}
 
 FeatureArray::~FeatureArray() {}
 
-void FeatureArray::savetxt(std::ofstream& outFile)
+void FeatureArray::savetxt(ostream* os)
 {
-  outFile << FEATURES_TXT_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_features << " " << features << std::endl;
-  for (featarray_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    i->savetxt(outFile);
-    outFile << std::endl;
+  *os << FEATURES_TXT_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_features << " " << m_features << endl;
+  for (featarray_t::iterator i = m_array.begin(); i != m_array.end(); ++i) {
+    i->savetxt(os);
+    *os << endl;
   }
-  outFile << FEATURES_TXT_END << std::endl;
+  *os << FEATURES_TXT_END << endl;
 }
 
-void FeatureArray::savebin(std::ofstream& outFile)
+void FeatureArray::savebin(ostream* os)
 {
-  outFile << FEATURES_BIN_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_features << " " << features << std::endl;
-  for (featarray_t::iterator i = array_.begin(); i !=array_.end(); i++)
-    i->savebin(outFile);
+  *os << FEATURES_BIN_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_features << " " << m_features << endl;
+  for (featarray_t::iterator i = m_array.begin(); i != m_array.end(); ++i)
+    i->savebin(os);
 
-  outFile << FEATURES_BIN_END << std::endl;
+  *os << FEATURES_BIN_END << endl;
 }
 
 
-void FeatureArray::save(std::ofstream& inFile, bool bin)
+void FeatureArray::save(ostream* os, bool bin)
 {
-  if (size()>0)
-    (bin)?savebin(inFile):savetxt(inFile);
+  if (size() <= 0) return;
+  if (bin) {
+    savebin(os);
+  } else {
+    savetxt(os);
+  }
 }
 
-void FeatureArray::save(const std::string &file, bool bin)
+void FeatureArray::save(const string &file, bool bin)
 {
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  save(outFile);
-
-  outFile.close();
+  ofstream ofs(file.c_str(), ios::out);
+  if (!ofs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  ostream *os = &ofs;
+  save(os, bin);
+  ofs.close();
 }
 
-void FeatureArray::loadbin(ifstream& inFile, size_t n)
+void FeatureArray::save(bool bin)
 {
-  FeatureStats entry(number_of_features);
+  save(&cout, bin);
+}
 
-  for (size_t i=0 ; i < n; i++) {
-    entry.loadbin(inFile);
+void FeatureArray::loadbin(istream* is, size_t n)
+{
+  FeatureStats entry(m_num_features);
+  for (size_t i = 0 ; i < n; i++) {
+    entry.loadbin(is);
     add(entry);
   }
 }
 
-void FeatureArray::loadtxt(ifstream& inFile, size_t n)
+void FeatureArray::loadtxt(istream* is, size_t n)
 {
-  FeatureStats entry(number_of_features);
+  FeatureStats entry(m_num_features);
 
-  for (size_t i=0 ; i < n; i++) {
-    entry.loadtxt(inFile);
+  for (size_t i = 0; i < n; i++) {
+    entry.loadtxt(is);
     add(entry);
     if (entry.getSparse().size()>0)
-      _sparse_flag = true;
+      m_sparse_flag = true;
   }
 }
 
-void FeatureArray::load(ifstream& inFile)
+void FeatureArray::load(istream* is)
 {
-  size_t number_of_entries=0;
-  bool binmode=false;
+  size_t number_of_entries = 0;
+  bool binmode = false;
 
-  std::string substring, stringBuf;
-  std::string::size_type loc;
+  string substring, stringBuf;
+  string::size_type loc;
 
-  std::getline(inFile, stringBuf);
-  if (!inFile.good()) {
+  getline(*is, stringBuf);
+  if (!is->good()) {
     return;
   }
 
   if (!stringBuf.empty()) {
     if ((loc = stringBuf.find(FEATURES_TXT_BEGIN)) == 0) {
-      binmode=false;
+      binmode = false;
     } else if ((loc = stringBuf.find(FEATURES_BIN_BEGIN)) == 0) {
-      binmode=true;
+      binmode = true;
     } else {
       TRACE_ERR("ERROR: FeatureArray::load(): Wrong header");
       return;
     }
     getNextPound(stringBuf, substring);
     getNextPound(stringBuf, substring);
-    idx = substring;
+    m_index = substring;
     getNextPound(stringBuf, substring);
     number_of_entries = atoi(substring.c_str());
     getNextPound(stringBuf, substring);
-    number_of_features = atoi(substring.c_str());
-    features = stringBuf;
+    m_num_features = atoi(substring.c_str());
+    m_features = stringBuf;
   }
 
-  (binmode)?loadbin(inFile, number_of_entries):loadtxt(inFile, number_of_entries);
+  if (binmode) {
+    loadbin(is, number_of_entries);
+  } else {
+    loadtxt(is, number_of_entries);
+  }
 
-  std::getline(inFile, stringBuf);
+  getline(*is, stringBuf);
   if (!stringBuf.empty()) {
-    if ((loc = stringBuf.find(FEATURES_TXT_END)) != 0 && (loc = stringBuf.find(FEATURES_BIN_END)) != 0) {
+    if ((loc = stringBuf.find(FEATURES_TXT_END)) != 0 &&
+        (loc = stringBuf.find(FEATURES_BIN_END)) != 0) {
       TRACE_ERR("ERROR: FeatureArray::load(): Wrong footer");
       return;
     }
   }
 }
 
-void FeatureArray::load(const std::string &file)
+void FeatureArray::load(const string &file)
 {
-  TRACE_ERR("loading data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  load((ifstream&) inFile);
-
-  inFile.close();
-
+  TRACE_ERR("loading data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }
 
 void FeatureArray::merge(FeatureArray& e)
 {
   //dummy implementation
-  for (size_t i=0; i<e.size(); i++)
+  for (size_t i = 0; i < e.size(); i++)
     add(e.get(i));
 }
 
@@ -144,10 +156,9 @@ bool FeatureArray::check_consistency() const
   if (sz == 0)
     return true;
 
-  for (featarray_t::const_iterator i = array_.begin(); i != array_.end(); i++) {
+  for (featarray_t::const_iterator i = m_array.begin(); i != m_array.end(); i++) {
     if (i->size() != sz)
       return false;
   }
   return true;
 }
-
diff --git a/mert/FeatureArray.h b/mert/FeatureArray.h
index ee8ee1354..25ebbe866 100644
--- a/mert/FeatureArray.h
+++ b/mert/FeatureArray.h
@@ -1,17 +1,16 @@
 /*
  *  FeatureArray.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef FEATURE_ARRAY_H
-#define FEATURE_ARRAY_H
+#ifndef MERT_FEATURE_ARRAY_H_
+#define MERT_FEATURE_ARRAY_H_
 
 #include <vector>
 #include <iostream>
-#include <fstream>
 #include "FeatureStats.h"
 
 using namespace std;
@@ -26,85 +25,60 @@ class FeatureArray
 private:
   // idx to identify the utterance. It can differ from
   // the index inside the vector.
-  std::string idx;
-
-protected:
-  featarray_t array_;
-  size_t number_of_features;
-  std::string features;
-  bool _sparse_flag;
+  std::string m_index;
+  featarray_t m_array;
+  size_t m_num_features;
+  std::string m_features;
+  bool m_sparse_flag;
 
 public:
   FeatureArray();
   ~FeatureArray();
 
-  inline void clear() {
-    array_.clear();
-  }
+  void clear() { m_array.clear(); }
 
-  inline bool hasSparseFeatures() const {
-    return _sparse_flag;
-  }
+  bool hasSparseFeatures() const { return m_sparse_flag; }
 
-  inline std::string getIndex() const {
-    return idx;
-  }
-  inline void setIndex(const std::string& value) {
-    idx = value;
-  }
+  std::string getIndex() const { return m_index; }
+  void setIndex(const std::string& value) { m_index = value; }
 
-  inline FeatureStats& get(size_t i) {
-    return array_.at(i);
-  }
-  inline const FeatureStats& get(size_t i)const {
-    return array_.at(i);
-  }
-  void add(FeatureStats& e) {
-    array_.push_back(e);
-  }
+  FeatureStats& get(size_t i) { return m_array.at(i); }
+  const FeatureStats& get(size_t i) const { return m_array.at(i); }
+
+  void add(FeatureStats& e) { m_array.push_back(e); }
 
   //ADDED BY TS
   void swap(size_t i, size_t j) {
-    std::swap(array_[i],array_[j]);
+    std::swap(m_array[i], m_array[j]);
   }
-  
+
   void resize(size_t new_size) {
-    array_.resize(std::min(new_size,array_.size()));
+    m_array.resize(std::min(new_size, m_array.size()));
   }
   //END_ADDED
 
   void merge(FeatureArray& e);
 
-  inline size_t size() const {
-    return array_.size();
-  }
-  inline size_t NumberOfFeatures() const {
-    return number_of_features;
-  }
-  inline void NumberOfFeatures(size_t v) {
-    number_of_features = v;
-  }
-  inline std::string Features() const {
-    return features;
-  }
-  inline void Features(const std::string& f) {
-    features = f;
-  }
+  size_t size() const { return m_array.size(); }
+
+  size_t NumberOfFeatures() const { return m_num_features; }
+  void NumberOfFeatures(size_t v) { m_num_features = v; }
 
-  void savetxt(ofstream& outFile);
-  void savebin(ofstream& outFile);
-  void save(ofstream& outFile, bool bin=false);
+  std::string Features() const { return m_features; }
+  void Features(const std::string& f) { m_features = f; }
+
+  void savetxt(std::ostream* os);
+  void savebin(std::ostream* os);
+  void save(std::ostream* os, bool bin=false);
   void save(const std::string &file, bool bin=false);
-  inline void save(bool bin=false) {
-    save("/dev/stdout",bin);
-  }
+  void save(bool bin=false);
 
-  void loadtxt(ifstream& inFile, size_t n);
-  void loadbin(ifstream& inFile, size_t n);
-  void load(ifstream& inFile);
+  void loadtxt(std::istream* is, size_t n);
+  void loadbin(std::istream* is, size_t n);
+  void load(std::istream* is);
   void load(const std::string &file);
 
   bool check_consistency() const;
 };
 
-#endif  // FEATURE_ARRAY_H
+#endif  // MERT_FEATURE_ARRAY_H_
diff --git a/mert/FeatureData.cpp b/mert/FeatureData.cpp
index ed76bca3b..acc144d1a 100644
--- a/mert/FeatureData.cpp
+++ b/mert/FeatureData.cpp
@@ -1,6 +1,6 @@
 /*
  *  FeatureData.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
@@ -12,44 +12,47 @@
 #include "FileStream.h"
 #include "Util.h"
 
-static const float MIN_FLOAT=-1.0*numeric_limits<float>::max();
-static const float MAX_FLOAT=numeric_limits<float>::max();
+using namespace std;
+
+static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
+static const float MAX_FLOAT = numeric_limits<float>::max();
 
 FeatureData::FeatureData()
-    : number_of_features(0),
-      _sparse_flag(false) {}
+    : m_num_features(0),
+      m_sparse_flag(false) {}
 
-void FeatureData::save(std::ofstream& outFile, bool bin)
+void FeatureData::save(ostream* os, bool bin)
 {
-  for (featdata_t::iterator i = array_.begin(); i !=array_.end(); i++)
-    i->save(outFile, bin);
+  for (featdata_t::iterator i = m_array.begin(); i != m_array.end(); i++)
+    i->save(os, bin);
 }
 
-void FeatureData::save(const std::string &file, bool bin)
+void FeatureData::save(const string &file, bool bin)
 {
   if (file.empty()) return;
+  TRACE_ERR("saving the array into " << file << endl);
+  ofstream ofs(file.c_str(), ios::out); // matches a stream with a file. Opens the file
+  ostream* os = &ofs;
+  save(os, bin);
+  ofs.close();
+}
 
-  TRACE_ERR("saving the array into " << file << std::endl);
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  save(outFile, bin);
-
-  outFile.close();
+void FeatureData::save(bool bin) {
+  save(&cout, bin);
 }
 
-void FeatureData::load(ifstream& inFile)
+void FeatureData::load(istream* is)
 {
   FeatureArray entry;
 
-  while (!inFile.eof()) {
+  while (!is->eof()) {
 
-    if (!inFile.good()) {
-      std::cerr << "ERROR FeatureData::load inFile.good()" << std::endl;
+    if (!is->good()) {
+      cerr << "ERROR FeatureData::load inFile.good()" << endl;
     }
 
     entry.clear();
-    entry.load(inFile);
+    entry.load(is);
 
     if (entry.size() == 0)
       break;
@@ -58,26 +61,23 @@ void FeatureData::load(ifstream& inFile)
       setFeatureMap(entry.Features());
 
     if (entry.hasSparseFeatures())
-      _sparse_flag = true;
+      m_sparse_flag = true;
 
     add(entry);
   }
 }
 
 
-void FeatureData::load(const std::string &file)
+void FeatureData::load(const string &file)
 {
-  TRACE_ERR("loading feature data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  if (!inFile) {
+  TRACE_ERR("loading feature data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  if (!input_stream) {
     throw runtime_error("Unable to open feature file: " + file);
   }
-
-  load((ifstream&) inFile);
-
-  inFile.close();
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }
 
 void FeatureData::add(FeatureArray& e)
@@ -85,25 +85,25 @@ void FeatureData::add(FeatureArray& e)
   if (exists(e.getIndex())) { // array at position e.getIndex() already exists
     //enlarge array at position e.getIndex()
     size_t pos = getIndex(e.getIndex());
-    array_.at(pos).merge(e);
+    m_array.at(pos).merge(e);
   } else {
-    array_.push_back(e);
+    m_array.push_back(e);
     setIndex();
   }
 }
 
-void FeatureData::add(FeatureStats& e, const std::string& sent_idx)
+void FeatureData::add(FeatureStats& e, const string& sent_idx)
 {
   if (exists(sent_idx)) { // array at position e.getIndex() already exists
     //enlarge array at position e.getIndex()
     size_t pos = getIndex(sent_idx);
 //              TRACE_ERR("Inserting " << e << " in array " << sent_idx << std::endl);
-    array_.at(pos).add(e);
+    m_array.at(pos).add(e);
   } else {
 //              TRACE_ERR("Creating a new entry in the array and inserting " << e << std::endl);
     FeatureArray a;
-    a.NumberOfFeatures(number_of_features);
-    a.Features(features);
+    a.NumberOfFeatures(m_num_features);
+    a.Features(m_features);
     a.setIndex(sent_idx);
     a.add(e);
     add(a);
@@ -112,10 +112,10 @@ void FeatureData::add(FeatureStats& e, const std::string& sent_idx)
 
 bool FeatureData::check_consistency() const
 {
-  if (array_.size() == 0)
+  if (m_array.size() == 0)
     return true;
 
-  for (featdata_t::const_iterator i = array_.begin(); i != array_.end(); i++)
+  for (featdata_t::const_iterator i = m_array.begin(); i != m_array.end(); i++)
     if (!i->check_consistency()) return false;
 
   return true;
@@ -124,25 +124,53 @@ bool FeatureData::check_consistency() const
 void FeatureData::setIndex()
 {
   size_t j=0;
-  for (featdata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    idx2arrayname_[j]=(*i).getIndex();
-    arrayname2idx_[(*i).getIndex()] = j;
+  for (featdata_t::iterator i = m_array.begin(); i !=m_array.end(); i++) {
+    m_index_to_array_name[j]=(*i).getIndex();
+    m_array_name_to_index[(*i).getIndex()] = j;
     j++;
   }
 }
 
-void FeatureData::setFeatureMap(const std::string& feat)
+void FeatureData::setFeatureMap(const string& feat)
 {
-  number_of_features = 0;
-  features = feat;
+  m_num_features = 0;
+  m_features = feat;
+
+  vector<string> buf;
+  Tokenize(feat.c_str(), ' ', &buf);
+  for (vector<string>::const_iterator it = buf.begin();
+       it != buf.end(); ++it) {
+    const size_t size = m_index_to_feature_name.size();
+    m_feature_name_to_index[*it] = size;
+    m_index_to_feature_name[size] = *it;
+    ++m_num_features;
+  }
+}
 
-  std::string substring, stringBuf;
-  stringBuf = features;
-  while (!stringBuf.empty()) {
-    getNextPound(stringBuf, substring);
+string FeatureData::ToString() const {
+  string res;
+
+  {
+    stringstream ss;
+    ss << "number of features: " << m_num_features
+       << ", features: " << m_features
+       << ", sparse flag: ";
+    if (m_sparse_flag) {
+      ss << "yes, ";
+    } else {
+      ss << "no, ";
+    }
+    res.append(ss.str());
+  }
 
-    featname2idx_[substring] = idx2featname_.size();
-    idx2featname_[idx2featname_.size()] = substring;
-    number_of_features++;
+  res.append("feature_id_map = { ");
+  for (map<string, size_t>::const_iterator it = m_feature_name_to_index.begin();
+       it != m_feature_name_to_index.end(); ++it) {
+    stringstream ss;
+    ss << it->first << " => " << it->second << ", ";
+    res.append(ss.str());
   }
+  res.append("}");
+
+  return res;
 }
diff --git a/mert/FeatureData.h b/mert/FeatureData.h
index 8331fe2d2..aef1ef250 100644
--- a/mert/FeatureData.h
+++ b/mert/FeatureData.h
@@ -1,15 +1,13 @@
 /*
  *  FeatureData.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef FEATURE_DATA_H
-#define FEATURE_DATA_H
-
-using namespace std;
+#ifndef MERT_FEATURE_DATA_H_
+#define MERT_FEATURE_DATA_H_
 
 #include <vector>
 #include <iostream>
@@ -19,123 +17,116 @@ using namespace std;
 class FeatureData
 {
 private:
-  size_t number_of_features;
-  std::string features;
-  bool _sparse_flag;
-
-  map<std::string, size_t> featname2idx_; // map from name to index of features
-  map<size_t, std::string> idx2featname_; // map from index to name of features
-
-protected:
-  featdata_t array_;
-  idx2name idx2arrayname_; // map from index to name of array
-  name2idx arrayname2idx_; // map from name to index of array
+  std::size_t m_num_features;
+  std::string m_features;
+  bool m_sparse_flag;
+  std::map<std::string, std::size_t> m_feature_name_to_index; // map from name to index of features
+  std::map<std::size_t, std::string> m_index_to_feature_name; // map from index to name of features
+  featdata_t m_array;
+  idx2name m_index_to_array_name; // map from index to name of array
+  name2idx m_array_name_to_index; // map from name to index of array
 
 public:
   FeatureData();
   ~FeatureData() {}
 
-  inline void clear() {
-    array_.clear();
-  }
+  void clear() { m_array.clear(); }
 
-  inline bool hasSparseFeatures() const {
-    return _sparse_flag;
-  }
-  inline FeatureArray get(const std::string& idx) {
-    return array_.at(getIndex(idx));
-  }
-  inline FeatureArray& get(size_t idx) {
-    return array_.at(idx);
-  }
-  inline const FeatureArray& get(size_t idx) const {
-    return array_.at(idx);
+  bool hasSparseFeatures() const { return m_sparse_flag; }
+
+  FeatureArray get(const std::string& idx) {
+    return m_array.at(getIndex(idx));
   }
 
+  FeatureArray& get(std::size_t idx) { return m_array.at(idx); }
+  const FeatureArray& get(std::size_t idx) const { return m_array.at(idx); }
+
   inline bool exists(const std::string& sent_idx) const {
     return exists(getIndex(sent_idx));
   }
 
   inline bool exists(int sent_idx) const {
-    return (sent_idx > -1 && sent_idx < static_cast<int>(array_.size())) ? true : false;
+    return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
   }
 
-  inline FeatureStats& get(size_t i, size_t j) {
-    return array_.at(i).get(j);
+  inline FeatureStats& get(std::size_t i, std::size_t j) {
+    return m_array.at(i).get(j);
   }
-  inline const FeatureStats&  get(size_t i, size_t j) const {
-    return array_.at(i).get(j);
+
+  inline const FeatureStats& get(std::size_t i, std::size_t j) const {
+    return m_array.at(i).get(j);
   }
 
   void add(FeatureArray& e);
   void add(FeatureStats& e, const std::string& sent_idx);
 
-  inline size_t size() const {
-    return array_.size();
-  }
-  inline size_t NumberOfFeatures() const {
-    return number_of_features;
-  }
-  inline void NumberOfFeatures(size_t v) {
-    number_of_features = v;
-  }
-  inline std::string Features() const {
-    return features;
-  }
-  inline void Features(const std::string& f) {
-    features = f;
-  }
+  std::size_t size() const { return m_array.size(); }
+
+  std::size_t NumberOfFeatures() const { return m_num_features; }
+  void NumberOfFeatures(std::size_t v) { m_num_features = v; }
+
+  std::string Features() const { return m_features; }
+  void Features(const std::string& f) { m_features = f; }
 
   void save(const std::string &file, bool bin=false);
-  void save(ofstream& outFile, bool bin=false);
-  inline void save(bool bin=false) {
-    save("/dev/stdout", bin);
-  }
+  void save(std::ostream* os, bool bin=false);
+  void save(bool bin=false);
 
-  void load(ifstream& inFile);
+  void load(std::istream* is);
   void load(const std::string &file);
 
   bool check_consistency() const;
+
   void setIndex();
 
   inline int getIndex(const std::string& idx) const {
-    name2idx::const_iterator i = arrayname2idx_.find(idx);
-    if (i != arrayname2idx_.end())
+    name2idx::const_iterator i = m_array_name_to_index.find(idx);
+    if (i != m_array_name_to_index.end())
       return i->second;
     else
       return -1;
   }
 
-  inline std::string getIndex(size_t idx) const {
-    idx2name::const_iterator i = idx2arrayname_.find(idx);
-    if (i != idx2arrayname_.end())
-      throw runtime_error("there is no entry at index " + idx);
+  inline std::string getIndex(std::size_t idx) const {
+    idx2name::const_iterator i = m_index_to_array_name.find(idx);
+    if (i != m_index_to_array_name.end())
+      throw std::runtime_error("there is no entry at index " + idx);
     return i->second;
   }
 
   bool existsFeatureNames() const {
-    return (idx2featname_.size() > 0) ? true : false;
+    return (m_index_to_feature_name.size() > 0) ? true : false;
   }
 
-  std::string getFeatureName(size_t idx) const {
-    if (idx >= idx2featname_.size())
+  std::string getFeatureName(std::size_t idx) const {
+    if (idx >= m_index_to_feature_name.size())
       throw runtime_error("Error: you required an too big index");
-    map<size_t, std::string>::const_iterator it = idx2featname_.find(idx);
-    if (it == idx2featname_.end()) {
+    std::map<std::size_t, std::string>::const_iterator it = m_index_to_feature_name.find(idx);
+    if (it == m_index_to_feature_name.end()) {
       throw runtime_error("Error: specified id is unknown: " + idx);
     } else {
       return it->second;
     }
   }
 
-  size_t getFeatureIndex(const std::string& name) const {
-    map<std::string, size_t>::const_iterator it = featname2idx_.find(name);
-    if (it == featname2idx_.end())
-      throw runtime_error("Error: feature " + name + " is unknown");
+  std::size_t getFeatureIndex(const std::string& name) const {
+    std::map<std::string, std::size_t>::const_iterator it = m_feature_name_to_index.find(name);
+    if (it == m_feature_name_to_index.end()) {
+      std::string msg = "Error: feature " + name + " is unknown. Known features: ";
+      for (std::map<std::string, std::size_t>::const_iterator it = m_feature_name_to_index.begin(); it != m_feature_name_to_index.end(); it++) {
+        msg += it->first;
+        msg += ", ";
+      }
+
+      throw std::runtime_error(msg);
+    }
     return it->second;
   }
 
   void setFeatureMap(const std::string& feat);
+
+  /* For debugging */
+  std::string ToString() const;
 };
 
-#endif  // FEATURE_DATA_H
+#endif  // MERT_FEATURE_DATA_H_
diff --git a/mert/FeatureDataIterator.h b/mert/FeatureDataIterator.h
index 81f072970..58345829c 100644
--- a/mert/FeatureDataIterator.h
+++ b/mert/FeatureDataIterator.h
@@ -17,8 +17,8 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 
-#ifndef _FEATURE_DATA_ITERATOR_
-#define _FEATURE_DATA_ITERATOR_
+#ifndef MERT_FEATURE_DATA_ITERATOR_H_
+#define MERT_FEATURE_DATA_ITERATOR_H_
 
 /**
   * For loading from the feature data file.
@@ -88,4 +88,4 @@ class FeatureDataIterator :
     std::vector<FeatureDataItem> m_next;
 };
 
-#endif
+#endif  // MERT_FEATURE_DATA_ITERATOR_H_
diff --git a/mert/FeatureDataTest.cpp b/mert/FeatureDataTest.cpp
new file mode 100644
index 000000000..42ac5996c
--- /dev/null
+++ b/mert/FeatureDataTest.cpp
@@ -0,0 +1,40 @@
+#include "FeatureData.h"
+
+#define BOOST_TEST_MODULE FeatureData
+#include <boost/test/unit_test.hpp>
+
+#include <sstream>
+
+namespace {
+
+void CheckFeatureMap(const FeatureData* feature_data,
+                     const char* str, int num_feature, int* cnt) {
+  for (int i = 0; i < num_feature; ++i) {
+    std::stringstream ss;
+    ss << str << "_" << i;
+    const string& s = ss.str();
+    BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), *cnt);
+    BOOST_CHECK_EQUAL(feature_data->getFeatureName(*cnt).c_str(), s);
+    ++(*cnt);
+  }
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(set_feature_map) {
+  std::string str("d_0 d_1 d_2 d_3 d_4 d_5 d_6 lm_0 lm_1 tm_0 tm_1 tm_2 tm_3 tm_4 w_0 ");
+  FeatureData feature_data;
+
+  feature_data.setFeatureMap(str);
+
+  BOOST_REQUIRE(feature_data.Features() == str);
+  BOOST_REQUIRE(feature_data.NumberOfFeatures() == 15);
+
+  int cnt = 0;
+  CheckFeatureMap(&feature_data, "d", 7, &cnt);
+  CheckFeatureMap(&feature_data, "lm", 2, &cnt);
+  CheckFeatureMap(&feature_data, "tm", 5, &cnt);
+
+  BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), cnt);
+  BOOST_CHECK_EQUAL(feature_data.getFeatureName(cnt).c_str(), "w_0");
+}
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 0fe003158..38aa31328 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -1,6 +1,6 @@
 /*
  *  FeatureStats.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
@@ -8,6 +8,7 @@
 
 #include "FeatureStats.h"
 
+#include <fstream>
 #include <cmath>
 #include "Util.h"
 
@@ -15,58 +16,58 @@ namespace {
 const int kAvailableSize = 8;
 } // namespace
 
-SparseVector::name2id_t SparseVector::name2id_;
-SparseVector::id2name_t SparseVector::id2name_;
+SparseVector::name2id_t SparseVector::m_name_to_id;
+SparseVector::id2name_t SparseVector::m_id_to_name;
 
 FeatureStatsType SparseVector::get(const string& name) const {
-  name2id_t::const_iterator name2id_iter = name2id_.find(name);
-  if (name2id_iter == name2id_.end()) return 0;
+  name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
+  if (name2id_iter == m_name_to_id.end()) return 0;
   size_t id = name2id_iter->second;
   return get(id);
 }
 
 FeatureStatsType SparseVector::get(size_t id) const {
-  fvector_t::const_iterator fvector_iter = fvector_.find(id);
-  if (fvector_iter == fvector_.end()) return 0;
+  fvector_t::const_iterator fvector_iter = m_fvector.find(id);
+  if (fvector_iter == m_fvector.end()) return 0;
   return fvector_iter->second;
 }
 
 void SparseVector::set(const string& name, FeatureStatsType value) {
-  name2id_t::const_iterator name2id_iter = name2id_.find(name);
+  name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
   size_t id = 0;
-  if (name2id_iter == name2id_.end()) {
-    id = id2name_.size();
-    id2name_.push_back(name);
-    name2id_[name] = id;
+  if (name2id_iter == m_name_to_id.end()) {
+    id = m_id_to_name.size();
+    m_id_to_name.push_back(name);
+    m_name_to_id[name] = id;
   } else {
     id = name2id_iter->second;
   }
-  fvector_[id] = value;
+  m_fvector[id] = value;
 }
 
 void SparseVector::write(ostream& out, const string& sep) const {
-  for (fvector_t::const_iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
+  for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
     if (abs(i->second) < 0.00001) continue;
-    string name = id2name_[i->first];
+    string name = m_id_to_name[i->first];
     out << name << sep << i->second << " ";
   }
 }
 
 void SparseVector::clear() {
-  fvector_.clear();
+  m_fvector.clear();
 }
 
 SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
   //All the elements that have values in *this
-  for (fvector_t::iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
-    fvector_[i->first] = i->second - rhs.get(i->first);
+  for (fvector_t::iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
+    m_fvector[i->first] = i->second - rhs.get(i->first);
   }
 
   //Any elements in rhs, that have no value in *this
-  for (fvector_t::const_iterator i = rhs.fvector_.begin();
-      i != rhs.fvector_.end(); ++i) {
-    if (fvector_.find(i->first) == fvector_.end()) {
-      fvector_[i->first] = -(i->second);
+  for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+      i != rhs.m_fvector.end(); ++i) {
+    if (m_fvector.find(i->first) == m_fvector.end()) {
+      m_fvector[i->first] = -(i->second);
     }
   }
   return *this;
@@ -79,37 +80,37 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs) {
 }
 
 FeatureStats::FeatureStats()
-    : available_(kAvailableSize), entries_(0),
-      array_(new FeatureStatsType[available_]) {}
+    : m_available_size(kAvailableSize), m_entries(0),
+      m_array(new FeatureStatsType[m_available_size]) {}
 
 FeatureStats::FeatureStats(const size_t size)
-    : available_(size), entries_(size),
-      array_(new FeatureStatsType[available_])
+    : m_available_size(size), m_entries(size),
+      m_array(new FeatureStatsType[m_available_size])
 {
-  memset(array_, 0, GetArraySizeWithBytes());
+  memset(m_array, 0, GetArraySizeWithBytes());
 }
 
-FeatureStats::FeatureStats(std::string &theString)
-    : available_(0), entries_(0), array_(NULL)
+FeatureStats::FeatureStats(string &theString)
+    : m_available_size(0), m_entries(0), m_array(NULL)
 {
   set(theString);
 }
 
 FeatureStats::~FeatureStats()
 {
-  if (array_) {
-    delete [] array_;
-    array_ = NULL;
+  if (m_array) {
+    delete [] m_array;
+    m_array = NULL;
   }
 }
 
 void FeatureStats::Copy(const FeatureStats &stats)
 {
-  available_ = stats.available();
-  entries_ = stats.size();
-  array_ = new FeatureStatsType[available_];
-  memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
-  map_ = stats.getSparse();
+  m_available_size = stats.available();
+  m_entries = stats.size();
+  m_array = new FeatureStatsType[m_available_size];
+  memcpy(m_array, stats.getArray(), GetArraySizeWithBytes());
+  m_map = stats.getSparse();
 }
 
 FeatureStats::FeatureStats(const FeatureStats &stats)
@@ -119,34 +120,34 @@ FeatureStats::FeatureStats(const FeatureStats &stats)
 
 FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
 {
-  delete [] array_;
+  delete [] m_array;
   Copy(stats);
   return *this;
 }
 
 void FeatureStats::expand()
 {
-  available_ *= 2;
-  featstats_t t_ = new FeatureStatsType[available_];
-  memcpy(t_, array_, GetArraySizeWithBytes());
-  delete [] array_;
-  array_ = t_;
+  m_available_size *= 2;
+  featstats_t t_ = new FeatureStatsType[m_available_size];
+  memcpy(t_, m_array, GetArraySizeWithBytes());
+  delete [] m_array;
+  m_array = t_;
 }
 
 void FeatureStats::add(FeatureStatsType v)
 {
   if (isfull()) expand();
-  array_[entries_++]=v;
+  m_array[m_entries++]=v;
 }
 
 void FeatureStats::addSparse(const string& name, FeatureStatsType v)
 {
-  map_.set(name,v);
+  m_map.set(name,v);
 }
 
-void FeatureStats::set(std::string &theString)
+void FeatureStats::set(string &theString)
 {
-  std::string substring, stringBuf;
+  string substring, stringBuf;
   reset();
 
   while (!theString.empty()) {
@@ -163,48 +164,50 @@ void FeatureStats::set(std::string &theString)
   }
 }
 
-
-void FeatureStats::loadbin(std::ifstream& inFile)
+void FeatureStats::loadbin(istream* is)
 {
-  inFile.read((char*) array_, GetArraySizeWithBytes());
+  is->read(reinterpret_cast<char*>(m_array),
+           static_cast<streamsize>(GetArraySizeWithBytes()));
 }
 
-void FeatureStats::loadtxt(std::ifstream& inFile)
+void FeatureStats::loadtxt(istream* is)
 {
-  std::string theString;
-  std::getline(inFile, theString);
-  set(theString);
+  string line;
+  getline(*is, line);
+  set(line);
 }
 
-void FeatureStats::loadtxt(const std::string &file)
+void FeatureStats::loadtxt(const string &file)
 {
-  //    TRACE_ERR("loading the stats from " << file << std::endl);
-
-  std::ifstream inFile(file.c_str(), std::ios::in); // matches a stream with a file. Opens the file
-
-  loadtxt(inFile);
+  ifstream ifs(file.c_str(), ios::in);
+  if (!ifs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  istream* is = &ifs;
+  loadtxt(is);
 }
 
-
-void FeatureStats::savetxt(const std::string &file)
+void FeatureStats::savetxt(const string &file)
 {
-//      TRACE_ERR("saving the stats into " << file << std::endl);
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  savetxt(outFile);
+  ofstream ofs(file.c_str(), ios::out);
+  ostream* os = &ofs;
+  savetxt(os);
 }
 
-
-void FeatureStats::savetxt(std::ofstream& outFile)
+void FeatureStats::savetxt(ostream* os)
 {
-//      TRACE_ERR("saving the stats" << std::endl);
-  outFile << *this;
+  *os << *this;
 }
 
-void FeatureStats::savebin(std::ofstream& outFile)
+void FeatureStats::savetxt() {
+  savetxt(&cout);
+}
+
+void FeatureStats::savebin(ostream* os)
 {
-  outFile.write((char*) array_, GetArraySizeWithBytes());
+  os->write(reinterpret_cast<char*>(m_array),
+            static_cast<streamsize>(GetArraySizeWithBytes()));
 }
 
 ostream& operator<<(ostream& o, const FeatureStats& e)
@@ -230,7 +233,7 @@ bool operator==(const FeatureStats& f1, const FeatureStats& f2) {
     if (f1.get(k) != f2.get(k))
       return false;
   }
-  
+
   return true;
 }
 //END_ADDED
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index 10ff31992..e2e63a714 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -1,16 +1,15 @@
 /*
  *  FeatureStats.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef FEATURE_STATS_H
-#define FEATURE_STATS_H
+#ifndef MERT_FEATURE_STATS_H_
+#define MERT_FEATURE_STATS_H_
 
 #include <cstring>
-#include <fstream>
 #include <iostream>
 #include <map>
 #include <string>
@@ -30,18 +29,16 @@ public:
   FeatureStatsType get(size_t id) const;
   void set(const std::string& name, FeatureStatsType value);
   void clear();
-  size_t size() const {
-    return fvector_.size();
-  }
+  size_t size() const { return m_fvector.size(); }
 
   void write(std::ostream& out, const std::string& sep = " ") const;
 
   SparseVector& operator-=(const SparseVector& rhs);
 
 private:
-  static name2id_t name2id_;
-  static id2name_t id2name_;
-  fvector_t fvector_;
+  static name2id_t m_name_to_id;
+  static id2name_t m_id_to_name;
+  fvector_t m_fvector;
 };
 
 SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
@@ -49,12 +46,12 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
 class FeatureStats
 {
 private:
-  size_t available_;
-  size_t entries_;
+  size_t m_available_size;
+  size_t m_entries;
 
   // TODO: Use smart pointer for exceptional-safety.
-  featstats_t array_;
-  SparseVector map_;
+  featstats_t m_array;
+  SparseVector m_map;
 
 public:
   FeatureStats();
@@ -69,64 +66,47 @@ public:
 
   void Copy(const FeatureStats &stats);
 
-  bool isfull() const {
-    return (entries_ < available_) ? 0 : 1;
-  }
+  bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }
   void expand();
   void add(FeatureStatsType v);
   void addSparse(const string& name, FeatureStatsType v);
 
   void clear() {
-    memset((void*)array_, 0, GetArraySizeWithBytes());
-    map_.clear();
+    memset((void*)m_array, 0, GetArraySizeWithBytes());
+    m_map.clear();
   }
 
   void reset() {
-    entries_ = 0;
+    m_entries = 0;
     clear();
   }
 
-  inline FeatureStatsType get(size_t i) {
-    return array_[i];
-  }
-  inline FeatureStatsType get(size_t i)const {
-    return array_[i];
-  }
-  inline featstats_t getArray() const {
-    return array_;
-  }
-  inline const SparseVector& getSparse() const {
-    return map_;
-  }
+  FeatureStatsType get(size_t i) { return m_array[i]; }
+  FeatureStatsType get(size_t i)const { return m_array[i]; }
+  featstats_t getArray() const { return m_array; }
+
+  const SparseVector& getSparse() const { return m_map; }
 
   void set(std::string &theString);
 
-  inline size_t bytes() const {
-    return GetArraySizeWithBytes();
-  }
+  inline size_t bytes() const { return GetArraySizeWithBytes(); }
 
   size_t GetArraySizeWithBytes() const {
-    return entries_ * sizeof(FeatureStatsType);
+    return m_entries * sizeof(FeatureStatsType);
   }
 
-  inline size_t size() const {
-    return entries_;
-  }
+  size_t size() const { return m_entries; }
 
-  inline size_t available() const {
-    return available_;
-  }
+  size_t available() const { return m_available_size; }
 
   void savetxt(const std::string &file);
-  void savetxt(ofstream& outFile);
-  void savebin(ofstream& outFile);
-  inline void savetxt() {
-    savetxt("/dev/stdout");
-  }
+  void savetxt(std::ostream* os);
+  void savebin(std::ostream* os);
+  void savetxt();
 
   void loadtxt(const std::string &file);
-  void loadtxt(ifstream& inFile);
-  void loadbin(ifstream& inFile);
+  void loadtxt(std::istream* is);
+  void loadbin(std::istream* is);
 
   /**
    * Write the whole object to a stream.
@@ -138,4 +118,4 @@ public:
 bool operator==(const FeatureStats& f1, const FeatureStats& f2);
 //END_ADDED
 
-#endif  // FEATURE_STATS_H
+#endif  // MERT_FEATURE_STATS_H_
diff --git a/mert/FileStream.cpp b/mert/FileStream.cpp
index 11fd58e26..1a52e53fa 100644
--- a/mert/FileStream.cpp
+++ b/mert/FileStream.cpp
@@ -1,7 +1,7 @@
 #include "FileStream.h"
 
 #include <stdexcept>
-#include "gzfilebuf.h"
+#include "GzFileBuf.h"
 
 using namespace std;
 
@@ -13,16 +13,16 @@ bool IsGzipFile(const std::string &filename) {
 } // namespace
 
 inputfilestream::inputfilestream(const std::string &filePath)
-    : std::istream(0), m_streambuf(0), is_good(false)
+    : std::istream(0), m_streambuf(0), m_is_good(false)
 {
   // check if file is readable
   std::filebuf* fb = new std::filebuf();
-  is_good = (fb->open(filePath.c_str(), std::ios::in) != NULL);
+  m_is_good = (fb->open(filePath.c_str(), std::ios::in) != NULL);
 
   if (IsGzipFile(filePath)) {
     fb->close();
     delete fb;
-    m_streambuf = new gzfilebuf(filePath.c_str());
+    m_streambuf = new GzFileBuf(filePath.c_str());
   } else {
     m_streambuf = fb;
   }
@@ -40,11 +40,11 @@ void inputfilestream::close()
 }
 
 outputfilestream::outputfilestream(const std::string &filePath)
-    : std::ostream(0), m_streambuf(0), is_good(false)
+    : std::ostream(0), m_streambuf(0), m_is_good(false)
 {
   // check if file is readable
   std::filebuf* fb = new std::filebuf();
-  is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);
+  m_is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);
 
   if (IsGzipFile(filePath)) {
     throw runtime_error("Output to a zipped file not supported!");
diff --git a/mert/FileStream.h b/mert/FileStream.h
index afa8d9a29..3fd489cd7 100644
--- a/mert/FileStream.h
+++ b/mert/FileStream.h
@@ -1,7 +1,8 @@
-#ifndef FILESTREAM_H_
-#define FILESTREAM_H_
+#ifndef MERT_FILE_STREAM_H_
+#define MERT_FILE_STREAM_H_
 
 #include <fstream>
+#include <iostream>
 #include <streambuf>
 #include <string>
 
@@ -9,12 +10,13 @@ class inputfilestream : public std::istream
 {
 protected:
   std::streambuf *m_streambuf;
-  bool is_good;
+  bool m_is_good;
 
 public:
   explicit inputfilestream(const std::string &filePath);
-  ~inputfilestream();
-  bool good() const { return is_good; }
+  virtual ~inputfilestream();
+
+  bool good() const { return m_is_good; }
   void close();
 };
 
@@ -22,13 +24,14 @@ class outputfilestream : public std::ostream
 {
 protected:
   std::streambuf *m_streambuf;
-  bool is_good;
+  bool m_is_good;
 
 public:
   explicit outputfilestream(const std::string &filePath);
-  ~outputfilestream();
-  bool good() const { return is_good; }
+  virtual ~outputfilestream();
+
+  bool good() const { return m_is_good; }
   void close();
 };
 
-#endif // FILESTREAM_H_
+#endif // MERT_FILE_STREAM_H_
diff --git a/mert/GzFileBuf.cpp b/mert/GzFileBuf.cpp
new file mode 100644
index 000000000..9d3ccb588
--- /dev/null
+++ b/mert/GzFileBuf.cpp
@@ -0,0 +1,80 @@
+#include "GzFileBuf.h"
+
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+
+GzFileBuf::GzFileBuf(const char* filename) {
+  m_gz_file = gzopen(filename, "rb");
+  if (m_gz_file == NULL) {
+    std::cerr << "ERROR: Failed to open " << filename << std::endl;
+    std::exit(1);
+  }
+  setg(m_buf + sizeof(int),     // beginning of putback area
+       m_buf + sizeof(int),     // read position
+       m_buf + sizeof(int));    // end position
+}
+
+GzFileBuf::~GzFileBuf() {
+  gzclose(m_gz_file);
+}
+
+int GzFileBuf::overflow(int_type c) {
+  throw;
+}
+
+// read one character
+int GzFileBuf::underflow() {
+  // is read position before end of m_buf?
+  if (gptr() < egptr()) {
+    return traits_type::to_int_type(*gptr());
+  }
+
+  /* process size of putback area
+   * - use number of characters read
+   * - but at most four
+   */
+  unsigned int num_put_back = static_cast<unsigned int>(gptr() - eback());
+  if (num_put_back > sizeof(int)) {
+    num_put_back = sizeof(int);
+  }
+
+  /* copy up to four characters previously read into
+   * the putback m_buf (area of first four characters)
+   */
+  std::memmove(m_buf + (sizeof(int) - num_put_back),
+               gptr() - num_put_back, num_put_back);
+
+  // read new characters
+  const int num = gzread(m_gz_file, m_buf + sizeof(int),
+                         kBufSize - sizeof(int));
+  if (num <= 0) {
+    // ERROR or EOF
+    return EOF;           // NOTE: the macro EOF defined in stdio.h
+  }
+
+  // reset m_buf pointers
+  setg(m_buf + (sizeof(int) - num_put_back),   // beginning of putback area
+       m_buf + sizeof(int),                // read position
+       m_buf + sizeof(int) + num);           // end of buffer
+
+  // return next character
+  return traits_type::to_int_type(*gptr());
+}
+
+std::streampos GzFileBuf::seekpos(
+    std::streampos sp,
+    std::ios_base::openmode which) {
+  throw;
+}
+
+std::streamsize GzFileBuf::xsgetn(char* s,
+                                  std::streamsize num) {
+  return static_cast<std::streamsize>(gzread(m_gz_file,s,num));
+}
+
+std::streamsize GzFileBuf::xsputn(const char* s,
+                                  std::streamsize num) {
+  throw;
+}
diff --git a/mert/InterpolatedScorer.cpp b/mert/InterpolatedScorer.cpp
new file mode 100644
index 000000000..822cdbb78
--- /dev/null
+++ b/mert/InterpolatedScorer.cpp
@@ -0,0 +1,189 @@
+#include "InterpolatedScorer.h"
+#include "ScorerFactory.h"
+#include "Util.h"
+
+using namespace std;
+
+// TODO: This is too long. Consider creating a function for
+// initialization such as Init().
+InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
+    : Scorer(name,config)
+{
+  // name would be: HAMMING,BLEU or similar
+  string scorers = name;
+  while (scorers.length() > 0) {
+    string scorertype = "";
+    getNextPound(scorers, scorertype,",");
+    Scorer *scorer = ScorerFactory::getScorer(scorertype,config);
+    m_scorers.push_back(scorer);
+  }
+  if (m_scorers.size() == 0) {
+    throw runtime_error("There are no scorers");
+  }
+  cerr << "Number of scorers: " << m_scorers.size() << endl;
+
+  //TODO debug this
+  string wtype = getConfig("weights","");
+  //Default weights set to uniform ie. if two weights 0.5 each
+  //weights should add to 1
+  if (wtype.length() == 0) {
+    float weight = 1.0 / m_scorers.size() ;
+    //cout << " Default weights:" << weight << endl;
+    for (size_t i = 0; i < m_scorers.size(); i ++) {
+      m_scorer_weights.push_back(weight);
+    }
+  } else {
+    float tot=0;
+    //cout << "Defined weights:"  << endl;
+    while (wtype.length() > 0) {
+      string scoreweight = "";
+      getNextPound(wtype,scoreweight,"+");
+      float weight = atof(scoreweight.c_str());
+      m_scorer_weights.push_back(weight);
+      tot += weight;
+      //cout << " :" << weight ;
+    }
+    //cout << endl;
+    if (tot != float(1)) { // TODO: fix this checking in terms of readability.
+      for (vector<float>::iterator it = m_scorer_weights.begin();
+           it != m_scorer_weights.end(); ++it) {
+        *it /= tot;
+      }
+    }
+
+    if (m_scorers.size() != m_scorer_weights.size()) {
+      throw runtime_error("The number of weights does not equal the number of scorers!");
+    }
+  }
+  cerr << "The weights for the interpolated scorers are: " << endl;
+  for (vector<float>::iterator it = m_scorer_weights.begin(); it < m_scorer_weights.end(); it++) {
+    cerr << *it << " " ;
+  }
+  cerr <<endl;
+}
+
+void InterpolatedScorer::setScoreData(ScoreData* data)
+{
+  size_t last = 0;
+  m_score_data = data;
+  for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
+    int numScoresScorer = (*itsc)->NumberOfScores();
+    ScoreData* newData =new ScoreData(*itsc);
+    for (size_t i = 0; i < data->size(); i++) {
+      ScoreArray scoreArray = data->get(i);
+      ScoreArray newScoreArray;
+      std::string istr;
+      std::stringstream out;
+      out << i;
+      istr = out.str();
+      size_t numNBest = scoreArray.size();
+      //cout << " Datasize " << data->size() <<  " NumNBest " << numNBest << endl ;
+      for (size_t j = 0; j < numNBest ; j++) {
+        ScoreStats scoreStats = data->get(i, j);
+        //cout << "Scorestats " << scoreStats << " i " << i << " j " << j << endl;
+        ScoreStats newScoreStats;
+        for (size_t k = last; k < size_t(numScoresScorer + last); k++) {
+          ScoreStatsType score = scoreStats.get(k);
+          newScoreStats.add(score);
+        }
+        //cout << " last " << last << " NumScores " << numScoresScorer << "newScorestats " << newScoreStats << endl;
+        newScoreArray.add(newScoreStats);
+      }
+      newScoreArray.setIndex(istr);
+      newData->add(newScoreArray);
+    }
+    //newData->dump();
+
+    // NOTE: This class takes the ownership of the heap allocated
+    // ScoreData objects to avoid the memory leak issues.
+    m_scorers_score_data.push_back(newData);
+
+    (*itsc)->setScoreData(newData);
+    last += numScoresScorer;
+  }
+}
+
+
+/** The interpolated scorer calls a vector of scorers and combines them with
+    weights **/
+void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
+                               statscores_t& scores) const
+{
+  //cout << "*******InterpolatedScorer::score" << endl;
+  size_t scorerNum = 0;
+  for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
+    //int numScores = (*itsc)->NumberOfScores();
+    statscores_t tscores;
+    (*itsc)->score(candidates,diffs,tscores);
+    size_t inc = 0;
+    for (statscores_t::iterator itstatsc = tscores.begin();
+         itstatsc != tscores.end(); ++itstatsc) {
+      //cout << "Scores " << (*itstatsc) << endl;
+      float weight = m_scorer_weights[scorerNum];
+      if (weight == 0) {
+        stringstream msg;
+        msg << "No weights for scorer" << scorerNum ;
+        throw runtime_error(msg.str());
+      }
+      if (scorerNum == 0) {
+        scores.push_back(weight * (*itstatsc));
+      } else {
+        scores[inc] +=  weight * (*itstatsc);
+      }
+      //cout << "Scorer:" << scorerNum <<  " scoreNum:" << inc << " score: " << (*itstatsc) << " weight:" << weight << endl;
+      inc++;
+
+    }
+    scorerNum++;
+  }
+
+}
+
+void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
+{
+  for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
+    (*itsc)->setReferenceFiles(referenceFiles);
+  }
+}
+
+void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
+{
+  stringstream buff;
+  int i = 0;
+  for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
+       itsc != m_scorers.end(); ++itsc) {
+    ScoreStats tempEntry;
+    (*itsc)->prepareStats(sid, text, tempEntry);
+    if (i > 0) buff <<  " ";
+    buff << tempEntry;
+    i++;
+  }
+  //cout << " Scores for interpolated: " << buff << endl;
+  string str = buff.str();
+  entry.set(str);
+}
+
+void InterpolatedScorer::setFactors(const string& factors)
+{
+  if (factors.empty()) return;
+
+  vector<string> fsplit;
+  split(factors, ',', fsplit);
+
+  if (fsplit.size() != m_scorers.size())
+    throw runtime_error("Number of factor specifications does not equal number of interpolated scorers.");
+
+  for (size_t i = 0; i < m_scorers.size(); ++i) {
+    m_scorers[i]->setFactors(fsplit[i]);
+  }
+}
+
+void InterpolatedScorer::setFilter(const string& filterCommand)
+{
+    for (size_t i = 0; i < m_scorers.size(); ++i) {
+      m_scorers[i]->setFilter(filterCommand);
+    }
+}
diff --git a/mert/InterpolatedScorer.h b/mert/InterpolatedScorer.h
new file mode 100644
index 000000000..7ee7e5eba
--- /dev/null
+++ b/mert/InterpolatedScorer.h
@@ -0,0 +1,55 @@
+#ifndef MERT_INTERPOLATED_SCORER_H_
+#define MERT_INTERPOLATED_SCORER_H_
+
+#include <string>
+#include <vector>
+#include "Types.h"
+#include "ScoreData.h"
+#include "Scorer.h"
+#include "ScopedVector.h"
+
+/**
+  * Class that includes other scorers eg.
+  * Interpolated HAMMING and BLEU scorer **/
+class InterpolatedScorer : public Scorer
+{
+public:
+  // name would be: "HAMMING,BLEU" or similar
+  InterpolatedScorer(const string& name, const string& config);
+  virtual ~InterpolatedScorer() {}
+
+  virtual void score(const candidates_t& candidates, const diffs_t& diffs,
+                     statscores_t& scores) const;
+
+  virtual void setReferenceFiles(const vector<string>& referenceFiles);
+  virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
+
+  virtual size_t NumberOfScores() const {
+    size_t sz = 0;
+    for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
+         itsc != m_scorers.end(); ++itsc) {
+      sz += (*itsc)->NumberOfScores();
+    }
+    return sz;
+  }
+
+  virtual void setScoreData(ScoreData* data);
+
+  /**
+   * Set the factors, which should be used for this metric
+   */
+  virtual void setFactors(const string& factors);
+
+  virtual void setFilter(const string& filterCommand);
+
+protected:
+  ScopedVector<Scorer> m_scorers;
+
+  // Take the ownership of the heap-allocated the objects
+  // by Scorer objects.
+  ScopedVector<ScoreData> m_scorers_score_data;
+
+  vector<float> m_scorer_weights;
+};
+
+#endif  // MERT_INTERPOLATED_SCORER_H_
diff --git a/mert/Jamfile b/mert/Jamfile
index b23078fbe..2eaa7143c 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -4,33 +4,44 @@ lib m ;
 
 lib mert_lib :
 Util.cpp
+GzFileBuf.cpp
 FileStream.cpp
 Timer.cpp
-ScoreStats.cpp ScoreArray.cpp ScoreData.cpp
+ScoreStats.cpp
+ScoreArray.cpp
+ScoreData.cpp
 ScoreDataIterator.cpp
-FeatureStats.cpp FeatureArray.cpp FeatureData.cpp
+FeatureStats.cpp
+FeatureArray.cpp
+FeatureData.cpp
 FeatureDataIterator.cpp
 Data.cpp
 BleuScorer.cpp
+SemposScorer.cpp
+SemposOverlapping.cpp
+InterpolatedScorer.cpp
 Point.cpp
 PerScorer.cpp
 Scorer.cpp
 ScorerFactory.cpp
 Optimizer.cpp
-TERsrc/alignmentStruct.cpp
-TERsrc/hashMap.cpp
-TERsrc/hashMapStringInfos.cpp
-TERsrc/stringHasher.cpp
-TERsrc/terAlignment.cpp
-TERsrc/terShift.cpp
-TERsrc/hashMapInfos.cpp
-TERsrc/infosHasher.cpp
-TERsrc/stringInfosHasher.cpp
-TERsrc/tercalc.cpp
-TERsrc/tools.cpp
+OptimizerFactory.cpp
+TER/alignmentStruct.cpp
+TER/hashMap.cpp
+TER/hashMapStringInfos.cpp
+TER/stringHasher.cpp
+TER/terAlignment.cpp
+TER/terShift.cpp
+TER/hashMapInfos.cpp
+TER/infosHasher.cpp
+TER/stringInfosHasher.cpp
+TER/tercalc.cpp
+TER/tools.cpp
 TerScorer.cpp
 CderScorer.cpp
 MergeScorer.cpp
+Vocabulary.cpp
+PreProcessFilter.cpp
 ../util//kenutil m ..//z ;
 
 exe mert : mert.cpp mert_lib ../moses/src//ThreadPool ;
@@ -43,6 +54,16 @@ exe pro : pro.cpp mert_lib ..//boost_program_options ;
 
 alias programs : mert extractor evaluator pro ;
 
+unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test reference_test : ReferenceTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test singleton_test : SingletonTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test timer_test : TimerTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test util_test : UtilTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test vocabulary_test : VocabularyTest.cpp mert_lib ..//boost_unit_test_framework ;
 
 install legacy : programs : <location>. ;
diff --git a/mert/MergeScorer.cpp b/mert/MergeScorer.cpp
index 0f0da39c3..7a80f1477 100644
--- a/mert/MergeScorer.cpp
+++ b/mert/MergeScorer.cpp
@@ -8,13 +8,14 @@
 #include "PerScorer.h"
 #include "CderScorer.h"
 
-#include "TERsrc/tercalc.h"
-#include "TERsrc/terAlignment.h"
+#include "TER/tercalc.h"
+#include "TER/terAlignment.h"
 
 using namespace TERCpp;
 
 MergeScorer::MergeScorer(const string& config)
-    : StatisticsBasedScorer("MERGE",config), kLENGTH(4) {}
+    : StatisticsBasedScorer("MERGE", config) {}
+
 MergeScorer::~MergeScorer() {}
 
 void MergeScorer::setReferenceFiles(const vector<string>& referenceFiles)
diff --git a/mert/MergeScorer.h b/mert/MergeScorer.h
index cc657b718..2d7030421 100644
--- a/mert/MergeScorer.h
+++ b/mert/MergeScorer.h
@@ -1,5 +1,5 @@
-#ifndef __MERGESCORER_H__
-#define __MERGESCORER_H__
+#ifndef MERT_MERGE_SCORER_H_
+#define MERT_MERGE_SCORER_H_
 
 #include <iostream>
 #include <set>
@@ -13,6 +13,8 @@ using namespace std;
 class PerScorer;
 class ScoreStats;
 
+const int kMergeScorerLength = 4;
+
 /**
  * Merge scoring.
  */
@@ -23,21 +25,16 @@ public:
 
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
   virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-
-  void whoami() const {
-    cerr << "I AM MergeScorer" << endl;
-  }
+  virtual size_t NumberOfScores() const { return 0; }
 
 protected:
   friend class PerScorer;
   virtual float calculateScore(const vector<int>& comps) const;
 
  private:
-  const int kLENGTH;
-
   // no copying allowed
   MergeScorer(const MergeScorer&);
   MergeScorer& operator=(const MergeScorer&);
 };
 
-#endif  //__TERSCORER_H
+#endif  // MERT_MERGE_SCORER_H_
diff --git a/mert/Ngram.h b/mert/Ngram.h
new file mode 100644
index 000000000..846604f3f
--- /dev/null
+++ b/mert/Ngram.h
@@ -0,0 +1,98 @@
+#ifndef MERT_NGRAM_H_
+#define MERT_NGRAM_H_
+
+#include <vector>
+#include <map>
+#include <string>
+
+/** A simple STL-std::map based n-gram counts. Basically, we provide
+ * typical accessors and mutaors, but we intentionally does not allow
+ * erasing elements.
+ */
+class NgramCounts {
+ public:
+  // Used to construct the ngram map
+  struct NgramComparator {
+    bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
+      std::size_t i;
+      const std::size_t as = a.size();
+      const std::size_t bs = b.size();
+      for (i = 0; i < as && i < bs; ++i) {
+        if (a[i] < b[i]) {
+          return true;
+        }
+        if (a[i] > b[i]) {
+          return false;
+        }
+      }
+      // entries are equal, shortest wins
+      return as < bs;
+    }
+  };
+
+  typedef std::vector<int> Key;
+  typedef int Value;
+  typedef std::map<Key, Value, NgramComparator>::iterator iterator;
+  typedef std::map<Key, Value, NgramComparator>::const_iterator const_iterator;
+
+  NgramCounts() : kDefaultCount(1) { }
+  virtual ~NgramCounts() { }
+
+  /**
+   * If the specified "ngram" is found, we add counts.
+   * If not, we insert the default count in the container. */
+  void Add(const Key& ngram) {
+    const_iterator it = find(ngram);
+    if (it != end()) {
+      m_counts[ngram] = it->second + 1;
+    } else {
+      m_counts[ngram] = kDefaultCount;
+    }
+  }
+
+  /**
+   * Return true iff the specified "ngram" is found in the container.
+   */
+  bool Lookup(const Key& ngram, Value* v) const {
+    const_iterator it = m_counts.find(ngram);
+    if (it == m_counts.end()) return false;
+    *v = it->second;
+    return true;
+  }
+
+  /**
+   * Clear all elments in the container.
+   */
+  void clear() { m_counts.clear(); }
+
+  /**
+   * Return true iff the container is empty.
+   */
+  bool empty() const { return m_counts.empty(); }
+
+  /**
+   * Return the the number of elements in the container.
+   */
+  std::size_t size() const { return m_counts.size(); }
+
+  std::size_t max_size() const { return m_counts.max_size(); }
+
+  // Note: This is mainly used by unit tests.
+  int get_default_count() const { return kDefaultCount; }
+
+  iterator find(const Key& ngram) { return m_counts.find(ngram); }
+  const_iterator find(const Key& ngram) const { return m_counts.find(ngram); }
+
+  Value& operator[](const Key& ngram) { return m_counts[ngram]; }
+
+  iterator begin() { return m_counts.begin(); }
+  const_iterator begin() const { return m_counts.begin(); }
+  iterator end() { return m_counts.end(); }
+  const_iterator end() const { return m_counts.end(); }
+
+ private:
+  const int kDefaultCount;
+  std::map<Key, Value, NgramComparator> m_counts;
+};
+
+#endif  // MERT_NGRAM_H_
diff --git a/mert/NgramTest.cpp b/mert/NgramTest.cpp
new file mode 100644
index 000000000..f2a8eb58b
--- /dev/null
+++ b/mert/NgramTest.cpp
@@ -0,0 +1,83 @@
+#include "Ngram.h"
+
+#define BOOST_TEST_MODULE MertNgram
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_CASE(ngram_basic) {
+  NgramCounts counts;
+  NgramCounts::Key key;
+  key.push_back(1);
+  key.push_back(2);
+  key.push_back(4);
+  counts.Add(key);
+
+  BOOST_REQUIRE(!counts.empty());
+  BOOST_CHECK_EQUAL(counts.size(), 1);
+
+  NgramCounts::const_iterator it = counts.find(key);
+  BOOST_CHECK(it != counts.end());
+  BOOST_CHECK_EQUAL(it->first.size(), key.size());
+  for (size_t i = 0; i < key.size(); ++i) {
+    BOOST_CHECK_EQUAL(it->first[i], key[i]);
+  }
+  BOOST_CHECK_EQUAL(it->second, 1);
+}
+
+BOOST_AUTO_TEST_CASE(ngram_Add) {
+  NgramCounts counts;
+  NgramCounts::Key key;
+  key.push_back(1);
+  key.push_back(2);
+  counts.Add(key);
+  BOOST_REQUIRE(!counts.empty());
+  BOOST_CHECK_EQUAL(counts[key], counts.get_default_count());
+
+  NgramCounts::Key key2;
+  key2.push_back(1);
+  key2.push_back(2);
+  counts.Add(key2);
+  BOOST_CHECK_EQUAL(counts.size(), 1);
+  BOOST_CHECK_EQUAL(counts[key], counts.get_default_count() + 1);
+  BOOST_CHECK_EQUAL(counts[key2], counts.get_default_count() + 1);
+
+  NgramCounts::Key key3;
+  key3.push_back(10);
+  counts.Add(key3);
+  BOOST_CHECK_EQUAL(counts.size(), 2);
+  BOOST_CHECK_EQUAL(counts[key3], counts.get_default_count());
+}
+
+BOOST_AUTO_TEST_CASE(ngram_lookup) {
+  NgramCounts counts;
+  NgramCounts::Key key;
+  key.push_back(1);
+  key.push_back(2);
+  key.push_back(4);
+  counts.Add(key);
+
+  {
+    NgramCounts::Value v;
+    BOOST_REQUIRE(counts.Lookup(key, &v));
+    BOOST_CHECK_EQUAL(v, 1);
+  }
+
+  // the case the key is not found.
+  {
+    NgramCounts::Key key2;
+    key2.push_back(0);
+    key2.push_back(4);
+    NgramCounts::Value v;
+    // We only check the return value;
+    // we don't check the value of "v" because it makes sense
+    // to check the value when the specified ngram is found.
+    BOOST_REQUIRE(!counts.Lookup(key2, &v));
+  }
+
+  // test after clear
+  counts.clear();
+  BOOST_CHECK(counts.empty());
+  {
+    NgramCounts::Value v;
+    BOOST_CHECK(!counts.Lookup(key, &v));
+  }
+}
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 093c9ac1b..39e9aac1b 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -7,6 +7,7 @@
 #include <map>
 #include <cfloat>
 #include <iostream>
+#include <stdint.h>
 
 #include "Point.h"
 #include "Util.h"
@@ -32,36 +33,25 @@ inline float intersect(float m1, float b1, float m2, float b2)
 
 } // namespace
 
-
-void Optimizer::SetScorer(Scorer *_scorer)
-{
-  scorer = _scorer;
-}
-
-void Optimizer::SetFData(FeatureDataHandle _FData)
-{
-  FData = _FData;
-}
-
-Optimizer::Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
-    : scorer(NULL), FData(), number_of_random_directions(nrandom)
+Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& pos, const vector<parameter_t>& start, unsigned int nrandom)
+  : m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom), m_positive(pos)
 {
-  // Warning: the init vector is a full set of parameters, of dimension pdim!
-  Point::pdim = Pd;
+  // Warning: the init vector is a full set of parameters, of dimension m_pdim!
+  Point::m_pdim = Pd;
 
   CHECK(start.size() == Pd);
-  Point::dim = i2O.size();
-  Point::optindices = i2O;
-  if (Point::pdim > Point::dim) {
-    for (unsigned int i = 0; i < Point::pdim; i++) {
+  Point::m_dim = i2O.size();
+  Point::m_opt_indices = i2O;
+  if (Point::m_pdim > Point::m_dim) {
+    for (unsigned int i = 0; i < Point::m_pdim; i++) {
       unsigned int j = 0;
-      while (j < Point::dim && i != i2O[j])
+      while (j < Point::m_dim && i != i2O[j])
         j++;
 
-      // The index i wasnt found on optindices, it is a fixed index,
+      // The index i wasnt found on m_opt_indices, it is a fixed index,
       // we use the value of the start vector.
-      if (j == Point::dim)
-        Point::fixedweights[i] = start[i];
+      if (j == Point::m_dim)
+        Point::m_fixed_weights[i] = start[i];
     }
   }
 }
@@ -72,12 +62,11 @@ statscore_t Optimizer::GetStatScore(const Point& param) const
 {
   vector<unsigned> bests;
   Get1bests(param, bests);
-  //copy(bests.begin(),bests.end(),ostream_iterator<unsigned>(cerr," "));
   statscore_t score = GetStatScore(bests);
   return score;
 }
 
-map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, pair<unsigned,unsigned> newdiff)
+map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, const pair<unsigned,unsigned>& newdiff)
 {
   map<float,diff_t>::iterator it = thresholdmap.find(newt);
   if (it != thresholdmap.end()) {
@@ -113,12 +102,12 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
     //cerr << "Sentence " << S << endl;
     multimap<float, unsigned> gradient;
     vector<float> f0;
-    f0.resize(FData->get(S).size());
-    for (unsigned j = 0; j < FData->get(S).size(); j++) {
+    f0.resize(m_feature_data->get(S).size());
+    for (unsigned j = 0; j < m_feature_data->get(S).size(); j++) {
       // gradient of the feature function for this particular target sentence
-      gradient.insert(pair<float, unsigned>(direction * (FData->get(S,j)), j));
+      gradient.insert(pair<float, unsigned>(direction * (m_feature_data->get(S,j)), j));
       // compute the feature function at the origin point
-      f0[j] = origin * FData->get(S, j);
+      f0[j] = origin * m_feature_data->get(S, j);
     }
     // Now let's compute the 1best for each value of x.
 
@@ -255,7 +244,16 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
   CHECK(scores.size() == thresholdmap.size());
   for (unsigned int sc = 0; sc != scores.size(); sc++) {
     //cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
-    if (scores[sc] > bestscore) {
+
+    //enforce positivity
+    Point respoint = origin + direction * thrit->first;
+    bool is_valid = true;
+    for (unsigned int k=0; k < respoint.getdim(); k++) {
+      if (m_positive[k] && respoint[k] <= 0.0)
+        is_valid = false;
+    }
+
+    if (is_valid && scores[sc] > bestscore) {
       // This is the score for the interval [lit2->first, (lit2+1)->first]
       // unless we're at the last score, when it's the score
       // for the interval [lit2->first,+inf].
@@ -309,7 +307,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction,
 
 void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
 {
-  CHECK(FData);
+  CHECK(m_feature_data);
   bests.clear();
   bests.resize(size());
 
@@ -317,8 +315,8 @@ void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
     float bestfs = MIN_FLOAT;
     unsigned idx = 0;
     unsigned j;
-    for (j = 0; j < FData->get(i).size(); j++) {
-      float curfs = P * FData->get(i, j);
+    for (j = 0; j < m_feature_data->get(i).size(); j++) {
+      float curfs = P * m_feature_data->get(i, j);
       if (curfs > bestfs) {
         bestfs = curfs;
         idx = j;
@@ -331,15 +329,15 @@ void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
 
 statscore_t Optimizer::Run(Point& P) const
 {
-  if (!FData) {
+  if (!m_feature_data) {
     cerr << "error trying to optimize without Features loaded" << endl;
     exit(2);
   }
-  if (!scorer) {
+  if (!m_scorer) {
     cerr << "error trying to optimize without a Scorer loaded" << endl;
     exit(2);
   }
-  if (scorer->getReferenceSize() != FData->size()) {
+  if (m_scorer->getReferenceSize() != m_feature_data->size()) {
     cerr << "error length mismatch between feature file and score file" << endl;
     exit(2);
   }
@@ -360,13 +358,13 @@ statscore_t Optimizer::Run(Point& P) const
 }
 
 
-vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst, vector<vector <pair<unsigned,unsigned> > > thediffs) const
+vector<statscore_t> Optimizer::GetIncStatScore(const vector<unsigned>& thefirst, const vector<vector <pair<unsigned,unsigned> > >& thediffs) const
 {
-  CHECK(scorer);
+  CHECK(m_scorer);
 
   vector<statscore_t> theres;
 
-  scorer->score(thefirst, thediffs, theres);
+  m_scorer->score(thefirst, thediffs, theres);
   return theres;
 }
 
@@ -393,7 +391,7 @@ statscore_t SimpleOptimizer::TrueRun(Point& P) const
 
     Point  linebest;
 
-    for (unsigned int d = 0; d < Point::getdim()+number_of_random_directions; d++) {
+    for (unsigned int d = 0; d < Point::getdim() + m_num_random_directions; d++) {
       if (verboselevel() > 4) {
         //	cerr<<"minimizing along direction "<<d<<endl;
         cerr << "starting point: " << P << " => " << prevscore << endl;
@@ -441,7 +439,7 @@ statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
   // do specified number of random direction optimizations
   unsigned int nrun = 0;
   unsigned int nrun_no_change = 0;
-  for (; nrun_no_change < number_of_random_directions; nrun++, nrun_no_change++)
+  for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++)
   {
     // choose a random direction in which to optimize
     Point direction;
@@ -474,63 +472,3 @@ statscore_t RandomOptimizer::TrueRun(Point& P) const
   P.SetScore(score);
   return score;
 }
-
-//--------------------------------------
-
-vector<string> OptimizerFactory::typenames;
-
-void OptimizerFactory::SetTypeNames()
-{
-  if (typenames.empty()) {
-    typenames.resize(NOPTIMIZER);
-    typenames[POWELL]="powell";
-    typenames[RANDOM_DIRECTION]="random-direction";
-    typenames[RANDOM]="random";
-    // Add new type there
-  }
-}
-vector<string> OptimizerFactory::GetTypeNames()
-{
-  if (typenames.empty())
-    SetTypeNames();
-  return typenames;
-}
-
-OptimizerFactory::OptType OptimizerFactory::GetOType(const string& type)
-{
-  unsigned int thetype;
-  if (typenames.empty())
-    SetTypeNames();
-  for (thetype = 0; thetype < typenames.size(); thetype++)
-    if (typenames[thetype] == type)
-      break;
-  return((OptType)thetype);
-}
-
-Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim, vector<unsigned> i2o, vector<parameter_t> start, const string& type, unsigned int nrandom)
-{
-  OptType T = GetOType(type);
-  if (T == NOPTIMIZER) {
-    cerr << "Error: unknown Optimizer type " << type << endl;
-    cerr << "Known Algorithm are:" << endl;
-    unsigned int thetype;
-    for (thetype = 0; thetype < typenames.size(); thetype++)
-      cerr << typenames[thetype] << endl;
-    throw ("unknown Optimizer Type");
-  }
-
-  switch ((OptType)T) {
-    case POWELL:
-      return new SimpleOptimizer(dim, i2o, start, nrandom);
-      break;
-    case RANDOM_DIRECTION:
-      return new RandomDirectionOptimizer(dim, i2o, start, nrandom);
-      break;
-    case RANDOM:
-      return new RandomOptimizer(dim, i2o, start, nrandom);
-      break;
-    default:
-      cerr << "Error: unknown optimizer" << type << endl;
-      return NULL;
-  }
-}
diff --git a/mert/Optimizer.h b/mert/Optimizer.h
index 69c7a7641..218a7b7e6 100644
--- a/mert/Optimizer.h
+++ b/mert/Optimizer.h
@@ -1,5 +1,5 @@
-#ifndef OPTIMIZER_H
-#define OPTIMIZER_H
+#ifndef MERT_OPTIMIZER_H_
+#define MERT_OPTIMIZER_H_
 
 #include <vector>
 #include <string>
@@ -10,7 +10,7 @@
 
 using namespace std;
 
-typedef float featurescore;
+static const float kMaxFloat = numeric_limits<float>::max();
 
 class Point;
 
@@ -20,18 +20,21 @@ class Point;
 class Optimizer
 {
 protected:
-  Scorer *scorer;      // no accessor for them only child can use them
-  FeatureDataHandle FData;  // no accessor for them only child can use them
-  unsigned int number_of_random_directions;
+  Scorer *m_scorer;      // no accessor for them only child can use them
+  FeatureDataHandle m_feature_data;  // no accessor for them only child can use them
+  unsigned int m_num_random_directions;
+
+  const vector<bool>& m_positive;
 
 public:
-  Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom);
-  void SetScorer(Scorer *_scorer);
-  void SetFData(FeatureDataHandle _FData);
+  Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& positive, const vector<parameter_t>& start, unsigned int nrandom);
+
+  void SetScorer(Scorer *scorer) { m_scorer = scorer; }
+  void SetFeatureData(FeatureDataHandle feature_data) { m_feature_data = feature_data; }
   virtual ~Optimizer();
 
   unsigned size() const {
-    return FData ? FData->size() : 0;
+    return m_feature_data ? m_feature_data->size() : 0;
   }
 
   /**
@@ -53,12 +56,12 @@ public:
    * Given a set of nbests, get the Statistical score.
    */
   statscore_t GetStatScore(const vector<unsigned>& nbests) const {
-    return scorer->score(nbests);
+    return m_scorer->score(nbests);
   }
 
   statscore_t GetStatScore(const Point& param) const;
 
-  vector<statscore_t> GetIncStatScore(vector<unsigned> ref, vector<vector<pair<unsigned,unsigned> > >) const;
+  vector<statscore_t> GetIncStatScore(const vector<unsigned>& ref, const vector<vector<pair<unsigned,unsigned> > >& diffs) const;
 
   /**
    * Get the optimal Lambda and the best score in a particular direction from a given Point.
@@ -76,8 +79,9 @@ class SimpleOptimizer : public Optimizer
 private:
   const float kEPS;
 public:
-  SimpleOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
-      : Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {}
+  SimpleOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
+                  const vector<parameter_t>& start, unsigned int nrandom)
+    : Optimizer(dim, i2O, positive, start,nrandom), kEPS(0.0001) {}
   virtual statscore_t TrueRun(Point&) const;
 };
 
@@ -89,8 +93,9 @@ class RandomDirectionOptimizer : public Optimizer
 private:
   const float kEPS;
 public:
-  RandomDirectionOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
-      : Optimizer(dim, i2O, start, nrandom), kEPS(0.0001) {}
+  RandomDirectionOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
+                           const vector<parameter_t>& start, unsigned int nrandom)
+      : Optimizer(dim, i2O, positive, start, nrandom), kEPS(0.0001) {}
   virtual statscore_t TrueRun(Point&) const;
 };
 
@@ -100,36 +105,10 @@ public:
 class RandomOptimizer : public Optimizer
 {
 public:
-  RandomOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
-      : Optimizer(dim, i2O, start, nrandom) {}
+  RandomOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
+                  const vector<parameter_t>& start, unsigned int nrandom)
+      : Optimizer(dim, i2O, positive, start, nrandom) {}
   virtual statscore_t TrueRun(Point&) const;
 };
 
-class OptimizerFactory
-{
-public:
-  static vector<string> GetTypeNames();
-  static Optimizer* BuildOptimizer(unsigned dim, vector<unsigned> tooptimize, vector<parameter_t> start, const string& type, unsigned int nrandom);
-
-private:
-  OptimizerFactory() {}
-  ~OptimizerFactory() {}
-
-  // Add new optimizer here BEFORE NOPTIMZER
-  enum OptType {
-    POWELL = 0,
-    RANDOM_DIRECTION = 1,
-    RANDOM,
-    NOPTIMIZER
-  };
-
-  // Get optimizer type.
-  static OptType GetOType(const string& type);
-
-  // Setup optimization types.
-  static void SetTypeNames();
-
-  static vector<string> typenames;
-};
-
 #endif  // OPTIMIZER_H
diff --git a/mert/OptimizerFactory.cpp b/mert/OptimizerFactory.cpp
new file mode 100644
index 000000000..6cafd15b0
--- /dev/null
+++ b/mert/OptimizerFactory.cpp
@@ -0,0 +1,67 @@
+#include "OptimizerFactory.h"
+#include "Optimizer.h"
+
+using namespace std;
+
+vector<string> OptimizerFactory::m_type_names;
+
+void OptimizerFactory::SetTypeNames()
+{
+  if (m_type_names.empty()) {
+    m_type_names.resize(NOPTIMIZER);
+    m_type_names[POWELL] = "powell";
+    m_type_names[RANDOM_DIRECTION] = "random-direction";
+    m_type_names[RANDOM] = "random";
+    // Add new type there
+  }
+}
+vector<string> OptimizerFactory::GetTypeNames()
+{
+  if (m_type_names.empty())
+    SetTypeNames();
+  return m_type_names;
+}
+
+OptimizerFactory::OptimizerType OptimizerFactory::GetOptimizerType(const string& type)
+{
+  unsigned int t;
+  if (m_type_names.empty())
+    SetTypeNames();
+  for (t = 0; t < m_type_names.size(); t++)
+    if (m_type_names[t] == type)
+      break;
+  return((OptimizerType)t);
+}
+
+Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,
+                                            const vector<unsigned>& i2o,
+                                            const std::vector<bool>& positive,
+                                            const vector<parameter_t>& start,
+                                            const string& type,
+                                            unsigned int nrandom)
+{
+  OptimizerType opt_type = GetOptimizerType(type);
+  if (opt_type == NOPTIMIZER) {
+    cerr << "Error: unknown Optimizer type " << type << endl;
+    cerr << "Known Algorithm are:" << endl;
+    unsigned int t;
+    for (t = 0; t < m_type_names.size(); t++)
+      cerr << m_type_names[t] << endl;
+    throw ("unknown Optimizer Type");
+  }
+
+  switch (opt_type) {
+    case POWELL:
+      return new SimpleOptimizer(dim, i2o, positive, start, nrandom);
+      break;
+    case RANDOM_DIRECTION:
+      return new RandomDirectionOptimizer(dim, i2o, positive, start, nrandom);
+      break;
+    case RANDOM:
+      return new RandomOptimizer(dim, i2o, positive, start, nrandom);
+      break;
+    default:
+      cerr << "Error: unknown optimizer" << type << endl;
+      return NULL;
+  }
+}
diff --git a/mert/OptimizerFactory.h b/mert/OptimizerFactory.h
new file mode 100644
index 000000000..3d8716115
--- /dev/null
+++ b/mert/OptimizerFactory.h
@@ -0,0 +1,42 @@
+#ifndef MERT_OPTIMIZER_FACTORY_H_
+#define MERT_OPTIMIZER_FACTORY_H_
+
+#include <vector>
+#include "Types.h"
+
+class Optimizer;
+
+class OptimizerFactory
+{
+ public:
+  // NOTE: Add new optimizer here BEFORE NOPTIMZER
+  enum OptimizerType {
+    POWELL = 0,
+    RANDOM_DIRECTION = 1,
+    RANDOM,
+    NOPTIMIZER
+  };
+
+  static std::vector<string> GetTypeNames();
+
+  // Setup optimization types.
+  static void SetTypeNames();
+
+  // Get optimizer type.
+  static OptimizerType GetOptimizerType(const std::string& type);
+
+  static Optimizer* BuildOptimizer(unsigned dim,
+                                   const std::vector<unsigned>& to_optimize,
+				   const std::vector<bool>& positive,
+                                   const std::vector<parameter_t>& start,
+                                   const std::string& type,
+                                   unsigned int nrandom);
+
+ private:
+  OptimizerFactory() {}
+  ~OptimizerFactory() {}
+
+  static vector<string> m_type_names;
+};
+
+#endif  // MERT_OPTIMIZER_FACTORY_H_
diff --git a/mert/OptimizerFactoryTest.cpp b/mert/OptimizerFactoryTest.cpp
new file mode 100644
index 000000000..53c2d252a
--- /dev/null
+++ b/mert/OptimizerFactoryTest.cpp
@@ -0,0 +1,49 @@
+#include "OptimizerFactory.h"
+#include "Optimizer.h"
+
+#define BOOST_TEST_MODULE MertOptimizerFactory
+#include <boost/test/unit_test.hpp>
+#include <boost/scoped_ptr.hpp>
+
+namespace {
+
+inline bool CheckBuildOptimizer(unsigned dim,
+                                const vector<unsigned>& to_optimize,
+				const vector<bool>& positive,
+                                const vector<parameter_t>& start,
+                                const string& type,
+                                unsigned int num_random) {
+  boost::scoped_ptr<Optimizer> optimizer(OptimizerFactory::BuildOptimizer(dim, to_optimize, positive, start, type, num_random));
+  return optimizer.get() != NULL;
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(optimizer_type) {
+  BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("powell"),
+                    OptimizerFactory::POWELL);
+  BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("random"),
+                    OptimizerFactory::RANDOM);
+  BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("random-direction"),
+                    OptimizerFactory::RANDOM_DIRECTION);
+}
+
+BOOST_AUTO_TEST_CASE(optimizer_build) {
+  const unsigned dim = 3;
+  std::vector<unsigned> to_optimize;
+  to_optimize.push_back(1);
+  to_optimize.push_back(2);
+  to_optimize.push_back(3);
+  std::vector<parameter_t> start;
+  start.push_back(0.3);
+  start.push_back(0.1);
+  start.push_back(0.2);
+  const unsigned int num_random = 1;
+  std::vector<bool> positive(dim);
+  for (unsigned int k = 0; k < dim; k++)
+    positive[k] = false;
+
+  BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, positive, start, "powell", num_random));
+  BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, positive, start, "random", num_random));
+  BOOST_CHECK(CheckBuildOptimizer(dim, to_optimize, positive, start, "random-direction", num_random));
+}
diff --git a/mert/PerScorer.cpp b/mert/PerScorer.cpp
index 76c2765dd..67b633872 100644
--- a/mert/PerScorer.cpp
+++ b/mert/PerScorer.cpp
@@ -29,6 +29,7 @@ void PerScorer::setReferenceFiles(const vector<string>& referenceFiles)
   string line;
   int sid = 0;
   while (getline(in,line)) {
+    line = this->preprocessSentence(line);
     vector<int> tokens;
     TokenizeAndEncode(line, tokens);
     m_ref_tokens.push_back(multiset<int>());
@@ -52,10 +53,13 @@ void PerScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
     msg << "Sentence id (" << sid << ") not found in reference set";
     throw runtime_error(msg.str());
   }
+
+  string sentence = this->preprocessSentence(text);
+
   // Calculate correct, output_length and ref_length for
   // the line and store it in entry
   vector<int> testtokens;
-  TokenizeAndEncode(text, testtokens);
+  TokenizeAndEncode(sentence, testtokens);
   multiset<int> testtokens_all(testtokens.begin(),testtokens.end());
   set<int> testtokens_unique(testtokens.begin(),testtokens.end());
   int correct = 0;
diff --git a/mert/PerScorer.h b/mert/PerScorer.h
index f06e2955a..d32e14029 100644
--- a/mert/PerScorer.h
+++ b/mert/PerScorer.h
@@ -1,9 +1,7 @@
-#ifndef __PERSCORER_H__
-#define __PERSCORER_H__
+#ifndef MERT_PER_SCORER_H_
+#define MERT_PER_SCORER_H_
 
-#include <iostream>
 #include <set>
-#include <sstream>
 #include <string>
 #include <vector>
 #include "Types.h"
@@ -27,18 +25,9 @@ public:
 
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
   virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-
-  virtual size_t NumberOfScores() const {
-    // cerr << "PerScorer: 3" << endl;
-    return 3;
-  }
-
+  virtual size_t NumberOfScores() const { return 3; }
   virtual float calculateScore(const vector<int>& comps) const;
 
-  void whoami() const {
-    cerr << "I AM PerScorer" << std::endl;
-  }
-
 private:
   // no copying allowed
   PerScorer(const PerScorer&);
@@ -49,4 +38,4 @@ private:
   vector<multiset<int> > m_ref_tokens;
 };
 
-#endif  // __PERSCORER_H__
+#endif  // MERT_PER_SCORER_H_
diff --git a/mert/Point.cpp b/mert/Point.cpp
index f298647dd..299e2b4d0 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -3,45 +3,46 @@
 #include <cmath>
 #include <cstdlib>
 #include "util/check.hh"
-#include <limits>
 #include "FeatureStats.h"
+#include "Optimizer.h"
 
 using namespace std;
 
-vector<unsigned> Point::optindices;
+vector<unsigned> Point::m_opt_indices;
 
-unsigned Point::dim = 0;
+unsigned Point::m_dim = 0;
 
-map<unsigned,statscore_t> Point::fixedweights;
+map<unsigned,statscore_t> Point::m_fixed_weights;
 
-unsigned Point::pdim = 0;
-unsigned Point::ncall = 0;
+unsigned Point::m_pdim = 0;
+unsigned Point::m_ncall = 0;
 
 vector<parameter_t> Point::m_min;
 vector<parameter_t> Point::m_max;
 
-Point::Point() : vector<parameter_t>(dim), score_(0.0) {}
+Point::Point() : vector<parameter_t>(m_dim), m_score(0.0) {}
 
-//Can initialize from a vector of dim or pdim
+//Can initialize from a vector of dim or m_pdim
 Point::Point(const vector<parameter_t>& init,
              const vector<parameter_t>& min,
              const vector<parameter_t>& max)
-    : vector<parameter_t>(Point::dim), score_(0.0)
+    : vector<parameter_t>(Point::m_dim), m_score(0.0)
 {
-  m_min.resize(Point::dim);
-  m_max.resize(Point::dim);
-  if(init.size()==dim) {
-    for (unsigned int i=0; i<Point::dim; i++) {
-      operator[](i)=init[i];
+  m_min.resize(Point::m_dim);
+  m_max.resize(Point::m_dim);
+  if (init.size() == m_dim) {
+    for (unsigned int i = 0; i < Point::m_dim; i++) {
+      operator[](i) = init[i];
       m_min[i] = min[i];
       m_max[i] = max[i];
     }
   } else {
-    CHECK(init.size()==pdim);
-    for (unsigned int i=0; i<Point::dim; i++) {
-      operator[](i)=init[optindices[i]];
-      m_min[i] = min[optindices[i]];
-      m_max[i] = max[optindices[i]];
+    CHECK(init.size() == m_pdim);
+    CHECK(m_opt_indices.size() == Point::m_dim);
+    for (unsigned int i = 0; i < Point::m_dim; i++) {
+      operator[](i) = init[m_opt_indices[i]];
+      m_min[i] = min[m_opt_indices[i]];
+      m_max[i] = max[m_opt_indices[i]];
     }
   }
 }
@@ -50,9 +51,9 @@ Point::~Point() {}
 
 void Point::Randomize()
 {
-  CHECK(m_min.size()==Point::dim);
-  CHECK(m_max.size()==Point::dim);
-  for (unsigned int i=0; i<size(); i++) {
+  CHECK(m_min.size() == Point::m_dim);
+  CHECK(m_max.size() == Point::m_dim);
+  for (unsigned int i = 0; i < size(); i++) {
     operator[](i) = m_min[i] +
                     static_cast<float>(random()) / static_cast<float>(RAND_MAX) * (m_max[i] - m_min[i]);
   }
@@ -60,21 +61,22 @@ void Point::Randomize()
 
 double Point::operator*(const FeatureStats& F) const
 {
-  ncall++; // to track performance
-  double prod=0.0;
-  if(OptimizeAll())
+  m_ncall++; // to track performance
+  double prod = 0.0;
+  if (OptimizeAll())
     for (unsigned i=0; i<size(); i++)
-      prod+= operator[](i)*F.get(i);
+      prod += operator[](i) * F.get(i);
   else {
-    for (unsigned i=0; i<size(); i++)
-      prod+= operator[](i)*F.get(optindices[i]);
-    for(map<unsigned,float >::iterator it=fixedweights.begin(); it!=fixedweights.end(); it++)
-      prod+=it->second*F.get(it->first);
+    for (unsigned i = 0; i < size(); i++)
+      prod += operator[](i) * F.get(m_opt_indices[i]);
+    for(map<unsigned, float>::iterator it = m_fixed_weights.begin();
+        it != m_fixed_weights.end(); ++it)
+      prod += it->second * F.get(it->first);
   }
   return prod;
 }
 
-Point Point::operator+(const Point& p2) const
+const Point Point::operator+(const Point& p2) const
 {
   CHECK(p2.size() == size());
   Point Res(*this);
@@ -82,7 +84,7 @@ Point Point::operator+(const Point& p2) const
     Res[i] += p2[i];
   }
 
-  Res.score_ = numeric_limits<statscore_t>::max();
+  Res.m_score = kMaxFloat;
   return Res;
 }
 
@@ -92,23 +94,24 @@ void Point::operator+=(const Point& p2)
   for (unsigned i = 0; i < size(); i++) {
     operator[](i) += p2[i];
   }
-  score_ = numeric_limits<statscore_t>::max();
+  m_score = kMaxFloat;
 }
 
-Point Point::operator*(float l) const
+const Point Point::operator*(float l) const
 {
   Point Res(*this);
   for (unsigned i = 0; i < size(); i++) {
     Res[i] *= l;
   }
-  Res.score_ = numeric_limits<statscore_t>::max();
+  Res.m_score = kMaxFloat;
   return Res;
 }
 
 ostream& operator<<(ostream& o, const Point& P)
 {
-  vector<parameter_t> w = P.GetAllWeights();
-  for (unsigned int i = 0; i < Point::pdim; i++) {
+  vector<parameter_t> w;
+  P.GetAllWeights(w);
+  for (unsigned int i = 0; i < Point::m_pdim; i++) {
     o << w[i] << " ";
   }
   return o;
@@ -117,39 +120,39 @@ ostream& operator<<(ostream& o, const Point& P)
 void Point::NormalizeL2()
 {
   parameter_t norm=0.0;
-  for (unsigned int i=0; i<size(); i++)
-    norm+= operator[](i)*operator[](i);
-  if(norm!=0.0) {
-    norm=sqrt(norm);
-    for (unsigned int i=0; i<size(); i++)
-      operator[](i)/=norm;
+  for (unsigned int i = 0; i < size(); i++)
+    norm += operator[](i) * operator[](i);
+  if (norm != 0.0) {
+    norm = sqrt(norm);
+    for (unsigned int i = 0; i < size(); i++)
+      operator[](i) /= norm;
   }
 }
 
 
 void Point::NormalizeL1()
 {
-  parameter_t norm=0.0;
-  for (unsigned int i=0; i<size(); i++)
-    norm+= abs(operator[](i));
-  if(norm!=0.0) {
-    for (unsigned int i=0; i<size(); i++)
-      operator[](i)/=norm;
+  parameter_t norm = 0.0;
+  for (unsigned int i = 0; i < size(); i++)
+    norm += abs(operator[](i));
+  if (norm != 0.0) {
+    for (unsigned int i = 0; i < size(); i++)
+      operator[](i) /= norm;
   }
 }
 
 
-vector<parameter_t> Point::GetAllWeights()const
+void Point::GetAllWeights(vector<parameter_t>& w) const
 {
-  vector<parameter_t> w;
-  if(OptimizeAll()) {
-    w=*this;
+  if (OptimizeAll()) {
+    w = *this;
   } else {
-    w.resize(pdim);
-    for (unsigned int i=0; i<size(); i++)
-      w[optindices[i]]=operator[](i);
-    for(map<unsigned,float >::iterator it=fixedweights.begin(); it!=fixedweights.end(); it++)
+    w.resize(m_pdim);
+    for (unsigned int i = 0; i < size(); i++)
+      w[m_opt_indices[i]] = operator[](i);
+    for (map<unsigned,float>::const_iterator it = m_fixed_weights.begin();
+         it != m_fixed_weights.end(); ++it) {
       w[it->first]=it->second;
+    }
   }
-  return w;
 }
diff --git a/mert/Point.h b/mert/Point.h
index 55d173215..46b23c9d9 100644
--- a/mert/Point.h
+++ b/mert/Point.h
@@ -1,7 +1,7 @@
-#ifndef POINT_H
-#define POINT_H
+#ifndef MERT_POINT_H_
+#define MERT_POINT_H_
 
-#include <fstream>
+#include <ostream>
 #include <map>
 #include <vector>
 #include "Types.h"
@@ -16,52 +16,55 @@ class Optimizer;
 class Point : public vector<parameter_t>
 {
   friend class Optimizer;
+
 private:
   /**
    * The indices over which we optimize.
    */
-  static vector<unsigned int> optindices;
+  static vector<unsigned int> m_opt_indices;
 
   /**
-   * Dimension of optindices and of the parent vector.
+   * Dimension of m_opt_indices and of the parent vector.
    */
-  static unsigned int dim;
+  static unsigned int m_dim;
 
   /**
    * Fixed weights in case of partial optimzation.
    */
-  static map<unsigned int,parameter_t> fixedweights;
+  static map<unsigned int,parameter_t> m_fixed_weights;
 
   /**
    * Total size of the parameter space; we have
-   * pdim = FixedWeight.size() + optinidices.size().
+   * m_pdim = FixedWeight.size() + optinidices.size().
    */
-  static unsigned int pdim;
-  static unsigned int ncall;
+  static unsigned int m_pdim;
+  static unsigned int m_ncall;
 
   /**
-   * The limits for randomization, both vectors are of full length, pdim.
+   * The limits for randomization, both vectors are of full length, m_pdim.
    */
   static vector<parameter_t> m_min;
   static vector<parameter_t> m_max;
 
-  statscore_t score_;
+  statscore_t m_score;
 
 public:
-  static unsigned int getdim() {
-    return dim;
-  }
-  static unsigned int getpdim() {
-    return pdim;
-  }
-  static void setpdim(size_t pd) {
-    pdim = pd;
+  static unsigned int getdim() { return m_dim; }
+  static void setdim(size_t d) { m_dim = d; }
+
+  static unsigned int getpdim() { return m_pdim; }
+  static void setpdim(size_t pd) { m_pdim = pd; }
+
+  static void set_optindices(const vector<unsigned int>& indices) {
+    m_opt_indices = indices;
   }
-  static void setdim(size_t d) {
-    dim = d;
+
+  static const vector<unsigned int>& get_optindices() {
+    return m_opt_indices;
   }
+
   static bool OptimizeAll() {
-    return fixedweights.empty();
+    return m_fixed_weights.empty();
   }
 
   Point();
@@ -74,12 +77,12 @@ public:
 
   // Compute the feature function
   double operator*(const FeatureStats&) const;
-  Point operator+(const Point&) const;
+  const Point operator+(const Point&) const;
   void operator+=(const Point&);
-  Point operator*(float) const;
+  const Point operator*(float) const;
 
   /**
-   * Write the Whole featureweight to a stream (ie pdim float).
+   * Write the Whole featureweight to a stream (ie m_pdim float).
    */
   friend ostream& operator<<(ostream& o,const Point& P);
 
@@ -88,16 +91,13 @@ public:
   void NormalizeL1();
 
   /**
-   * Return a vector of size pdim where all weights have been
+   * Return a vector of size m_pdim where all weights have been
    * put (including fixed ones).
    */
-  vector<parameter_t> GetAllWeights() const;
-
-  statscore_t GetScore() const {
-    return score_;
-  }
+  void GetAllWeights(vector<parameter_t>& w) const;
 
-  void SetScore(statscore_t score) { score_ = score; }
+  statscore_t GetScore() const { return m_score; }
+  void SetScore(statscore_t score) { m_score = score; }
 };
 
-#endif  // POINT_H
+#endif  // MERT_POINT_H
diff --git a/mert/PointTest.cpp b/mert/PointTest.cpp
new file mode 100644
index 000000000..d7d6b031c
--- /dev/null
+++ b/mert/PointTest.cpp
@@ -0,0 +1,60 @@
+#include "Point.h"
+
+#define BOOST_TEST_MODULE MertPoint
+#include <boost/test/unit_test.hpp>
+
+#include "Optimizer.h"
+#include "Util.h"
+
+using namespace std;
+
+BOOST_AUTO_TEST_CASE(point_operators) {
+  const unsigned int dim = 5;
+  vector<float> init(dim);
+  init[0] = 1.0f;
+  init[1] = 1.0f;
+  init[2] = 0.3f;
+  init[3] = 0.2f;
+  init[4] = 0.3f;
+
+  vector<float> min(dim, 0.0f);
+  vector<float> max(dim, 0.0f);
+
+  Point::setdim(dim);
+  BOOST_REQUIRE(dim == Point::getdim());
+
+  // Test operator '+'
+  {
+    Point p1(init, min, max);
+    Point p2(init, min, max);
+    Point p3 = p1 + p2;
+    for (size_t i = 0; i < p3.size(); ++i) {
+      BOOST_CHECK(IsAlmostEqual(init[i] * 2.0f, p3[i]));
+    }
+    BOOST_CHECK_EQUAL(p3.GetScore(), kMaxFloat);
+  }
+
+  // Test operator '+='
+  {
+    Point p1(init, min, max);
+    Point p2(init, min, max);
+    p1 += p2;
+
+    for (size_t i = 0; i < p1.size(); ++i) {
+      BOOST_CHECK(IsAlmostEqual(init[i] * 2.0f, p1[i]));
+    }
+    BOOST_CHECK_EQUAL(p1.GetScore(), kMaxFloat);
+  }
+
+  // Test operator '*'
+  {
+    Point p1(init, min, max);
+    const Point p2 = p1 * 2.0;
+
+    BOOST_REQUIRE(p1.size() == p2.size());
+    for (size_t i = 0; i < p2.size(); ++i) {
+      BOOST_CHECK(IsAlmostEqual(init[i] * 2.0f, p2[i]));
+    }
+    BOOST_CHECK_EQUAL(p2.GetScore(), kMaxFloat);
+  }
+}
diff --git a/mert/PreProcessFilter.cpp b/mert/PreProcessFilter.cpp
new file mode 100644
index 000000000..d72907713
--- /dev/null
+++ b/mert/PreProcessFilter.cpp
@@ -0,0 +1,135 @@
+#include "PreProcessFilter.h"
+
+#include <iostream>
+#include <cstdlib>
+#include <unistd.h>
+#include <csignal>
+
+using namespace std;
+
+#define CHILD_STDIN_READ pipefds_input[0]
+#define CHILD_STDIN_WRITE pipefds_input[1]
+#define CHILD_STDOUT_READ pipefds_output[0]
+#define CHILD_STDOUT_WRITE pipefds_output[1]
+#define CHILD_STDERR_READ pipefds_error[0]
+#define CHILD_STDERR_WRITE pipefds_error[1]
+
+// Child exec error signal
+void exec_failed (int sig)
+{
+  cerr << "Exec failed. Child process couldn't be launched." << endl;
+  exit (EXIT_FAILURE);
+}
+
+PreProcessFilter::PreProcessFilter(const string& filterCommand)
+    : m_toFilter(NULL),
+      m_fromFilter(NULL)
+{
+    // Child error signal install
+    // sigaction is the replacement for the traditional signal() method
+    struct sigaction action;
+    action.sa_handler = exec_failed;
+    sigemptyset(&action.sa_mask);
+    action.sa_flags = 0;
+    if (sigaction(SIGUSR1, &action, NULL) < 0)
+    {
+        perror("SIGUSR1 install error");
+        exit(EXIT_FAILURE);
+    }
+
+    int pipe_status;
+    int pipefds_input[2];
+    int pipefds_output[2];
+    // int pipefds_error[2];
+
+    // Create the pipes
+    // We do this before the fork so both processes will know about
+    // the same pipe and they can communicate.
+
+    pipe_status = pipe(pipefds_input);
+    if (pipe_status == -1)
+    {
+        perror("Error creating the pipe");
+        exit(EXIT_FAILURE);
+    }
+
+    pipe_status = pipe(pipefds_output);
+    if (pipe_status == -1)
+    {
+        perror("Error creating the pipe");
+        exit(EXIT_FAILURE);
+    }
+
+    /*
+    pipe_status = pipe(pipefds_error);
+    if (pipe_status == -1)
+    {
+        perror("Error creating the pipe");
+        exit(EXIT_FAILURE);
+    }
+    */
+
+    pid_t pid;
+    // Create child process; both processes continue from here
+    pid = fork();
+
+    if (pid == pid_t(0))
+    {
+        // Child process
+
+        // When the child process finishes sends a SIGCHLD signal
+        // to the parent
+
+        // Tie the standard input, output and error streams to the
+        // appropiate pipe ends
+        // The file descriptor 0 is the standard input
+        // We tie it to the read end of the pipe as we will use
+        // this end of the pipe to read from it
+        dup2 (CHILD_STDIN_READ,0);
+        dup2 (CHILD_STDOUT_WRITE,1);
+        // dup2 (CHILD_STDERR_WRITE,2);
+        // Close in the child the unused ends of the pipes
+        close(CHILD_STDIN_WRITE);
+        close(CHILD_STDOUT_READ);
+        //close(CHILD_STDERR_READ);
+
+        // Execute the program
+        execl("/bin/bash", "bash", "-c", filterCommand.c_str() , (char*)NULL);
+
+        // We should never reach this point
+        // Tell the parent the exec failed
+        kill(getppid(), SIGUSR1);
+        exit(EXIT_FAILURE);
+    }
+    else if (pid > pid_t(0))
+    {
+        // Parent
+
+        // Close in the parent the unused ends of the pipes
+        close(CHILD_STDIN_READ);
+        close(CHILD_STDOUT_WRITE);
+        // close(CHILD_STDERR_WRITE);
+
+        m_toFilter = new ofdstream(CHILD_STDIN_WRITE);
+        m_fromFilter = new ifdstream(CHILD_STDOUT_READ);
+    }
+    else
+    {
+        perror("Error: fork failed");
+        exit(EXIT_FAILURE);
+    }
+}
+
+string PreProcessFilter::ProcessSentence(const string& sentence)
+{
+    *m_toFilter << sentence << "\n";
+    string processedSentence;
+    m_fromFilter->getline(processedSentence);
+    return processedSentence;
+}
+
+PreProcessFilter::~PreProcessFilter()
+{
+    delete m_toFilter;
+    delete m_fromFilter;
+}
diff --git a/mert/PreProcessFilter.h b/mert/PreProcessFilter.h
new file mode 100644
index 000000000..c65c060a4
--- /dev/null
+++ b/mert/PreProcessFilter.h
@@ -0,0 +1,24 @@
+#ifndef MERT_PREPROCESSFILTER_H_
+#define MERT_PREPROCESSFILTER_H_
+
+#include <string>
+
+#include "Fdstream.h"
+
+/*
+ * This class runs the filter command in a child process and
+ * then use this filter to process given sentences.
+ */
+class PreProcessFilter
+{
+public:
+    PreProcessFilter(const string& filterCommand);
+    string ProcessSentence(const string& sentence);
+    ~PreProcessFilter();
+
+private:
+    ofdstream* m_toFilter;
+    ifdstream* m_fromFilter;    
+};
+
+#endif  // MERT_PREPROCESSFILTER_H_
diff --git a/mert/Reference.h b/mert/Reference.h
new file mode 100644
index 000000000..353a3311b
--- /dev/null
+++ b/mert/Reference.h
@@ -0,0 +1,82 @@
+#ifndef MERT_REFERENCE_H_
+#define MERT_REFERENCE_H_
+
+#include <algorithm>
+#include <climits>
+#include <vector>
+
+#include "Ngram.h"
+
+/**
+ * Reference class represents reference translations for an output
+ * translation used in calculating BLEU score.
+ */
+class Reference {
+ public:
+  // for m_length
+  typedef std::vector<size_t>::iterator iterator;
+  typedef std::vector<size_t>::const_iterator const_iterator;
+
+  Reference() : m_counts(new NgramCounts) { }
+  ~Reference() { delete m_counts; }
+
+  NgramCounts* get_counts() { return m_counts; }
+  const NgramCounts* get_counts() const { return m_counts; }
+
+  iterator begin() { return m_length.begin(); }
+  const_iterator begin() const { return m_length.begin(); }
+  iterator end() { return m_length.end(); }
+  const_iterator end() const { return m_length.end(); }
+
+  void push_back(size_t len) { m_length.push_back(len); }
+
+  size_t num_references() const { return m_length.size(); }
+
+  int CalcAverage() const;
+  int CalcClosest(size_t length) const;
+  int CalcShortest() const;
+
+ private:
+  NgramCounts* m_counts;
+
+  // multiple reference lengths
+  std::vector<size_t> m_length;
+};
+
+// TODO(tetsuok): fix this function and related stuff.
+// "average" reference length should not be calculated at sentence-level unlike "closest".
+inline int Reference::CalcAverage() const {
+  int total = 0;
+  for (size_t i = 0; i < m_length.size(); ++i) {
+    total += m_length[i];
+  }
+  return static_cast<int>(
+      static_cast<float>(total) / m_length.size());
+}
+
+inline int Reference::CalcClosest(size_t length) const {
+  int min_diff = INT_MAX;
+  int closest_ref_id = 0; // an index of the closest reference translation
+  for (size_t i = 0; i < m_length.size(); ++i) {
+    const int ref_length = m_length[i];
+    const int length_diff = abs(ref_length - static_cast<int>(length));
+    const int abs_min_diff = abs(min_diff);
+    // Look for the closest reference
+    if (length_diff < abs_min_diff) {
+      min_diff = ref_length - length;
+      closest_ref_id = i;
+      // if two references has the same closest length, take the shortest
+    } else if (length_diff == abs_min_diff) {
+      if (ref_length < static_cast<int>(m_length[closest_ref_id])) {
+        closest_ref_id = i;
+      }
+    }
+  }
+  return static_cast<int>(m_length[closest_ref_id]);
+}
+
+inline int Reference::CalcShortest() const {
+  return *std::min_element(m_length.begin(), m_length.end());
+}
+
+#endif  // MERT_REFERENCE_H_
diff --git a/mert/ReferenceTest.cpp b/mert/ReferenceTest.cpp
new file mode 100644
index 000000000..454768195
--- /dev/null
+++ b/mert/ReferenceTest.cpp
@@ -0,0 +1,116 @@
+#include "Reference.h"
+
+#define BOOST_TEST_MODULE MertReference
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_CASE(refernece_count) {
+  Reference ref;
+  BOOST_CHECK(ref.get_counts() != NULL);
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_iterator) {
+  Reference ref;
+  ref.push_back(4);
+  ref.push_back(2);
+  BOOST_REQUIRE(ref.num_references() == 2);
+
+  Reference::iterator it = ref.begin();
+  BOOST_CHECK_EQUAL(*it, 4);
+  ++it;
+  BOOST_CHECK_EQUAL(*it, 2);
+  ++it;
+  BOOST_CHECK(it == ref.end());
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_average) {
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(1);
+    BOOST_CHECK_EQUAL(2, ref.CalcAverage());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    BOOST_CHECK_EQUAL(3, ref.CalcAverage());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    ref.push_back(4);
+    ref.push_back(5);
+    BOOST_CHECK_EQUAL(4, ref.CalcAverage());
+  }
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_closest) {
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(1);
+    BOOST_REQUIRE(ref.num_references() == 2);
+
+    BOOST_CHECK_EQUAL(1, ref.CalcClosest(2));
+    BOOST_CHECK_EQUAL(1, ref.CalcClosest(1));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(3));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(4));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(5));
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    BOOST_REQUIRE(ref.num_references() == 2);
+
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(1));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(2));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(3));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(4));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(5));
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    ref.push_back(4);
+    ref.push_back(5);
+    BOOST_REQUIRE(ref.num_references() == 4);
+
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(1));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(2));
+    BOOST_CHECK_EQUAL(3, ref.CalcClosest(3));
+    BOOST_CHECK_EQUAL(4, ref.CalcClosest(4));
+    BOOST_CHECK_EQUAL(5, ref.CalcClosest(5));
+  }
+}
+
+BOOST_AUTO_TEST_CASE(refernece_length_shortest) {
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(1);
+    BOOST_CHECK_EQUAL(1, ref.CalcShortest());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    BOOST_CHECK_EQUAL(3, ref.CalcShortest());
+  }
+
+  {
+    Reference ref;
+    ref.push_back(4);
+    ref.push_back(3);
+    ref.push_back(4);
+    ref.push_back(5);
+    BOOST_CHECK_EQUAL(3, ref.CalcShortest());
+  }
+}
diff --git a/mert/ScopedVector.h b/mert/ScopedVector.h
index 1fbce88b7..a2f0e7066 100644
--- a/mert/ScopedVector.h
+++ b/mert/ScopedVector.h
@@ -1,5 +1,5 @@
-#ifndef SCOPEDVECTOR_H_
-#define SCOPEDVECTOR_H_
+#ifndef MERT_SCOPED_VECTOR_H_
+#define MERT_SCOPED_VECTOR_H_
 
 #include <vector>
 
@@ -12,43 +12,43 @@ class ScopedVector {
   ScopedVector() {}
   virtual ~ScopedVector() { reset(); }
 
-  bool empty() const { return vec_.empty(); }
+  bool empty() const { return m_vec.empty(); }
 
-  void push_back(T *e) { vec_.push_back(e); }
+  void push_back(T *e) { m_vec.push_back(e); }
 
   void reset() {
-    for (iterator it = vec_.begin(); it != vec_.end(); ++it) {
+    for (iterator it = m_vec.begin(); it != m_vec.end(); ++it) {
       delete *it;
     }
-    vec_.clear();
+    m_vec.clear();
   }
 
-  void reserve(size_t capacity) { vec_.reserve(capacity); }
-  void resize(size_t size) { vec_.resize(size); }
+  void reserve(size_t capacity) { m_vec.reserve(capacity); }
+  void resize(size_t size) { m_vec.resize(size); }
 
-  size_t size() const {return vec_.size(); }
+  size_t size() const {return m_vec.size(); }
 
-  iterator begin() { return vec_.begin(); }
-  const_iterator begin() const { return vec_.begin(); }
+  iterator begin() { return m_vec.begin(); }
+  const_iterator begin() const { return m_vec.begin(); }
 
-  iterator end() { return vec_.end(); }
-  const_iterator end() const { return vec_.end(); }
+  iterator end() { return m_vec.end(); }
+  const_iterator end() const { return m_vec.end(); }
 
-  std::vector<T*>& get() { return vec_; }
-  const std::vector<T*>& get() const { return vec_; }
+  std::vector<T*>& get() { return m_vec; }
+  const std::vector<T*>& get() const { return m_vec; }
 
-  std::vector<T*>* operator->() { return &vec_; }
-  const std::vector<T*>* operator->() const { return &vec_; }
+  std::vector<T*>* operator->() { return &m_vec; }
+  const std::vector<T*>* operator->() const { return &m_vec; }
 
-  T*& operator[](size_t i) { return vec_[i]; }
-  const T* operator[](size_t i) const { return vec_[i]; }
+  T*& operator[](size_t i) { return m_vec[i]; }
+  const T* operator[](size_t i) const { return m_vec[i]; }
 
  private:
-  std::vector<T*> vec_;
+  std::vector<T*> m_vec;
 
   // no copying allowed.
   ScopedVector<T>(const ScopedVector<T>&);
   void operator=(const ScopedVector<T>&);
 };
 
-#endif // SCOPEDVECTOR_H_
+#endif // MERT_SCOPED_VECTOR_H_
diff --git a/mert/ScoreArray.cpp b/mert/ScoreArray.cpp
index b26b93114..83fa96ef0 100644
--- a/mert/ScoreArray.cpp
+++ b/mert/ScoreArray.cpp
@@ -1,6 +1,6 @@
 /*
  *  ScoreArray.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
@@ -10,76 +10,87 @@
 #include "Util.h"
 #include "FileStream.h"
 
+using namespace std;
 
 ScoreArray::ScoreArray()
-    : number_of_scores(0), idx("") {}
+    : m_num_scores(0), m_index("") {}
 
-void ScoreArray::savetxt(std::ofstream& outFile, const std::string& sctype)
+void ScoreArray::savetxt(ostream* os, const string& sctype)
 {
-  outFile << SCORES_TXT_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_scores << " " << sctype << std::endl;
-  for (scorearray_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    i->savetxt(outFile);
-    outFile << std::endl;
+  *os << SCORES_TXT_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_scores << " " << sctype << endl;
+  for (scorearray_t::iterator i = m_array.begin(); i !=m_array.end(); i++) {
+    i->savetxt(os);
+    *os << endl;
   }
-  outFile << SCORES_TXT_END << std::endl;
+  *os << SCORES_TXT_END << endl;
 }
 
-void ScoreArray::savebin(std::ofstream& outFile, const std::string& sctype)
+void ScoreArray::savebin(ostream* os, const string& score_type)
 {
-  outFile << SCORES_BIN_BEGIN << " " << idx << " " << array_.size()
-          << " " << number_of_scores << " " << sctype << std::endl;
-  for (scorearray_t::iterator i = array_.begin(); i !=array_.end(); i++)
-    i->savebin(outFile);
-
-  outFile << SCORES_BIN_END << std::endl;
+  *os << SCORES_BIN_BEGIN << " " << m_index << " " << m_array.size()
+          << " " << m_num_scores << " " << score_type << endl;
+  for (scorearray_t::iterator i = m_array.begin();
+       i != m_array.end(); i++) {
+    i->savebin(os);
+  }
+  *os << SCORES_BIN_END << endl;
 }
 
-void ScoreArray::save(std::ofstream& inFile, const std::string& sctype, bool bin)
+void ScoreArray::save(ostream* os, const string& score_type, bool bin)
 {
-  if (size()>0)
-    (bin)?savebin(inFile, sctype):savetxt(inFile, sctype);
+  if (size() <= 0) return;
+  if (bin) {
+    savebin(os, score_type);
+  } else {
+    savetxt(os, score_type);
+  }
 }
 
-void ScoreArray::save(const std::string &file, const std::string& sctype, bool bin)
+void ScoreArray::save(const string &file, const string& score_type, bool bin)
 {
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  save(outFile, sctype, bin);
+  ofstream ofs(file.c_str(), ios::out);
+  if (!ofs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  ostream* os = &ofs;
+  save(os, score_type, bin);
+  ofs.close();
+}
 
-  outFile.close();
+void ScoreArray::save(const string& score_type, bool bin) {
+  save(&cout, score_type, bin);
 }
 
-void ScoreArray::loadbin(ifstream& inFile, size_t n)
+void ScoreArray::loadbin(istream* is, size_t n)
 {
-  ScoreStats entry(number_of_scores);
-
-  for (size_t i=0 ; i < n; i++) {
-    entry.loadbin(inFile);
+  ScoreStats entry(m_num_scores);
+  for (size_t i = 0; i < n; i++) {
+    entry.loadbin(is);
     add(entry);
   }
 }
 
-void ScoreArray::loadtxt(ifstream& inFile, size_t n)
+void ScoreArray::loadtxt(istream* is, size_t n)
 {
-  ScoreStats entry(number_of_scores);
-
-  for (size_t i=0 ; i < n; i++) {
-    entry.loadtxt(inFile);
+  ScoreStats entry(m_num_scores);
+  for (size_t i = 0; i < n; i++) {
+    entry.loadtxt(is);
     add(entry);
   }
 }
 
-void ScoreArray::load(ifstream& inFile)
+void ScoreArray::load(istream* is)
 {
-  size_t number_of_entries=0;
-  bool binmode=false;
+  size_t number_of_entries = 0;
+  bool binmode = false;
 
-  std::string substring, stringBuf;
-  std::string::size_type loc;
+  string substring, stringBuf;
+  string::size_type loc;
 
-  std::getline(inFile, stringBuf);
-  if (!inFile.good()) {
+  getline(*is, stringBuf);
+  if (!is->good()) {
     return;
   }
 
@@ -94,35 +105,38 @@ void ScoreArray::load(ifstream& inFile)
     }
     getNextPound(stringBuf, substring);
     getNextPound(stringBuf, substring);
-    idx = substring;
+    m_index = substring;
     getNextPound(stringBuf, substring);
     number_of_entries = atoi(substring.c_str());
     getNextPound(stringBuf, substring);
-    number_of_scores = atoi(substring.c_str());
+    m_num_scores = atoi(substring.c_str());
     getNextPound(stringBuf, substring);
-    score_type = substring;
+    m_score_type = substring;
   }
 
-  (binmode)?loadbin(inFile, number_of_entries):loadtxt(inFile, number_of_entries);
+  if (binmode) {
+    loadbin(is, number_of_entries);
+  } else {
+    loadtxt(is, number_of_entries);
+  }
 
-  std::getline(inFile, stringBuf);
+  getline(*is, stringBuf);
   if (!stringBuf.empty()) {
-    if ((loc = stringBuf.find(SCORES_TXT_END)) != 0 && (loc = stringBuf.find(SCORES_BIN_END)) != 0) {
+    if ((loc = stringBuf.find(SCORES_TXT_END)) != 0 &&
+        (loc = stringBuf.find(SCORES_BIN_END)) != 0) {
       TRACE_ERR("ERROR: ScoreArray::load(): Wrong footer");
       return;
     }
   }
 }
 
-void ScoreArray::load(const std::string &file)
+void ScoreArray::load(const string &file)
 {
-  TRACE_ERR("loading data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  load((ifstream&) inFile);
-
-  inFile.close();
+  TRACE_ERR("loading data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }
 
 
@@ -139,7 +153,8 @@ bool ScoreArray::check_consistency() const
   if (sz == 0)
     return true;
 
-  for (scorearray_t::const_iterator i = array_.begin(); i != array_.end(); ++i) {
+  for (scorearray_t::const_iterator i = m_array.begin();
+       i != m_array.end(); ++i) {
     if (i->size() != sz)
       return false;
   }
diff --git a/mert/ScoreArray.h b/mert/ScoreArray.h
index 0a0ddbdc0..64d019daf 100644
--- a/mert/ScoreArray.h
+++ b/mert/ScoreArray.h
@@ -1,15 +1,13 @@
 /*
  *  ScoreArray.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef SCORE_ARRAY_H
-#define SCORE_ARRAY_H
-
-using namespace std;
+#ifndef MERT_SCORE_ARRAY_H_
+#define MERT_SCORE_ARRAY_H_
 
 #include <vector>
 #include <iostream>
@@ -24,88 +22,65 @@ const char SCORES_BIN_END[] = "SCORES_BIN_END_0";
 
 class ScoreArray
 {
-protected:
-  scorearray_t array_;
-  std::string score_type;
-  size_t number_of_scores;
+ private:
+  scorearray_t m_array;
+  std::string m_score_type;
+  std::size_t m_num_scores;
 
-private:
-  // idx to identify the utterance.
+  // indexx to identify the utterance.
   // It can differ from the index inside the vector.
-  std::string  idx;
+  std::string  m_index;
 
 public:
   ScoreArray();
   ~ScoreArray() {}
 
-  inline void clear() {
-    array_.clear();
-  }
+  void clear() { m_array.clear(); }
 
-  inline std::string getIndex() const {
-    return idx;
-  }
-  inline void setIndex(const std::string& value) {
-    idx=value;
-  }
+  std::string getIndex() const { return m_index; }
 
-//	inline ScoreStats get(size_t i){ return array_.at(i); }
+  void setIndex(const std::string& value) { m_index = value; }
 
-  inline ScoreStats&  get(size_t i) {
-    return array_.at(i);
-  }
-  inline const ScoreStats&  get(size_t i)const {
-    return array_.at(i);
-  }
+  ScoreStats& get(std::size_t i) { return m_array.at(i); }
 
-  void add(const ScoreStats& e) {
-    array_.push_back(e);
-  }
+  const ScoreStats& get(std::size_t i) const { return m_array.at(i); }
+
+  void add(const ScoreStats& e) { m_array.push_back(e); }
 
   //ADDED BY TS
-  void swap(size_t i, size_t j) {
-    std::swap(array_[i],array_[j]);
+  void swap(std::size_t i, std::size_t j) {
+    std::swap(m_array[i], m_array[j]);
   }
 
-  void resize(size_t new_size) {
-    array_.resize(std::min(new_size,array_.size()));
+  void resize(std::size_t new_size) {
+    m_array.resize(std::min(new_size, m_array.size()));
   }
   //END_ADDED
 
   void merge(ScoreArray& e);
 
-  inline std::string name() const {
-    return score_type;
-  }
+  std::string name() const { return m_score_type; }
 
-  inline void name(std::string &sctype) {
-    score_type = sctype;
-  }
+  void name(std::string &score_type) { m_score_type = score_type; }
 
-  inline size_t size() const {
-    return array_.size();
-  }
-  inline size_t NumberOfScores() const {
-    return number_of_scores;
-  }
-  inline void NumberOfScores(size_t v) {
-    number_of_scores = v;
-  }
+  std::size_t size() const { return m_array.size(); }
 
-  void savetxt(ofstream& outFile, const std::string& sctype);
-  void savebin(ofstream& outFile, const std::string& sctype);
-  void save(ofstream& outFile, const std::string& sctype, bool bin=false);
-  void save(const std::string &file, const std::string& sctype, bool bin=false);
-  inline void save(const std::string& sctype, bool bin=false) {
-    save("/dev/stdout", sctype, bin);
-  }
+  std::size_t NumberOfScores() const { return m_num_scores; }
+
+  void NumberOfScores(std::size_t v) { m_num_scores = v; }
+
+  void savetxt(std::ostream* os, const std::string& score_type);
+  void savebin(std::ostream* os, const std::string& score_type);
+  void save(std::ostream* os, const std::string& score_type, bool bin=false);
+  void save(const std::string &file, const std::string& score_type, bool bin=false);
+  void save(const std::string& score_type, bool bin=false);
 
-  void loadtxt(ifstream& inFile, size_t n);
-  void loadbin(ifstream& inFile, size_t n);
-  void load(ifstream& inFile);
+  void loadtxt(std::istream* is, std::size_t n);
+  void loadbin(std::istream* is, std::size_t n);
+  void load(std::istream* is);
   void load(const std::string &file);
 
   bool check_consistency() const;
 };
 
-#endif  // SCORE_ARRAY_H
+#endif  // MERT_SCORE_ARRAY_H_
diff --git a/mert/ScoreData.cpp b/mert/ScoreData.cpp
index e79595d06..b4454dc4e 100644
--- a/mert/ScoreData.cpp
+++ b/mert/ScoreData.cpp
@@ -1,61 +1,62 @@
 /*
  *  ScoreData.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
 #include "ScoreData.h"
+
+#include <fstream>
 #include "Scorer.h"
 #include "Util.h"
 #include "FileStream.h"
 
-ScoreData::ScoreData(Scorer& ptr):
-  theScorer(&ptr)
+ScoreData::ScoreData(Scorer* scorer) :
+  m_scorer(scorer)
 {
-  score_type = theScorer->getName();
+  m_score_type = m_scorer->getName();
   // This is not dangerous: we don't use the this pointer in SetScoreData.
-  theScorer->setScoreData(this);
-  number_of_scores = theScorer->NumberOfScores();
-  // TRACE_ERR("ScoreData: number_of_scores: " << number_of_scores << std::endl);
+  m_scorer->setScoreData(this);
+  m_num_scores = m_scorer->NumberOfScores();
+  // TRACE_ERR("ScoreData: m_num_scores: " << m_num_scores << std::endl);
 }
 
-void ScoreData::save(std::ofstream& outFile, bool bin)
+void ScoreData::save(ostream* os, bool bin)
 {
-  for (scoredata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    i->save(outFile, score_type, bin);
+  for (scoredata_t::iterator i = m_array.begin();
+       i != m_array.end(); ++i) {
+    i->save(os, m_score_type, bin);
   }
 }
 
-void ScoreData::save(const std::string &file, bool bin)
+void ScoreData::save(const string &file, bool bin)
 {
   if (file.empty()) return;
-  TRACE_ERR("saving the array into " << file << std::endl);
+  TRACE_ERR("saving the array into " << file << endl);
 
   // matches a stream with a file. Opens the file.
-  std::ofstream outFile(file.c_str(), std::ios::out);
-
-  ScoreStats entry;
-
-  save(outFile, bin);
+  ofstream ofs(file.c_str(), ios::out);
+  ostream* os = &ofs;
+  save(os, bin);
+  ofs.close();
+}
 
-  outFile.close();
+void ScoreData::save(bool bin) {
+  save(&cout, bin);
 }
 
-void ScoreData::load(ifstream& inFile)
+void ScoreData::load(istream* is)
 {
   ScoreArray entry;
 
-  while (!inFile.eof()) {
-
-    if (!inFile.good()) {
-      std::cerr << "ERROR ScoreData::load inFile.good()" << std::endl;
+  while (!is->eof()) {
+    if (!is->good()) {
+      cerr << "ERROR ScoreData::load inFile.good()" << endl;
     }
-
     entry.clear();
-    entry.load(inFile);
-
+    entry.load(is);
     if (entry.size() == 0) {
       break;
     }
@@ -63,60 +64,58 @@ void ScoreData::load(ifstream& inFile)
   }
 }
 
-
-void ScoreData::load(const std::string &file)
+void ScoreData::load(const string &file)
 {
-  TRACE_ERR("loading score data from " << file << std::endl);
-
-  inputfilestream inFile(file); // matches a stream with a file. Opens the file
-
-  if (!inFile) {
+  TRACE_ERR("loading score data from " << file << endl);
+  inputfilestream input_stream(file); // matches a stream with a file. Opens the file
+  if (!input_stream) {
     throw runtime_error("Unable to open score file: " + file);
   }
-
-  load((ifstream&) inFile);
-
-  inFile.close();
+  istream* is = &input_stream;
+  load(is);
+  input_stream.close();
 }
 
-
 void ScoreData::add(ScoreArray& e)
 {
   if (exists(e.getIndex())) { // array at position e.getIndex() already exists
     //enlarge array at position e.getIndex()
     size_t pos = getIndex(e.getIndex());
-    array_.at(pos).merge(e);
+    m_array.at(pos).merge(e);
   } else {
-    array_.push_back(e);
+    m_array.push_back(e);
     setIndex();
   }
 }
 
-void ScoreData::add(const ScoreStats& e, const std::string& sent_idx)
+void ScoreData::add(const ScoreStats& e, const string& sent_idx)
 {
   if (exists(sent_idx)) { // array at position e.getIndex() already exists
     // Enlarge array at position e.getIndex()
     size_t pos = getIndex(sent_idx);
     //          TRACE_ERR("Inserting in array " << sent_idx << std::endl);
-    array_.at(pos).add(e);
+    m_array.at(pos).add(e);
     //          TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
   } else {
     //          TRACE_ERR("Creating a new entry in the array" << std::endl);
     ScoreArray a;
-    a.NumberOfScores(number_of_scores);
+    a.NumberOfScores(m_num_scores);
     a.add(e);
     a.setIndex(sent_idx);
-    add(a);
+    size_t idx = m_array.size();
+    m_array.push_back(a);
+    m_index_to_array_name[idx] = sent_idx;
+    m_array_name_to_index[sent_idx]=idx;
     //          TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
   }
 }
 
 bool ScoreData::check_consistency() const
 {
-  if (array_.size() == 0)
+  if (m_array.size() == 0)
     return true;
 
-  for (scoredata_t::const_iterator i = array_.begin(); i != array_.end(); ++i)
+  for (scoredata_t::const_iterator i = m_array.begin(); i != m_array.end(); ++i)
     if (!i->check_consistency()) return false;
 
   return true;
@@ -124,10 +123,10 @@ bool ScoreData::check_consistency() const
 
 void ScoreData::setIndex()
 {
-  size_t j=0;
-  for (scoredata_t::iterator i = array_.begin(); i !=array_.end(); i++) {
-    idx2arrayname_[j]=i->getIndex();
-    arrayname2idx_[i->getIndex()]=j;
+  size_t j = 0;
+  for (scoredata_t::iterator i = m_array.begin(); i != m_array.end(); ++i) {
+    m_index_to_array_name[j] = i->getIndex();
+    m_array_name_to_index[i->getIndex()]=j;
     j++;
   }
 }
diff --git a/mert/ScoreData.h b/mert/ScoreData.h
index 765f74148..70d7b9ab1 100644
--- a/mert/ScoreData.h
+++ b/mert/ScoreData.h
@@ -1,17 +1,16 @@
 /*
  *  ScoreData.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef SCORE_DATA_H
-#define SCORE_DATA_H
+#ifndef MERT_SCORE_DATA_H_
+#define MERT_SCORE_DATA_H_
 
-#include <fstream>
-#include <vector>
 #include <iostream>
+#include <vector>
 #include <stdexcept>
 #include <string>
 #include "ScoreArray.h"
@@ -23,35 +22,34 @@ class Scorer;
 
 class ScoreData
 {
-protected:
-  scoredata_t array_;
-  idx2name idx2arrayname_; // map from index to name of array
-  name2idx arrayname2idx_; // map from name to index of array
-
 private:
   // Do not allow the user to instanciate without arguments.
   ScoreData() {}
 
-  Scorer* theScorer;
-  std::string score_type;
-  size_t number_of_scores;
+  scoredata_t m_array;
+  idx2name m_index_to_array_name; // map from index to name of array
+  name2idx m_array_name_to_index; // map from name to index of array
+
+  Scorer* m_scorer;
+  std::string m_score_type;
+  size_t m_num_scores;
 
 public:
-  ScoreData(Scorer& sc);
+  ScoreData(Scorer* scorer);
   ~ScoreData() {}
 
-  inline void clear() {
-    array_.clear();
-  }
+  void clear() { m_array.clear(); }
 
   inline ScoreArray get(const std::string& idx) {
-    return array_.at(getIndex(idx));
+    return m_array.at(getIndex(idx));
   }
+
   inline ScoreArray& get(size_t idx) {
-    return array_.at(idx);
+    return m_array.at(idx);
   }
+
   inline const ScoreArray& get(size_t idx) const {
-    return array_.at(idx);
+    return m_array.at(idx);
   }
 
   inline bool exists(const std::string& sent_idx) const {
@@ -59,59 +57,54 @@ public:
   }
 
   inline bool exists(int sent_idx) const {
-    return (sent_idx > -1 && sent_idx < static_cast<int>(array_.size())) ? true : false;
+    return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
   }
 
   inline ScoreStats& get(size_t i, size_t j) {
-    return array_.at(i).get(j);
-  }
-  inline const ScoreStats&  get(size_t i, size_t j) const {
-    return array_.at(i).get(j);
+    return m_array.at(i).get(j);
   }
 
-  inline std::string name() const {
-    return score_type;
+  inline const ScoreStats& get(size_t i, size_t j) const {
+    return m_array.at(i).get(j);
   }
 
-  inline std::string name(const std::string &sctype) {
-    return score_type = sctype;
+  std::string name() const { return m_score_type; }
+
+  std::string name(const std::string &score_type) {
+    return m_score_type = score_type;
   }
 
   void add(ScoreArray& e);
   void add(const ScoreStats& e, const std::string& sent_idx);
 
-  inline size_t NumberOfScores() const {
-    return number_of_scores;
-  }
-  inline size_t size() const {
-    return array_.size();
-  }
+  size_t NumberOfScores() const { return m_num_scores; }
+  size_t size() const { return m_array.size(); }
 
   void save(const std::string &file, bool bin=false);
-  void save(ofstream& outFile, bool bin=false);
-  inline void save(bool bin=false) {
-    save("/dev/stdout", bin);
-  }
+  void save(std::ostream* os, bool bin=false);
+  void save(bool bin=false);
 
-  void load(ifstream& inFile);
+  void load(std::istream* is);
   void load(const std::string &file);
 
   bool check_consistency() const;
+
   void setIndex();
 
   inline int getIndex(const std::string& idx) const {
-    name2idx::const_iterator i = arrayname2idx_.find(idx);
-    if (i != arrayname2idx_.end())
+    name2idx::const_iterator i = m_array_name_to_index.find(idx);
+    if (i != m_array_name_to_index.end())
       return i->second;
     else
       return -1;
   }
+
   inline std::string getIndex(size_t idx) const {
-    idx2name::const_iterator i = idx2arrayname_.find(idx);
-    if (i != idx2arrayname_.end())
+    idx2name::const_iterator i = m_index_to_array_name.find(idx);
+    if (i != m_index_to_array_name.end())
       throw runtime_error("there is no entry at index " + idx);
     return i->second;
   }
 };
 
-#endif  // SCORE_DATA_H
+#endif  // MERT_SCORE_DATA_H_
diff --git a/mert/ScoreDataIterator.h b/mert/ScoreDataIterator.h
index 4633b8651..910e92165 100644
--- a/mert/ScoreDataIterator.h
+++ b/mert/ScoreDataIterator.h
@@ -17,8 +17,8 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 
-#ifndef _SCORE_DATA_ITERATOR_
-#define _SCORE_DATA_ITERATOR_
+#ifndef MERT_SCORE_DATA_ITERATOR_H_
+#define MERT_SCORE_DATA_ITERATOR_H_
 
 /*
  * For loading from the score data file.
@@ -62,4 +62,4 @@ class ScoreDataIterator :
     std::vector<ScoreDataItem> m_next;
 };
 
-#endif
+#endif  // MERT_SCORE_DATA_ITERATOR_H_
diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp
index 7efea99a9..e6c111d5d 100644
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@@ -1,6 +1,6 @@
 /*
  *  FeatureStats.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
@@ -14,36 +14,30 @@ const int kAvailableSize = 8;
 } // namespace
 
 ScoreStats::ScoreStats()
-    : available_(kAvailableSize), entries_(0),
-      array_(new ScoreStatsType[available_]) {}
+    : m_available_size(kAvailableSize), m_entries(0),
+      m_array(new ScoreStatsType[m_available_size]) {}
 
 ScoreStats::ScoreStats(const size_t size)
-    : available_(size), entries_(size),
-      array_(new ScoreStatsType[available_])
+    : m_available_size(size), m_entries(size),
+      m_array(new ScoreStatsType[m_available_size])
 {
-  memset(array_, 0, GetArraySizeWithBytes());
-}
-
-ScoreStats::ScoreStats(std::string &theString)
-    : available_(0), entries_(0), array_(NULL)
-{
-  set(theString);
+  memset(m_array, 0, GetArraySizeWithBytes());
 }
 
 ScoreStats::~ScoreStats()
 {
-  if (array_) {
-    delete [] array_;
-    array_ = NULL;
+  if (m_array) {
+    delete [] m_array;
+    m_array = NULL;
   }
 }
 
 void ScoreStats::Copy(const ScoreStats &stats)
 {
-  available_ = stats.available();
-  entries_ = stats.size();
-  array_ = new ScoreStatsType[available_];
-  memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
+  m_available_size = stats.available();
+  m_entries = stats.size();
+  m_array = new ScoreStatsType[m_available_size];
+  memcpy(m_array, stats.getArray(), GetArraySizeWithBytes());
 }
 
 ScoreStats::ScoreStats(const ScoreStats &stats)
@@ -53,77 +47,82 @@ ScoreStats::ScoreStats(const ScoreStats &stats)
 
 ScoreStats& ScoreStats::operator=(const ScoreStats &stats)
 {
-  delete [] array_;
+  delete [] m_array;
   Copy(stats);
   return *this;
 }
 
 void ScoreStats::expand()
 {
-  available_ *= 2;
-  scorestats_t buf = new ScoreStatsType[available_];
-  memcpy(buf, array_, GetArraySizeWithBytes());
-  delete [] array_;
-  array_ = buf;
+  m_available_size *= 2;
+  scorestats_t buf = new ScoreStatsType[m_available_size];
+  memcpy(buf, m_array, GetArraySizeWithBytes());
+  delete [] m_array;
+  m_array = buf;
 }
 
 void ScoreStats::add(ScoreStatsType v)
 {
   if (isfull()) expand();
-  array_[entries_++]=v;
+  m_array[m_entries++]=v;
 }
 
-void ScoreStats::set(std::string &theString)
+void ScoreStats::set(const string& str)
 {
-  std::string substring, stringBuf;
   reset();
-
-  while (!theString.empty()) {
-    getNextPound(theString, substring);
-    add(ConvertStringToScoreStatsType(substring));
+  vector<string> out;
+  Tokenize(str.c_str(), ' ', &out);
+  for (vector<string>::const_iterator it = out.begin();
+       it != out.end(); ++it) {
+    add(ConvertStringToScoreStatsType(*it));
   }
 }
 
-void ScoreStats::loadbin(std::ifstream& inFile)
+void ScoreStats::loadbin(istream* is)
 {
-  inFile.read((char*)array_, GetArraySizeWithBytes());
+  is->read(reinterpret_cast<char*>(m_array),
+           static_cast<streamsize>(GetArraySizeWithBytes()));
 }
 
-void ScoreStats::loadtxt(std::ifstream& inFile)
+void ScoreStats::loadtxt(istream* is)
 {
-  std::string theString;
-  std::getline(inFile, theString);
-  set(theString);
+  string line;
+  getline(*is, line);
+  set(line);
 }
 
-void ScoreStats::loadtxt(const std::string &file)
+void ScoreStats::loadtxt(const string &file)
 {
-//      TRACE_ERR("loading the stats from " << file << std::endl);
-
-  std::ifstream inFile(file.c_str(), std::ios::in); // matches a stream with a file. Opens the file
-
-  loadtxt(inFile);
+  ifstream ifs(file.c_str(), ios::in); // matches a stream with a file. Opens the file
+  if (!ifs) {
+    cerr << "Failed to open " << file << endl;
+    exit(1);
+  }
+  istream* is = &ifs;
+  loadtxt(is);
 }
 
 
-void ScoreStats::savetxt(const std::string &file)
+void ScoreStats::savetxt(const string &file)
 {
-//      TRACE_ERR("saving the stats into " << file << std::endl);
-
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
-
-  savetxt(outFile);
+  ofstream ofs(file.c_str(), ios::out); // matches a stream with a file. Opens the file
+  ostream* os = &ofs;
+  savetxt(os);
 }
 
-
-void ScoreStats::savetxt(std::ofstream& outFile)
+void ScoreStats::savetxt(ostream* os)
 {
-  outFile << *this;
+  *os << *this;
 }
 
-void ScoreStats::savebin(std::ofstream& outFile)
+void ScoreStats::savetxt() {
+  savetxt(&cout);
+}
+
+void ScoreStats::savebin(ostream* os)
 {
-  outFile.write((char*)array_, GetArraySizeWithBytes());
+  os->write(reinterpret_cast<char*>(m_array),
+            static_cast<streamsize>(GetArraySizeWithBytes()));
 }
 
 ostream& operator<<(ostream& o, const ScoreStats& e)
@@ -144,7 +143,7 @@ bool operator==(const ScoreStats& s1, const ScoreStats& s2) {
     if (s1.get(k) != s2.get(k))
       return false;
   }
-  
+
   return true;
 }
 //END_ADDED
diff --git a/mert/ScoreStats.h b/mert/ScoreStats.h
index 68df91195..e8d4543ce 100644
--- a/mert/ScoreStats.h
+++ b/mert/ScoreStats.h
@@ -1,13 +1,13 @@
 /*
  *  ScoreStats.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef SCORE_STATS_H
-#define SCORE_STATS_H
+#ifndef MERT_SCORE_STATS_H_
+#define MERT_SCORE_STATS_H_
 
 #include <vector>
 #include <iostream>
@@ -22,16 +22,16 @@ using namespace std;
 class ScoreStats
 {
 private:
-  size_t available_;
-  size_t entries_;
+  size_t m_available_size;
+  size_t m_entries;
 
   // TODO: Use smart pointer for exceptional-safety.
-  scorestats_t array_;
+  scorestats_t m_array;
 
 public:
   ScoreStats();
   explicit ScoreStats(const size_t size);
-  explicit ScoreStats(std::string &theString);
+
   ~ScoreStats();
 
   // We intentionally allow copying.
@@ -40,59 +40,52 @@ public:
 
   void Copy(const ScoreStats &stats);
 
-  bool isfull() const {
-    return (entries_ < available_) ? 0 : 1;
-  }
+  bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }
 
   void expand();
   void add(ScoreStatsType v);
 
   void clear() {
-    memset((void*)array_, 0, GetArraySizeWithBytes());
+    memset((void*)m_array, 0, GetArraySizeWithBytes());
   }
 
   void reset() {
-    entries_ = 0;
+    m_entries = 0;
     clear();
   }
 
-  inline ScoreStatsType get(size_t i) {
-    return array_[i];
-  }
-  inline ScoreStatsType get(size_t i)const {
-    return array_[i];
-  }
-  inline scorestats_t getArray() const {
-    return array_;
-  }
+  ScoreStatsType get(size_t i) { return m_array[i]; }
+  ScoreStatsType get(size_t i) const { return m_array[i]; }
+  scorestats_t getArray() const { return m_array; }
 
-  void set(std::string &theString);
+  void set(const std::string& str);
 
-  inline size_t bytes() const {
-    return GetArraySizeWithBytes();
+  // Much more efficient than the above.
+  void set(const std::vector<ScoreStatsType>& stats) {
+    reset();
+    for (size_t i = 0; i < stats.size(); ++i) {
+      add(stats[i]);
+    }
   }
 
+  size_t bytes() const { return GetArraySizeWithBytes(); }
+
   size_t GetArraySizeWithBytes() const {
-    return entries_ * sizeof(ScoreStatsType);
+    return m_entries * sizeof(ScoreStatsType);
   }
 
-  inline size_t size() const {
-    return entries_;
-  }
-  inline size_t available() const {
-    return available_;
-  }
+  size_t size() const { return m_entries; }
+
+  size_t available() const { return m_available_size; }
 
   void savetxt(const std::string &file);
-  void savetxt(ofstream& outFile);
-  void savebin(ofstream& outFile);
-  inline void savetxt() {
-    savetxt("/dev/stdout");
-  }
+  void savetxt(ostream* os);
+  void savebin(ostream* os);
+  void savetxt();
 
   void loadtxt(const std::string &file);
-  void loadtxt(ifstream& inFile);
-  void loadbin(ifstream& inFile);
+  void loadtxt(istream* is);
+  void loadbin(istream* is);
 
   /**
    * Write the whole object to a stream.
@@ -101,7 +94,7 @@ public:
 };
 
 //ADDED_BY_TS
-bool operator==(const ScoreStats& s1, const ScoreStats& s2); 
+bool operator==(const ScoreStats& s1, const ScoreStats& s2);
 //END_ADDED
 
-#endif  // SCORE_STATS_H
+#endif  // MERT_SCORE_STATS_H_
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index a2bb4720c..70948c47f 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -1,5 +1,9 @@
 #include "Scorer.h"
+
 #include <limits>
+#include "Vocabulary.h"
+#include "Util.h"
+#include "Singleton.h"
 
 namespace {
 
@@ -33,14 +37,16 @@ inline float score_average(const statscores_t& scores, size_t start, size_t end)
 
 Scorer::Scorer(const string& name, const string& config)
     : m_name(name),
-      m_encoder(new Encoder),
+      m_vocab(mert::VocabularyFactory::GetVocabulary()),
       m_score_data(0),
-      m_enable_preserve_case(true) {
+      m_enable_preserve_case(true),
+      m_filter(NULL) {
   InitConfig(config);
 }
 
 Scorer::~Scorer() {
-  delete m_encoder;
+  Singleton<mert::Vocabulary>::Delete();
+  delete m_filter;
 }
 
 void Scorer::InitConfig(const string& config) {
@@ -64,23 +70,6 @@ void Scorer::InitConfig(const string& config) {
   }
 }
 
-Scorer::Encoder::Encoder() {}
-
-Scorer::Encoder::~Encoder() {}
-
-int Scorer::Encoder::Encode(const string& token) {
-  map<string, int>::iterator it = m_vocab.find(token);
-  int encoded_token;
-  if (it == m_vocab.end()) {
-    // Add an new entry to the vocaburary.
-    encoded_token = static_cast<int>(m_vocab.size());
-    m_vocab[token] = encoded_token;
-  } else {
-    encoded_token = it->second;
-  }
-  return encoded_token;
-}
-
 void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) {
   std::istringstream in(line);
   std::string token;
@@ -91,10 +80,84 @@ void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) {
         *it = tolower(*it);
       }
     }
-    encoded.push_back(m_encoder->Encode(token));
+    encoded.push_back(m_vocab->Encode(token));
+  }
+}
+
+/**
+ * Set the factors, which should be used for this metric
+ */
+void Scorer::setFactors(const string& factors)
+{
+  if (factors.empty()) return;
+  vector<string> factors_vec;
+  split(factors, '|', factors_vec);
+  for(vector<string>::iterator it = factors_vec.begin(); it != factors_vec.end(); ++it)
+  {
+    int factor = atoi(it->c_str());
+    m_factors.push_back(factor);
+  }
+}
+
+/**
+ * Set unix filter, which will be used to preprocess the sentences
+ */
+void Scorer::setFilter(const string& filterCommand)
+{
+    if (filterCommand.empty()) return;
+    m_filter = new PreProcessFilter(filterCommand);
+}
+
+/**
+ * Take the factored sentence and return the desired factors
+ */
+string Scorer::applyFactors(const string& sentence) const
+{
+  if (m_factors.size() == 0) return sentence;
+
+  vector<string> tokens;
+  split(sentence, ' ', tokens);
+
+  stringstream sstream;
+  for (size_t i = 0; i < tokens.size(); ++i)
+  {
+    if (tokens[i] == "") continue;
+
+    vector<string> factors;
+    split(tokens[i], '|', factors);
+
+    int fsize = factors.size();
+
+    if (i > 0) sstream << " ";
+
+    for (size_t j = 0; j < m_factors.size(); ++j)
+    {
+      int findex = m_factors[j];
+      if (findex < 0 || findex >= fsize) throw runtime_error("Factor index is out of range.");
+
+      if (j > 0) sstream << "|";
+      sstream << factors[findex];
+    }
+  }
+  return sstream.str();
+}
+
+/**
+ * Preprocess the sentence with the filter (if given)
+ */
+string Scorer::applyFilter(const string& sentence) const
+{
+  if (m_filter)
+  {
+    return m_filter->ProcessSentence(sentence);
+  }
+  else
+  {
+    return sentence;
   }
 }
 
+
 StatisticsBasedScorer::StatisticsBasedScorer(const string& name, const string& config)
     : Scorer(name,config) {
   //configure regularisation
diff --git a/mert/Scorer.h b/mert/Scorer.h
index f2f54670a..c441eca28 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -1,5 +1,5 @@
-#ifndef __SCORER_H__
-#define __SCORER_H__
+#ifndef MERT_SCORER_H_
+#define MERT_SCORER_H_
 
 #include <iostream>
 #include <sstream>
@@ -8,11 +8,18 @@
 #include <vector>
 #include "Types.h"
 #include "ScoreData.h"
+#include "PreProcessFilter.h"
 
 using namespace std;
 
 class ScoreStats;
 
+namespace mert {
+
+class Vocabulary;
+
+} // namespace mert
+
 /**
  * Superclass of all scorers and dummy implementation.
  *
@@ -28,10 +35,7 @@ class Scorer
   /**
    * Return the number of statistics needed for the computation of the score.
    */
-  virtual size_t NumberOfScores() const {
-    cerr << "Scorer: 0" << endl;
-    return 0;
-  }
+  virtual size_t NumberOfScores() const = 0;
 
   /**
    * Set the reference files. This must be called before prepareStats().
@@ -57,7 +61,9 @@ class Scorer
    * applying each in turn, and calculating a new score each time.
    */
   virtual void score(const candidates_t& candidates, const diffs_t& diffs,
-                     statscores_t& scores) const {
+                     statscores_t& scores) const = 0;
+  /*
+  {
     //dummy impl
     if (!m_score_data) {
       throw runtime_error("score data not loaded");
@@ -67,6 +73,7 @@ class Scorer
       scores.push_back(0);
     }
   }
+  */
 
   /**
    * Calculate the score of the sentences corresponding to the list of candidate
@@ -93,27 +100,40 @@ class Scorer
   /**
    * Set the score data, prior to scoring.
    */
-  void setScoreData(ScoreData* data) {
+  virtual void setScoreData(ScoreData* data) {
     m_score_data = data;
   }
 
- private:
-  class Encoder {
-   public:
-    Encoder();
-    virtual ~Encoder();
-    int Encode(const std::string& token);
-    void Clear() { m_vocab.clear(); }
-
-   private:
-    std::map<std::string, int> m_vocab;
-  };
+  /**
+   * Set the factors, which should be used for this metric
+   */
+  virtual void setFactors(const string& factors);
 
+  mert::Vocabulary* GetVocab() const { return m_vocab; }
+
+  /**
+   * Set unix filter, which will be used to preprocess the sentences
+   */
+  virtual void setFilter(const string& filterCommand);
+  
+ private:
   void InitConfig(const string& config);
 
+  /**
+   * Take the factored sentence and return the desired factors
+   */
+  string applyFactors(const string& sentece) const;
+
+  /**
+   * Preprocess the sentence with the filter (if given)
+   */
+  string applyFilter(const string& sentence) const;
+
   string m_name;
-  Encoder* m_encoder;
+  mert::Vocabulary* m_vocab;
   map<string, string> m_config;
+  vector<int> m_factors;
+  PreProcessFilter* m_filter;    
 
  protected:
   ScoreData* m_score_data;
@@ -133,13 +153,19 @@ class Scorer
 
   /**
    * Tokenise line and encode.
-   * Note: We assume that all tokens are separated by single spaces.
+   * Note: We assume that all tokens are separated by whitespaces.
    */
   void TokenizeAndEncode(const string& line, vector<int>& encoded);
 
-  void ClearEncoder() { m_encoder->Clear(); }
-};
+  /**
+   * Every inherited scorer should call this function for each sentence
+   */
+  string preprocessSentence(const string& sentence) const
+  {
+    return applyFactors(applyFilter(sentence));
+  }
 
+};
 
 /**
  * Abstract base class for Scorers that work by adding statistics across all
@@ -171,4 +197,4 @@ class StatisticsBasedScorer : public Scorer
   size_t  m_regularization_window;
 };
 
-#endif // __SCORER_H__
+#endif // MERT_SCORER_H_
diff --git a/mert/ScorerFactory.cpp b/mert/ScorerFactory.cpp
index 2f47092ef..5da75273d 100644
--- a/mert/ScorerFactory.cpp
+++ b/mert/ScorerFactory.cpp
@@ -7,6 +7,8 @@
 #include "TerScorer.h"
 #include "CderScorer.h"
 #include "MergeScorer.h"
+#include "InterpolatedScorer.h"
+#include "SemposScorer.h"
 
 using namespace std;
 
@@ -16,22 +18,34 @@ vector<string> ScorerFactory::getTypes() {
   types.push_back(string("PER"));
   types.push_back(string("TER"));
   types.push_back(string("CDER"));
+  types.push_back(string("WER"));
   types.push_back(string("MERGE"));
+  types.push_back(string("SEMPOS"));
   return types;
 }
 
 Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
   if (type == "BLEU") {
-    return (BleuScorer*) new BleuScorer(config);
+    return new BleuScorer(config);
   } else if (type == "PER") {
-    return (PerScorer*) new PerScorer(config);
+    return new PerScorer(config);
   } else if (type == "TER") {
-    return (TerScorer*) new TerScorer(config);
+    return new TerScorer(config);
   } else if (type == "CDER") {
-    return (CderScorer*) new CderScorer(config);
+    return new CderScorer(config, true);
+  } else if (type == "WER") {
+    // CderScorer can compute both CDER and WER metric
+    return new CderScorer(config, false);
+  } else if (type == "SEMPOS") {
+    return new SemposScorer(config);
   } else if (type == "MERGE") {
-    return (MergeScorer*) new MergeScorer(config);
+    return new MergeScorer(config);
   } else {
-    throw runtime_error("Unknown scorer type: " + type);
+    if (type.find(',') != string::npos) {
+      return new InterpolatedScorer(type, config);
+    }
+    else {
+      throw runtime_error("Unknown scorer type: " + type);
+    }
   }
 }
diff --git a/mert/ScorerFactory.h b/mert/ScorerFactory.h
index f6054c770..6752817ef 100644
--- a/mert/ScorerFactory.h
+++ b/mert/ScorerFactory.h
@@ -1,5 +1,5 @@
-#ifndef __SCORER_FACTORY_H
-#define __SCORER_FACTORY_H
+#ifndef MERT_SCORER_FACTORY_H_
+#define MERT_SCORER_FACTORY_H_
 
 #include <vector>
 #include <string>
@@ -18,4 +18,4 @@ private:
   ~ScorerFactory() {}
 };
 
-#endif  // __SCORER_FACTORY_H
+#endif  // MERT_SCORER_FACTORY_H_
diff --git a/mert/SemposOverlapping.cpp b/mert/SemposOverlapping.cpp
new file mode 100644
index 000000000..f27f188f7
--- /dev/null
+++ b/mert/SemposOverlapping.cpp
@@ -0,0 +1,109 @@
+#include "SemposOverlapping.h"
+#include "SemposScorer.h"
+
+#include <algorithm>
+#include <stdexcept>
+
+using namespace std;
+
+namespace {
+
+SemposOverlapping* g_overlapping = NULL;
+
+} // namespace
+
+SemposOverlapping* SemposOverlappingFactory::GetOverlapping(const string& str, const SemposScorer* sempos) {
+  if (str == "cap-micro") {
+    return new CapMicroOverlapping(sempos);
+  } else if (str == "cap-macro") {
+    return new CapMacroOverlapping(sempos);
+  } else {
+    throw runtime_error("Unknown overlapping: " + str);
+  }
+}
+
+void SemposOverlappingFactory::SetOverlapping(SemposOverlapping* ovr) {
+  g_overlapping = ovr;
+}
+
+vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
+{
+  vector<int> stats(2);
+  sentence_t intersection;
+
+  set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
+                   inserter(intersection, intersection.begin()));
+
+  int multCoeff = 1000;
+
+  float interSum = 0;
+  for (sentence_t::iterator it = intersection.begin(); it != intersection.end(); it++)
+  {
+    interSum += semposScorer->weight(it->first);
+  }
+
+  float refSum = 0;
+  for (sentence_t::iterator it = ref.begin(); it != ref.end(); it++)
+  {
+    refSum += semposScorer->weight(it->first);    
+  }
+
+  stats[0] = (int)(multCoeff * interSum);
+  stats[1] = (int)(multCoeff * refSum);
+  return stats;
+}
+
+float CapMicroOverlapping::calculateScore(const vector<int>& stats) const
+{
+  if (stats.size() != 2) {
+    throw std::runtime_error("Size of stats vector has to be 2");
+  }
+  if (stats[1] == 0) return 1.0f;
+  return stats[0] / static_cast<float>(stats[1]);
+}
+
+vector<int> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
+{
+  vector<int> stats(2 * kMaxNOC);
+  sentence_t intersection;
+
+  set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
+                   inserter(intersection, intersection.begin()));
+
+  int multCoeff = 1000;
+
+  for (int i = 0; i < 2 * kMaxNOC; ++i) stats[i] = 0;
+  for (sentence_t::const_iterator it = intersection.begin(); it != intersection.end(); ++it) {
+    const int sempos = it->second;
+    float weight = semposScorer->weight(it->first);
+    stats[2 * sempos] += weight * multCoeff ;
+  }
+  for (sentence_t::const_iterator it = ref.begin(); it != ref.end(); ++it) {
+    const int sempos = it->second;
+    float weight = semposScorer->weight(it->first);
+    stats[2 * sempos + 1] += weight * multCoeff;
+  }
+
+  return stats;
+}
+
+float CapMacroOverlapping::calculateScore(const vector<int>& stats) const
+{
+  if (stats.size() != 2 * kMaxNOC) {
+    // TODO: Add some comments. The number "38" looks like a magic number.
+    throw std::runtime_error("Size of stats vector has to be 38");
+  }
+
+  int n = 0;
+  float sum = 0;
+  for (int i = 0; i < kMaxNOC; ++i) {
+    int clipped = stats[2 * i];
+    int refsize = stats[2 * i + 1];
+    if (refsize > 0) {
+      sum += clipped / static_cast<float>(refsize);
+      ++n;
+    }
+  }
+  if (n == 0) return 1;
+  return sum / n;
+}
diff --git a/mert/SemposOverlapping.h b/mert/SemposOverlapping.h
new file mode 100644
index 000000000..e16ffe7bb
--- /dev/null
+++ b/mert/SemposOverlapping.h
@@ -0,0 +1,90 @@
+#ifndef MERT_SEMPOSOVERLAPPING_H_
+#define MERT_SEMPOSOVERLAPPING_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+class SemposScorer;
+
+// TODO: need comments about this number.
+const int kMaxNOC = 50;
+
+typedef std::pair<std::string, std::string> str_item_t;
+typedef std::vector<str_item_t> str_sentence_t;
+typedef str_sentence_t::const_iterator str_sentence_it;
+
+typedef std::pair<int,int> item_t;
+typedef std::multiset<item_t> sentence_t;
+typedef sentence_t::const_iterator sentence_it;
+
+/**
+ * An interface for classes representing overlapping formulas
+ */
+class SemposOverlapping
+{
+public:
+  virtual ~SemposOverlapping() {}
+  virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref) = 0;
+  virtual float calculateScore(const std::vector<int>& stats) const = 0;
+  virtual std::size_t NumberOfScores() const = 0;
+};
+
+class SemposOverlappingFactory {
+ public:
+  static SemposOverlapping* GetOverlapping(const std::string& str, const SemposScorer* sempos);
+
+  // dependency injection for unit testing.
+  static void SetOverlapping(SemposOverlapping* ovr);
+
+ private:
+  SemposOverlappingFactory() {}
+  ~SemposOverlappingFactory() {}
+};
+
+/**
+ * Overlapping proposed by (Bojar and Machacek, WMT 2011)
+ *
+ * Please refer to the paper for details:
+ * http://aclweb.org/anthology-new/W/W11/W11-2108.pdf
+ */
+class CapMicroOverlapping : public SemposOverlapping
+{
+public:
+  CapMicroOverlapping(const SemposScorer* sempos) : semposScorer(sempos) {}
+  ~CapMicroOverlapping() {}
+
+  virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
+  virtual float calculateScore(const std::vector<int>& stats) const;
+  virtual std::size_t NumberOfScores() const { return 2; }
+
+ private:
+  // no copying allowed.
+  CapMicroOverlapping(const CapMicroOverlapping&);
+  CapMicroOverlapping& operator=(const CapMicroOverlapping&);
+  const SemposScorer* semposScorer;
+};
+
+/**
+ * Overlapping proposed by (Kos and Bojar, 2009)
+ */
+class CapMacroOverlapping : public SemposOverlapping
+{
+public:
+  CapMacroOverlapping(const SemposScorer* sempos) : semposScorer(sempos) {}
+  ~CapMacroOverlapping() {}
+
+  virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
+  virtual float calculateScore(const std::vector<int>& stats) const;
+  virtual std::size_t NumberOfScores() const { return kMaxNOC * 2; }
+
+ private:
+  // no copying allowed.
+  CapMacroOverlapping(const CapMacroOverlapping&);
+  CapMacroOverlapping& operator=(const CapMacroOverlapping&);
+  const SemposScorer* semposScorer;
+};
+
+#endif  // MERT_SEMPOSOVERLAPPING_H_
diff --git a/mert/SemposScorer.cpp b/mert/SemposScorer.cpp
new file mode 100644
index 000000000..30105c01f
--- /dev/null
+++ b/mert/SemposScorer.cpp
@@ -0,0 +1,179 @@
+#include "SemposScorer.h"
+
+#include <algorithm>
+#include <vector>
+#include <stdexcept>
+#include <fstream>
+
+#include "Util.h"
+#include "SemposOverlapping.h"
+
+using namespace std;
+
+SemposScorer::SemposScorer(const string& config)
+  : StatisticsBasedScorer("SEMPOS", config),
+    m_ovr(SemposOverlappingFactory::GetOverlapping(getConfig("overlapping", "cap-micro"),this)),
+    m_enable_debug(false)
+{
+  const string& debugSwitch = getConfig("debug", "0");
+  if (debugSwitch == "1") m_enable_debug = true;
+
+  m_semposMap.clear();
+
+  string weightsfile = getConfig("weightsfile", "");
+  if (weightsfile != "")
+  {
+    loadWeights(weightsfile);
+  }
+}
+
+SemposScorer::~SemposScorer() {}
+
+void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
+{
+  //make sure reference data is clear
+  m_ref_sentences.clear();
+
+  //load reference data
+  for (size_t rid = 0; rid < referenceFiles.size(); ++rid) {
+    ifstream refin(referenceFiles[rid].c_str());
+    if (!refin) {
+      throw runtime_error("Unable to open: " + referenceFiles[rid]);
+    }
+    m_ref_sentences.push_back(vector<sentence_t>());
+    string line;
+    while (getline(refin,line)) {
+      line = preprocessSentence(line);
+
+      str_sentence_t sentence;
+      splitSentence(line, sentence);
+
+      sentence_t encodedSentence;
+      encodeSentence(sentence, encodedSentence);
+
+      m_ref_sentences[rid].push_back(encodedSentence);
+    }
+  }
+}
+
+void SemposScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
+{
+  vector<ScoreStatsType> stats;
+
+  const string& sentence = preprocessSentence(text);
+  str_sentence_t splitCandSentence;
+  splitSentence(sentence, splitCandSentence);
+
+  sentence_t encodedCandSentence;
+  encodeSentence(splitCandSentence, encodedCandSentence);
+
+  if (m_ref_sentences.size() == 1) {
+    stats = m_ovr->prepareStats(encodedCandSentence, m_ref_sentences[0][sid]);
+  } else {
+    float max = -1.0f;
+    for (size_t rid = 0; rid < m_ref_sentences.size(); ++rid) {
+      const vector<ScoreStatsType>& tmp = m_ovr->prepareStats(encodedCandSentence, m_ref_sentences[rid][sid]);
+      if (m_ovr->calculateScore(tmp) > max) {
+        stats = tmp;
+      }
+    }
+  }
+  entry.set(stats);
+}
+
+void SemposScorer::splitSentence(const string& sentence, str_sentence_t& splitSentence)
+{
+  splitSentence.clear();
+
+  vector<string> tokens;
+  split(sentence, ' ', tokens);
+  for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it) {
+    vector<string> factors;
+    split(*it, '|', factors);
+    if (factors.size() != 2) throw runtime_error("Sempos scorer accepts two factors (item|class)");
+    const string& item = factors[0];
+    const string& klass = factors[1];
+    splitSentence.push_back(make_pair(item, klass));
+  }
+}
+
+void SemposScorer::encodeSentence(const str_sentence_t& sentence, sentence_t& encodedSentence)
+{
+  for (str_sentence_it it = sentence.begin(); it != sentence.end(); ++it) {
+    const int tlemma = encodeString(it->first);
+    const int sempos = encodeSempos(it->second);
+    if (sempos >= 0) {
+      encodedSentence.insert(make_pair(tlemma,sempos));
+    }
+  }
+}
+
+int SemposScorer::encodeString(const string& str)
+{
+  encoding_it encoding = m_stringMap.find(str);
+  int encoded_str;
+  if (encoding == m_stringMap.end()) {
+    encoded_str = static_cast<int>(m_stringMap.size());
+    m_stringMap[str] = encoded_str;
+  } else {
+    encoded_str = encoding->second;
+  }
+  return encoded_str;
+}
+
+int SemposScorer::encodeSempos(const string& sempos)
+{
+  if (sempos == "-") return -1;
+  encoding_it it = m_semposMap.find(sempos);
+  if (it == m_semposMap.end()) {
+    const int classNumber = static_cast<int>(m_semposMap.size());
+    if (classNumber == kMaxNOC) {
+      throw std::runtime_error("Number of classes is greater than kMaxNOC");
+    }
+    m_semposMap[sempos] = classNumber;
+    return classNumber;
+  } else {
+    return it->second;
+  }
+}
+
+float SemposScorer::weight(int item) const
+{
+    std::map<int,float>::const_iterator it = weightsMap.find(item);
+    if (it == weightsMap.end())
+    {
+        return 1.0f;
+    }
+    else
+    {
+        return it->second;
+    }
+}
+
+void SemposScorer::loadWeights(const string& weightsfile)
+{
+    string line;
+    ifstream myfile;
+    myfile.open(weightsfile.c_str(), ifstream::in);
+    if (myfile.is_open())
+    {
+        while ( myfile.good() )
+        {
+            getline (myfile,line);
+            vector<string> fields;
+            if (line == "") continue;
+            split(line, '\t', fields);
+            if (fields.size() != 2) throw std::runtime_error("Bad format of a row in weights file.");
+            int encoded = encodeString(fields[0]);
+            float weight = atof(fields[1].c_str());
+            weightsMap[encoded] = weight;
+        }
+        myfile.close();
+    }
+    else
+    {
+        cerr << "Unable to open file "<< weightsfile << endl;
+        exit(1);
+    }
+
+}
diff --git a/mert/SemposScorer.h b/mert/SemposScorer.h
new file mode 100644
index 000000000..e0ab84768
--- /dev/null
+++ b/mert/SemposScorer.h
@@ -0,0 +1,64 @@
+#ifndef MERT_SEMPOSSCORER_H_
+#define MERT_SEMPOSSCORER_H_
+
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+#include <boost/scoped_ptr.hpp>
+
+#include "Scorer.h"
+
+// NOTE: This header should be included in .cpp file
+// because SemposScorer wants to know what actual SemposOverlapping type is
+// when we implement the scorer in .cpp file.
+// However, currently SemposScorer uses a bunch of typedefs, which are
+// used in SemposScorer as well as inherited SemposOverlapping classes.
+#include "SemposOverlapping.h"
+
+/**
+ * This class represents sempos based metrics.
+ */
+class SemposScorer: public StatisticsBasedScorer
+{
+public:
+  explicit SemposScorer(const std::string& config);
+  ~SemposScorer();
+
+  virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
+  virtual void prepareStats(std::size_t sindex, const std::string& text, ScoreStats& entry);
+  virtual std::size_t NumberOfScores() const { return m_ovr->NumberOfScores(); }
+  virtual float calculateScore(const std::vector<int>& comps) const {
+    return m_ovr->calculateScore(comps);
+  }
+
+  bool EnableDebug() const { return m_enable_debug; }
+
+  float weight(int item) const;
+
+private:
+  boost::scoped_ptr<SemposOverlapping> m_ovr;
+  std::vector<std::vector<sentence_t> > m_ref_sentences;
+
+  typedef std::map<std::string, int> encoding_t;
+  typedef encoding_t::iterator encoding_it;
+
+  encoding_t m_semposMap;
+  encoding_t m_stringMap;
+  bool m_enable_debug;
+
+  void splitSentence(const std::string& sentence, str_sentence_t& splitSentence);
+  void encodeSentence(const str_sentence_t& sentence, sentence_t& encodedSentence);
+  int encodeString(const std::string& str);
+  int encodeSempos(const std::string& sempos);
+
+  std::map<int, float> weightsMap;
+
+  void loadWeights(const string& weightsfile);
+
+  // no copying allowed.
+  SemposScorer(const SemposScorer&);
+  SemposScorer& operator=(const SemposScorer&);
+};
+
+#endif  // MERT_SEMPOSSCORER_H_
diff --git a/mert/Singleton.h b/mert/Singleton.h
new file mode 100644
index 000000000..9fef3e639
--- /dev/null
+++ b/mert/Singleton.h
@@ -0,0 +1,33 @@
+#ifndef MERT_SINGLETON_H_
+#define MERT_SINGLETON_H_
+
+#include <cstdlib>
+
+// thread *un*safe singleton.
+// TODO: replace this with thread-safe singleton.
+template <typename T>
+class Singleton {
+ public:
+  static T* GetInstance() {
+    if (m_instance == NULL) {
+      m_instance = new T;
+    }
+    return m_instance;
+  }
+
+  static void Delete() {
+    if (m_instance) {
+      delete m_instance;
+      m_instance = NULL;
+    }
+  }
+
+ private:
+  Singleton();
+  static T* m_instance;
+};
+
+template <typename T>
+T* Singleton<T>::m_instance = NULL;
+
+#endif  // MERT_SINGLETON_H_
diff --git a/mert/SingletonTest.cpp b/mert/SingletonTest.cpp
new file mode 100644
index 000000000..2c44bdc1f
--- /dev/null
+++ b/mert/SingletonTest.cpp
@@ -0,0 +1,27 @@
+#include "Singleton.h"
+
+#define BOOST_TEST_MODULE MertSingleton
+#include <boost/test/unit_test.hpp>
+
+namespace {
+
+static int g_count = 0;
+
+class Instance {
+ public:
+  Instance() { ++g_count; }
+  ~Instance() {}
+};
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(singleton_basic) {
+  Instance* instance1 = Singleton<Instance>::GetInstance();
+  Instance* instance2 = Singleton<Instance>::GetInstance();
+  Instance* instance3 = Singleton<Instance>::GetInstance();
+  BOOST_REQUIRE(instance1 == instance2);
+  BOOST_REQUIRE(instance2 == instance3);
+  BOOST_CHECK_EQUAL(1, g_count);
+
+  Singleton<Instance>::Delete();
+}
diff --git a/mert/TERsrc/alignmentStruct.cpp b/mert/TER/alignmentStruct.cpp
index 15b4a8032..15b4a8032 100644
--- a/mert/TERsrc/alignmentStruct.cpp
+++ b/mert/TER/alignmentStruct.cpp
diff --git a/mert/TERsrc/alignmentStruct.h b/mert/TER/alignmentStruct.h
index 27e8c35d3..9e9a75468 100644
--- a/mert/TERsrc/alignmentStruct.h
+++ b/mert/TER/alignmentStruct.h
@@ -1,6 +1,5 @@
-#ifndef __TERCPPALIGNMENTSTRUCT_H__
-#define __TERCPPALIGNMENTSTRUCT_H__
-
+#ifndef MERT_TER_ALIGNMENT_STRUCT_H_
+#define MERT_TER_ALIGNMENT_STRUCT_H_
 
 #include <vector>
 #include <stdio.h>
@@ -8,7 +7,6 @@
 #include <sstream>
 #include "tools.h"
 
-
 using namespace std;
 using namespace Tools;
 
@@ -44,4 +42,4 @@ public:
 
 }
 
-#endif  // __TERCPPALIGNMENTSTRUCT_H__
+#endif  // MERT_TER_ALIGNMENT_STRUCT_H_
diff --git a/mert/TERsrc/bestShiftStruct.h b/mert/TER/bestShiftStruct.h
index 141ebdeb8..bfebe3b1e 100644
--- a/mert/TERsrc/bestShiftStruct.h
+++ b/mert/TER/bestShiftStruct.h
@@ -1,5 +1,5 @@
-#ifndef __BESTSHIFTSTRUCT_H__
-#define __BESTSHIFTSTRUCT_H__
+#ifndef MERT_TER_BEST_SHIFT_STRUCT_H_
+#define MERT_TER_BEST_SHIFT_STRUCT_H_
 
 #include <vector>
 #include <stdio.h>
@@ -47,4 +47,4 @@ public:
 
 }
 
-#endif  // __BESTSHIFTSTRUCT_H__
+#endif  // MERT_TER_BEST_SHIFT_STRUCT_H_
diff --git a/mert/TERsrc/hashMap.cpp b/mert/TER/hashMap.cpp
index 469167aaa..469167aaa 100644
--- a/mert/TERsrc/hashMap.cpp
+++ b/mert/TER/hashMap.cpp
diff --git a/mert/TERsrc/hashMap.h b/mert/TER/hashMap.h
index c3e4578e5..85020d041 100644
--- a/mert/TERsrc/hashMap.h
+++ b/mert/TER/hashMap.h
@@ -2,8 +2,8 @@
  * Generic hashmap manipulation functions
  */
 
-#ifndef __HASHMAP_H__
-#define __HASHMAP_H__
+#ifndef MERT_TER_HASHMAP_H_
+#define MERT_TER_HASHMAP_H_
 
 #include "stringHasher.h"
 #include <vector>
@@ -40,4 +40,4 @@ public:
 
 }
 
-#endif  // __HASHMAP_H__
+#endif  // MERT_TER_HASHMAP_H_
diff --git a/mert/TERsrc/hashMapInfos.cpp b/mert/TER/hashMapInfos.cpp
index 9cd431196..9cd431196 100644
--- a/mert/TERsrc/hashMapInfos.cpp
+++ b/mert/TER/hashMapInfos.cpp
diff --git a/mert/TERsrc/hashMapInfos.h b/mert/TER/hashMapInfos.h
index f4a46acf8..8b56e9d02 100644
--- a/mert/TERsrc/hashMapInfos.h
+++ b/mert/TER/hashMapInfos.h
@@ -1,8 +1,8 @@
 /*
  * Generic hashmap manipulation functions
  */
-#ifndef __HASHMAPINFOS_H__
-#define __HASHMAPINFOS_H__
+#ifndef MERT_TER_HASHMAP_INFOS_H_
+#define MERT_TER_HASHMAP_INFOS_H_
 
 #include "infosHasher.h"
 #include <vector>
@@ -39,4 +39,4 @@ public:
 
 }
 
-#endif  // __HASHMAPINFOS_H__
+#endif  // MERT_TER_HASHMAP_INFOS_H_
diff --git a/mert/TERsrc/hashMapStringInfos.cpp b/mert/TER/hashMapStringInfos.cpp
index 0fbb0a98a..0fbb0a98a 100644
--- a/mert/TERsrc/hashMapStringInfos.cpp
+++ b/mert/TER/hashMapStringInfos.cpp
diff --git a/mert/TERsrc/hashMapStringInfos.h b/mert/TER/hashMapStringInfos.h
index 7912be0a2..870274f3d 100644
--- a/mert/TERsrc/hashMapStringInfos.h
+++ b/mert/TER/hashMapStringInfos.h
@@ -1,8 +1,8 @@
 /*
  * Generic hashmap manipulation functions
  */
-#ifndef __HASHMAPSTRINGINFOS_H__
-#define __HASHMAPSTRINGINFOS_H__
+#ifndef MERT_TER_HASHMAP_STRING_INFOS_H_
+#define MERT_TER_HASHMAP_STRING_INFOS_H_
 
 #include "stringInfosHasher.h"
 #include <vector>
@@ -39,4 +39,4 @@ public:
 
 }
 
-#endif  // __HASHMAPSTRINGINFOS_H__
+#endif  // MERT_TER_HASHMAP_STRING_INFOS_H_
diff --git a/mert/TERsrc/infosHasher.cpp b/mert/TER/infosHasher.cpp
index 654b0b26f..654b0b26f 100644
--- a/mert/TERsrc/infosHasher.cpp
+++ b/mert/TER/infosHasher.cpp
diff --git a/mert/TERsrc/infosHasher.h b/mert/TER/infosHasher.h
index 8bc2ccd00..02a32280b 100644
--- a/mert/TERsrc/infosHasher.h
+++ b/mert/TER/infosHasher.h
@@ -1,5 +1,5 @@
-#ifndef __INFOSHASHER_H__
-#define __INFOSHASHER_H__
+#ifndef MERT_TER_INFO_SHASHER_H_
+#define MERT_TER_INFO_SHASHER_H_
 
 #include <string>
 #include <stdio.h>
@@ -28,4 +28,4 @@ public:
 
 }
 
-#endif  // __INFOSHASHER_H__
+#endif  // MERT_TER_INFO_SHASHER_H_
diff --git a/mert/TERsrc/stringHasher.cpp b/mert/TER/stringHasher.cpp
index 24fde0e32..24fde0e32 100644
--- a/mert/TERsrc/stringHasher.cpp
+++ b/mert/TER/stringHasher.cpp
diff --git a/mert/TERsrc/stringHasher.h b/mert/TER/stringHasher.h
index 0894812f0..897bd9ff5 100644
--- a/mert/TERsrc/stringHasher.h
+++ b/mert/TER/stringHasher.h
@@ -1,5 +1,5 @@
-#ifndef __STRINGHASHER_H__
-#define __STRINGHASHER_H__
+#ifndef MERT_TER_STRING_HASHER_H_
+#define MERT_TER_STRING_HASHER_H_
 
 #include <string>
 #include <iostream>
@@ -25,4 +25,4 @@ public:
 
 }
 
-#endif  // __STRINGHASHER_H__
+#endif  // MERT_TER_STRING_HASHER_H_
diff --git a/mert/TERsrc/stringInfosHasher.cpp b/mert/TER/stringInfosHasher.cpp
index 3e02e7a20..3e02e7a20 100644
--- a/mert/TERsrc/stringInfosHasher.cpp
+++ b/mert/TER/stringInfosHasher.cpp
diff --git a/mert/TERsrc/stringInfosHasher.h b/mert/TER/stringInfosHasher.h
index e9324cc47..c1b891662 100644
--- a/mert/TERsrc/stringInfosHasher.h
+++ b/mert/TER/stringInfosHasher.h
@@ -1,5 +1,5 @@
-#ifndef __STRINGINFOSHASHER_H__
-#define __STRINGINFOSHASHER_H__
+#ifndef MERT_TER_STRING_INFOS_HASHER_H_
+#define MERT_TER_STRING_INFOS_HASHER_H_
 
 #include <string>
 #include <iostream>
@@ -25,4 +25,4 @@ public:
 
 }
 
-#endif  // __STRINGINFOSHASHER_H__
+#endif  // MERT_TER_STRING_INFOS_HASHER_H_
diff --git a/mert/TERsrc/terAlignment.cpp b/mert/TER/terAlignment.cpp
index 87be53b11..87be53b11 100644
--- a/mert/TERsrc/terAlignment.cpp
+++ b/mert/TER/terAlignment.cpp
diff --git a/mert/TERsrc/terAlignment.h b/mert/TER/terAlignment.h
index bca00ead3..c8c82eac8 100644
--- a/mert/TERsrc/terAlignment.h
+++ b/mert/TER/terAlignment.h
@@ -1,5 +1,5 @@
-#ifndef __TERCPPTERALIGNMENT_H__
-#define __TERCPPTERALIGNMENT_H__
+#ifndef MERT_TER_TER_ALIGNMENT_H_
+#define MERT_TER_TER_ALIGNMENT_H_
 
 #include <vector>
 #include <stdio.h>
@@ -48,4 +48,4 @@ public:
 
 }
 
-#endif  // __TERCPPTERALIGNMENT_H__
+#endif  // MERT_TER_TER_ALIGNMENT_H__
diff --git a/mert/TERsrc/terShift.cpp b/mert/TER/terShift.cpp
index 428803849..428803849 100644
--- a/mert/TERsrc/terShift.cpp
+++ b/mert/TER/terShift.cpp
diff --git a/mert/TERsrc/terShift.h b/mert/TER/terShift.h
index a54ba633d..679a7c8bb 100644
--- a/mert/TERsrc/terShift.h
+++ b/mert/TER/terShift.h
@@ -1,5 +1,5 @@
-#ifndef __TERCPPTERSHIFT_H__
-#define __TERCPPTERSHIFT_H__
+#ifndef MERT_TER_TER_SHIFT_H_
+#define MERT_TER_TER_SHIFT_H_
 
 #include <vector>
 #include <stdio.h>
@@ -41,4 +41,4 @@ public:
 
 }
 
-#endif  // __TERCPPTERSHIFT_H__
+#endif  // MERT_TER_TER_SHIFT_H_
diff --git a/mert/TERsrc/tercalc.cpp b/mert/TER/tercalc.cpp
index e16f692e8..e16f692e8 100644
--- a/mert/TERsrc/tercalc.cpp
+++ b/mert/TER/tercalc.cpp
diff --git a/mert/TERsrc/tercalc.h b/mert/TER/tercalc.h
index cf205ccbb..9e1a01f65 100644
--- a/mert/TERsrc/tercalc.h
+++ b/mert/TER/tercalc.h
@@ -1,5 +1,5 @@
-#ifndef _TERCPPTERCALC_H___
-#define _TERCPPTERCALC_H___
+#ifndef MERT_TER_TER_CALC_H_
+#define MERT_TER_TER_CALC_H_
 
 #include <vector>
 #include <stdio.h>
@@ -79,4 +79,4 @@ public:
 
 }
 
-#endif  // _TERCPPTERCALC_H___
+#endif  // MERT_TER_TER_CALC_H_
diff --git a/mert/TERsrc/tools.cpp b/mert/TER/tools.cpp
index 2d910ec05..2d910ec05 100644
--- a/mert/TERsrc/tools.cpp
+++ b/mert/TER/tools.cpp
diff --git a/mert/TERsrc/tools.h b/mert/TER/tools.h
index df681a2b2..6f78b9a6a 100644
--- a/mert/TERsrc/tools.h
+++ b/mert/TER/tools.h
@@ -1,5 +1,5 @@
-#ifndef __TERCPPTOOLS_H__
-#define __TERCPPTOOLS_H__
+#ifndef MERT_TER_TOOLS_H_
+#define MERT_TER_TOOLS_H_
 
 #include <vector>
 #include <iostream>
@@ -62,4 +62,4 @@ param copyParam(param p);
 
 }
 
-#endif  // __TERCPPTOOLS_H__
+#endif  // MERT_TER_TOOLS_H_
diff --git a/mert/TODO b/mert/TODO
index 2559e78b5..21b4ce04e 100644
--- a/mert/TODO
+++ b/mert/TODO
@@ -4,3 +4,21 @@
     - this may make use of 'evaluator', soon to be added by Matous Machacek
 
 - check that --pairwise-ranked is compatible with all optimization metrics
+
+- Replace the standard rand() currently used in MERT and PRO with better
+  random generators such as Boost's random generators (e.g., boost::mt19937).
+  - create a Random class to hide the details, i.e., how to generate
+    random numbers, which allows us to use custom random generators more
+    easily.
+
+  Pros:
+  - In MERT, you might want to use the random restarting technique to avoid
+    local optima.
+  - PRO uses a sampling technique to choose candidate translation pairs
+    from N-best lists, which means the choice of random generators seems to
+    be important.
+
+  Cons:
+  - This change will require us to re-create the truth results for regression
+    testing related to MERT and PRO because the new random generator will
+    generate different numbers from the current generator does.
diff --git a/mert/TerScorer.cpp b/mert/TerScorer.cpp
index ac029b027..2cfb19275 100644
--- a/mert/TerScorer.cpp
+++ b/mert/TerScorer.cpp
@@ -5,8 +5,8 @@
 #include <stdexcept>
 
 #include "ScoreStats.h"
-#include "TERsrc/tercalc.h"
-#include "TERsrc/terAlignment.h"
+#include "TER/tercalc.h"
+#include "TER/terAlignment.h"
 #include "Util.h"
 
 using namespace TERCpp;
@@ -33,6 +33,7 @@ void TerScorer::setReferenceFiles ( const vector<string>& referenceFiles )
     string line;
     int sid = 0;
     while ( getline ( in, line ) ) {
+      line = this->preprocessSentence(line);
       vector<int> tokens;
       TokenizeAndEncode(line, tokens);
       m_references.push_back ( tokens );
@@ -48,6 +49,7 @@ void TerScorer::setReferenceFiles ( const vector<string>& referenceFiles )
 
 void TerScorer::prepareStats ( size_t sid, const string& text, ScoreStats& entry )
 {
+  string sentence = this->preprocessSentence(text);
 
   terAlignment result;
   result.numEdits = 0.0 ;
@@ -74,7 +76,7 @@ void TerScorer::prepareStats ( size_t sid, const string& text, ScoreStats& entry
       averageLength+=(double)m_multi_references.at ( incRefsBis ).at ( sid ).size();
     }
     averageLength=averageLength/( double ) m_multi_references.size();
-    TokenizeAndEncode(text, testtokens);
+    TokenizeAndEncode(sentence, testtokens);
     terCalc * evaluation=new terCalc();
     evaluation->setDebugMode ( false );
     terAlignment tmp_result = evaluation->TER ( reftokens, testtokens );
diff --git a/mert/TerScorer.h b/mert/TerScorer.h
index 7ffb4c741..46b02924e 100644
--- a/mert/TerScorer.h
+++ b/mert/TerScorer.h
@@ -1,5 +1,5 @@
-#ifndef __TERSCORER_H__
-#define __TERSCORER_H__
+#ifndef MERT_TER_SCORER_H_
+#define MERT_TER_SCORER_H_
 
 #include <iostream>
 #include <set>
@@ -54,4 +54,4 @@ private:
   TerScorer& operator=(const TerScorer&);
 };
 
-#endif // __TERSCORER_H__
+#endif // MERT_TER_SCORER_H_
diff --git a/mert/Timer.cpp b/mert/Timer.cpp
index 373eb4a2e..5235edb04 100644
--- a/mert/Timer.cpp
+++ b/mert/Timer.cpp
@@ -1,73 +1,104 @@
 #include "Timer.h"
 #include "Util.h"
 
-double Timer::elapsed_time()
-{
-  time_t now;
-  time(&now);
-  return difftime(now, start_time);
-}
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <sys/resource.h>
+#include <sys/time.h>
+#endif
 
-double Timer::get_elapsed_time()
-{
-  return elapsed_time();
+namespace {
+
+#if !defined(_WIN32) && !defined(_WIN64)
+uint64_t GetMicroSeconds(const struct timeval& tv) {
+  return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 }
 
-void Timer::start(const char* msg)
-{
-  // Print an optional message, something like "Starting timer t";
-  if (msg) TRACE_ERR( msg << std::endl);
+uint64_t GetTimeOfDayMicroSeconds() {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+}
+#endif
 
-  // Return immediately if the timer is already running
-  if (running) return;
+} // namespace
 
-  // Change timer status to running
-  running = true;
+void Timer::GetCPUTimeMicroSeconds(Timer::CPUTime* cpu_time) const {
+#if !defined(_WIN32) && !defined(_WIN64)
+  struct rusage usage;
+  if (getrusage(RUSAGE_SELF, &usage)) {
+    TRACE_ERR("Error occurred: getrusage().\n");
+    exit(1);
+  }
+  cpu_time->user_time = GetMicroSeconds(usage.ru_utime);
+  cpu_time->sys_time = GetMicroSeconds(usage.ru_stime);
+#else  // Windows
+  // Not implemented yet.
+  // TODO: implement the Windows version using native APIs.
+#endif
+}
 
-  // Set the start time;
-  time(&start_time);
+double Timer::get_elapsed_cpu_time() const {
+  return static_cast<double>(get_elapsed_cpu_time_microseconds()) * 1e-6;
 }
 
-/***
- * Turn the timer off and start it again from 0.  Print an optional message.
- */
-/*
-inline void Timer::restart(const char* msg)
-{
-  // Print an optional message, something like "Restarting timer t";
-  if (msg) TRACE_ERR( msg << std::endl;
+uint64_t Timer::get_elapsed_cpu_time_microseconds() const {
+  CPUTime e;
+  GetCPUTimeMicroSeconds(&e);
+  return (e.user_time - m_start_time.user_time) +
+      (e.sys_time - m_start_time.sys_time);
+}
 
-  // Set the timer status to running
-  running = true;
+double Timer::get_elapsed_wall_time() const {
+  return static_cast<double>(get_elapsed_wall_time_microseconds()) * 1e-6;
+}
 
-  // Set the accumulated time to 0 and the start time to now
-  acc_time = 0;
-  start_clock = clock();
-  start_time = time(0);
+uint64_t Timer::get_elapsed_wall_time_microseconds() const {
+  return GetTimeOfDayMicroSeconds() - m_wall;
 }
-*/
 
-/***
- * Stop the timer and print an optional message.
- */
-/*
-inline void Timer::stop(const char* msg)
+void Timer::start(const char* msg)
 {
-  // Print an optional message, something like "Stopping timer t";
-  check(msg);
-
-  // Recalculate and store the total accumulated time up until now
-  if (running) acc_time += elapsed_time();
+  // Print an optional message, something like "Starting timer t";
+  if (msg) TRACE_ERR( msg << std::endl);
+  if (m_is_running) return;
+  m_is_running = true;
+  m_wall = GetTimeOfDayMicroSeconds();
+  GetCPUTimeMicroSeconds(&m_start_time);
+}
 
-  running = false;
+void Timer::restart(const char* msg)
+{
+  if (msg) {
+    TRACE_ERR(msg << std::endl);
+  }
+  m_wall = GetTimeOfDayMicroSeconds();
+  GetCPUTimeMicroSeconds(&m_start_time);
 }
-*/
 
 void Timer::check(const char* msg)
 {
   // Print an optional message, something like "Checking timer t";
   if (msg) TRACE_ERR( msg << " : ");
 
-//  TRACE_ERR( "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
-  TRACE_ERR( "[" << (running ? elapsed_time() : 0) << "] seconds\n");
+  if (m_is_running) {
+    TRACE_ERR("[Wall " << get_elapsed_wall_time()
+              << " CPU " << get_elapsed_cpu_time() << "] seconds.\n");
+  } else {
+    TRACE_ERR("WARNING: the timer is not running.\n");
+  }
+}
+
+std::string Timer::ToString() const {
+  std::string res;
+  const double wall = get_elapsed_wall_time();
+  CPUTime e;
+  GetCPUTimeMicroSeconds(&e);
+  const double utime = (e.user_time - m_start_time.user_time) * 1e-6;
+  const double stime = (e.sys_time - m_start_time.sys_time) * 1e-6;
+  std::stringstream ss;
+  ss << "wall "  << wall << " sec. user " << utime << " sec. sys " << stime
+     << " sec. total " << utime + stime << " sec.";
+  res.append(ss.str());
+
+  return res;
 }
diff --git a/mert/Timer.h b/mert/Timer.h
index 403547620..7b1101b50 100644
--- a/mert/Timer.h
+++ b/mert/Timer.h
@@ -1,46 +1,54 @@
-#ifndef TIMER_H
-#define TIMER_H
+#ifndef MERT_TIMER_H_
+#define MERT_TIMER_H_
 
-#include <ctime>
-#include <iostream>
-#include <iomanip>
+#include <ostream>
+#include <string>
+#include <stdint.h>
 
 class Timer
 {
-  /**
-   * Allow timers to be printed to ostreams using the syntax 'os << t'
-   * for an ostream 'os' and a timer 't'.  For example, "cout << t" will
-   * print out the total amount of time 't' has been "running".
-   */
-  friend std::ostream& operator<<(std::ostream& os, Timer& t);
+ private:
+  // Time values are stored in microseconds.
+  struct CPUTime {
+    uint64_t user_time;                 // user CPU time
+    uint64_t sys_time;                  // system CPU time
 
-private:
-  bool running;
-  time_t start_time;
+    CPUTime() : user_time(0), sys_time(0) { }
+  };
 
-  /**
-   * Return the total time that the timer has been in the "running"
-   * state since it was first "started" or last "restarted".  For
-   * "short" time periods (less than an hour), the actual cpu time
-   * used is reported instead of the elapsed time.
-   * TODO in seconds?
-   */
-  double elapsed_time();
+  void GetCPUTimeMicroSeconds(CPUTime* cpu_time) const;
+
+  bool m_is_running;
+  uint64_t m_wall;                      // wall-clock time in microseconds
+  CPUTime m_start_time;
+
+  // No copying allowed
+  Timer(const Timer&);
+  void operator=(const Timer&);
 
-public:
+ public:
   /**
-   * 'running' is initially false. A timer needs to be explicitly started
-   * using 'start' or 'restart'.
+   * 'm_is_running' is initially false. A timer needs to be explicitly started
+   * using 'start'.
    */
-  Timer() : running(false), start_time(0) { }
+  Timer()
+      : m_is_running(false),
+        m_wall(0),
+        m_start_time() {}
+
+  ~Timer() {}
 
   /**
    * Start a timer.  If it is already running, let it continue running.
    * Print an optional message.
    */
   void start(const char* msg = 0);
-//  void restart(const char* msg = 0);
-//  void stop(const char* msg = 0);
+
+  /**
+   * Restart the timer iff the timer is already running.
+   * if the timer is not running, just start the timer.
+   */
+  void restart(const char* msg = 0);
 
   /**
    * Print out an optional message followed by the current timer timing.
@@ -48,20 +56,50 @@ public:
   void check(const char* msg = 0);
 
   /**
-   * Return the total time that the timer has been in the "running"
-   * state since it was first "started" or last "restarted".  For
-   * "short" time periods (less than an hour), the actual cpu time
+   */
+  bool is_running() const { return m_is_running; }
+
+  /**
+   * Return the total time in seconds that the timer has been in the
+   * "running" state since it was first "started" or last "restarted".
+   * For "short" time periods (less than an hour), the actual cpu time
    * used is reported instead of the elapsed time.
-   * This function is the public version of elapsed_time()
    */
-  double get_elapsed_time();
+  double get_elapsed_cpu_time() const;
+
+  /**
+   * Return the total time in microseconds.
+   */
+  uint64_t get_elapsed_cpu_time_microseconds() const;
+
+  /**
+   * Get elapsed wall-clock time in seconds.
+   */
+  double get_elapsed_wall_time() const;
+
+  /**
+   * Get elapsed wall-clock time in microseconds.
+   */
+  uint64_t get_elapsed_wall_time_microseconds() const;
+
+  /**
+   * Return a string that has the user CPU time, system time, and total time.
+   */
+  std::string ToString() const;
 };
 
-inline std::ostream& operator<<(std::ostream& os, Timer& t)
-{
-  //os << std::setprecision(2) << std::setiosflags(std::ios::fixed) << (t.running ? t.elapsed_time() : 0);
-  os << (t.running ? t.elapsed_time() : 0);
+/**
+ * Allow timers to be printed to ostreams using the syntax 'os << t'
+ * for an ostream 'os' and a timer 't'.  For example, "cout << t" will
+ * print out the total amount of time 't' has been "running".
+ */
+inline std::ostream& operator<<(std::ostream& os, const Timer& t) {
+  if (t.is_running()) {
+    os << t.ToString();
+  } else {
+    os << "timer is not running.";
+  }
   return os;
 }
 
-#endif  // TIMER_H
+#endif  // MERT_TIMER_H_
diff --git a/mert/TimerTest.cpp b/mert/TimerTest.cpp
new file mode 100644
index 000000000..d9562a3df
--- /dev/null
+++ b/mert/TimerTest.cpp
@@ -0,0 +1,27 @@
+#include "Timer.h"
+
+#define BOOST_TEST_MODULE TimerTest
+#include <boost/test/unit_test.hpp>
+
+#include <string>
+#include <unistd.h>
+
+BOOST_AUTO_TEST_CASE(timer_basic_test) {
+  Timer timer;
+  const int sleep_time_microsec = 40; // ad-hoc microseconds to pass unit tests.
+
+  timer.start();
+  BOOST_REQUIRE(timer.is_running());
+  BOOST_REQUIRE(usleep(sleep_time_microsec) == 0);
+  BOOST_CHECK(timer.get_elapsed_wall_time() > 0.0);
+  BOOST_CHECK(timer.get_elapsed_wall_time_microseconds() > 0);
+
+  timer.restart();
+  BOOST_REQUIRE(timer.is_running());
+  BOOST_REQUIRE(usleep(sleep_time_microsec) == 0);
+  BOOST_CHECK(timer.get_elapsed_wall_time() > 0.0);
+  BOOST_CHECK(timer.get_elapsed_wall_time_microseconds() > 0);
+
+  const std::string s = timer.ToString();
+  BOOST_CHECK(!s.empty());
+}
diff --git a/mert/Types.h b/mert/Types.h
index 1d0fd0dd0..c65c6ffc2 100644
--- a/mert/Types.h
+++ b/mert/Types.h
@@ -1,5 +1,5 @@
-#ifndef TYPE_H
-#define TYPE_H
+#ifndef MERT_TYPE_H_
+#define MERT_TYPE_H_
 
 #include <vector>
 #include <map>
@@ -40,4 +40,4 @@ typedef vector<ScoreArray> scoredata_t;
 typedef map<size_t, std::string> idx2name;
 typedef map<std::string, size_t> name2idx;
 
-#endif  // TYPE_H
+#endif  // MERT_TYPE_H_
diff --git a/mert/Util.cpp b/mert/Util.cpp
index 3769c71e7..952aaf9aa 100644
--- a/mert/Util.cpp
+++ b/mert/Util.cpp
@@ -1,6 +1,6 @@
 /*
  *  Util.cpp
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
@@ -11,29 +11,28 @@
 
 using namespace std;
 
-// global variables
-Timer g_timer;
-
-int verbose = 0;
-
 namespace {
 
+Timer g_timer;
+int g_verbose = 0;
+
 bool FindDelimiter(const std::string &str, const std::string &delim, size_t *pos)
 {
   *pos = str.find(delim);
   return *pos != std::string::npos ? true : false;
 }
+
 } // namespace
 
 int verboselevel()
 {
-  return verbose;
+  return g_verbose;
 }
 
 int setverboselevel(int v)
 {
-  verbose = v;
-  return verbose;
+  g_verbose = v;
+  return g_verbose;
 }
 
 size_t getNextPound(std::string &str, std::string &substr,
@@ -67,27 +66,12 @@ void Tokenize(const char *str, const char delim,
   while (1) {
     const char *begin = str;
     while (*str != delim && *str) str++;
-    res->push_back(std::string(begin, str));
+    if (begin != str)            // Don't create empty string objects.
+      res->push_back(std::string(begin, str));
     if (*str++ == 0) break;
   }
 }
 
-int swapbytes(char *p, int sz, int n)
-{
-  char c, *l, *h;
-
-  if((n < 1) || (sz < 2)) return 0;
-  for (; n--; p += sz) {
-    for (h = (l = p) + sz; --h > l; l++) {
-      c = *h;
-      *h = *l;
-      *l = c;
-    }
-  }
-  return 0;
-
-}
-
 void ResetUserTime()
 {
   g_timer.start();
@@ -100,5 +84,5 @@ void PrintUserTime(const std::string &message)
 
 double GetUserTime()
 {
-  return g_timer.get_elapsed_time();
+  return g_timer.get_elapsed_cpu_time();
 }
diff --git a/mert/Util.h b/mert/Util.h
index da68685c3..cf99cdf6e 100644
--- a/mert/Util.h
+++ b/mert/Util.h
@@ -1,14 +1,15 @@
 /*
  *  Util.h
- *  met - Minimum Error Training
+ *  mert - Minimum Error Rate Training
  *
  *  Created by Nicola Bertoldi on 13/05/08.
  *
  */
 
-#ifndef UTIL_H
-#define UTIL_H
+#ifndef MERT_UTIL_H_
+#define MERT_UTIL_H_
 
+#include <cmath>
 #include <cstdlib>
 #include <stdexcept>
 #include <limits>
@@ -23,9 +24,6 @@
 
 using namespace std;
 
-#define US_NOSET (numeric_limits<unsigned short>::max())
-#define MAX_LINE  1024
-
 #ifdef TRACE_ENABLE
 #define TRACE_ERR(str) { std::cerr << str; }
 #else
@@ -37,6 +35,20 @@ const char kDefaultDelimiterSymbol[] = " ";
 int verboselevel();
 int setverboselevel(int v);
 
+
+const float kEPS = 0.0001f;
+
+template <typename T>
+bool IsAlmostEqual(T expected, T actual, float round=kEPS) {
+  if (abs(expected - actual) < round) {
+    return true;
+  } else {
+    cerr << "Fail: expected = " << expected
+         << " (actual = " << actual << ")" << endl;
+    return false;
+  }
+}
+
 /**
  * Find the specified delimiter for the string 'str', and 'str' is assigned
  * to a substring object that starts at the position of first occurrence of
@@ -52,6 +64,12 @@ size_t getNextPound(std::string &str, std::string &substr,
 
 void split(const std::string &s, char delim, std::vector<std::string> &elems);
 
+/**
+ * Split the string 'str' with specified delimitter 'delim' into tokens.
+ * The resulting tokens are set to 'res'.
+ *
+ * ex. "a,b,c" => {"a", "b", "c"}.
+ */
 void Tokenize(const char *str, const char delim, std::vector<std::string> *res);
 
 template<typename T>
@@ -63,6 +81,14 @@ inline T Scan(const std::string &input)
   return ret;
 }
 
+/**
+ * Returns true iff "str" ends with "suffix".
+ * e.g., Given str = "abc:" and suffix = ":", this function returns true.
+ */
+inline bool EndsWith(const std::string& str, const char* suffix) {
+  return str.find_last_of(suffix) == str.size() - 1;
+}
+
 template<typename T>
 inline std::string stringify(T x)
 {
@@ -97,4 +123,4 @@ void ResetUserTime();
 void PrintUserTime(const std::string &message);
 double GetUserTime();
 
-#endif  // UTIL_H
+#endif  // MERT_UTIL_H_
diff --git a/mert/UtilTest.cpp b/mert/UtilTest.cpp
new file mode 100644
index 000000000..2101f7c8d
--- /dev/null
+++ b/mert/UtilTest.cpp
@@ -0,0 +1,76 @@
+#include "Util.h"
+
+#define BOOST_TEST_MODULE UtilTest
+#include <boost/test/unit_test.hpp>
+
+BOOST_AUTO_TEST_CASE(util_get_next_pound_test) {
+  {
+    std::string str("9 9 7 ");
+    std::string substr;
+    std::vector<std::string> res;
+
+    while (!str.empty()) {
+      getNextPound(str, substr);
+      res.push_back(substr);
+    }
+    BOOST_REQUIRE(res.size() == 3);
+    BOOST_CHECK_EQUAL("9", res[0]);
+    BOOST_CHECK_EQUAL("9", res[1]);
+    BOOST_CHECK_EQUAL("7", res[2]);
+  }
+
+  {
+    std::string str("ref.0,ref.1,ref.2");
+    std::string substr;
+    std::vector<std::string> res;
+    const std::string delim(",");
+
+    while (!str.empty()) {
+      getNextPound(str, substr, delim);
+      res.push_back(substr);
+    }
+    BOOST_REQUIRE(res.size() == 3);
+    BOOST_CHECK_EQUAL("ref.0", res[0]);
+    BOOST_CHECK_EQUAL("ref.1", res[1]);
+    BOOST_CHECK_EQUAL("ref.2", res[2]);
+  }
+}
+
+BOOST_AUTO_TEST_CASE(util_tokenize_test) {
+  {
+    std::vector<std::string> res;
+    Tokenize("9 9 7", ' ', &res);
+    BOOST_REQUIRE(res.size() == 3);
+    BOOST_CHECK_EQUAL("9", res[0]);
+    BOOST_CHECK_EQUAL("9", res[1]);
+    BOOST_CHECK_EQUAL("7", res[2]);
+  }
+
+  {
+    std::vector<std::string> res;
+    Tokenize("9 8 7 ", ' ', &res);
+    BOOST_REQUIRE(res.size() == 3);
+    BOOST_CHECK_EQUAL("9", res[0]);
+    BOOST_CHECK_EQUAL("8", res[1]);
+    BOOST_CHECK_EQUAL("7", res[2]);
+  }
+
+  {
+    std::vector<std::string> res;
+    Tokenize("ref.0,ref.1,", ',', &res);
+    BOOST_REQUIRE(res.size() == 2);
+    BOOST_CHECK_EQUAL("ref.0", res[0]);
+    BOOST_CHECK_EQUAL("ref.1", res[1]);
+  }
+}
+
+BOOST_AUTO_TEST_CASE(util_ends_with_test) {
+  BOOST_CHECK(EndsWith("abc:", ":"));
+  BOOST_CHECK(EndsWith("a b c:", ":"));
+  BOOST_CHECK(!EndsWith("a", ":"));
+  BOOST_CHECK(!EndsWith("a:b", ":"));
+
+  BOOST_CHECK(EndsWith("ab ", " "));
+  BOOST_CHECK(!EndsWith("ab", " "));
+  BOOST_CHECK(!EndsWith("a b", " "));
+}
diff --git a/mert/Vocabulary.cpp b/mert/Vocabulary.cpp
new file mode 100644
index 000000000..40b04bf99
--- /dev/null
+++ b/mert/Vocabulary.cpp
@@ -0,0 +1,21 @@
+#include "Vocabulary.h"
+#include "Singleton.h"
+
+namespace mert {
+namespace {
+Vocabulary* g_vocab = NULL;
+} // namespace
+
+Vocabulary* VocabularyFactory::GetVocabulary() {
+  if (g_vocab == NULL) {
+    return Singleton<Vocabulary>::GetInstance();
+  } else {
+    return g_vocab;
+  }
+}
+
+void VocabularyFactory::SetVocabulary(Vocabulary* vocab) {
+  g_vocab = vocab;
+}
+
+} // namespace mert
diff --git a/mert/Vocabulary.h b/mert/Vocabulary.h
new file mode 100644
index 000000000..12c8c1727
--- /dev/null
+++ b/mert/Vocabulary.h
@@ -0,0 +1,79 @@
+#ifndef MERT_VOCABULARY_H_
+#define MERT_VOCABULARY_H_
+
+#include <map>
+#include <string>
+
+namespace mert {
+
+/**
+ * A embarrassingly simple map to handle vocabularies to calculate
+ * various scores such as BLEU.
+ *
+ * TODO: replace this with more efficient data structure.
+ */
+class Vocabulary {
+ public:
+  typedef std::map<std::string, int>::iterator iterator;
+  typedef std::map<std::string, int>::const_iterator const_iterator;
+
+  Vocabulary() {}
+  virtual ~Vocabulary() {}
+
+  /** Returns the assiged id for given "token". */
+  int Encode(const std::string& token) {
+    iterator it = m_vocab.find(token);
+    int encoded_token;
+    if (it == m_vocab.end()) {
+      // Add an new entry to the vocaburary.
+      encoded_token = static_cast<int>(m_vocab.size());
+      m_vocab[token] = encoded_token;
+    } else {
+      encoded_token = it->second;
+    }
+    return encoded_token;
+  }
+
+  /**
+   * Return true iff the specified "str" is found in the container.
+   */
+  bool Lookup(const std::string&str , int* v) const {
+    const_iterator it = m_vocab.find(str);
+    if (it == m_vocab.end()) return false;
+    *v = it->second;
+    return true;
+  }
+
+  void clear() { m_vocab.clear(); }
+
+  bool empty() const { return m_vocab.empty(); }
+
+  size_t size() const { return m_vocab.size(); }
+
+  iterator find(const std::string& str) { return m_vocab.find(str); }
+  const_iterator find(const std::string& str) const { return m_vocab.find(str); }
+
+  int& operator[](const std::string& str) { return m_vocab[str]; }
+
+  iterator begin() { return m_vocab.begin(); }
+  const_iterator begin() const { return m_vocab.begin(); }
+  iterator end() { return m_vocab.end(); }
+  const_iterator end() const { return m_vocab.end(); }
+
+ private:
+  std::map<std::string, int> m_vocab;
+};
+
+class VocabularyFactory {
+ public:
+  static Vocabulary* GetVocabulary();
+  static void SetVocabulary(Vocabulary* vocab);
+
+ private:
+  VocabularyFactory() {}
+  virtual ~VocabularyFactory() {}
+};
+
+} // namespace mert
+
+#endif  // MERT_VOCABULARY_H_
diff --git a/mert/VocabularyTest.cpp b/mert/VocabularyTest.cpp
new file mode 100644
index 000000000..0e67ba62a
--- /dev/null
+++ b/mert/VocabularyTest.cpp
@@ -0,0 +1,52 @@
+#include "Vocabulary.h"
+
+#define BOOST_TEST_MODULE MertVocabulary
+#include <boost/test/unit_test.hpp>
+
+#include "Singleton.h"
+
+namespace mert {
+namespace {
+
+void TearDown() {
+  Singleton<Vocabulary>::Delete();
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(vocab_basic) {
+  Vocabulary vocab;
+  BOOST_REQUIRE(vocab.empty());
+  vocab.clear();
+
+  BOOST_CHECK_EQUAL(0, vocab.Encode("hello"));
+  BOOST_CHECK_EQUAL(0, vocab.Encode("hello"));
+  BOOST_CHECK_EQUAL(1, vocab.Encode("world"));
+
+  BOOST_CHECK_EQUAL(2, vocab.size());
+
+  int v;
+  BOOST_CHECK(vocab.Lookup("hello", &v));
+  BOOST_CHECK_EQUAL(0, v);
+  BOOST_CHECK(vocab.Lookup("world", &v));
+  BOOST_CHECK_EQUAL(1, v);
+
+  BOOST_CHECK(!vocab.Lookup("java", &v));
+
+  vocab.clear();
+  BOOST_CHECK(!vocab.Lookup("hello", &v));
+  BOOST_CHECK(!vocab.Lookup("world", &v));
+}
+
+BOOST_AUTO_TEST_CASE(vocab_factory_test) {
+  Vocabulary* vocab1 = VocabularyFactory::GetVocabulary();
+  Vocabulary* vocab2 = VocabularyFactory::GetVocabulary();
+  Vocabulary* vocab3 = VocabularyFactory::GetVocabulary();
+
+  BOOST_REQUIRE(vocab1 != NULL);
+  BOOST_CHECK(vocab1 == vocab2);
+  BOOST_CHECK(vocab2 == vocab3);
+
+  TearDown();
+}
+} // namespace mert
diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp
index 2fcda0140..a95cdfa1b 100644
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@@ -55,7 +55,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
     for (int i = 0; i < bootstrap; ++i)
     {
       // TODO: Use smart pointer for exceptional-safety.
-      ScoreData* scoredata = new ScoreData(*g_scorer);
+      ScoreData* scoredata = new ScoreData(g_scorer);
       for (int j = 0; j < n; ++j)
       {
         int randomIndex = random() % n;
@@ -89,7 +89,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
   else
   {
     // TODO: Use smart pointer for exceptional-safety.
-    ScoreData* scoredata = new ScoreData(*g_scorer);
+    ScoreData* scoredata = new ScoreData(g_scorer);
     for (int sid = 0; sid < n; ++sid)
     {
       string str_sid = int2string(sid);
@@ -133,15 +133,26 @@ void usage()
   cerr << "\tThis is of the form NAME1:VAL1,NAME2:VAL2 etc " << endl;
   cerr << "[--reference|-R] comma separated list of reference files" << endl;
   cerr << "[--candidate|-C] comma separated list of candidate files" << endl;
+  cerr << "[--factors|-f] list of factors passed to the scorer (e.g. 0|2)" << endl;
+  cerr << "[--filter|-l] filter command which will be used to preprocess the sentences" << endl;
   cerr << "[--bootstrap|-b] number of booststraped samples (default 0 - no bootstraping)" << endl;
   cerr << "[--rseed|-r] the random seed for bootstraping (defaults to system clock)" << endl;
   cerr << "[--help|-h] print this message and exit" << endl;
   cerr << endl;
   cerr << "Evaluator is able to compute more metrics at once. To do this," << endl;
-  cerr << "separate scorers with semicolon (note that comma is used to separate" << endl;
-  cerr << "scorers in the interpolated scorer)." << endl;
+  cerr << "specify more --sctype arguments. You can also specify more --scconfig strings." << endl;
   cerr << endl;
-  cerr << "If you specify only one metric and one candidate file, only the final score" << endl;
+  cerr << "The example below prints BLEU score, PER score and interpolated" << endl;
+  cerr << "score of CDER and PER with the given weights." << endl;
+  cerr << endl;
+  cerr << "./evaluator \\" << endl;
+  cerr << "\t--sctype BLEU --scconfig reflen:closest \\" << endl;
+  cerr << "\t--sctype PER \\" << endl;
+  cerr << "\t--sctype CDER,PER --scconfig weights:0.25+0.75 \\" << endl;
+  cerr << "\t--candidate CANDIDATE \\" << endl;
+  cerr << "\t--reference REFERENCE" << endl;
+  cerr << endl;
+  cerr << "If you specify only one scorer and one candidate file, only the final score" << endl;
   cerr << "will be printed to stdout. Otherwise each line will contain metric name" << endl;
   cerr << "and/or filename and the final score. Since most of the metrics prints some" << endl;
   cerr << "debuging info, consider redirecting stderr to /dev/null." << endl;
@@ -155,24 +166,26 @@ static struct option long_options[] = {
   {"candidate", required_argument, 0, 'C'},
   {"bootstrap", required_argument, 0, 'b'},
   {"rseed", required_argument, 0, 'r'},
+  {"factors", required_argument, 0, 'f'},
+  {"filter", required_argument, 0, 'l'},
   {"help", no_argument, 0, 'h'},
   {0, 0, 0, 0}
 };
 
 // Options used in evaluator.
 struct ProgramOption {
-  string scorer_type;
-  string scorer_config;
+  vector<string> scorer_types;
+  vector<string> scorer_configs;
   string reference;
   string candidate;
+  vector<string> scorer_factors;
+  vector<string> scorer_filter;
   int bootstrap;
   int seed;
   bool has_seed;
 
   ProgramOption()
-      : scorer_type("BLEU"),
-        scorer_config(""),
-        reference(""),
+      : reference(""),
         candidate(""),
         bootstrap(0),
         seed(0),
@@ -182,13 +195,18 @@ struct ProgramOption {
 void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
   int c;
   int option_index;
-  while ((c = getopt_long(argc, argv, "s:c:R:C:b:r:h", long_options, &option_index)) != -1) {
+  int last_scorer_index = -1;
+  while ((c = getopt_long(argc, argv, "s:c:R:C:b:r:f:l:h", long_options, &option_index)) != -1) {
     switch(c) {
       case 's':
-        opt->scorer_type = string(optarg);
+        opt->scorer_types.push_back(string(optarg));
+        opt->scorer_configs.push_back(string(""));
+        opt->scorer_factors.push_back(string(""));
+        opt->scorer_filter.push_back(string(""));
+        last_scorer_index++;
         break;
       case 'c':
-        opt->scorer_config = string(optarg);
+        opt->scorer_configs[last_scorer_index] = string(optarg);
         break;
       case 'R':
         opt->reference = string(optarg);
@@ -203,10 +221,25 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
         opt->seed = strtol(optarg, NULL, 10);
         opt->has_seed = true;
         break;
+      case 'f':
+        opt->scorer_factors[last_scorer_index] = string(optarg);
+        break;
+      case 'l':
+        opt->scorer_filter[last_scorer_index] = string(optarg);
+        break;
       default:
         usage();
     }
   }
+
+  // Add default scorer if no scorer provided
+  if (opt->scorer_types.size() == 0)
+  {
+    opt->scorer_types.push_back(string("BLEU"));
+    opt->scorer_configs.push_back(string(""));
+    opt->scorer_factors.push_back(string(""));
+    opt->scorer_filter.push_back(string(""));
+  }
 }
 
 void InitSeed(const ProgramOption *opt) {
@@ -236,7 +269,6 @@ int main(int argc, char** argv)
   try {
     vector<string> refFiles;
     vector<string> candFiles;
-    vector<string> scorerTypes;
 
     if (option.reference.length() == 0) throw runtime_error("You have to specify at least one reference file.");
     split(option.reference, ',', refFiles);
@@ -244,17 +276,16 @@ int main(int argc, char** argv)
     if (option.candidate.length() == 0) throw runtime_error("You have to specify at least one candidate file.");
     split(option.candidate, ',', candFiles);
 
-    if (option.scorer_type.length() == 0) throw runtime_error("You have to specify at least one scorer.");
-    split(option.scorer_type, ';', scorerTypes);
-
     if (candFiles.size() > 1) g_has_more_files = true;
-    if (scorerTypes.size() > 1) g_has_more_scorers = true;
+    if (option.scorer_types.size() > 1) g_has_more_scorers = true;
 
     for (vector<string>::const_iterator fileIt = candFiles.begin(); fileIt != candFiles.end(); ++fileIt)
     {
-        for (vector<string>::const_iterator scorerIt = scorerTypes.begin(); scorerIt != scorerTypes.end(); ++scorerIt)
+        for (size_t i = 0; i < option.scorer_types.size(); i++)
         {
-            g_scorer = ScorerFactory::getScorer(*scorerIt, option.scorer_config);
+            g_scorer = ScorerFactory::getScorer(option.scorer_types[i], option.scorer_configs[i]);
+            g_scorer->setFactors(option.scorer_factors[i]);
+            g_scorer->setFilter(option.scorer_filter[i]);
             g_scorer->setReferenceFiles(refFiles);
             EvaluatorUtil::evaluate(*fileIt, option.bootstrap);
             delete g_scorer;
diff --git a/mert/example/README b/mert/example/README
deleted file mode 100644
index 7ece55a53..000000000
--- a/mert/example/README
+++ /dev/null
@@ -1,26 +0,0 @@
-extractor=../extractor
-#extractor="../extractor --binary"
-mert=../mert
-size=15
-
-#to read an nbest file; output is in text format
-$extractor --nbest NBEST --reference REF.0,REF.1,REF.2 --ffile FEATSTAT --scfile SCORESTAT --sctype BLEU
-$extractor --ffile FEATSTAT.2 --scfile SCORESTAT.2 --sctype BLEU --prev-ffile FEATSTAT --prev-scfile SCORESTAT
-$extractor --binary --ffile FEATSTAT.3 --scfile SCORESTAT.3 --sctype BLEU --prev-ffile FEATSTAT,FEATSTAT.2 --prev-scfile SCORESTAT,SCORESTAT.2
-$extractor --nbest NBEST --reference REF.0,REF.1,REF.2 --ffile FEATSTAT.4 --scfile SCORESTAT.4 --sctype BLEU --prev-ffile FEATSTAT,FEATSTAT.3 --prev-scfile SCORESTAT,SCORESTAT.3
-
-
-$mert -r 1234 --ifile init.opt --scfile SCORESTAT --ffile FEATSTAT -d $size --verbose 4 -n 5 
-
-exit
-
-
-#to read a gzipped nbest file; output is in text format
-$extractor --nbest NBEST.gz --reference REF.0,REF.1,REF.2 --ffile FEATSTATgz --scfile SCORESTATgz --sctype BLEU
-gzip FEATSTATgz SCORESTATgz
-
-$extractor --nbest NBEST --reference REF.0,REF.1,REF.2 --prev-ffile FEATSTAT --prev-scfile SCORESTAT --ffile FEATSTAT2 --scfile SCORESTAT2 --sctype BLEU
-
-$extractor --nbest NBEST.gz --reference REF.0,REF.1,REF.2 --prev-ffile FEATSTATgz.gz --prev-scfile SCORESTATgz.gz --ffile FEATSTAT2gz --scfile SCORESTAT2gz --sctype BLEU
-
-exit
diff --git a/mert/example/gzipped_test.sh b/mert/example/gzipped_test.sh
new file mode 100755
index 000000000..f52613da1
--- /dev/null
+++ b/mert/example/gzipped_test.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+extractor=$1
+mert=$2
+size=$3
+
+if [ $# -ne 3 ]; then
+    echo "Usage: ./normal_test.sh extracto mert size"
+    exit 1
+fi
+
+if ! [ -f NBEST.gz ]; then
+    gzip NBEST
+fi
+
+$extractor --nbest NBEST.gz --reference REF.0,REF.1,REF.2 \
+    --ffile FEATSTAT_gz --scfile SCORESTAT_gz \
+    --sctype BLEU 2> extractor_gz1.log
+
+gzip -d NBEST.gz
+
+$extractor --nbest NBEST --reference REF.0,REF.1,REF.2 \
+    --prev-ffile FEATSTAT --prev-scfile SCORESTAT \
+    --ffile FEATSTAT2 --scfile SCORESTAT2 \
+    --sctype BLEU 2> extractor_gz2.log
+
+# Now we want to test reading gzipped files.
+# We will first compress the output previously.
+
+for f in FEATSTAT_gz SCORESTAT_gz; do
+    printf "Compressing %s " $f
+    gzip $f
+    echo "done."
+done
+
+$extractor --nbest NBEST --reference REF.0,REF.1,REF.2 \
+    --prev-ffile FEATSTAT_gz.gz --prev-scfile SCORESTAT_gz.gz \
+    --ffile FEATSTAT2_gz --scfile SCORESTAT2_gz \
+    --sctype BLEU 2> extractor_gz3.log
+
+gzip -d FEATSTAT_gz.gz SCORESTAT_gz.gz
+echo "Done."
diff --git a/mert/example/normal_test.sh b/mert/example/normal_test.sh
new file mode 100755
index 000000000..8b5bf3eb9
--- /dev/null
+++ b/mert/example/normal_test.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+extractor=$1
+mert=$2
+size=$3
+
+if [ $# -ne 3 ]; then
+    echo "Usage: ./normal_test.sh extracto mert size"
+    exit 1
+fi
+
+echo "Runnning extractor ..."
+$extractor --nbest NBEST --reference REF.0,REF.1,REF.2 --ffile FEATSTAT \
+    --scfile SCORESTAT --sctype BLEU 2> extractor1.log
+
+$extractor --ffile FEATSTAT.2 --scfile SCORESTAT.2 --sctype BLEU \
+    --prev-ffile FEATSTAT --prev-scfile SCORESTAT 2> extractor2.log
+
+$extractor --binary --ffile FEATSTAT.3 --scfile SCORESTAT.3 --sctype BLEU \
+    --prev-ffile FEATSTAT,FEATSTAT.2 \
+    --prev-scfile SCORESTAT,SCORESTAT.2 2> extractor3.log
+
+$extractor --nbest NBEST --reference REF.0,REF.1,REF.2 --ffile FEATSTAT.4 \
+    --scfile SCORESTAT.4 --sctype BLEU --prev-ffile FEATSTAT,FEATSTAT.3 \
+    --prev-scfile SCORESTAT,SCORESTAT.3 2> extractor4.log
+
+echo "Running mert ..."
+$mert -r 1234 --ifile init.opt --scfile SCORESTAT --ffile FEATSTAT \
+    -d $size --verbose 4 -n 5 2>mert.log
+
+echo "Done."
diff --git a/mert/example/smoke_test.sh b/mert/example/smoke_test.sh
new file mode 100755
index 000000000..193d481ae
--- /dev/null
+++ b/mert/example/smoke_test.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+# A sample script for smoke testing.
+# This is not tuning script.
+# Please see: mosesdecoder/scripts/training/mert-moses.pl
+
+extractor=../extractor
+mert=../mert
+
+# Default the dimension used in mert.
+size=15
+
+# Make sure you have already compiled mert related stuff.
+for f in $extractor $mert; do
+    if ! [ -f $f ]; then
+        echo "Error: no such file or directory: $f"
+        echo "You should run `bjam` first!"
+        exit 1
+    fi
+done
+
+# Make sure you have sample data and inifile used in this tests.
+for f in NBEST REF.0 REF.1 REF.2 init.opt; do
+    if ! [ -f $f ]; then
+        echo "Error: no such file or directory: $f"
+        exit 1
+    fi
+done
+
+# Read an nbest file, Print output in text format.
+# We will save stderr to disk. Please see each log file.
+echo "Running tests for reading text files ..."
+./normal_test.sh $extractor $mert $size
+
+# Run reading gzipped file tests.
+# We will save stderr to disk. Please see each log file.
+echo "Running tests for reading gzipped files ..."
+./gzipped_test.sh $extractor $mert $size
+
+echo "Smoke tests done."
diff --git a/mert/extractor.cpp b/mert/extractor.cpp
index cb3e4c8ef..1119dfa57 100644
--- a/mert/extractor.cpp
+++ b/mert/extractor.cpp
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include <getopt.h>
+#include <boost/scoped_ptr.hpp>
 
 #include "Data.h"
 #include "Scorer.h"
@@ -33,6 +34,8 @@ void usage()
   cerr << "[--ffile|-F] the feature data output file" << endl;
   cerr << "[--prev-ffile|-E] comma separated list of previous feature data" << endl;
   cerr << "[--prev-scfile|-R] comma separated list of previous scorer data" << endl;
+  cerr << "[--factors|-f] list of factors passed to the scorer (e.g. 0|2)" << endl;
+  cerr << "[--filter|-l] filter command used to preprocess the sentences" << endl;
   cerr << "[-v] verbose level" << endl;
   cerr << "[--help|-h] print this message and exit" << endl;
   exit(1);
@@ -41,6 +44,8 @@ void usage()
 static struct option long_options[] = {
   {"sctype", required_argument, 0, 's'},
   {"scconfig", required_argument,0, 'c'},
+  {"factors", required_argument,0, 'f'},
+  {"filter", required_argument,0, 'l'},
   {"reference", required_argument, 0, 'r'},
   {"binary", no_argument, 0, 'b'},
   {"nbest", required_argument, 0, 'n'},
@@ -57,6 +62,8 @@ static struct option long_options[] = {
 struct ProgramOption {
   string scorerType;
   string scorerConfig;
+  string scorerFactors;
+  string scorerFilter;
   string referenceFile;
   string nbestFile;
   string scoreDataFile;
@@ -69,6 +76,8 @@ struct ProgramOption {
   ProgramOption()
       : scorerType("BLEU"),
         scorerConfig(""),
+        scorerFactors(""),
+        scorerFilter(""),
         referenceFile(""),
         nbestFile(""),
         scoreDataFile("statscore.data"),
@@ -83,7 +92,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
   int c;
   int option_index;
 
-  while ((c = getopt_long(argc, argv, "s:r:n:S:F:R:E:v:hb", long_options, &option_index)) != -1) {
+  while ((c = getopt_long(argc, argv, "s:r:f:l:n:S:F:R:E:v:hb", long_options, &option_index)) != -1) {
     switch (c) {
       case 's':
         opt->scorerType = string(optarg);
@@ -91,6 +100,12 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
       case 'c':
         opt->scorerConfig = string(optarg);
         break;
+      case 'f':
+        opt->scorerFactors = string(optarg);
+        break;
+      case 'l':
+        opt->scorerFilter = string(optarg);
+        break;
       case 'r':
         opt->referenceFile = string(optarg);
         break;
@@ -178,7 +193,12 @@ int main(int argc, char** argv)
 
     TRACE_ERR("Scorer type: " << option.scorerType << endl);
 
-    Scorer* scorer = ScorerFactory::getScorer(option.scorerType, option.scorerConfig);
+    boost::scoped_ptr<Scorer> scorer(
+        ScorerFactory::getScorer(option.scorerType, option.scorerConfig));
+
+    // set Factors and Filter used to preprocess the sentences
+    scorer->setFactors(option.scorerFactors);
+    scorer->setFilter(option.scorerFilter);
 
     // load references
     if (referenceFiles.size() > 0)
@@ -186,7 +206,7 @@ int main(int argc, char** argv)
 
     PrintUserTime("References loaded");
 
-    Data data(*scorer);
+    Data data(scorer.get());
 
     // load old data
     for (size_t i = 0; i < prevScoreDataFiles.size(); i++) {
@@ -197,27 +217,18 @@ int main(int argc, char** argv)
 
     // computing score statistics of each nbest file
     for (size_t i = 0; i < nbestFiles.size(); i++) {
-      data.loadnbest(nbestFiles.at(i));
+      data.loadNBest(nbestFiles.at(i));
     }
 
     PrintUserTime("Nbest entries loaded and scored");
 
     //ADDED_BY_TS
-    data.remove_duplicates();
+    data.removeDuplicates();
     //END_ADDED
 
-    if (option.binmode)
-      cerr << "Binary write mode is selected" << endl;
-    else
-      cerr << "Binary write mode is NOT selected" << endl;
-
     data.save(option.featureDataFile, option.scoreDataFile, option.binmode);
     PrintUserTime("Stopping...");
 
-    // timer.stop("Stopping...");
-
-    delete scorer;
-
     return EXIT_SUCCESS;
   } catch (const exception& e) {
     cerr << "Exception: " << e.what() << endl;
diff --git a/mert/gzfilebuf.h b/mert/gzfilebuf.h
deleted file mode 100644
index f9cd8a446..000000000
--- a/mert/gzfilebuf.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef _GZFILEBUF_H_
-#define _GZFILEBUF_H_
-
-#include <streambuf>
-#include <zlib.h>
-#include <cstring>
-
-class gzfilebuf : public std::streambuf
-{
-public:
-  explicit gzfilebuf(const char *filename) {
-    _gzf = gzopen(filename, "rb");
-    setg (_buff+sizeof(int),     // beginning of putback area
-          _buff+sizeof(int),     // read position
-          _buff+sizeof(int));    // end position
-  }
-  ~gzfilebuf() {
-    gzclose(_gzf);
-  }
-protected:
-  virtual int_type overflow (int_type c) {
-    throw;
-  }
-
-  // write multiple characters
-  virtual
-  std::streamsize xsputn (const char* s,
-                          std::streamsize num) {
-    throw;
-  }
-
-  virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ) {
-    throw;
-  }
-
-  // read one character
-  virtual int_type underflow () {
-    // is read position before end of _buff?
-    if (gptr() < egptr()) {
-      return traits_type::to_int_type(*gptr());
-    }
-
-    /* process size of putback area
-     * - use number of characters read
-     * - but at most four
-     */
-    unsigned int numPutback = gptr() - eback();
-    if (numPutback > sizeof(int)) {
-      numPutback = sizeof(int);
-    }
-
-    /* copy up to four characters previously read into
-     * the putback _buff (area of first four characters)
-     */
-    std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
-                  numPutback);
-
-    // read new characters
-    int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
-    if (num <= 0) {
-      // ERROR or EOF
-      return EOF;
-    }
-
-    // reset _buff pointers
-    setg (_buff+(sizeof(int)-numPutback),   // beginning of putback area
-          _buff+sizeof(int),                // read position
-          _buff+sizeof(int)+num);           // end of buffer
-
-    // return next character
-    return traits_type::to_int_type(*gptr());
-  }
-
-  std::streamsize xsgetn (char* s,
-                          std::streamsize num) {
-    return gzread(_gzf,s,num);
-  }
-
-private:
-  gzFile _gzf;
-  static const unsigned int _buffsize = 1024;
-  char _buff[_buffsize];
-};
-
-#endif  // _GZFILEBUF_H_
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 58214f30b..bbad8fe38 100755..100644
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -11,6 +11,7 @@
 #include <ctime>
 
 #include <getopt.h>
+#include <boost/scoped_ptr.hpp>
 
 #include "Data.h"
 #include "Point.h"
@@ -19,6 +20,7 @@
 #include "ScoreData.h"
 #include "FeatureData.h"
 #include "Optimizer.h"
+#include "OptimizerFactory.h"
 #include "Types.h"
 #include "Timer.h"
 #include "Util.h"
@@ -34,6 +36,7 @@ const char kDefaultScorer[] = "BLEU";
 const char kDefaultScorerFile[] = "statscore.data";
 const char kDefaultFeatureFile[] = "features.data";
 const char kDefaultInitFile[] = "init.opt";
+const char kDefaultPositiveString[] = "";
 
 // Used when saving optimized weights.
 const char kOutputFile[] = "weights.txt";
@@ -106,6 +109,7 @@ void usage(int ret)
   cerr << "[--scfile|-S] comma separated list of scorer data files (default " << kDefaultScorerFile << ")" << endl;
   cerr << "[--ffile|-F] comma separated list of feature data files (default " << kDefaultFeatureFile << ")" << endl;
   cerr << "[--ifile|-i] the starting point data file (default " << kDefaultInitFile << ")" << endl;
+  cerr << "[--positive|-P] indexes with positive weights (default none)"<<endl;
 #ifdef WITH_THREADS
   cerr << "[--threads|-T] use multiple threads (default 1)" << endl;
 #endif
@@ -123,6 +127,7 @@ static struct option long_options[] = {
   {"rseed", required_argument, 0, 'r'},
   {"optimize", 1, 0, 'o'},
   {"pro", required_argument, 0, 'p'},
+  {"positive",1,0,'P'},
   {"type", 1, 0, 't'},
   {"sctype", 1, 0, 's'},
   {"scconfig", required_argument, 0, 'c'},
@@ -152,6 +157,7 @@ struct ProgramOption {
   string scorer_file;
   string feature_file;
   string init_file;
+  string positive_string;
   size_t num_threads;
   float shard_size;
   size_t shard_count;
@@ -169,6 +175,7 @@ struct ProgramOption {
         scorer_file(kDefaultScorerFile),
         feature_file(kDefaultFeatureFile),
         init_file(kDefaultInitFile),
+        positive_string(kDefaultPositiveString),
         num_threads(1),
         shard_size(0),
         shard_count(0) { }
@@ -178,7 +185,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
   int c;
   int option_index;
 
-  while ((c = getopt_long(argc, argv, "o:r:d:n:m:t:s:S:F:v:p:", long_options, &option_index)) != -1) {
+  while ((c = getopt_long(argc, argv, "o:r:d:n:m:t:s:S:F:v:p:P:", long_options, &option_index)) != -1) {
     switch (c) {
       case 'o':
         opt->to_optimize_str = string(optarg);
@@ -232,6 +239,9 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
       case 'h':
         usage(0);
         break;
+      case 'P':
+        opt->positive_string = string(optarg);
+        break;
       default:
         usage(1);
     }
@@ -251,6 +261,7 @@ int main(int argc, char **argv)
   vector<vector<parameter_t> > start_list;
   vector<parameter_t> min;
   vector<parameter_t> max;
+  vector<bool> positive;
   // NOTE: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result!
 
   if (option.pdim < 0)
@@ -333,19 +344,20 @@ int main(int argc, char **argv)
   }
 
   // it make sense to know what parameter set were used to generate the nbest
-  Scorer *TheScorer = ScorerFactory::getScorer(option.scorer_type, option.scorer_config);
+  boost::scoped_ptr<Scorer> scorer(
+      ScorerFactory::getScorer(option.scorer_type, option.scorer_config));
 
   //load data
-  Data data(*TheScorer);
+  Data data(scorer.get());
 
   for (size_t i = 0; i < ScoreDataFiles.size(); i++) {
     cerr<<"Loading Data from: "<< ScoreDataFiles.at(i) << " and " << FeatureDataFiles.at(i) << endl;
     data.load(FeatureDataFiles.at(i), ScoreDataFiles.at(i));
   }
 
-  //ADDED_BY_TS
-  data.remove_duplicates();
-  //END_ADDED
+  scorer->setScoreData(data.getScoreData().get());
+
+  data.removeDuplicates();
 
   PrintUserTime("Data loaded");
 
@@ -358,19 +370,24 @@ int main(int argc, char **argv)
   if (option.to_optimize_str.length() > 0) {
     cerr << "Weights to optimize: " << option.to_optimize_str << endl;
 
-    // Parse string to get weights to optimize, and set them as active
-    string substring;
-    int index;
-    while (!option.to_optimize_str.empty()) {
-      getNextPound(option.to_optimize_str, substring, ",");
-      index = data.getFeatureIndex(substring);
-      cerr << "FeatNameIndex:" << index << " to insert" << endl;
-      //index = strtol(substring.c_str(), NULL, 10);
-      if (index >= 0 && index < option.pdim) {
-        to_optimize.push_back(index);
-      } else {
-        cerr << "Index " << index << " is out of bounds. Allowed indexes are [0," << option.pdim - 1 << "]." << endl;
+    // Parse the string to get weights to optimize, and set them as active.
+    vector<string> features;
+    Tokenize(option.to_optimize_str.c_str(), ',', &features);
+
+    for (vector<string>::const_iterator it = features.begin();
+         it != features.end(); ++it) {
+      const int feature_index = data.getFeatureIndex(*it);
+
+      // Note: previous implementaion checked whether
+      // feature_index is less than option.pdim.
+      // However, it does not make sense when we optimize 'discrete' features,
+      // given by '-o' option like -o "d_0,lm_0,tm_2,tm_3,tm_4,w_0".
+      if (feature_index < 0) {
+        cerr << "Error: invalid feature index = " << feature_index << endl;
+        exit(1);
       }
+      cerr << "FeatNameIndex: " << feature_index << " to insert" << endl;
+      to_optimize.push_back(feature_index);
     }
   } else {
     //set all weights as active
@@ -380,6 +397,27 @@ int main(int argc, char **argv)
     }
   }
 
+  positive.resize(option.pdim);
+  for (int i = 0; i < option.pdim; i++)
+    positive[i] = false;
+  if (option.positive_string.length() > 0) {
+    // Parse string to get weights that need to be positive
+    std::string substring;
+    int index;
+    while (!option.positive_string.empty()) {
+      getNextPound(option.positive_string, substring, ",");
+      index = data.getFeatureIndex(substring);
+      //index = strtol(substring.c_str(), NULL, 10);
+      if (index >= 0 && index < option.pdim) {
+        positive[index] = true;
+      } else {
+        cerr << "Index " << index
+             << " is out of bounds in positivity list. Allowed indexes are [0,"
+             << (option.pdim-1) << "]." << endl;
+      }
+    }
+  }
+
   // treat sparse features just like regular features
   if (data.hasSparseFeatures()) {
     data.mergeSparseFeatures();
@@ -393,6 +431,7 @@ int main(int argc, char **argv)
 
   Point::setpdim(option.pdim);
   Point::setdim(to_optimize.size());
+  Point::set_optindices(to_optimize);
 
   //starting points consist of specified points and random restarts
   vector<Point> startingPoints;
@@ -422,9 +461,9 @@ int main(int argc, char **argv)
       data_ref = shards[i]; //use the sharded data if it exists
 
     vector<OptimizationTask*>& tasks = allTasks[i];
-    Optimizer *optimizer = OptimizerFactory::BuildOptimizer(option.pdim, to_optimize, start_list[0], option.optimize_type, option.nrandom);
+    Optimizer *optimizer = OptimizerFactory::BuildOptimizer(option.pdim, to_optimize, positive, start_list[0], option.optimize_type, option.nrandom);
     optimizer->SetScorer(data_ref.getScorer());
-    optimizer->SetFData(data_ref.getFeatureData());
+    optimizer->SetFeatureData(data_ref.getFeatureData());
     // A task for each start point
     for (size_t j = 0; j < startingPoints.size(); ++j) {
       OptimizationTask* task = new OptimizationTask(optimizer, startingPoints[j]);
@@ -498,7 +537,6 @@ int main(int argc, char **argv)
     }
   }
 
-  delete TheScorer;
   PrintUserTime("Stopping...");
 
   return 0;
diff --git a/mert/pro.cpp b/mert/pro.cpp
index a18e7a117..e1d2ebcfd 100644
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@@ -21,8 +21,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 
 
-/** 
-  * This is part of the PRO implementation. It converts the features and scores 
+/**
+  * This is part of the PRO implementation. It converts the features and scores
   * files into a form suitable for input into the megam maxent trainer.
   *
   *   For details of PRO, refer to Hopkins & May (EMNLP 2011)
@@ -34,9 +34,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <iostream>
 #include <string>
 #include <vector>
+#include <utility>
 
 #include <boost/program_options.hpp>
 
+#include "BleuScorer.h"
 #include "FeatureDataIterator.h"
 #include "ScoreDataIterator.h"
 
@@ -46,49 +48,33 @@ namespace po = boost::program_options;
 
 class SampledPair {
 private:
-	pair<size_t,size_t> translation1;
-	pair<size_t,size_t> translation2;
-	float scoreDiff;
-public:
-	SampledPair(const pair<size_t,size_t>& t1, const pair<size_t,size_t>& t2, float diff ) {
-		if (diff > 0) {
-			translation1 = t1;
-			translation2 = t2;
-			scoreDiff = diff;
-		}
-		else {
-			translation1 = t2;
-			translation2 = t1;
-			scoreDiff = -diff;
-		}			
-	}
-	float getDiff() const { return scoreDiff; }
-	const pair<size_t,size_t>& getTranslation1() const { return translation1; }
-	const pair<size_t,size_t>& getTranslation2() const { return translation2; }
-};
+  pair<size_t,size_t> m_translation1;
+  pair<size_t,size_t> m_translation2;
+  float m_score_diff;
 
+public:
+  SampledPair(const pair<size_t,size_t>& t1, const pair<size_t,size_t>& t2, float diff ) {
+    if (diff > 0) {
+      m_translation1 = t1;
+      m_translation2 = t2;
+      m_score_diff = diff;
+    } else {
+      m_translation1 = t2;
+      m_translation2 = t1;
+      m_score_diff = -diff;
+    }
+  }
 
-static float sentenceLevelBleuPlusOne(const vector<float>& stats) {
-	float logbleu = 0.0;
-	const unsigned int bleu_order = 4;
-	for (unsigned int j=0; j<bleu_order; j++) {
-		//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
-		logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
-	}
-	logbleu /= bleu_order;
-	const float brevity = 1.0 - static_cast<float>(stats[(bleu_order*2)]) / stats[1];
-	if (brevity < 0.0) {
-		logbleu += brevity;
-	}
-	//cerr << brevity << " -> " << exp(logbleu) << endl;
-	return exp(logbleu);
-}
+  float getDiff() const { return m_score_diff; }
+  const pair<size_t,size_t>& getTranslation1() const { return m_translation1; }
+  const pair<size_t,size_t>& getTranslation2() const { return m_translation2; }
+};
 
 static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureDataItem& f2) {
   // difference in score in regular features
-	for(unsigned int j=0; j<f1.dense.size(); j++)
-		if (abs(f1.dense[j]-f2.dense[j]) > 0.00001)
-			out << " F" << j << " " << (f1.dense[j]-f2.dense[j]);
+  for(unsigned int j=0; j<f1.dense.size(); j++)
+    if (abs(f1.dense[j]-f2.dense[j]) > 0.00001)
+      out << " F" << j << " " << (f1.dense[j]-f2.dense[j]);
 
   if (f1.sparse.size() || f2.sparse.size()) {
     out << " ";
@@ -101,27 +87,27 @@ static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureD
   }
 }
 
-	
-int main(int argc, char** argv) 
+
+int main(int argc, char** argv)
 {
   bool help;
   vector<string> scoreFiles;
   vector<string> featureFiles;
   int seed;
   string outputFile;
-  //TODO: options
-	const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
-	const unsigned int n_samples = 50; // Xi, in Hopkins & May
-	const float min_diff = 0.05;
+  // TODO: Add these constants to options
+  const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
+  const unsigned int n_samples = 50; // Xi, in Hopkins & May
+  const float min_diff = 0.05;
 
   po::options_description desc("Allowed options");
   desc.add_options()
-    ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
-    ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
-    ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
-    ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
-    ("output-file,o", po::value<string>(&outputFile), "Output file")
-    ;
+      ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+      ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
+      ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+      ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
+      ("output-file,o", po::value<string>(&outputFile), "Output file")
+      ;
 
   po::options_description cmdline_options;
   cmdline_options.add(desc);
@@ -134,7 +120,7 @@ int main(int argc, char** argv)
       cout << desc << endl;
       exit(0);
   }
-  
+
   if (vm.count("random-seed")) {
     cerr << "Initialising random seed to " << seed << endl;
     srand(seed);
@@ -167,7 +153,7 @@ int main(int argc, char** argv)
     out = &cout;
   }
 
-  
+
   vector<FeatureDataIterator> featureDataIters;
   vector<ScoreDataIterator> scoreDataIters;
   for (size_t i = 0; i < featureFiles.size(); ++i) {
@@ -179,7 +165,7 @@ int main(int argc, char** argv)
   size_t sentenceId = 0;
   while(1) {
     vector<pair<size_t,size_t> > hypotheses;
-    //TODO: de-deuping. Collect hashes of score,feature pairs and 
+    //TODO: de-deuping. Collect hashes of score,feature pairs and
     //only add index if it's unique.
     if (featureDataIters[0] == FeatureDataIterator::end()) {
       break;
@@ -214,7 +200,7 @@ int main(int argc, char** argv)
       size_t rand2 = rand() % n_translations;
       pair<size_t,size_t> translation2 = hypotheses[rand2];
       float bleu2 = sentenceLevelBleuPlusOne(scoreDataIters[translation2.first]->operator[](translation2.second));
-      
+
       /*
       cerr << "t(" << translation1.first << "," << translation1.second << ") = " << bleu1 <<
         " t(" << translation2.first << "," << translation2.second << ") = " <<
@@ -222,7 +208,7 @@ int main(int argc, char** argv)
       */
       if (abs(bleu1-bleu2) < min_diff)
         continue;
-      
+
       samples.push_back(SampledPair(translation1, translation2, bleu1-bleu2));
       scores.push_back(1.0-abs(bleu1-bleu2));
     }
@@ -261,4 +247,3 @@ int main(int argc, char** argv)
   outFile.close();
 
 }
-
author	Christian Federmann <cfedermann@gmail.com>	2012-05-09 23:10:52 +0400
committer	Christian Federmann <cfedermann@gmail.com>	2012-05-09 23:10:52 +0400
commit	25f43d13b8f3cc6cc0be19028605efe15eaa416b (patch)
tree	5e2a7e22a17f88e0dc53032a473bb20ff368375a /mert
parent	d9e77ed5b1676956978e221427c8c39f372e7612 (diff)
parent	440650bd6e03cdea9aa2d3f11b32697bb9340ca0 (diff)