Merge remote branch 'origin/master' into pro

Conflicts: kenlm mert/Data.cpp mert/Data.h mert/Makefile.am mert/mert.cpp
author: Barry Haddow <barry.haddow@gmail.com> 2011-11-17 19:58:56 +0400
committer: Barry Haddow <barry.haddow@gmail.com> 2011-11-17 19:58:56 +0400
commit: ba5633c7b57b8843f9befc836db78989f3d15652 (patch)
tree: 4342cdcbf03d6d0ae6e42a519ebb1a67ebee8746 /mert
parent: 71c777f01df14c9523e20f118c90396d51678c02 (diff)
parent: 492fe6d97af84065fa0b4e0afa9b77a26761a11f (diff)
59 files changed, 2072 insertions, 1860 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 3c5a89789..67692eef7 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -1,11 +1,38 @@
 #include "BleuScorer.h"
 
-const int BleuScorer::LENGTH = 4;
+#include <algorithm>
+#include <cmath>
+#include <climits>
+#include <fstream>
+#include <iterator>
+#include <stdexcept>
+#include "Util.h"
 
+BleuScorer::BleuScorer(const string& config)
+    : StatisticsBasedScorer("BLEU",config),
+      kLENGTH(4),
+      _refLengthStrategy(BLEU_CLOSEST) {
+  //configure regularisation
+  static string KEY_REFLEN = "reflen";
+  static string REFLEN_AVERAGE = "average";
+  static string REFLEN_SHORTEST = "shortest";
+  static string REFLEN_CLOSEST = "closest";
+
+  string reflen = getConfig(KEY_REFLEN,REFLEN_CLOSEST);
+  if (reflen == REFLEN_AVERAGE) {
+    _refLengthStrategy = BLEU_AVERAGE;
+  } else if (reflen == REFLEN_SHORTEST) {
+    _refLengthStrategy = BLEU_SHORTEST;
+  } else if (reflen == REFLEN_CLOSEST) {
+    _refLengthStrategy = BLEU_CLOSEST;
+  } else {
+    throw runtime_error("Unknown reference length strategy: " + reflen);
+  }
+  //    cerr << "Using reference length strategy: " << reflen << endl;
+}
+
+BleuScorer::~BleuScorer() {}
 
-/**
-  * count the ngrams of each type, up to the given length in the input line.
- **/
 size_t BleuScorer::countNgrams(const string& line, counts_t& counts, unsigned int n)
 {
   vector<int> encoded_tokens;
@@ -24,7 +51,7 @@ size_t BleuScorer::countNgrams(const string& line, counts_t& counts, unsigned in
         ngram.push_back(encoded_tokens[j]);
       }
       int count = 1;
-      counts_it oldcount = counts.find(ngram);
+      counts_iterator oldcount = counts.find(ngram);
       if (oldcount != counts.end()) {
         count = (oldcount->second) + 1;
       }
@@ -41,7 +68,7 @@ size_t BleuScorer::countNgrams(const string& line, counts_t& counts, unsigned in
 void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
 {
   //make sure reference data is clear
-  _refcounts.clear();
+  _refcounts.reset();
   _reflengths.clear();
   _encodings.clear();
 
@@ -57,7 +84,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
     while (getline(refin,line)) {
       //cerr << line << endl;
       if (i == 0) {
-        counts_t* counts = new counts_t(); //these get leaked
+        counts_t *counts = new counts_t; //these get leaked
         _refcounts.push_back(counts);
         vector<size_t> lengths;
         _reflengths.push_back(lengths);
@@ -66,10 +93,10 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
         throw runtime_error("File " + referenceFiles[i] + " has too many sentences");
       }
       counts_t counts;
-      size_t length = countNgrams(line,counts,LENGTH);
+      size_t length = countNgrams(line,counts,kLENGTH);
       //for any counts larger than those already there, merge them in
-      for (counts_it ci = counts.begin(); ci != counts.end(); ++ci) {
-        counts_it oldcount_it = _refcounts[sid]->find(ci->first);
+      for (counts_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
+        counts_iterator oldcount_it = _refcounts[sid]->find(ci->first);
         int oldcount = 0;
         if (oldcount_it != _refcounts[sid]->end()) {
           oldcount = oldcount_it->second;
@@ -93,8 +120,8 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
 
 void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
 {
-//	cerr << text << endl;
-//	cerr << sid << endl;
+//      cerr << text << endl;
+//      cerr << sid << endl;
   //dump_counts(*_refcounts[sid]);
   if (sid >= _refcounts.size()) {
     stringstream msg;
@@ -103,8 +130,8 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
   }
   counts_t testcounts;
   //stats for this line
-  vector<float> stats(LENGTH*2);;
-  size_t length = countNgrams(text,testcounts,LENGTH);
+  vector<float> stats(kLENGTH*2);;
+  size_t length = countNgrams(text,testcounts,kLENGTH);
   //dump_counts(testcounts);
   if (_refLengthStrategy == BLEU_SHORTEST) {
     //cerr << reflengths.size() << " " << sid << endl;
@@ -137,9 +164,9 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
   }
   //cerr << "computed length" << endl;
   //precision on each ngram type
-  for (counts_it testcounts_it = testcounts.begin();
+  for (counts_iterator testcounts_it = testcounts.begin();
        testcounts_it != testcounts.end(); ++testcounts_it) {
-    counts_it refcounts_it = _refcounts[sid]->find(testcounts_it->first);
+    counts_iterator refcounts_it = _refcounts[sid]->find(testcounts_it->first);
     int correct = 0;
     int guess = testcounts_it->second;
     if (refcounts_it != _refcounts[sid]->end()) {
@@ -156,20 +183,20 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
   entry.set(stats_str);
 }
 
-float BleuScorer::calculateScore(const vector<int>& comps)
+float BleuScorer::calculateScore(const vector<int>& comps) const
 {
   //cerr << "BLEU: ";
   //copy(comps.begin(),comps.end(), ostream_iterator<int>(cerr," "));
   float logbleu = 0.0;
-  for (int i = 0; i < LENGTH; ++i) {
+  for (int i = 0; i < kLENGTH; ++i) {
     if (comps[2*i] == 0) {
       return 0.0;
     }
     logbleu += log(comps[2*i]) - log(comps[2*i+1]);
 
   }
-  logbleu /= LENGTH;
-  float brevity = 1.0 - (float)comps[LENGTH*2]/comps[1];//reflength divided by test length
+  logbleu /= kLENGTH;
+  float brevity = 1.0 - (float)comps[kLENGTH*2]/comps[1];//reflength divided by test length
   if (brevity < 0.0) {
     logbleu += brevity;
   }
@@ -177,3 +204,11 @@ float BleuScorer::calculateScore(const vector<int>& comps)
   return exp(logbleu);
 }
 
+void BleuScorer::dump_counts(counts_t& counts) const {
+  for (counts_const_iterator i = counts.begin(); i != counts.end(); ++i) {
+    cerr << "(";
+    copy(i->first.begin(), i->first.end(), ostream_iterator<int>(cerr," "));
+    cerr << ") " << i->second << ", ";
+  }
+  cerr << endl;
+}
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 6481ce709..7e0e18b53 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -1,19 +1,14 @@
 #ifndef __BLEUSCORER_H__
 #define __BLEUSCORER_H__
 
-#include <algorithm>
-#include <cmath>
 #include <iostream>
-#include <iterator>
-#include <set>
-#include <sstream>
-#include <stdexcept>
 #include <string>
 #include <vector>
-#include <limits.h>
+
 #include "Types.h"
 #include "ScoreData.h"
 #include "Scorer.h"
+#include "ScopedVector.h"
 
 using namespace std;
 
@@ -21,55 +16,29 @@ enum BleuReferenceLengthStrategy { BLEU_AVERAGE, BLEU_SHORTEST, BLEU_CLOSEST };
 
 
 /**
-  * Bleu scoring
- **/
+ * Bleu scoring
+ */
 class BleuScorer: public StatisticsBasedScorer
 {
 public:
-  BleuScorer(const string& config = "") : StatisticsBasedScorer("BLEU",config),_refLengthStrategy(BLEU_CLOSEST) {
-    //configure regularisation
-    static string KEY_REFLEN = "reflen";
-    static string REFLEN_AVERAGE = "average";
-    static string REFLEN_SHORTEST = "shortest";
-    static string REFLEN_CLOSEST = "closest";
-
-
-    string reflen = getConfig(KEY_REFLEN,REFLEN_CLOSEST);
-    if (reflen == REFLEN_AVERAGE) {
-      _refLengthStrategy = BLEU_AVERAGE;
-    } else if (reflen == REFLEN_SHORTEST) {
-      _refLengthStrategy = BLEU_SHORTEST;
-    } else if (reflen == REFLEN_CLOSEST) {
-      _refLengthStrategy = BLEU_CLOSEST;
-    } else {
-      throw runtime_error("Unknown reference length strategy: " + reflen);
-    }
-//    cerr << "Using reference length strategy: " << reflen << endl;
-  }
+  explicit BleuScorer(const string& config = "");
+  ~BleuScorer();
+
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
   virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-  static const int LENGTH;
+  virtual float calculateScore(const vector<int>& comps) const;
 
-  size_t NumberOfScores() {
-    // cerr << "BleuScorer: " << (2 * LENGTH + 1) << endl;
-    return (2 * LENGTH + 1);
-  };
-
-
-//protected:
-  float calculateScore(const vector<int>& comps);
+  virtual size_t NumberOfScores() const {
+    return 2 * kLENGTH + 1;
+  }
 
 private:
-  //no copy
-  BleuScorer(const BleuScorer&);
-  ~BleuScorer() {};
-  BleuScorer& operator=(const BleuScorer&);
   //Used to construct the ngram map
   struct CompareNgrams {
-    int operator() (const vector<int>& a, const vector<int>& b) {
+    bool operator()(const vector<int>& a, const vector<int>& b) const {
       size_t i;
-      size_t as = a.size();
-      size_t bs = b.size();
+      const size_t as = a.size();
+      const size_t bs = b.size();
       for (i = 0; i < as && i < bs; ++i) {
         if (a[i] < b[i]) {
           //cerr << "true" << endl;
@@ -86,26 +55,27 @@ private:
   };
 
   typedef map<vector<int>,int,CompareNgrams> counts_t;
-  typedef map<vector<int>,int,CompareNgrams>::iterator counts_it;
-
-  typedef vector<counts_t*> refcounts_t;
+  typedef map<vector<int>,int,CompareNgrams>::iterator counts_iterator;
+  typedef map<vector<int>,int,CompareNgrams>::iterator counts_const_iterator;
+  typedef ScopedVector<counts_t> refcounts_t;
 
+  /**
+   * Count the ngrams of each type, up to the given length in the input line.
+   */
   size_t countNgrams(const string& line, counts_t& counts, unsigned int n);
 
-  void dump_counts(counts_t& counts) {
-    for (counts_it i = counts.begin(); i != counts.end(); ++i) {
-      cerr << "(";
-      copy(i->first.begin(), i->first.end(), ostream_iterator<int>(cerr," "));
-      cerr << ") " << i->second << ", ";
-    }
-    cerr << endl;
-  }
+  void dump_counts(counts_t& counts) const;
+
+  const int kLENGTH;
   BleuReferenceLengthStrategy _refLengthStrategy;
 
   // data extracted from reference files
   refcounts_t _refcounts;
   vector<vector<size_t> > _reflengths;
-};
 
+  // no copying allowed
+  BleuScorer(const BleuScorer&);
+  BleuScorer& operator=(const BleuScorer&);
+};
 
-#endif //__BLEUSCORER_H
+#endif  // __BLEUSCORER_H__
diff --git a/mert/CderScorer.cpp b/mert/CderScorer.cpp
index 7b85da94d..fc8d69f5f 100644
--- a/mert/CderScorer.cpp
+++ b/mert/CderScorer.cpp
@@ -1,19 +1,17 @@
 #include "CderScorer.h"
-
+#include <fstream>
 #include <stdexcept>
-#include <iostream>
-#include <algorithm>
 
-CderScorer::CderScorer(const string& config) 
-    : StatisticsBasedScorer("CDER",config)
-{
-}
+CderScorer::CderScorer(const string& config)
+    : StatisticsBasedScorer("CDER",config) {}
+
+CderScorer::~CderScorer() {}
 
 void CderScorer::setReferenceFiles(const vector<string>& referenceFiles)
 {
   //make sure reference data is clear
   ref_sentences.clear();
-  
+
   //load reference data
   for (size_t rid = 0; rid < referenceFiles.size(); ++rid) {
     ifstream refin(referenceFiles[rid].c_str());
@@ -23,85 +21,81 @@ void CderScorer::setReferenceFiles(const vector<string>& referenceFiles)
     ref_sentences.push_back(vector<sent_t>());
     string line;
     while (getline(refin,line)) {
-		sent_t encoded;
-		encode(line, encoded);
-		ref_sentences[rid].push_back(encoded);
+      sent_t encoded;
+      encode(line, encoded);
+      ref_sentences[rid].push_back(encoded);
     }
-  }  
+  }
 }
 
 void CderScorer::prepareStatsVector(size_t sid, const string& text, vector<int>& stats)
 {
-	sent_t cand;
-	encode(text, cand);
-  
-    float max = -2;
-    for (size_t rid = 0; rid < ref_sentences.size(); ++rid)
-    {
-		sent_t& ref = ref_sentences[rid][sid];
-        vector<int> tmp = computeCD(cand, ref);
-		if (calculateScore(tmp) > max)
-        {
-            stats = tmp;
-        }
+  sent_t cand;
+  encode(text, cand);
+
+  float max = -2;
+  for (size_t rid = 0; rid < ref_sentences.size(); ++rid) {
+    sent_t& ref = ref_sentences[rid][sid];
+    vector<int> tmp = computeCD(cand, ref);
+    if (calculateScore(tmp) > max) {
+      stats = tmp;
     }
+  }
 }
 
-float CderScorer::calculateScore(const vector<int>& comps)
+float CderScorer::calculateScore(const vector<int>& comps) const
 {
-	if (comps.size() != 2)
-	{
-		throw runtime_error("Size of stat vector for CDER is not 2");
-	}
+  if (comps.size() != 2)
+  {
+    throw runtime_error("Size of stat vector for CDER is not 2");
+  }
 
-	return 1 - (comps[0] / (float) comps[1]);
+  return 1 - (comps[0] / (float) comps[1]);
 }
 
-vector<int> CderScorer::computeCD(const sent_t& cand, const sent_t& ref)
+vector<int> CderScorer::computeCD(const sent_t& cand, const sent_t& ref) const
 {
-	int I = cand.size() + 1;	// Number of inter-words positions in candidate sentence
-	int L = ref.size() + 1;		// Number of inter-words positions in reference sentence
-
-	
-	int l = 0;
-	vector<int>* row = new vector<int>(I);	// row[i] stores cost of cheapest path from (0,0) to (i,l) in CDER aligment grid. 
-
-	// Initialization of first row
-	(*row)[0] = 0;
-	for (int i = 1; i < I; ++i) (*row)[i] = 1;
-	
-	// Calculating costs for next row using costs from the previous row.
-	while (++l < L)
-	{
-		vector<int>* nextRow = new vector<int>(I);
-		for (int i = 0; i < I; ++i)
-		{
-			vector<int> possibleCosts;
-			if (i > 0)
-			{
-				possibleCosts.push_back((*nextRow)[i-1] + 1);							// Deletion
-				possibleCosts.push_back((*row)[i-1] + distance(ref[l-1], cand[i-1]));	// Substitution/Identity
-			}
-			possibleCosts.push_back((*row)[i] + 1);										// Insertion
-			(*nextRow)[i] = *min_element(possibleCosts.begin(), possibleCosts.end());
-		}
-
-		int LJ = 1 + *min_element(nextRow->begin(), nextRow->end());					// Cost of LongJumps is the same for all in the row
-
-		for (int i = 0; i < I; ++i)
-		{
-			(*nextRow)[i] = min((*nextRow)[i], LJ);										// LongJumps
-		}
-
-		delete row;
-		row = nextRow;
-	}
-
-
-	vector<int> stats(2);
-	stats[0] = *(row->rbegin());  // CD distance is the cost of path from (0,0) to (I,L)
-	stats[1] = ref.size();
-
-	delete row;
-	return stats;
+  int I = cand.size() + 1; // Number of inter-words positions in candidate sentence
+  int L = ref.size() + 1; // Number of inter-words positions in reference sentence
+
+  int l = 0;
+  // row[i] stores cost of cheapest path from (0,0) to (i,l) in CDER aligment grid.
+  vector<int>* row = new vector<int>(I);
+
+  // Initialization of first row
+  (*row)[0] = 0;
+  for (int i = 1; i < I; ++i) (*row)[i] = 1;
+
+  // Calculating costs for next row using costs from the previous row.
+  while (++l < L)
+  {
+    vector<int>* nextRow = new vector<int>(I);
+    for (int i = 0; i < I; ++i)
+    {
+      vector<int> possibleCosts;
+      if (i > 0) {
+        possibleCosts.push_back((*nextRow)[i-1] + 1); // Deletion
+        possibleCosts.push_back((*row)[i-1] + distance(ref[l-1], cand[i-1])); // Substitution/Identity
+      }
+      possibleCosts.push_back((*row)[i] + 1); // Insertion
+      (*nextRow)[i] = *min_element(possibleCosts.begin(), possibleCosts.end());
+    }
+
+    // Cost of LongJumps is the same for all in the row
+    int LJ = 1 + *min_element(nextRow->begin(), nextRow->end());
+
+    for (int i = 0; i < I; ++i) {
+      (*nextRow)[i] = min((*nextRow)[i], LJ); // LongJumps
+    }
+
+    delete row;
+    row = nextRow;
+  }
+
+  vector<int> stats(2);
+  stats[0] = *(row->rbegin());  // CD distance is the cost of path from (0,0) to (I,L)
+  stats[1] = ref.size();
+
+  delete row;
+  return stats;
 }
diff --git a/mert/CderScorer.h b/mert/CderScorer.h
index 1a2989b9e..359ac31e9 100644
--- a/mert/CderScorer.h
+++ b/mert/CderScorer.h
@@ -3,20 +3,20 @@
 
 #include <algorithm>
 #include <iostream>
+#include <iterator>
 #include <string>
 #include <vector>
 #include "Types.h"
-#include "ScoreData.h"
 #include "Scorer.h"
 
-
 using namespace std;
 
-
 class CderScorer: public StatisticsBasedScorer
 {
 public:
-  CderScorer(const string& config);
+  explicit CderScorer(const string& config);
+  ~CderScorer();
+
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
   virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry)
   {
@@ -30,30 +30,25 @@ public:
   }
   virtual void prepareStatsVector(size_t sid, const string& text, vector<int>& stats);
 
-  size_t NumberOfScores() {
+  virtual size_t NumberOfScores() const {
     return 2;
-  };
+  }
 
-  float calculateScore(const vector<int>& comps);
+  virtual float calculateScore(const vector<int>& comps) const;
 
 private:
   typedef vector<int> sent_t;
   vector<vector<sent_t> > ref_sentences;
 
-  vector<int> computeCD(const sent_t& cand, const sent_t& ref);
-  int distance(int word1, int word2) 
+  vector<int> computeCD(const sent_t& cand, const sent_t& ref) const;
+  int distance(int word1, int word2) const
   {
-	  if (word1 == word2) 
-		  return 0;
-	  else 
-		  return 1;
+    return word1 == word2 ? 0 : 1;
   }
 
-  //no copy
+  // no copying allowed
   CderScorer(const CderScorer&);
-  ~CderScorer() {};
   CderScorer& operator=(const CderScorer&);
 };
 
-
-#endif
+#endif  // __CDERSCORER_H__
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 806308cb1..ed1100106 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -6,26 +6,46 @@
  *
  */
 
+#include <algorithm>
 #include <cassert>
+#include <cmath>
 #include <fstream>
 
+#include "Data.h"
+#include "FileStream.h"
 #include "Scorer.h"
 #include "ScorerFactory.h"
-#include "Data.h"
 #include "Util.h"
 
-
-Data::Data(Scorer& ptr):
-  theScorer(&ptr),
-  _sparse_flag(false)
+Data::Data()
+  : theScorer(NULL),
+    number_of_scores(0),
+    _sparse_flag(false),
+    scoredata(NULL),
+    featdata(NULL) {}
+
+Data::Data(Scorer& ptr)
+    : theScorer(&ptr),
+      score_type(theScorer->getName()),
+      number_of_scores(0),
+      _sparse_flag(false),
+      scoredata(new ScoreData(*theScorer)),
+      featdata(new FeatureData)
 {
-  score_type = (*theScorer).getName();
   TRACE_ERR("Data::score_type " << score_type << std::endl);
-
   TRACE_ERR("Data::Scorer type from Scorer: " << theScorer->getName() << endl);
-  featdata=new FeatureData;
-  scoredata=new ScoreData(*theScorer);
-};
+}
+
+Data::~Data() {
+  if (featdata) {
+    delete featdata;
+    featdata = NULL;
+  }
+  if (scoredata) {
+    delete scoredata;
+    scoredata = NULL;
+  }
+}
 
 void Data::loadnbest(const std::string &file)
 {
@@ -47,7 +67,7 @@ void Data::loadnbest(const std::string &file)
   while (getline(inp,stringBuf,'\n')) {
     if (stringBuf.empty()) continue;
 
-//		TRACE_ERR("stringBuf: " << stringBuf << std::endl);
+//              TRACE_ERR("stringBuf: " << stringBuf << std::endl);
 
     getNextPound(stringBuf, substring, "|||"); //first field
     sentence_index = substring;
@@ -55,7 +75,7 @@ void Data::loadnbest(const std::string &file)
     getNextPound(stringBuf, substring, "|||"); //second field
     theSentence = substring;
 
-// adding statistics for error measures
+    // adding statistics for error measures
     featentry.reset();
     scoreentry.clear();
 
@@ -73,7 +93,7 @@ void Data::loadnbest(const std::string &file)
 
       size_t tmpidx=0;
       while (!stringsupport.empty()) {
-        //			TRACE_ERR("Decompounding: " << substring << std::endl);
+        //                      TRACE_ERR("Decompounding: " << substring << std::endl);
         getNextPound(stringsupport, subsubstring);
 
         // string ending with ":" are skipped, because they are the names of the features
@@ -98,12 +118,12 @@ void Data::loadnbest(const std::string &file)
 
     // adding features
     while (!substring.empty()) {
-//			TRACE_ERR("Decompounding: " << substring << std::endl);
+//                      TRACE_ERR("Decompounding: " << substring << std::endl);
       getNextPound(substring, subsubstring);
 
       // no ':' -> feature value that needs to be stored
       if ((loc = subsubstring.find_last_of(":")) != subsubstring.length()-1) {
-        featentry.add(ATOFST(subsubstring.c_str()));
+        featentry.add(ConvertStringToFeatureStatsType(subsubstring));
       }
       // sparse feature name? store as well
       else if (subsubstring.find("_") != string::npos) {
@@ -121,22 +141,22 @@ void Data::loadnbest(const std::string &file)
 }
 
 // TODO
-void Data::mergeSparseFeatures() { 
+void Data::mergeSparseFeatures() {
   std::cerr << "ERROR: sparse features can only be trained with pairwise ranked optimizer (PRO), not traditional MERT\n";
   exit(1);
 }
 
 void Data::createShards(size_t shard_count, float shard_size, const string& scorerconfig,
-      std::vector<Data>& shards) 
+                        std::vector<Data>& shards)
 {
   assert(shard_count);
-  assert(shard_size >=0);
+  assert(shard_size >= 0);
   assert(shard_size <= 1);
 
   size_t data_size = scoredata->size();
   assert(data_size == featdata->size());
 
-  shard_size *=  data_size;
+  shard_size *= data_size;
 
   for (size_t shard_id = 0; shard_id < shard_count; ++shard_id) {
     vector<size_t> shard_contents;
@@ -153,9 +173,8 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
         shard_contents.push_back(rand() % data_size);
       }
     }
-    
-    ScorerFactory SF;
-    Scorer* scorer = SF.getScorer(score_type, scorerconfig);
+
+    Scorer* scorer = ScorerFactory::getScorer(score_type, scorerconfig);
 
     shards.push_back(Data(*scorer));
     shards.back().score_type = score_type;
@@ -166,7 +185,5 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
       shards.back().scoredata->add(scoredata->get(shard_contents[i]));
     }
     //cerr << endl;
-    
   }
 }
-
diff --git a/mert/Data.h b/mert/Data.h
index 84dcb884e..db858fc18 100644
--- a/mert/Data.h
+++ b/mert/Data.h
@@ -23,19 +23,21 @@ class Scorer;
 
 class Data
 {
-protected:
-  ScoreData* scoredata;
-  FeatureData* featdata;
-
 private:
   Scorer* theScorer;
   std::string score_type;
-  size_t number_of_scores; //number of scores
+  size_t number_of_scores;
   bool _sparse_flag;
 
+protected:
+  // TODO: Use smart pointers for exceptional-safety.
+  ScoreData* scoredata;
+  FeatureData* featdata;
+
 public:
-  Data(Scorer& sc);
-  ~Data() {};
+  explicit Data(Scorer& sc);
+  Data();
+  ~Data();
 
   inline void clear() {
     scoredata->clear();
@@ -44,10 +46,11 @@ public:
 
   ScoreData* getScoreData() {
     return scoredata;
-  };
+  }
+
   FeatureData* getFeatureData() {
     return featdata;
-  };
+  }
 
   Scorer* getScorer() {
     return theScorer;
@@ -62,7 +65,7 @@ public:
   inline std::string Features() const {
     return featdata->Features();
   }
-  inline void Features(const std::string f) {
+  inline void Features(const std::string &f) {
     featdata->Features(f);
   }
 
@@ -87,26 +90,26 @@ public:
     scoredata->save(scorefile, bin);
   }
 
-  inline bool existsFeatureNames() {
+  inline bool existsFeatureNames() const {
     return featdata->existsFeatureNames();
-  };
+  }
 
-  inline std::string getFeatureName(size_t idx) {
+  inline std::string getFeatureName(size_t idx) const {
     return featdata->getFeatureName(idx);
-  };
+  }
 
-  inline size_t getFeatureIndex(const std::string& name) {
+  inline size_t getFeatureIndex(const std::string& name) const {
     return featdata->getFeatureIndex(name);
-  };
+  }
 
   /**
-   *  Create shard_count shards. If shard_size == 0, then the shards are non-overlapping
-   *  and exhaust the data. If 0 < shard_size <= 1, then shards are chosen by sampling 
-   *  the data (with replacement) and shard_size is interpreted as the proportion
+   * Create shard_count shards. If shard_size == 0, then the shards are non-overlapping
+   * and exhaust the data. If 0 < shard_size <= 1, then shards are chosen by sampling
+   * the data (with replacement) and shard_size is interpreted as the proportion
    * of the total size.
    */
-  void createShards(size_t shard_count, float shard_size, const std::string& scorerconfig, 
-       std::vector<Data>& shards);
+  void createShards(size_t shard_count, float shard_size, const std::string& scorerconfig,
+                    std::vector<Data>& shards);
 };
 
-#endif
+#endif  // DATA_H
diff --git a/mert/FeatureArray.cpp b/mert/FeatureArray.cpp
index 91785ec91..854bcef79 100644
--- a/mert/FeatureArray.cpp
+++ b/mert/FeatureArray.cpp
@@ -6,13 +6,15 @@
  *
  */
 
-#include <fstream>
 #include "FeatureArray.h"
+#include "FileStream.h"
 #include "Util.h"
 
 
-FeatureArray::FeatureArray(): idx(""), _sparse_flag(false)
-{};
+FeatureArray::FeatureArray()
+    : idx(""), number_of_features(0), _sparse_flag(false) {}
+
+FeatureArray::~FeatureArray() {}
 
 void FeatureArray::savetxt(std::ofstream& outFile)
 {
@@ -136,19 +138,16 @@ void FeatureArray::merge(FeatureArray& e)
     add(e.get(i));
 }
 
-
-
-bool FeatureArray::check_consistency()
+bool FeatureArray::check_consistency() const
 {
-  size_t sz = NumberOfFeatures();
-
+  const size_t sz = NumberOfFeatures();
   if (sz == 0)
     return true;
 
-  for (featarray_t::iterator i=array_.begin(); i!=array_.end(); i++)
-    if (i->size()!=sz)
+  for (featarray_t::const_iterator i = array_.begin(); i != array_.end(); i++) {
+    if (i->size() != sz)
       return false;
-
+  }
   return true;
 }
 
diff --git a/mert/FeatureArray.h b/mert/FeatureArray.h
index 0f5f60376..1fa3c4151 100644
--- a/mert/FeatureArray.h
+++ b/mert/FeatureArray.h
@@ -9,36 +9,34 @@
 #ifndef FEATURE_ARRAY_H
 #define FEATURE_ARRAY_H
 
-#define FEATURES_TXT_BEGIN "FEATURES_TXT_BEGIN_0"
-#define FEATURES_TXT_END "FEATURES_TXT_END_0"
-#define FEATURES_BIN_BEGIN "FEATURES_BIN_BEGIN_0"
-#define FEATURES_BIN_END "FEATURES_BIN_END_0"
-
-using namespace std;
-
-#include <limits>
 #include <vector>
 #include <iostream>
 #include <fstream>
-
-#include "Util.h"
 #include "FeatureStats.h"
 
+using namespace std;
+
+const char FEATURES_TXT_BEGIN[] = "FEATURES_TXT_BEGIN_0";
+const char FEATURES_TXT_END[] = "FEATURES_TXT_END_0";
+const char FEATURES_BIN_BEGIN[] = "FEATURES_BIN_BEGIN_0";
+const char FEATURES_BIN_END[] = "FEATURES_BIN_END_0";
+
 class FeatureArray
 {
+private:
+  // idx to identify the utterance. It can differ from
+  // the index inside the vector.
+  std::string idx;
+
 protected:
   featarray_t array_;
   size_t number_of_features;
   std::string features;
   bool _sparse_flag;
 
-private:
-  std::string idx; // idx to identify the utterance, it can differ from the index inside the vector
-
 public:
   FeatureArray();
-
-  ~FeatureArray() {};
+  ~FeatureArray();
 
   inline void clear() {
     array_.clear();
@@ -48,26 +46,26 @@ public:
     return _sparse_flag;
   }
 
-  inline std::string getIndex() {
+  inline std::string getIndex() const {
     return idx;
   }
-  inline void setIndex(const std::string & value) {
-    idx=value;
+  inline void setIndex(const std::string& value) {
+    idx = value;
   }
 
-  inline FeatureStats&  get(size_t i) {
+  inline FeatureStats& get(size_t i) {
     return array_.at(i);
   }
-  inline const FeatureStats&  get(size_t i)const {
+  inline const FeatureStats& get(size_t i)const {
     return array_.at(i);
   }
-  void add(FeatureStats e) {
+  void add(FeatureStats& e) {
     array_.push_back(e);
   }
 
   void merge(FeatureArray& e);
 
-  inline size_t size() {
+  inline size_t size() const {
     return array_.size();
   }
   inline size_t NumberOfFeatures() const {
@@ -79,7 +77,7 @@ public:
   inline std::string Features() const {
     return features;
   }
-  inline void Features(const std::string f) {
+  inline void Features(const std::string& f) {
     features = f;
   }
 
@@ -96,8 +94,7 @@ public:
   void load(ifstream& inFile);
   void load(const std::string &file);
 
-  bool check_consistency();
+  bool check_consistency() const;
 };
 
-
-#endif
+#endif  // FEATURE_ARRAY_H
diff --git a/mert/FeatureData.cpp b/mert/FeatureData.cpp
index 7e8d53990..ed76bca3b 100644
--- a/mert/FeatureData.cpp
+++ b/mert/FeatureData.cpp
@@ -6,15 +6,18 @@
  *
  */
 
-#include <fstream>
 #include "FeatureData.h"
-#include "Util.h"
 
+#include <limits>
+#include "FileStream.h"
+#include "Util.h"
 
 static const float MIN_FLOAT=-1.0*numeric_limits<float>::max();
 static const float MAX_FLOAT=numeric_limits<float>::max();
 
-FeatureData::FeatureData() {};
+FeatureData::FeatureData()
+    : number_of_features(0),
+      _sparse_flag(false) {}
 
 void FeatureData::save(std::ofstream& outFile, bool bin)
 {
@@ -89,15 +92,15 @@ void FeatureData::add(FeatureArray& e)
   }
 }
 
-void FeatureData::add(FeatureStats& e, const std::string & sent_idx)
+void FeatureData::add(FeatureStats& e, const std::string& sent_idx)
 {
   if (exists(sent_idx)) { // array at position e.getIndex() already exists
     //enlarge array at position e.getIndex()
     size_t pos = getIndex(sent_idx);
-//		TRACE_ERR("Inserting " << e << " in array " << sent_idx << std::endl);
+//              TRACE_ERR("Inserting " << e << " in array " << sent_idx << std::endl);
     array_.at(pos).add(e);
   } else {
-//		TRACE_ERR("Creating a new entry in the array and inserting " << e << std::endl);
+//              TRACE_ERR("Creating a new entry in the array and inserting " << e << std::endl);
     FeatureArray a;
     a.NumberOfFeatures(number_of_features);
     a.Features(features);
@@ -107,12 +110,12 @@ void FeatureData::add(FeatureStats& e, const std::string & sent_idx)
   }
 }
 
-bool FeatureData::check_consistency()
+bool FeatureData::check_consistency() const
 {
   if (array_.size() == 0)
     return true;
 
-  for (featdata_t::iterator i = array_.begin(); i !=array_.end(); i++)
+  for (featdata_t::const_iterator i = array_.begin(); i != array_.end(); i++)
     if (!i->check_consistency()) return false;
 
   return true;
@@ -128,20 +131,18 @@ void FeatureData::setIndex()
   }
 }
 
-
-void FeatureData::setFeatureMap(const std::string feat)
+void FeatureData::setFeatureMap(const std::string& feat)
 {
   number_of_features = 0;
-  features=feat;
+  features = feat;
 
   std::string substring, stringBuf;
-  stringBuf=features;
+  stringBuf = features;
   while (!stringBuf.empty()) {
     getNextPound(stringBuf, substring);
 
-    featname2idx_[substring]=idx2featname_.size();
-    idx2featname_[idx2featname_.size()]=substring;
+    featname2idx_[substring] = idx2featname_.size();
+    idx2featname_[idx2featname_.size()] = substring;
     number_of_features++;
   }
 }
-
diff --git a/mert/FeatureData.h b/mert/FeatureData.h
index 0fd69608e..6934cdcba 100644
--- a/mert/FeatureData.h
+++ b/mert/FeatureData.h
@@ -11,40 +11,36 @@
 
 using namespace std;
 
-#include <limits>
 #include <vector>
 #include <iostream>
-
-#include "Util.h"
+#include <stdexcept>
 #include "FeatureArray.h"
 
 class FeatureData
 {
-
-protected:
-  featdata_t array_;
-  idx2name idx2arrayname_; //map from index to name of array
-  name2idx arrayname2idx_; //map from name to index of array
-
 private:
   size_t number_of_features;
   std::string features;
   bool _sparse_flag;
 
-  map<std::string, size_t> featname2idx_; //map from name to index of features
-  map<size_t, std::string> idx2featname_; //map from index to name of features
+  map<std::string, size_t> featname2idx_; // map from name to index of features
+  map<size_t, std::string> idx2featname_; // map from index to name of features
+
+protected:
+  featdata_t array_;
+  idx2name idx2arrayname_; // map from index to name of array
+  name2idx arrayname2idx_; // map from name to index of array
 
 public:
   FeatureData();
-
-  ~FeatureData() {};
+  ~FeatureData() {}
 
   inline void clear() {
     array_.clear();
   }
 
-  inline bool hasSparseFeatures() const { 
-    return _sparse_flag; 
+  inline bool hasSparseFeatures() const {
+    return _sparse_flag;
   }
   inline FeatureArray get(const std::string& idx) {
     return array_.at(getIndex(idx));
@@ -56,7 +52,7 @@ public:
     return array_.at(idx);
   }
 
-  inline bool exists(const std::string & sent_idx) {
+  inline bool exists(const std::string& sent_idx) {
     return exists(getIndex(sent_idx));
   }
   inline bool exists(int sent_idx) {
@@ -73,7 +69,7 @@ public:
   void add(FeatureArray& e);
   void add(FeatureStats& e, const std::string& sent_idx);
 
-  inline size_t size() {
+  inline size_t size() const {
     return array_.size();
   }
   inline size_t NumberOfFeatures() const {
@@ -85,7 +81,7 @@ public:
   inline std::string Features() const {
     return features;
   }
-  inline void Features(const std::string f) {
+  inline void Features(const std::string& f) {
     features = f;
   }
 
@@ -98,44 +94,47 @@ public:
   void load(ifstream& inFile);
   void load(const std::string &file);
 
-  bool check_consistency();
+  bool check_consistency() const;
   void setIndex();
 
-  inline int getIndex(const std::string& idx) {
-    name2idx::iterator i = arrayname2idx_.find(idx);
-    if (i!=arrayname2idx_.end())
+  inline int getIndex(const std::string& idx) const {
+    name2idx::const_iterator i = arrayname2idx_.find(idx);
+    if (i != arrayname2idx_.end())
       return i->second;
     else
       return -1;
   }
 
-  inline std::string getIndex(size_t idx) {
-    idx2name::iterator i = idx2arrayname_.find(idx);
-    if (i!=idx2arrayname_.end())
+  inline std::string getIndex(size_t idx) const {
+    idx2name::const_iterator i = idx2arrayname_.find(idx);
+    if (i != idx2arrayname_.end())
       throw runtime_error("there is no entry at index " + idx);
     return i->second;
   }
 
+  bool existsFeatureNames() const {
+    return (idx2featname_.size() > 0) ? true : false;
+  }
 
-  bool existsFeatureNames() {
-    return (idx2featname_.size() > 0)?true:false;
-  };
-
-  std::string getFeatureName(size_t idx) {
+  std::string getFeatureName(size_t idx) const {
     if (idx >= idx2featname_.size())
       throw runtime_error("Error: you required an too big index");
-    return idx2featname_[idx];
-  };
+    map<size_t, std::string>::const_iterator it = idx2featname_.find(idx);
+    if (it == idx2featname_.end()) {
+      throw runtime_error("Error: specified id is unknown: " + idx);
+    } else {
+      return it->second;
+    }
+  }
 
-  size_t getFeatureIndex(const std::string& name) {
-    if (featname2idx_.find(name)==featname2idx_.end())
-      throw runtime_error("Error: feature " + name +" is unknown");
-    return featname2idx_[name];
-  };
+  size_t getFeatureIndex(const std::string& name) const {
+    map<std::string, size_t>::const_iterator it = featname2idx_.find(name);
+    if (it == featname2idx_.end())
+      throw runtime_error("Error: feature " + name + " is unknown");
+    return it->second;
+  }
 
-  void setFeatureMap(const std::string feat);
+  void setFeatureMap(const std::string& feat);
 };
 
-
-#endif
-
+#endif  // FEATURE_DATA_H
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 3d2b1d1d0..e7682518c 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -6,21 +6,23 @@
  *
  */
 
-#include <cmath>
-#include <fstream>
 #include "FeatureStats.h"
 
-#define AVAILABLE_ 8;
+#include <cmath>
+#include "Util.h"
+
+namespace {
+const int kAvailableSize = 8;
+} // namespace
 
 SparseVector::name2id_t SparseVector::name2id_;
 SparseVector::id2name_t SparseVector::id2name_;
 
-FeatureStatsType SparseVector::get(string name) const {
+FeatureStatsType SparseVector::get(const string& name) const {
   name2id_t::const_iterator name2id_iter = name2id_.find(name);
   if (name2id_iter == name2id_.end()) return 0;
   size_t id = name2id_iter->second;
   return get(id);
-
 }
 
 FeatureStatsType SparseVector::get(size_t id) const {
@@ -29,7 +31,7 @@ FeatureStatsType SparseVector::get(size_t id) const {
   return fvector_iter->second;
 }
 
-void SparseVector::set(string name, FeatureStatsType value) {
+void SparseVector::set(const string& name, FeatureStatsType value) {
   name2id_t::const_iterator name2id_iter = name2id_.find(name);
   size_t id = 0;
   if (name2id_iter == name2id_.end()) {
@@ -54,10 +56,6 @@ void SparseVector::clear() {
   fvector_.clear();
 }
 
-size_t SparseVector::size() const {
-  return fvector_.size();
-}
-
 SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
   //All the elements that have values in *this
   for (fvector_t::iterator i = fvector_.begin(); i != fvector_.end(); ++i) {
@@ -65,7 +63,7 @@ SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
   }
 
   //Any elements in rhs, that have no value in *this
-  for (fvector_t::const_iterator i = rhs.fvector_.begin(); 
+  for (fvector_t::const_iterator i = rhs.fvector_.begin();
       i != rhs.fvector_.end(); ++i) {
     if (fvector_.find(i->first) == fvector_.end()) {
       fvector_[i->first] = -(i->second);
@@ -80,49 +78,59 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs) {
   return res;
 }
 
-
 FeatureStats::FeatureStats()
+    : available_(kAvailableSize), entries_(0),
+      array_(new FeatureStatsType[available_]) {}
+
+FeatureStats::FeatureStats(const size_t size)
+    : available_(size), entries_(size),
+      array_(new FeatureStatsType[available_])
 {
-  available_ = AVAILABLE_;
-  entries_ = 0;
-  array_ = new FeatureStatsType[available_];
-};
+  memset(array_, 0, GetArraySizeWithBytes());
+}
+
+FeatureStats::FeatureStats(std::string &theString)
+    : available_(0), entries_(0), array_(NULL)
+{
+  set(theString);
+}
 
 FeatureStats::~FeatureStats()
 {
-  delete[] array_;
-};
+  if (array_) {
+    delete [] array_;
+    array_ = NULL;
+  }
+}
 
-FeatureStats::FeatureStats(const FeatureStats &stats)
+void FeatureStats::Copy(const FeatureStats &stats)
 {
   available_ = stats.available();
   entries_ = stats.size();
   array_ = new FeatureStatsType[available_];
-  memcpy(array_,stats.getArray(),featbytes_);
+  memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
   map_ = stats.getSparse();
-};
+}
 
-FeatureStats::FeatureStats(const size_t size)
+FeatureStats::FeatureStats(const FeatureStats &stats)
 {
-  available_ = size;
-  entries_ = size;
-  array_ = new FeatureStatsType[available_];
-  memset(array_,0,featbytes_);
-};
-
+  Copy(stats);
+}
 
-FeatureStats::FeatureStats(std::string &theString)
+FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
 {
-  set(theString);
+  delete [] array_;
+  Copy(stats);
+  return *this;
 }
 
 void FeatureStats::expand()
 {
-  available_*=2;
+  available_ *= 2;
   featstats_t t_ = new FeatureStatsType[available_];
-  memcpy(t_,array_,featbytes_);
-  delete array_;
-  array_=t_;
+  memcpy(t_, array_, GetArraySizeWithBytes());
+  delete [] array_;
+  array_ = t_;
 }
 
 void FeatureStats::add(FeatureStatsType v)
@@ -131,7 +139,7 @@ void FeatureStats::add(FeatureStatsType v)
   array_[entries_++]=v;
 }
 
-void FeatureStats::addSparse(string name, FeatureStatsType v)
+void FeatureStats::addSparse(const string& name, FeatureStatsType v)
 {
   map_.set(name,v);
 }
@@ -145,7 +153,7 @@ void FeatureStats::set(std::string &theString)
     getNextPound(theString, substring);
     // regular feature
     if (substring.find(":") == string::npos) {
-      add(ATOFST(substring.c_str()));
+      add(ConvertStringToFeatureStatsType(substring));
     }
     // sparse feature
     else {
@@ -158,7 +166,7 @@ void FeatureStats::set(std::string &theString)
 
 void FeatureStats::loadbin(std::ifstream& inFile)
 {
-  inFile.read((char*) array_, featbytes_);
+  inFile.read((char*) array_, GetArraySizeWithBytes());
 }
 
 void FeatureStats::loadtxt(std::ifstream& inFile)
@@ -170,7 +178,7 @@ void FeatureStats::loadtxt(std::ifstream& inFile)
 
 void FeatureStats::loadtxt(const std::string &file)
 {
-  //	TRACE_ERR("loading the stats from " << file << std::endl);
+  //    TRACE_ERR("loading the stats from " << file << std::endl);
 
   std::ifstream inFile(file.c_str(), std::ios::in); // matches a stream with a file. Opens the file
 
@@ -180,7 +188,7 @@ void FeatureStats::loadtxt(const std::string &file)
 
 void FeatureStats::savetxt(const std::string &file)
 {
-//	TRACE_ERR("saving the stats into " << file << std::endl);
+//      TRACE_ERR("saving the stats into " << file << std::endl);
 
   std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
 
@@ -190,29 +198,15 @@ void FeatureStats::savetxt(const std::string &file)
 
 void FeatureStats::savetxt(std::ofstream& outFile)
 {
-//	TRACE_ERR("saving the stats" << std::endl);
+//      TRACE_ERR("saving the stats" << std::endl);
   outFile << *this;
 }
 
 void FeatureStats::savebin(std::ofstream& outFile)
 {
-  outFile.write((char*) array_, featbytes_);
-}
-
-FeatureStats& FeatureStats::operator=(const FeatureStats &stats)
-{
-  delete array_;
-  available_ = stats.available();
-  entries_ = stats.size();
-  array_ = new FeatureStatsType[available_];
-  memcpy(array_,stats.getArray(),featbytes_);
-  map_ = stats.getSparse();
-
-  return *this;
+  outFile.write((char*) array_, GetArraySizeWithBytes());
 }
 
-
-/**write the whole object to a stream*/
 ostream& operator<<(ostream& o, const FeatureStats& e)
 {
   // print regular features
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index 6b729e4e5..44858a5a3 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -9,41 +9,39 @@
 #ifndef FEATURE_STATS_H
 #define FEATURE_STATS_H
 
-using namespace std;
-
-#include <limits>
-#include <vector>
+#include <cstring>
+#include <fstream>
 #include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include "Types.h"
 
-#include "Util.h"
-
-#define FEATURE_STATS_MIN (numeric_limits<FeatureStatsType>::min())
-#define ATOFST(str) ((FeatureStatsType) atof(str))
-
-#define featbytes_ (entries_*sizeof(FeatureStatsType))
+using namespace std;
 
-//Minimal sparse vector
+// Minimal sparse vector
 class SparseVector {
+public:
+  typedef std::map<size_t,FeatureStatsType> fvector_t;
+  typedef std::map<std::string, size_t> name2id_t;
+  typedef std::vector<std::string> id2name_t;
+
+  FeatureStatsType get(const std::string& name) const;
+  FeatureStatsType get(size_t id) const;
+  void set(const std::string& name, FeatureStatsType value);
+  void clear();
+  size_t size() const {
+    return fvector_.size();
+  }
 
-  public:
-    typedef std::map<size_t,FeatureStatsType> fvector_t;
-    typedef std::map<std::string, size_t> name2id_t;
-    typedef std::vector<std::string> id2name_t;
-
-    FeatureStatsType get(std::string name) const;
-    FeatureStatsType get(size_t id) const;
-    void set(std::string name, FeatureStatsType value);
-    void clear();
-    size_t size() const;
-
-    void write(std::ostream& out, const std::string& sep = " ") const;
+  void write(std::ostream& out, const std::string& sep = " ") const;
 
-    SparseVector& operator-=(const SparseVector& rhs);
+  SparseVector& operator-=(const SparseVector& rhs);
 
-  private:
-    static name2id_t name2id_;
-    static id2name_t id2name_;
-    fvector_t fvector_;
+private:
+  static name2id_t name2id_;
+  static id2name_t id2name_;
+  fvector_t fvector_;
 };
 
 SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
@@ -51,32 +49,43 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
 class FeatureStats
 {
 private:
+  size_t available_;
+  size_t entries_;
+
+  // TODO: Use smart pointer for exceptional-safety.
   featstats_t array_;
   SparseVector map_;
-  size_t entries_;
-  size_t available_;
 
 public:
   FeatureStats();
-  FeatureStats(const size_t size);
+  explicit FeatureStats(const size_t size);
+  explicit FeatureStats(std::string &theString);
+
+  ~FeatureStats();
+
+  // We intentionally allow copying.
   FeatureStats(const FeatureStats &stats);
-  FeatureStats(std::string &theString);
   FeatureStats& operator=(const FeatureStats &stats);
 
-  ~FeatureStats();
+  void Copy(const FeatureStats &stats);
 
-  bool isfull() {
-    return (entries_ < available_)?0:1;
+  bool isfull() const {
+    return (entries_ < available_) ? 0 : 1;
   }
   void expand();
   void add(FeatureStatsType v);
-  void addSparse(string name, FeatureStatsType v);
+  void addSparse(const string& name, FeatureStatsType v);
 
-  inline void clear() {
-    memset((void*) array_,0,featbytes_);
+  void clear() {
+    memset((void*)array_, 0, GetArraySizeWithBytes());
     map_.clear();
   }
 
+  void reset() {
+    entries_ = 0;
+    clear();
+  }
+
   inline FeatureStatsType get(size_t i) {
     return array_[i];
   }
@@ -93,11 +102,17 @@ public:
   void set(std::string &theString);
 
   inline size_t bytes() const {
-    return featbytes_;
+    return GetArraySizeWithBytes();
+  }
+
+  size_t GetArraySizeWithBytes() const {
+    return entries_ * sizeof(FeatureStatsType);
   }
+
   inline size_t size() const {
     return entries_;
   }
+
   inline size_t available() const {
     return available_;
   }
@@ -113,15 +128,10 @@ public:
   void loadtxt(ifstream& inFile);
   void loadbin(ifstream& inFile);
 
-  inline void reset() {
-    entries_ = 0;
-    clear();
-  }
-
-  /**write the whole object to a stream*/
+  /**
+   * Write the whole object to a stream.
+   */
   friend ostream& operator<<(ostream& o, const FeatureStats& e);
 };
 
-#endif
-
-
+#endif  // FEATURE_STATS_H
diff --git a/mert/FileStream.cpp b/mert/FileStream.cpp
new file mode 100644
index 000000000..11fd58e26
--- /dev/null
+++ b/mert/FileStream.cpp
@@ -0,0 +1,65 @@
+#include "FileStream.h"
+
+#include <stdexcept>
+#include "gzfilebuf.h"
+
+using namespace std;
+
+namespace {
+bool IsGzipFile(const std::string &filename) {
+  return filename.size() > 3 &&
+      filename.substr(filename.size() - 3, 3) == ".gz";
+}
+} // namespace
+
+inputfilestream::inputfilestream(const std::string &filePath)
+    : std::istream(0), m_streambuf(0), is_good(false)
+{
+  // check if file is readable
+  std::filebuf* fb = new std::filebuf();
+  is_good = (fb->open(filePath.c_str(), std::ios::in) != NULL);
+
+  if (IsGzipFile(filePath)) {
+    fb->close();
+    delete fb;
+    m_streambuf = new gzfilebuf(filePath.c_str());
+  } else {
+    m_streambuf = fb;
+  }
+  this->init(m_streambuf);
+}
+
+inputfilestream::~inputfilestream()
+{
+  delete m_streambuf;
+  m_streambuf = 0;
+}
+
+void inputfilestream::close()
+{
+}
+
+outputfilestream::outputfilestream(const std::string &filePath)
+    : std::ostream(0), m_streambuf(0), is_good(false)
+{
+  // check if file is readable
+  std::filebuf* fb = new std::filebuf();
+  is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);
+
+  if (IsGzipFile(filePath)) {
+    throw runtime_error("Output to a zipped file not supported!");
+  } else {
+    m_streambuf = fb;
+  }
+  this->init(m_streambuf);
+}
+
+outputfilestream::~outputfilestream()
+{
+  delete m_streambuf;
+  m_streambuf = 0;
+}
+
+void outputfilestream::close()
+{
+}
diff --git a/mert/FileStream.h b/mert/FileStream.h
new file mode 100644
index 000000000..afa8d9a29
--- /dev/null
+++ b/mert/FileStream.h
@@ -0,0 +1,34 @@
+#ifndef FILESTREAM_H_
+#define FILESTREAM_H_
+
+#include <fstream>
+#include <streambuf>
+#include <string>
+
+class inputfilestream : public std::istream
+{
+protected:
+  std::streambuf *m_streambuf;
+  bool is_good;
+
+public:
+  explicit inputfilestream(const std::string &filePath);
+  ~inputfilestream();
+  bool good() const { return is_good; }
+  void close();
+};
+
+class outputfilestream : public std::ostream
+{
+protected:
+  std::streambuf *m_streambuf;
+  bool is_good;
+
+public:
+  explicit outputfilestream(const std::string &filePath);
+  ~outputfilestream();
+  bool good() const { return is_good; }
+  void close();
+};
+
+#endif // FILESTREAM_H_
diff --git a/mert/Makefile.am b/mert/Makefile.am
index 58baa5053..0b16a2f4f 100644
--- a/mert/Makefile.am
+++ b/mert/Makefile.am
@@ -4,6 +4,7 @@ AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)
 
 libmert_la_SOURCES =  \
 Util.cpp \
+FileStream.cpp \
 Timer.cpp \
 ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
 ScoreDataIterator.cpp \
@@ -14,6 +15,7 @@ BleuScorer.cpp \
 Point.cpp \
 PerScorer.cpp \
 Scorer.cpp \
+ScorerFactory.cpp \
 Optimizer.cpp \
 TERsrc/alignmentStruct.cpp \
 TERsrc/hashMap.cpp \
@@ -38,6 +40,6 @@ pro_SOURCES = pro.cpp
 extractor_LDADD = libmert.la -lm -lz
 mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
 evaluator_LDADD = libmert.la -lm -lz
-pro_LDADD = libmert.la @KENLM_LDFLAGS@ $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
+pro_LDADD = libmert.la $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
 pro_DEPENDENCIES = $(top_srcdir)/kenlm/libkenlm.la libmert.la
 
diff --git a/mert/MergeScorer.cpp b/mert/MergeScorer.cpp
index e60e5c9f1..0b9de73f1 100644
--- a/mert/MergeScorer.cpp
+++ b/mert/MergeScorer.cpp
@@ -1,109 +1,118 @@
 #include "MergeScorer.h"
+
+#include <cmath>
+#include <stdexcept>
+#include "ScoreStats.h"
+#include "TerScorer.h"
+#include "BleuScorer.h"
+#include "PerScorer.h"
+#include "CderScorer.h"
+
 #include "TERsrc/tercalc.h"
 #include "TERsrc/terAlignment.h"
 
-const int MergeScorer::LENGTH = 4;
 using namespace TERCpp;
-using namespace std;
 
+MergeScorer::MergeScorer(const string& config)
+    : StatisticsBasedScorer("MERGE",config), kLENGTH(4) {}
+MergeScorer::~MergeScorer() {}
 
 void MergeScorer::setReferenceFiles(const vector<string>& referenceFiles)
 {
-        throw runtime_error("MERGE Scorer can be used only in mert execution");
-	exit(0);
+  throw runtime_error("MERGE Scorer can be used only in mert execution");
+  exit(0);
 }
 
 void MergeScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
 {
-        throw runtime_error("MergeScorer::prepareStats : MERGE Scorer can be used only in mert execution");
-	exit(0);
+  throw runtime_error("MergeScorer::prepareStats : MERGE Scorer can be used only in mert execution");
+  exit(0);
 }
+
 /*
 float MergeScorer::calculateScore(const vector<int>& comps)
 {
-	throw runtime_error("MergeScorer::calculateScore : MERGE Scorer can be used only in mert execution");
+        throw runtime_error("MergeScorer::calculateScore : MERGE Scorer can be used only in mert execution");
     exit(0);
 }
 */
-float MergeScorer::calculateScore(const std::vector< int >& comps)
+
+float MergeScorer::calculateScore(const std::vector< int >& comps) const
 {
-        float result=0.0;
-	float weight=1.0;
-	float resultTmp=0.0;
-	vector<int> vecLine;
-	vector<string> vecScorerType;
-	vector<float> weightsModifier;
-	int pos=0;
-	int weightIncrement=0;
-	string initfile="merge.init";
-	string line;
-	ifstream opt(initfile.c_str());
-	float denom=0.0;
-	if (opt.fail()) {
-	    cerr<<"MergeScorer::calculateScore : could not open initfile: " << initfile << endl;
-	    exit(3);
-	}
-	while (getline ( opt, line ))
-	{
-	    vector<string> vecLine=stringToVector(line, " ");
-	    if ((int)vecLine.size()!=4)
-	    {
-		cerr<<"MergeScorer::calculateScore : Error in initfile: " << initfile << endl;
-		exit(4);
-	    }
-	    vecScorerType.push_back(vecLine.at(0));
-	    weightsModifier.push_back(atof(vecLine.at(1).c_str()));
-	    denom+=abs(atof(vecLine.at(1).c_str()));
-	}
-	for (weightIncrement = 0; weightIncrement < (int) weightsModifier.size(); weightIncrement++)
-	{
-	    if (vecScorerType.at(weightIncrement).compare("BLEU")==0)
-	    {
-	      BleuScorer* scorer01= new BleuScorer("");
-	      weight=weightsModifier.at(weightIncrement) / denom;
-	      vecLine.clear();
-	      vecLine=subVector(comps, pos,pos+(int)(scorer01->NumberOfScores()));
-	      pos=pos+(int)(scorer01->NumberOfScores());
- 	      resultTmp=(scorer01->calculateScore(vecLine));
-	      result+=(weight * resultTmp);
-	      
-	    }
-	    else if (vecScorerType.at(weightIncrement).compare("TER")==0)
-	    {
-	      TerScorer* scorer02= new TerScorer(""); 
-	      weight=weightsModifier.at(weightIncrement) / denom;
-	      vecLine.clear();
-	      vecLine=subVector(comps, pos,pos+(int)(scorer02->NumberOfScores()));
-	      pos=pos+(int)(scorer02->NumberOfScores());
- 	      resultTmp=(scorer02->calculateScore(vecLine));
-	      result+=(weight * resultTmp);
-	    }
-            else if (vecScorerType.at(weightIncrement).compare("PER")==0)
-            {
-              PerScorer* scorer03= new PerScorer("");
-              weight=weightsModifier.at(weightIncrement) / denom;
-              vecLine.clear();
-              vecLine=subVector(comps, pos,pos+(int)(scorer03->NumberOfScores()));
-              pos=pos+(int)(scorer03->NumberOfScores());
-              resultTmp=(scorer03->calculateScore(vecLine));
-              result+=(weight * result);
-            }
-            else if (vecScorerType.at(weightIncrement).compare("CER")==0)
-            {
-              CderScorer* scorer04= new CderScorer("");
-              weight=weightsModifier.at(weightIncrement) / denom;
-              vecLine.clear();
-              vecLine=subVector(comps, pos,pos+(int)(scorer04->NumberOfScores()));
-              pos=pos+(int)(scorer04->NumberOfScores());
-              resultTmp=(scorer04->calculateScore(vecLine));
-              result+=(weight * resultTmp);
-            }
+  float result=0.0;
+  float weight=1.0;
+  float resultTmp=0.0;
+  vector<int> vecLine;
+  vector<string> vecScorerType;
+  vector<float> weightsModifier;
+  int pos=0;
+  int weightIncrement=0;
+  string initfile="merge.init";
+  string line;
+  ifstream opt(initfile.c_str());
+  float denom=0.0;
+  if (opt.fail()) {
+    cerr<<"MergeScorer::calculateScore : could not open initfile: " << initfile << endl;
+    exit(3);
+  }
+  while (getline (opt, line)) {
+    vector<string> vecLine=stringToVector(line, " ");
+    if ((int)vecLine.size() != 4) {
+      cerr<<"MergeScorer::calculateScore : Error in initfile: " << initfile << endl;
+      exit(4);
+    }
+    vecScorerType.push_back(vecLine.at(0));
+    weightsModifier.push_back(atof(vecLine.at(1).c_str()));
+    denom+=abs(atof(vecLine.at(1).c_str()));
+  }
+  for (weightIncrement = 0; weightIncrement < (int) weightsModifier.size(); weightIncrement++)
+  {
+    if (vecScorerType.at(weightIncrement).compare("BLEU")==0)
+    {
+      BleuScorer* scorer01= new BleuScorer("");
+      weight=weightsModifier.at(weightIncrement) / denom;
+      vecLine.clear();
+      vecLine=subVector(comps, pos,pos+(int)(scorer01->NumberOfScores()));
+      pos=pos+(int)(scorer01->NumberOfScores());
+      resultTmp=(scorer01->calculateScore(vecLine));
+      result+=(weight * resultTmp);
 
-	    else
-	    {
-	      throw runtime_error("MergeScorer::calculateScore : Scorer unknown");
-	      exit(0);
-	    }
-	}
-       return result;
+    }
+    else if (vecScorerType.at(weightIncrement).compare("TER")==0)
+    {
+      TerScorer* scorer02= new TerScorer("");
+      weight=weightsModifier.at(weightIncrement) / denom;
+      vecLine.clear();
+      vecLine=subVector(comps, pos,pos+(int)(scorer02->NumberOfScores()));
+      pos=pos+(int)(scorer02->NumberOfScores());
+      resultTmp=(scorer02->calculateScore(vecLine));
+      result+=(weight * resultTmp);
+    }
+    else if (vecScorerType.at(weightIncrement).compare("PER")==0)
+    {
+      PerScorer* scorer03= new PerScorer("");
+      weight=weightsModifier.at(weightIncrement) / denom;
+      vecLine.clear();
+      vecLine=subVector(comps, pos,pos+(int)(scorer03->NumberOfScores()));
+      pos=pos+(int)(scorer03->NumberOfScores());
+      resultTmp=(scorer03->calculateScore(vecLine));
+      result+=(weight * result);
+    }
+    else if (vecScorerType.at(weightIncrement).compare("CER")==0)
+    {
+      CderScorer* scorer04= new CderScorer("");
+      weight=weightsModifier.at(weightIncrement) / denom;
+      vecLine.clear();
+      vecLine=subVector(comps, pos,pos+(int)(scorer04->NumberOfScores()));
+      pos=pos+(int)(scorer04->NumberOfScores());
+      resultTmp=(scorer04->calculateScore(vecLine));
+      result+=(weight * resultTmp);
+    }
+    else
+    {
+      throw runtime_error("MergeScorer::calculateScore : Scorer unknown");
+      exit(0);
+    }
+  }
+  return result;
 }
diff --git a/mert/MergeScorer.h b/mert/MergeScorer.h
index 6b31a1119..10e034ac6 100644
--- a/mert/MergeScorer.h
+++ b/mert/MergeScorer.h
@@ -1,73 +1,53 @@
 #ifndef __MERGESCORER_H__
 #define __MERGESCORER_H__
 
-// #include <stdio.h>
-#include <algorithm>
-#include <cmath>
 #include <iostream>
-#include <iterator>
 #include <set>
-#include <sstream>
-#include <stdexcept>
 #include <string>
 #include <vector>
-#include <limits.h>
-#include "Types.h"
-#include "ScoreData.h"
+
 #include "Scorer.h"
-#include "TerScorer.h"
-#include "BleuScorer.h"
-#include "PerScorer.h"
-#include "CderScorer.h"
-//#include "TERsrc/tercalc.h"
-//#include "TERsrc/terAlignment.h"
 
 using namespace std;
-using namespace TERCpp;
 
 // enum MergeReferenceLengthStrategy { MERGE_AVERAGE, MERGE_SHORTEST, MERGE_CLOSEST };
 
+class PerScorer;
+class ScoreStats;
 
 /**
-  * Bleu scoring
- **/
+ * Merge scoring.
+ */
 class MergeScorer: public StatisticsBasedScorer {
-
-	public:
-		MergeScorer(const string& config = "") : StatisticsBasedScorer("MERGE",config){}
-		virtual void setReferenceFiles(const vector<string>& referenceFiles);
-		virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-		static const int LENGTH;	
-		virtual void whoami() 
-		{
-			cerr << "I AM MergeScorer" << std::endl;
-		}
-// 		size_t NumberOfScores(){ cerr << "MergeScorer: " << (2 * LENGTH + 1) << endl; return (2 * LENGTH + 1); };
-		
-		
-       protected:
-/*        friend float PerScorer::calculateScore(const vector<int>& comps);
-        friend float BleuScorer::calculateScore(const vector<int>& comps);
-        friend float TerScorer::calculateScore(const vector<int>& comps);
-        friend float CderScorer::calculateScore(const vector<int>& comps);
-*/	friend class PerScorer;
-        float calculateScore(const vector<int>& comps);
-//        float calculateScore(const vector<float>& comps);
-		
-	private:
-		string javaEnv;
-		string tercomEnv;
-		//no copy
-		MergeScorer(const MergeScorer&);
-		~MergeScorer(){};
-		MergeScorer& operator=(const MergeScorer&);
-		// data extracted from reference files
-		vector<size_t> _reflengths;
-		vector<multiset<int> > _reftokens;
-		vector<vector<int> > m_references;
-		string m_pid;
-  
+public:
+  explicit MergeScorer(const string& config = "");
+  ~MergeScorer();
+
+  virtual void setReferenceFiles(const vector<string>& referenceFiles);
+  virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
+  virtual void whoami() const {
+    cerr << "I AM MergeScorer" << std::endl;
+  }
+
+protected:
+  friend class PerScorer;
+  virtual float calculateScore(const vector<int>& comps) const;
+
+ private:
+  const int kLENGTH;
+
+  string javaEnv;
+  string tercomEnv;
+
+  // data extracted from reference files
+  vector<size_t> _reflengths;
+  vector<multiset<int> > _reftokens;
+  vector<vector<int> > m_references;
+  string m_pid;
+
+  // no copying allowed
+  MergeScorer(const MergeScorer&);
+  MergeScorer& operator=(const MergeScorer&);
 };
 
-
-#endif //__TERSCORER_H
+#endif  //__TERSCORER_H
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 040d60e60..cd9703ddd 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -1,226 +1,236 @@
+#include "Optimizer.h"
+
+#include <cmath>
 #include <cassert>
 #include <vector>
 #include <limits>
-#include <list>
+#include <map>
 #include <cfloat>
 #include <iostream>
 
-#include "Optimizer.h"
+#include "Point.h"
+#include "Util.h"
 
 using namespace std;
 
-static const float MIN_FLOAT=-1.0*numeric_limits<float>::max();
-static const float MAX_FLOAT=numeric_limits<float>::max();
+static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
+static const float MAX_FLOAT = numeric_limits<float>::max();
 
+namespace {
 
-
-void Optimizer::SetScorer(Scorer *S)
+/**
+ * Compute the intersection of 2 lines.
+ */
+inline float intersect(float m1, float b1, float m2, float b2)
 {
-  if(scorer)
-    delete scorer;
-  scorer=S;
+  float isect = (b2 - b1) / (m1 - m2);
+  if (!isfinite(isect)) {
+    isect = MAX_FLOAT;
+  }
+  return isect;
 }
 
-void Optimizer::SetFData(FeatureData *F)
-{
-  if(FData)
-    delete FData;
-  FData=F;
-};
+} // namespace
 
-Optimizer::Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start, unsigned int nrandom):scorer(NULL),FData(NULL),number_of_random_directions(nrandom)
+
+void Optimizer::SetScorer(Scorer *_scorer)
 {
-  //warning: the init vector is a full set of parameters, of dimension pdim!
+  scorer = _scorer;
+}
 
-  Point::pdim=Pd;
+void Optimizer::SetFData(FeatureData *_FData)
+{
+  FData = _FData;
+}
 
-  assert(start.size()==Pd);
-  Point::dim=i2O.size();
-  Point::optindices=i2O;
-  if (Point::pdim>Point::dim) {
-    for (unsigned int i=0; i<Point::pdim; i++) {
+Optimizer::Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
+    : scorer(NULL), FData(NULL), number_of_random_directions(nrandom)
+{
+  // Warning: the init vector is a full set of parameters, of dimension pdim!
+  Point::pdim = Pd;
+
+  assert(start.size() == Pd);
+  Point::dim = i2O.size();
+  Point::optindices = i2O;
+  if (Point::pdim > Point::dim) {
+    for (unsigned int i = 0; i < Point::pdim; i++) {
       unsigned int j = 0;
-      while (j<Point::dim && i!=i2O[j])
+      while (j < Point::dim && i != i2O[j])
         j++;
 
-      if (j==Point::dim)//the index i wasnt found on optindices, it is a fixed index, we use the value of the start vector
-        Point::fixedweights[i]=start[i];
+      // The index i wasnt found on optindices, it is a fixed index,
+      // we use the value of the start vector.
+      if (j == Point::dim)
+        Point::fixedweights[i] = start[i];
     }
   }
-};
-
-Optimizer::~Optimizer()
-{
-  delete scorer;
-  delete FData;
 }
 
-statscore_t Optimizer::GetStatScore(const Point& param)const
+Optimizer::~Optimizer() {}
+
+statscore_t Optimizer::GetStatScore(const Point& param) const
 {
   vector<unsigned> bests;
-  Get1bests(param,bests);
+  Get1bests(param, bests);
   //copy(bests.begin(),bests.end(),ostream_iterator<unsigned>(cerr," "));
   statscore_t score = GetStatScore(bests);
   return score;
-};
-
-/**compute the intersection of 2 lines*/
-float intersect (float m1, float b1,float m2,float b2)
-{
-  float isect = ((b2-b1)/(m1-m2));
-  if (!isfinite(isect)) {
-    isect = MAX_FLOAT;
-  }
-  return isect;
 }
 
-map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap,float newt,pair<unsigned,unsigned> newdiff)
+map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, pair<unsigned,unsigned> newdiff)
 {
-  map<float,diff_t>::iterator it=thresholdmap.find(newt);
-  if(it!=thresholdmap.end()) {
-    //the threshold already exists!! this is very unlikely
-    if(it->second.back().first==newdiff.first)
-      it->second.back().second=newdiff.second;//there was already a diff for this sentence, we change the 1 best;
+  map<float,diff_t>::iterator it = thresholdmap.find(newt);
+  if (it != thresholdmap.end()) {
+    // the threshold already exists!! this is very unlikely
+    if (it->second.back().first == newdiff.first)
+      // there was already a diff for this sentence, we change the 1 best;
+      it->second.back().second = newdiff.second;
     else
       it->second.push_back(newdiff);
   } else {
-    //normal case
-    pair< map<float,diff_t >::iterator,bool > ins=thresholdmap.insert(threshold(newt,diff_t(1,newdiff)));
-    assert(ins.second);//we really inserted something
-    it=ins.first;
+    // normal case
+    pair<map<float,diff_t>::iterator, bool> ins = thresholdmap.insert(threshold(newt, diff_t(1, newdiff)));
+    assert(ins.second);                // we really inserted something
+    it = ins.first;
   }
   return it;
-};
-
+}
 
-statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,Point& bestpoint)const
+statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, Point& bestpoint) const
 {
-
-// we are looking for the best Point on the line y=Origin+x*direction
-  float min_int=0.0001;
+  // We are looking for the best Point on the line y=Origin+x*direction
+  float min_int = 0.0001;
   //typedef pair<unsigned,unsigned> diff;//first the sentence that changes, second is the new 1best for this sentence
   //list<threshold> thresholdlist;
 
   map<float,diff_t> thresholdmap;
-  thresholdmap[MIN_FLOAT]=diff_t();
-  vector<unsigned> first1best;//the vector of nbests for x=-inf
-  for(unsigned int S=0; S<size(); S++) {
-    map<float,diff_t >::iterator previnserted=thresholdmap.begin();
-    //first we determine the translation with the best feature score for each sentence and each value of x
+  thresholdmap[MIN_FLOAT] = diff_t();
+  vector<unsigned> first1best;       // the vector of nbests for x=-inf
+  for (unsigned int S = 0; S < size(); S++) {
+    map<float,diff_t >::iterator previnserted = thresholdmap.begin();
+    // First, we determine the translation with the best feature score
+    // for each sentence and each value of x.
     //cerr << "Sentence " << S << endl;
-    multimap<float,unsigned> gradient;
+    multimap<float, unsigned> gradient;
     vector<float> f0;
     f0.resize(FData->get(S).size());
-    for(unsigned j=0; j<FData->get(S).size(); j++) {
-      gradient.insert(pair<float,unsigned>(direction*(FData->get(S,j)),j));//gradient of the feature function for this particular target sentence
-      f0[j]=origin*FData->get(S,j);//compute the feature function at the origin point
+    for (unsigned j = 0; j < FData->get(S).size(); j++) {
+      // gradient of the feature function for this particular target sentence
+      gradient.insert(pair<float, unsigned>(direction * (FData->get(S,j)), j));
+      // compute the feature function at the origin point
+      f0[j] = origin * FData->get(S, j);
     }
-    //now lets compute the 1best for each value of x
+    // Now let's compute the 1best for each value of x.
 
     //    vector<pair<float,unsigned> > onebest;
 
 
-    multimap<float,unsigned>::iterator gradientit=gradient.begin();
-    multimap<float,unsigned>::iterator highest_f0=gradient.begin();
+    multimap<float,unsigned>::iterator gradientit = gradient.begin();
+    multimap<float,unsigned>::iterator highest_f0 = gradient.begin();
 
-    float smallest=gradientit->first;//smallest gradient
-    //several candidates can have the lowest slope (eg for word penalty where the gradient is an integer )
+    float smallest = gradientit->first;//smallest gradient
+    // Several candidates can have the lowest slope (e.g., for word penalty where the gradient is an integer).
 
     gradientit++;
-    while(gradientit!=gradient.end()&&gradientit->first==smallest) {
+    while (gradientit != gradient.end() && gradientit->first == smallest) {
       //   cerr<<"ni"<<gradientit->second<<endl;;
       //cerr<<"fos"<<f0[gradientit->second]<<" "<<f0[index]<<" "<<index<<endl;
-      if(f0[gradientit->second]>f0[highest_f0->second])
-        highest_f0=gradientit;//the highest line is the one with he highest f0
+      if (f0[gradientit->second] > f0[highest_f0->second])
+        highest_f0 = gradientit;//the highest line is the one with he highest f0
       gradientit++;
     }
 
     gradientit = highest_f0;
     first1best.push_back(highest_f0->second);
 
-    //now we look for the intersections points indicating a change of 1 best
-    //we use the fact that the function is convex, which means that the gradient can only go up
-    while(gradientit!=gradient.end()) {
-      map<float,unsigned>::iterator leftmost=gradientit;
-      float m=gradientit->first;
-      float b=f0[gradientit->second];
-      multimap<float,unsigned>::iterator gradientit2=gradientit;
+    // Now we look for the intersections points indicating a change of 1 best.
+    // We use the fact that the function is convex, which means that the gradient can only go up.
+    while (gradientit != gradient.end()) {
+      map<float,unsigned>::iterator leftmost = gradientit;
+      float m = gradientit->first;
+      float b = f0[gradientit->second];
+      multimap<float,unsigned>::iterator gradientit2 = gradientit;
       gradientit2++;
-      float leftmostx=MAX_FLOAT;
-      for(; gradientit2!=gradient.end(); gradientit2++) {
+      float leftmostx = MAX_FLOAT;
+      for (; gradientit2 != gradient.end(); gradientit2++) {
         //cerr<<"--"<<d++<<' '<<gradientit2->first<<' '<<gradientit2->second<<endl;
-        //look for all candidate with a gradient bigger than the current one and find the one with the leftmost intersection
+        // Look for all candidate with a gradient bigger than the current one, and
+        // find the one with the leftmost intersection.
         float curintersect;
-        if(m!=gradientit2->first) {
-          curintersect=intersect(m,b,gradientit2->first,f0[gradientit2->second]);
+        if (m != gradientit2->first) {
+          curintersect = intersect(m, b, gradientit2->first, f0[gradientit2->second]);
           //cerr << "curintersect: " << curintersect << " leftmostx: " << leftmostx << endl;
-          if(curintersect<=leftmostx) {
-            //we have found an intersection to the left of the leftmost we had so far.
-            //we might have curintersect==leftmostx for example is 2 candidates are the same
-            //in that case its better its better to update leftmost to gradientit2 to avoid some recomputing later
-            leftmostx=curintersect;
-            leftmost=gradientit2;//this is the new reference
+          if (curintersect<=leftmostx) {
+            // We have found an intersection to the left of the leftmost we had so far.
+            // We might have curintersect==leftmostx for example is 2 candidates are the same
+            // in that case its better its better to update leftmost to gradientit2 to avoid some recomputing later.
+            leftmostx = curintersect;
+            leftmost = gradientit2; // this is the new reference
           }
         }
       }
       if (leftmost == gradientit) {
-        //we didn't find any more intersections
-        //the rightmost bestindex is the one with the highest slope.
-        assert(abs(leftmost->first-gradient.rbegin()->first)<0.0001);//they should be egal but there might be
-        //a small difference due to rounding error
+        // We didn't find any more intersections.
+        // The rightmost bestindex is the one with the highest slope.
+
+        // They should be equal but there might be.
+        assert(abs(leftmost->first-gradient.rbegin()->first) < 0.0001);
+        // A small difference due to rounding error
         break;
       }
-      //we have found the next intersection!
-
-      pair<unsigned,unsigned> newd(S,leftmost->second);//new onebest for Sentence S is leftmost->second
-
-      if(leftmostx-previnserted->first<min_int) {
-        /* Require that the intersection Point be at least min_int
-               to the right of the previous one(for this sentence). If not, we replace the
-               previous intersection Point with this one. Yes, it can even
-               happen that the new intersection Point is slightly to the
-               left of the old one, because of numerical imprecision.
-         we do not check that we are to the right of the penultimate point also. it this happen the 1best the inteval will be wrong
-          we are going to replace previnsert by the new one because we do not want to keep
-          2 very close threshold: if the minima is there it could be an artifact
-        */
-        map<float,diff_t>::iterator tit=thresholdmap.find(leftmostx);
-        if(tit==previnserted) {
-          //the threshold is the same as before can happen if 2 candidates are the same for example
-          assert(previnserted->second.back().first==newd.first);
-          previnserted->second.back()=newd;//just replace the 1 best fors sentence S
-          //previnsert doesnt change
+      // We have found the next intersection!
+
+      pair<unsigned,unsigned> newd(S, leftmost->second);//new onebest for Sentence S is leftmost->second
+
+      if (leftmostx-previnserted->first < min_int) {
+        // Require that the intersection Point be at least min_int to the right of the previous
+        // one (for this sentence). If not, we replace the previous intersection Point with
+        // this one.
+        // Yes, it can even happen that the new intersection Point is slightly to the left of
+        // the old one, because of numerical imprecision. We do not check that we are to the
+        // right of the penultimate point also. It this happen the 1best the interval will
+        // be wrong we are going to replace previnsert by the new one because we do not want to keep
+        // 2 very close threshold: if the minima is there it could be an artifact.
+
+        map<float,diff_t>::iterator tit = thresholdmap.find(leftmostx);
+        if (tit == previnserted) {
+          // The threshold is the same as before can happen if 2 candidates are the same for example.
+          assert(previnserted->second.back().first == newd.first);
+          previnserted->second.back()=newd; // just replace the 1 best for sentence S
+          // previnsert doesn't change
         } else {
 
-          if(tit==thresholdmap.end()) {
-            thresholdmap[leftmostx]=previnserted->second;//We keep the diffs at previnsert
-            thresholdmap.erase(previnserted);//erase old previnsert
-            previnserted=thresholdmap.find(leftmostx);//point previnsert to the new threshold
-            previnserted->second.back()=newd;//we update the diff for sentence S
-          } else { //threshold already exists but is not the previous one.
-            //we append the diffs in previnsert to tit before destroying previnsert
+          if (tit == thresholdmap.end()) {
+            thresholdmap[leftmostx]=previnserted->second; // We keep the diffs at previnsert
+            thresholdmap.erase(previnserted); // erase old previnsert
+            previnserted = thresholdmap.find(leftmostx); // point previnsert to the new threshold
+            previnserted->second.back()=newd; // We update the diff for sentence S
+          // Threshold already exists but is not the previous one.
+          } else {
+            // We append the diffs in previnsert to tit before destroying previnsert.
             tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end());
-            assert(tit->second.back().first==newd.first);
-            tit->second.back()=newd;//change diff for sentence S
-            thresholdmap.erase(previnserted);//erase old previnsert
-            previnserted=tit;//point previnsert to the new threshold
+            assert(tit->second.back().first == newd.first);
+            tit->second.back()=newd;    // change diff for sentence S
+            thresholdmap.erase(previnserted); // erase old previnsert
+            previnserted = tit;  // point previnsert to the new threshold
           }
         }
 
         assert(previnserted != thresholdmap.end());
       } else { //normal insertion process
-        previnserted=AddThreshold(thresholdmap,leftmostx,newd);
+        previnserted = AddThreshold(thresholdmap, leftmostx, newd);
       }
-      gradientit=leftmost;
-    }   //while(gradientit!=gradient.end()){
-  }  //loop on S
-  //now the thresholdlist is up to date:
-  //it contains a list of all the parameter_ts where the function changed its value, along with the nbest list for the interval after each threshold
+      gradientit = leftmost;
+    } // while (gradientit!=gradient.end()){
+  }   // loop on S
+
+  // Now the thresholdlist is up to date: it contains a list of all the parameter_ts where
+  // the function changed its value, along with the nbest list for the interval after each threshold.
 
   map<float,diff_t >::iterator thrit;
-  if(verboselevel()>6) {
-    cerr << "Thresholds:(" <<thresholdmap.size()<<")"<< endl;
-    for (thrit = thresholdmap.begin(); thrit!=thresholdmap.end(); thrit++) {
+  if (verboselevel() > 6) {
+    cerr << "Thresholds:(" << thresholdmap.size() << ")" << endl;
+    for (thrit = thresholdmap.begin(); thrit != thresholdmap.end(); thrit++) {
       cerr << "x: " << thrit->first << " diffs";
       for (size_t j = 0; j < thrit->second.size(); ++j) {
         cerr << " " <<thrit->second[j].first << "," << thrit->second[j].second;
@@ -229,31 +239,33 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
     }
   }
 
-  //last thing to do is compute the Stat score (ie BLEU) and find the minimum
-  thrit=thresholdmap.begin();
-  ++thrit;//first diff corrrespond to MIN_FLOAT and first1best
+  // Last thing to do is compute the Stat score (i.e., BLEU) and find the minimum.
+  thrit = thresholdmap.begin();
+  ++thrit;       // first diff corrrespond to MIN_FLOAT and first1best
   diffs_t diffs;
-  for(; thrit!=thresholdmap.end(); thrit++)
+  for (; thrit != thresholdmap.end(); thrit++)
     diffs.push_back(thrit->second);
-  vector<statscore_t> scores=GetIncStatScore(first1best,diffs);
+  vector<statscore_t> scores = GetIncStatScore(first1best, diffs);
+
+  thrit = thresholdmap.begin();
+  statscore_t bestscore = MIN_FLOAT;
+  float bestx = MIN_FLOAT;
 
-  thrit=thresholdmap.begin();
-  statscore_t bestscore=MIN_FLOAT;
-  float bestx=MIN_FLOAT;
-  assert(scores.size()==thresholdmap.size());//we skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best
-  for(unsigned int sc=0; sc!=scores.size(); sc++) {
+  // We skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best.
+  assert(scores.size() == thresholdmap.size());
+  for (unsigned int sc = 0; sc != scores.size(); sc++) {
     //cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
     if (scores[sc] > bestscore) {
-      //This is the score for the interval [lit2->first, (lit2+1)->first]
-      //unless we're at the last score, when it's the score
-      //for the interval [lit2->first,+inf]
+      // This is the score for the interval [lit2->first, (lit2+1)->first]
+      // unless we're at the last score, when it's the score
+      // for the interval [lit2->first,+inf].
       bestscore = scores[sc];
 
-      //if we're not in [-inf,x1] or [xn,+inf] then just take the value
-      //if x which splits the interval in half. For the rightmost interval,
-      //take x to be the last interval boundary + 0.1, and for the leftmost
-      //interval, take x to be the first interval boundary - 1000.
-      //These values are taken from cmert.
+      // If we're not in [-inf,x1] or [xn,+inf], then just take the value
+      // if x which splits the interval in half. For the rightmost interval,
+      // take x to be the last interval boundary + 0.1, and for the leftmost
+      // interval, take x to be the first interval boundary - 1000.
+      // These values are taken from cmert.
       float leftx = thrit->first;
       if (thrit == thresholdmap.begin()) {
         leftx = MIN_FLOAT;
@@ -268,7 +280,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
       if (leftx == MIN_FLOAT) {
         bestx = rightx-1000;
       } else if (rightx == MAX_FLOAT) {
-        bestx = leftx+0.1;
+        bestx = leftx + 0.1;
       } else {
         bestx = 0.5 * (rightx + leftx);
       }
@@ -277,37 +289,39 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
     ++thrit;
   }
 
-  if(abs(bestx)<0.00015) {
-    bestx=0.0;//the origin of the line is the best point!we put it back at 0 so we do not propagate rounding erros
-    //finally! we manage to extract the best score;
-    //now we convert bestx  (position on the line) to a point!
-    if(verboselevel()>4)
-      cerr<<"best point on line at origin"<<endl;
+  if (abs(bestx) < 0.00015) {
+    // The origin of the line is the best point! We put it back at 0
+    // so we do not propagate rounding erros.
+    bestx = 0.0;
+
+    // Finally, we manage to extract the best score;
+    // now we convert bestx (position on the line) to a point.
+    if (verboselevel() > 4)
+      cerr << "best point on line at origin" << endl;
   }
-  if(verboselevel()>3) {
+  if (verboselevel() > 3) {
 //    cerr<<"end Lineopt, bestx="<<bestx<<endl;
   }
-  bestpoint=direction*bestx+origin;
-  bestpoint.score=bestscore;
+  bestpoint = direction * bestx + origin;
+  bestpoint.SetScore(bestscore);
   return bestscore;
-};
-
+}
 
-void  Optimizer::Get1bests(const Point& P,vector<unsigned>& bests)const
+void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
 {
   assert(FData);
   bests.clear();
   bests.resize(size());
 
-  for(unsigned i=0; i<size(); i++) {
-    float bestfs=MIN_FLOAT;
-    unsigned idx=0;
+  for (unsigned i = 0; i < size(); i++) {
+    float bestfs = MIN_FLOAT;
+    unsigned idx = 0;
     unsigned j;
-    for(j=0; j<FData->get(i).size(); j++) {
-      float curfs=P*FData->get(i,j);
-      if(curfs>bestfs) {
-        bestfs=curfs;
-        idx=j;
+    for (j = 0; j < FData->get(i).size(); j++) {
+      float curfs = P * FData->get(i, j);
+      if (curfs > bestfs) {
+        bestfs = curfs;
+        idx = j;
       }
     }
     bests[i]=idx;
@@ -315,173 +329,169 @@ void  Optimizer::Get1bests(const Point& P,vector<unsigned>& bests)const
 
 }
 
-statscore_t Optimizer::Run(Point& P)const
+statscore_t Optimizer::Run(Point& P) const
 {
-  if(!FData) {
-    cerr<<"error trying to optimize without Features loaded"<<endl;
+  if (!FData) {
+    cerr << "error trying to optimize without Features loaded" << endl;
     exit(2);
   }
-  if(!scorer) {
-    cerr<<"error trying to optimize without a Scorer loaded"<<endl;
+  if (!scorer) {
+    cerr << "error trying to optimize without a Scorer loaded" << endl;
     exit(2);
   }
-  if (scorer->getReferenceSize()!=FData->size()) {
-    cerr<<"error length mismatch between feature file and score file"<<endl;
+  if (scorer->getReferenceSize() != FData->size()) {
+    cerr << "error length mismatch between feature file and score file" << endl;
     exit(2);
   }
 
-  statscore_t score=GetStatScore(P);
-  P.score=score;
+  P.SetScore(GetStatScore(P));
+
+  if (verboselevel () > 2) {
+    cerr << "Starting point: " << P << " => " << P.GetScore() << endl;
+  }
+  statscore_t score = TrueRun(P);
 
-  if(verboselevel()>2)
-    cerr<<"Starting point: "<< P << " => "<< P.score << endl;
-  statscore_t s=TrueRun(P);
-  P.score=s;//just in case its not done in TrueRun
-  if (verboselevel()>2)
-    cerr<<"Ending point: "<< P <<" => "<< s << endl;
-  return s;
+  // just in case its not done in TrueRun
+  P.SetScore(score);
+  if (verboselevel() > 2) {
+    cerr << "Ending point: " << P << " => " << score << endl;
+  }
+  return score;
 }
 
 
-vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst,vector<vector <pair<unsigned,unsigned> > > thediffs)const
+vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst, vector<vector <pair<unsigned,unsigned> > > thediffs) const
 {
   assert(scorer);
 
   vector<statscore_t> theres;
 
-  scorer->score(thefirst,thediffs,theres);
+  scorer->score(thefirst, thediffs, theres);
   return theres;
-};
-
-
+}
 
 
-//---------------- code for the powell optimizer
-float SimpleOptimizer::eps=0.0001;
-statscore_t SimpleOptimizer::TrueRun(Point& P)const
+statscore_t SimpleOptimizer::TrueRun(Point& P) const
 {
-
-  statscore_t prevscore=0;
-  statscore_t bestscore=MIN_FLOAT;
-  Point  best;
-
-  //If P is already defined and provides a score
-  //we must improve over this score
-  if(P.score>bestscore) {
-    bestscore=P.score;
-    best=P;
+  statscore_t prevscore = 0;
+  statscore_t bestscore = MIN_FLOAT;
+  Point best;
+
+  // If P is already defined and provides a score,
+  // We must improve over this score.
+  if (P.GetScore() > bestscore) {
+    bestscore = P.GetScore();
+    best = P;
   }
 
-  int nrun=0;
+  int nrun = 0;
   do {
     ++nrun;
-    if(verboselevel()>2&&nrun>1)
-      cerr<<"last diff="<<bestscore-prevscore<<" nrun "<<nrun<<endl;
-    prevscore=bestscore;
+    if (verboselevel() > 2 && nrun > 1)
+      cerr << "last diff=" << bestscore-prevscore << " nrun " << nrun << endl;
+    prevscore = bestscore;
 
     Point  linebest;
 
-    for(unsigned int d=0; d<Point::getdim()+number_of_random_directions; d++) {
-      if(verboselevel()>4) {
+    for (unsigned int d = 0; d < Point::getdim()+number_of_random_directions; d++) {
+      if (verboselevel() > 4) {
         //	cerr<<"minimizing along direction "<<d<<endl;
-        cerr<<"starting point: " << P << " => " << prevscore << endl;
+        cerr << "starting point: " << P << " => " << prevscore << endl;
       }
       Point direction;
-      if (d<Point::getdim()) { // regular updates along one dimension
-        for(unsigned int i=0; i<Point::getdim(); i++)
+      if (d < Point::getdim()) { // regular updates along one dimension
+        for (unsigned int i = 0; i < Point::getdim(); i++)
           direction[i]=0.0;
         direction[d]=1.0;
       }
       else { // random direction update
         direction.Randomize();
       }
-      statscore_t curscore=LineOptimize(P,direction,linebest);//find the minimum on the line
-      if(verboselevel()>5) {
-        cerr<<"direction: "<< d << " => " << curscore << endl;
-        cerr<<"\tending point: "<< linebest << " => " << curscore << endl;
+      statscore_t curscore = LineOptimize(P, direction, linebest);//find the minimum on the line
+      if (verboselevel() > 5) {
+        cerr << "direction: " << d << " => " << curscore << endl;
+        cerr << "\tending point: "<< linebest << " => " << curscore << endl;
       }
-      if(curscore>bestscore) {
-        bestscore=curscore;
-        best=linebest;
-        if(verboselevel()>3) {
-          cerr<<"new best dir:"<<d<<" ("<<nrun<<")"<<endl;
-          cerr<<"new best Point "<<best<< " => " <<curscore<<endl;
+      if (curscore > bestscore) {
+        bestscore = curscore;
+        best = linebest;
+        if (verboselevel() > 3) {
+          cerr << "new best dir:" << d << " (" << nrun << ")" << endl;
+          cerr << "new best Point " << best << " => "  << curscore << endl;
         }
       }
     }
-    P=best;//update the current vector with the best point on all line tested
-    if(verboselevel()>3)
-      cerr<<nrun<<"\t"<<P<<endl;
-  } while(bestscore-prevscore>eps);
-
-  if(verboselevel()>2) {
-    cerr<<"end Powell Algo, nrun="<<nrun<<endl;
-    cerr<<"last diff="<<bestscore-prevscore<<endl;
-    cerr<<"\t"<<P<<endl;
+    P = best; //update the current vector with the best point on all line tested
+    if (verboselevel() > 3)
+      cerr << nrun << "\t" << P << endl;
+  } while (bestscore-prevscore > kEPS);
+
+  if (verboselevel() > 2) {
+    cerr << "end Powell Algo, nrun=" << nrun << endl;
+    cerr << "last diff=" << bestscore-prevscore << endl;
+    cerr << "\t" << P << endl;
   }
   return bestscore;
 }
 
-//---------------- code for the optimizer with random directions
-float RandomDirectionOptimizer::eps=0.0001;
-statscore_t RandomDirectionOptimizer::TrueRun(Point& P)const
+statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
 {
-  statscore_t prevscore=P.score;
+  statscore_t prevscore = P.GetScore();
 
   // do specified number of random direction optimizations
   unsigned int nrun = 0;
   unsigned int nrun_no_change = 0;
-  for(; nrun_no_change<number_of_random_directions; nrun++, nrun_no_change++)
+  for (; nrun_no_change < number_of_random_directions; nrun++, nrun_no_change++)
   {
     // choose a random direction in which to optimize
     Point direction;
     direction.Randomize();
 
     //find the minimum on the line
-    statscore_t score=LineOptimize(P,direction,P);
-    if(verboselevel()>4) {
-      cerr<<"direction: "<< direction << " => " << score;
-      cerr<<" ("<< (score-prevscore) << ")" << endl;
-      cerr<<"\tending point: "<< P << " => " << score << endl;
+    statscore_t score = LineOptimize(P, direction, P);
+    if (verboselevel() > 4) {
+      cerr << "direction: " << direction << " => " << score;
+      cerr << " (" <<  (score-prevscore) << ")" << endl;
+      cerr << "\tending point: " <<  P << " => " << score << endl;
     }
 
-    if (score-prevscore > eps)
-      nrun_no_change=0;
+    if (score-prevscore > kEPS)
+      nrun_no_change = 0;
     prevscore = score;
   }
 
-  if(verboselevel()>2) {
-    cerr<<"end Powell Algo, nrun="<<nrun<<endl;
+  if (verboselevel() > 2) {
+    cerr << "end Powell Algo, nrun=" << nrun << endl;
   }
   return prevscore;
 }
 
-/**RandomOptimizer to use as beaseline and test.\n
-Just return a random point*/
 
-statscore_t RandomOptimizer::TrueRun(Point& P)const
+statscore_t RandomOptimizer::TrueRun(Point& P) const
 {
   P.Randomize();
-  statscore_t score=GetStatScore(P);
-  P.score=score;
+  statscore_t score = GetStatScore(P);
+  P.SetScore(score);
   return score;
 }
+
 //--------------------------------------
+
 vector<string> OptimizerFactory::typenames;
 
 void OptimizerFactory::SetTypeNames()
 {
-  if(typenames.empty()) {
+  if (typenames.empty()) {
     typenames.resize(NOPTIMIZER);
     typenames[POWELL]="powell";
     typenames[RANDOM_DIRECTION]="random-direction";
     typenames[RANDOM]="random";
-    //add new type there
+    // Add new type there
   }
 }
 vector<string> OptimizerFactory::GetTypeNames()
 {
-  if(typenames.empty())
+  if (typenames.empty())
     SetTypeNames();
   return typenames;
 }
@@ -489,38 +499,38 @@ vector<string> OptimizerFactory::GetTypeNames()
 OptimizerFactory::OptType OptimizerFactory::GetOType(string type)
 {
   unsigned int thetype;
-  if(typenames.empty())
+  if (typenames.empty())
     SetTypeNames();
-  for(thetype=0; thetype<typenames.size(); thetype++)
-    if(typenames[thetype]==type)
+  for (thetype = 0; thetype < typenames.size(); thetype++)
+    if (typenames[thetype] == type)
       break;
   return((OptType)thetype);
-};
+}
 
-Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,vector<unsigned> i2o,vector<parameter_t> start,string type, unsigned int nrandom)
+Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim, vector<unsigned> i2o, vector<parameter_t> start, string type, unsigned int nrandom)
 {
-  OptType T=GetOType(type);
-  if(T==NOPTIMIZER) {
-    cerr<<"Error: unknown Optimizer type "<<type<<endl;
-    cerr<<"Known Algorithm are:"<<endl;
+  OptType T = GetOType(type);
+  if (T == NOPTIMIZER) {
+    cerr << "Error: unknown Optimizer type " << type << endl;
+    cerr << "Known Algorithm are:" << endl;
     unsigned int thetype;
-    for(thetype=0; thetype<typenames.size(); thetype++)
-      cerr<<typenames[thetype]<<endl;
+    for (thetype = 0; thetype < typenames.size(); thetype++)
+      cerr << typenames[thetype] << endl;
     throw ("unknown Optimizer Type");
   }
 
-  switch((OptType)T) {
-  case POWELL:
-    return new SimpleOptimizer(dim,i2o,start,nrandom);
-    break;
-  case RANDOM_DIRECTION:
-    return new RandomDirectionOptimizer(dim,i2o,start,nrandom);
-    break;
-  case RANDOM:
-    return new RandomOptimizer(dim,i2o,start,nrandom);
-    break;
-  default:
-    cerr<<"Error: unknown optimizer"<<type<<endl;
-    return NULL;
+  switch ((OptType)T) {
+    case POWELL:
+      return new SimpleOptimizer(dim, i2o, start, nrandom);
+      break;
+    case RANDOM_DIRECTION:
+      return new RandomDirectionOptimizer(dim, i2o, start, nrandom);
+      break;
+    case RANDOM:
+      return new RandomOptimizer(dim, i2o, start, nrandom);
+      break;
+    default:
+      cerr << "Error: unknown optimizer" << type << endl;
+      return NULL;
   }
 }
diff --git a/mert/Optimizer.h b/mert/Optimizer.h
index 9920be77f..aa588994d 100644
--- a/mert/Optimizer.h
+++ b/mert/Optimizer.h
@@ -1,93 +1,130 @@
 #ifndef OPTIMIZER_H
 #define OPTIMIZER_H
+
 #include <vector>
-#include "FeatureStats.h"
+#include <string>
 #include "FeatureData.h"
-#include "FeatureArray.h"
 #include "Scorer.h"
-#include "Point.h"
 #include "Types.h"
 
+using namespace std;
 
 typedef float featurescore;
 
-using namespace std;
-/**abstract virtual class*/
+class Point;
+
+/**
+ * Abstract optimizer class.
+ */
 class Optimizer
 {
 protected:
-  Scorer * scorer; //no accessor for them only child can use them
-  FeatureData * FData;//no accessor for them only child can use them
+  Scorer *scorer;      // no accessor for them only child can use them
+  FeatureData *FData;  // no accessor for them only child can use them
   unsigned int number_of_random_directions;
+
 public:
-  Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom);
-  void SetScorer(Scorer *S);
-  void SetFData(FeatureData *F);
+  Optimizer(unsigned Pd, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom);
+  void SetScorer(Scorer *_scorer);
+  void SetFData(FeatureData *_FData);
   virtual ~Optimizer();
 
-  unsigned size()const {
-    return (FData?FData->size():0);
+  unsigned size() const {
+    return FData ? FData->size() : 0;
   }
-  /**Generic wrapper around TrueRun to check a few things. Non virtual*/
-  statscore_t  Run(Point&)const;
-  /**main function that perform an optimization*/
-  virtual  statscore_t  TrueRun(Point&)const=0;
-  /**given a set of lambdas, get the nbest for each sentence*/
-  void Get1bests(const Point& param,vector<unsigned>& bests)const;
-  /**given a set of nbests, get the Statistical score*/
-  statscore_t  GetStatScore(const vector<unsigned>& nbests)const {
+
+  /**
+   * Generic wrapper around TrueRun to check a few things. Non virtual.
+   */
+  statscore_t Run(Point&) const;
+
+  /**
+   * Main function that performs an optimization.
+   */
+  virtual statscore_t TrueRun(Point&) const = 0;
+
+  /**
+   * Given a set of lambdas, get the nbest for each sentence.
+   */
+  void Get1bests(const Point& param,vector<unsigned>& bests) const;
+
+  /**
+   * Given a set of nbests, get the Statistical score.
+   */
+  statscore_t GetStatScore(const vector<unsigned>& nbests) const {
     return scorer->score(nbests);
-  };
-  /**given a set of lambdas, get the total statistical score*/
-  statscore_t  GetStatScore(const Point& param)const;
-  vector<statscore_t > GetIncStatScore(vector<unsigned> ref,vector<vector <pair<unsigned,unsigned> > >)const;
-  statscore_t  LineOptimize(const Point& start,const Point& direction,Point& best)const;//Get the optimal Lambda and the best score in a particular direction from a given Point
+  }
+
+  statscore_t GetStatScore(const Point& param) const;
+
+  vector<statscore_t> GetIncStatScore(vector<unsigned> ref, vector<vector<pair<unsigned,unsigned> > >) const;
+
+  /**
+   * Get the optimal Lambda and the best score in a particular direction from a given Point.
+   */
+  statscore_t LineOptimize(const Point& start, const Point& direction, Point& best) const;
 };
 
 
-/**default basic optimizer*/
-class SimpleOptimizer: public Optimizer
+/**
+ * Default basic optimizer.
+ * This class implements Powell's method.
+ */
+class SimpleOptimizer : public Optimizer
 {
 private:
-  static float eps;
+  const float kEPS;
 public:
-  SimpleOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom):Optimizer(dim,i2O,start,nrandom) {};
-  virtual statscore_t  TrueRun(Point&)const;
+  SimpleOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
+      : Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {}
+  virtual statscore_t TrueRun(Point&) const;
 };
 
-/**optimizer with random directions*/
-class RandomDirectionOptimizer: public Optimizer
+/**
+ * An optimizer with random directions.
+ */
+class RandomDirectionOptimizer : public Optimizer
 {
 private:
-  static float eps;
+  const float kEPS;
 public:
-  RandomDirectionOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom):Optimizer(dim,i2O,start,nrandom) {};
-  virtual statscore_t  TrueRun(Point&)const;
+  RandomDirectionOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
+      : Optimizer(dim, i2O, start, nrandom), kEPS(0.0001) {}
+  virtual statscore_t TrueRun(Point&) const;
 };
 
-/**dumb baseline optimizer: just picks a random point and quits*/
-class RandomOptimizer: public Optimizer
+/**
+ * Dumb baseline optimizer: just picks a random point and quits.
+ */
+class RandomOptimizer : public Optimizer
 {
 public:
-  RandomOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start, unsigned int nrandom):Optimizer(dim,i2O,start,nrandom) {};
-  virtual statscore_t  TrueRun(Point&)const;
+  RandomOptimizer(unsigned dim, vector<unsigned> i2O, vector<parameter_t> start, unsigned int nrandom)
+      : Optimizer(dim, i2O, start, nrandom) {}
+  virtual statscore_t TrueRun(Point&) const;
 };
 
 class OptimizerFactory
 {
 public:
-  // unsigned dim;
-  //Point Start;
   static vector<string> GetTypeNames();
-  static Optimizer* BuildOptimizer(unsigned dim,vector<unsigned>tooptimize,vector<parameter_t> start,string type,unsigned int nrandom);
+  static Optimizer* BuildOptimizer(unsigned dim, vector<unsigned> tooptimize, vector<parameter_t> start, string type, unsigned int nrandom);
+
 private:
-  enum OptType {POWELL=0,RANDOM_DIRECTION=1,RANDOM,NOPTIMIZER}; //Add new optimizer here BEFORE NOPTIMZER
+  OptimizerFactory() {}
+  ~OptimizerFactory() {}
+
+  // Add new optimizer here BEFORE NOPTIMZER
+  enum OptType {
+    POWELL = 0,
+    RANDOM_DIRECTION = 1,
+    RANDOM,
+    NOPTIMIZER
+  };
+
   static OptType GetOType(string);
   static vector<string> typenames;
   static void SetTypeNames();
-
 };
 
-
-#endif
-
+#endif  // OPTIMIZER_H
diff --git a/mert/PerScorer.cpp b/mert/PerScorer.cpp
index fe3f6c135..765cb0876 100644
--- a/mert/PerScorer.cpp
+++ b/mert/PerScorer.cpp
@@ -1,10 +1,22 @@
 #include "PerScorer.h"
 
+#include <fstream>
+#include <stdexcept>
+
+#include "ScoreStats.h"
+#include "Util.h"
+
+using namespace std;
+
+PerScorer::PerScorer(const string& config)
+  : StatisticsBasedScorer("PER",config) {}
+
+PerScorer::~PerScorer() {}
 
 void PerScorer::setReferenceFiles(const vector<string>& referenceFiles)
 {
-  // for each line in the reference file, create a multiset of the
-  // word ids
+  // For each line in the reference file, create a multiset of
+  // the word ids.
   if (referenceFiles.size() != 1) {
     throw runtime_error("PER only supports a single reference");
   }
@@ -40,8 +52,8 @@ void PerScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
     msg << "Sentence id (" << sid << ") not found in reference set";
     throw runtime_error(msg.str());
   }
-  //calculate correct, output_length and ref_length for
-  //the line and store it in entry
+  // Calculate correct, output_length and ref_length for
+  // the line and store it in entry
   vector<int> testtokens;
   encode(text,testtokens);
   multiset<int> testtokens_all(testtokens.begin(),testtokens.end());
@@ -59,12 +71,12 @@ void PerScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
   entry.set(stats_str);
 }
 
-float PerScorer::calculateScore(const vector<int>& comps)
+float PerScorer::calculateScore(const vector<int>& comps) const
 {
   float denom = comps[2];
   float num = comps[0] - max(0,comps[1]-comps[2]);
   if (denom == 0) {
-    //shouldn't happen!
+    // This shouldn't happen!
     return 0.0;
   } else {
     return num/denom;
diff --git a/mert/PerScorer.h b/mert/PerScorer.h
index 8f6e6e7a4..800a3caf1 100644
--- a/mert/PerScorer.h
+++ b/mert/PerScorer.h
@@ -1,52 +1,47 @@
 #ifndef __PERSCORER_H__
 #define __PERSCORER_H__
 
-#include <algorithm>
-#include <cmath>
 #include <iostream>
-#include <iterator>
 #include <set>
 #include <sstream>
-#include <stdexcept>
 #include <string>
 #include <vector>
 #include "Types.h"
-#include "ScoreData.h"
 #include "Scorer.h"
 
-
 using namespace std;
 
+class ScoreStats;
+
 /**
-  * Implementation of position-independent word error rate. This is defined
-  * as 1 - (correct - max(0,output_length - ref_length)) / ref_length
-  * In fact, we ignore the " 1 - " so that it can be maximised.
- **/
+ * An implementation of position-independent word error rate.
+ * This is defined as
+ *   1 - (correct - max(0,output_length - ref_length)) / ref_length
+ * In fact, we ignore the " 1 - " so that it can be maximised.
+ */
 class PerScorer: public StatisticsBasedScorer
 {
 public:
-  PerScorer(const string& config = "") : StatisticsBasedScorer("PER",config) {}
+  explicit PerScorer(const string& config = "");
+  ~PerScorer();
+
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
   virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
 
-  virtual void whoami() {
+  virtual void whoami() const {
     cerr << "I AM PerScorer" << std::endl;
   }
 
-  size_t NumberOfScores() {
+  virtual size_t NumberOfScores() const {
     // cerr << "PerScorer: 3" << endl;
     return 3;
-  };
-
-//protected:
+  }
 
-  virtual float calculateScore(const vector<int>& comps) ;
+  virtual float calculateScore(const vector<int>& comps) const;
 
 private:
-
-  //no copy
+  // no copying allowed
   PerScorer(const PerScorer&);
-  ~PerScorer() {};
   PerScorer& operator=(const PerScorer&);
 
   // data extracted from reference files
@@ -54,4 +49,4 @@ private:
   vector<multiset<int> > _reftokens;
 };
 
-#endif //__PERSCORER_H
+#endif  // __PERSCORER_H__
diff --git a/mert/Point.cpp b/mert/Point.cpp
index 109ca954a..71338966a 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -1,60 +1,32 @@
 #include "Point.h"
-#include<cmath>
-#include<cstdlib>
+
+#include <cmath>
+#include <cstdlib>
 #include <cassert>
-using namespace std;
+#include <limits>
+#include "FeatureStats.h"
 
+using namespace std;
 
 vector<unsigned> Point::optindices;
 
-unsigned Point::dim=0;
+unsigned Point::dim = 0;
 
 map<unsigned,statscore_t> Point::fixedweights;
 
-unsigned Point::pdim=0;
-unsigned Point::ncall=0;
+unsigned Point::pdim = 0;
+unsigned Point::ncall = 0;
 
 vector<parameter_t> Point::m_min;
 vector<parameter_t> Point::m_max;
 
-void Point::Randomize()
-{
-  assert(m_min.size()==Point::dim);
-  assert(m_max.size()==Point::dim);
-  for (unsigned int i=0; i<size(); i++)
-    operator[](i)= m_min[i]
-      + (float)random()/(float)RAND_MAX * (float)(m_max[i]-m_min[i]);
-}
-
-void Point::NormalizeL2()
-{
-  parameter_t norm=0.0;
-  for (unsigned int i=0; i<size(); i++)
-    norm+= operator[](i)*operator[](i);
-  if(norm!=0.0) {
-    norm=sqrt(norm);
-    for (unsigned int i=0; i<size(); i++)
-      operator[](i)/=norm;
-  }
-}
-
-
-void Point::NormalizeL1()
-{
-  parameter_t norm=0.0;
-  for (unsigned int i=0; i<size(); i++)
-    norm+= abs(operator[](i));
-  if(norm!=0.0) {
-    for (unsigned int i=0; i<size(); i++)
-      operator[](i)/=norm;
-  }
-}
+Point::Point() : vector<parameter_t>(dim), score_(0.0) {}
 
 //Can initialize from a vector of dim or pdim
 Point::Point(const vector<parameter_t>& init,
-    const vector<parameter_t>& min,
-    const vector<parameter_t>& max
-  ):vector<parameter_t>(Point::dim)
+             const vector<parameter_t>& min,
+             const vector<parameter_t>& max)
+    : vector<parameter_t>(Point::dim), score_(0.0)
 {
   m_min.resize(Point::dim);
   m_max.resize(Point::dim);
@@ -72,12 +44,23 @@ Point::Point(const vector<parameter_t>& init,
       m_max[i] = max[optindices[i]];
     }
   }
-};
+}
 
+Point::~Point() {}
+
+void Point::Randomize()
+{
+  assert(m_min.size()==Point::dim);
+  assert(m_max.size()==Point::dim);
+  for (unsigned int i=0; i<size(); i++) {
+    operator[](i) = m_min[i] +
+                   (float)random()/(float)RAND_MAX * (float)(m_max[i]-m_min[i]);
+  }
+}
 
-double Point::operator*(const FeatureStats& F)const
+double Point::operator*(const FeatureStats& F) const
 {
-  ncall++;//to track performance
+  ncall++; // to track performance
   double prod=0.0;
   if(OptimizeAll())
     for (unsigned i=0; i<size(); i++)
@@ -90,43 +73,71 @@ double Point::operator*(const FeatureStats& F)const
   }
   return prod;
 }
-Point Point::operator+(const Point& p2)const
+
+Point Point::operator+(const Point& p2) const
 {
-  assert(p2.size()==size());
+  assert(p2.size() == size());
   Point Res(*this);
-  for(unsigned i=0; i<size(); i++)
-    Res[i]+=p2[i];
-  Res.score=numeric_limits<statscore_t>::max();
+  for (unsigned i = 0; i < size(); i++) {
+    Res[i] += p2[i];
+  }
+
+  Res.score_ = numeric_limits<statscore_t>::max();
   return Res;
-};
+}
 
 void Point::operator+=(const Point& p2)
 {
-  assert(p2.size()==size());
-  for(unsigned i=0; i<size(); i++)
-    operator[](i)+=p2[i];
-  score=numeric_limits<statscore_t>::max();
-};
-
+  assert(p2.size() == size());
+  for (unsigned i = 0; i < size(); i++) {
+    operator[](i) += p2[i];
+  }
+  score_ = numeric_limits<statscore_t>::max();
+}
 
-Point Point::operator*(float l)const
+Point Point::operator*(float l) const
 {
   Point Res(*this);
-  for(unsigned i=0; i<size(); i++)
-    Res[i]*=l;
-  Res.score=numeric_limits<statscore_t>::max();
+  for (unsigned i = 0; i < size(); i++) {
+    Res[i] *= l;
+  }
+  Res.score_ = numeric_limits<statscore_t>::max();
   return Res;
-};
+}
 
-ostream& operator<<(ostream& o,const Point& P)
+ostream& operator<<(ostream& o, const Point& P)
 {
-  vector<parameter_t> w=P.GetAllWeights();
-//	 o << "[" << Point::pdim << "] ";
-  for(unsigned int i=0; i<Point::pdim; i++)
+  vector<parameter_t> w = P.GetAllWeights();
+  for (unsigned int i = 0; i < Point::pdim; i++) {
     o << w[i] << " ";
-//	 o << "=> " << P.GetScore();
+  }
   return o;
-};
+}
+
+void Point::NormalizeL2()
+{
+  parameter_t norm=0.0;
+  for (unsigned int i=0; i<size(); i++)
+    norm+= operator[](i)*operator[](i);
+  if(norm!=0.0) {
+    norm=sqrt(norm);
+    for (unsigned int i=0; i<size(); i++)
+      operator[](i)/=norm;
+  }
+}
+
+
+void Point::NormalizeL1()
+{
+  parameter_t norm=0.0;
+  for (unsigned int i=0; i<size(); i++)
+    norm+= abs(operator[](i));
+  if(norm!=0.0) {
+    for (unsigned int i=0; i<size(); i++)
+      operator[](i)/=norm;
+  }
+}
+
 
 vector<parameter_t> Point::GetAllWeights()const
 {
@@ -141,7 +152,4 @@ vector<parameter_t> Point::GetAllWeights()const
       w[it->first]=it->second;
   }
   return w;
-};
-
-
-
+}
diff --git a/mert/Point.h b/mert/Point.h
index da2da56b1..55d173215 100644
--- a/mert/Point.h
+++ b/mert/Point.h
@@ -1,31 +1,52 @@
 #ifndef POINT_H
 #define POINT_H
+
+#include <fstream>
+#include <map>
 #include <vector>
 #include "Types.h"
-#include "FeatureStats.h"
-#include <cassert>
-
 
+class FeatureStats;
 class Optimizer;
 
-/**class that handle the subset of the Feature weight on which we run the optimization*/
-
-class Point:public vector<parameter_t>
+/**
+ * A class that handles the subset of the Feature weight on which
+ * we run the optimization.
+ */
+class Point : public vector<parameter_t>
 {
   friend class Optimizer;
 private:
-  /**The indices over which we optimize*/
+  /**
+   * The indices over which we optimize.
+   */
   static vector<unsigned int> optindices;
-  /**dimension of optindices and of the parent vector*/
+
+  /**
+   * Dimension of optindices and of the parent vector.
+   */
   static unsigned int dim;
-  /**fixed weights in case of partial optimzation*/
+
+  /**
+   * Fixed weights in case of partial optimzation.
+   */
   static map<unsigned int,parameter_t> fixedweights;
-  /**total size of the parameter space; we have pdim=FixedWeight.size()+optinidices.size()*/
+
+  /**
+   * Total size of the parameter space; we have
+   * pdim = FixedWeight.size() + optinidices.size().
+   */
   static unsigned int pdim;
   static unsigned int ncall;
-  /**The limits for randomization, both vectors are of full length, pdim*/
+
+  /**
+   * The limits for randomization, both vectors are of full length, pdim.
+   */
   static vector<parameter_t> m_min;
   static vector<parameter_t> m_max;
+
+  statscore_t score_;
+
 public:
   static unsigned int getdim() {
     return dim;
@@ -41,32 +62,42 @@ public:
   }
   static bool OptimizeAll() {
     return fixedweights.empty();
-  };
-  statscore_t score;
-  Point():vector<parameter_t>(dim) {};
+  }
+
+  Point();
   Point(const vector<parameter_t>& init,
-    const vector<parameter_t>& min,
-    const vector<parameter_t>& max
-  );
+        const vector<parameter_t>& min,
+        const vector<parameter_t>& max);
+  ~Point();
+
   void Randomize();
 
-  double operator*(const FeatureStats&)const;//compute the feature function
-  Point operator+(const Point&)const;
+  // Compute the feature function
+  double operator*(const FeatureStats&) const;
+  Point operator+(const Point&) const;
   void operator+=(const Point&);
-  Point operator*(float)const;
-  /**write the Whole featureweight to a stream (ie pdim float)*/
+  Point operator*(float) const;
+
+  /**
+   * Write the Whole featureweight to a stream (ie pdim float).
+   */
   friend ostream& operator<<(ostream& o,const Point& P);
-  void Normalize() {
-    NormalizeL2();
-  };
+
+  void Normalize() { NormalizeL2(); }
   void NormalizeL2();
   void NormalizeL1();
-  /**return a vector of size pdim where all weights have been put(including fixed ones)*/
-  vector<parameter_t> GetAllWeights()const;
-  statscore_t GetScore()const {
-    return score;
-  };
-};
 
-#endif
+  /**
+   * Return a vector of size pdim where all weights have been
+   * put (including fixed ones).
+   */
+  vector<parameter_t> GetAllWeights() const;
+
+  statscore_t GetScore() const {
+    return score_;
+  }
+
+  void SetScore(statscore_t score) { score_ = score; }
+};
 
+#endif  // POINT_H
diff --git a/mert/ScopedVector.h b/mert/ScopedVector.h
new file mode 100644
index 000000000..1fbce88b7
--- /dev/null
+++ b/mert/ScopedVector.h
@@ -0,0 +1,54 @@
+#ifndef SCOPEDVECTOR_H_
+#define SCOPEDVECTOR_H_
+
+#include <vector>
+
+template <class T>
+class ScopedVector {
+ public:
+  typedef typename std::vector<T*>::iterator iterator;
+  typedef typename std::vector<T*>::const_iterator const_iterator;
+
+  ScopedVector() {}
+  virtual ~ScopedVector() { reset(); }
+
+  bool empty() const { return vec_.empty(); }
+
+  void push_back(T *e) { vec_.push_back(e); }
+
+  void reset() {
+    for (iterator it = vec_.begin(); it != vec_.end(); ++it) {
+      delete *it;
+    }
+    vec_.clear();
+  }
+
+  void reserve(size_t capacity) { vec_.reserve(capacity); }
+  void resize(size_t size) { vec_.resize(size); }
+
+  size_t size() const {return vec_.size(); }
+
+  iterator begin() { return vec_.begin(); }
+  const_iterator begin() const { return vec_.begin(); }
+
+  iterator end() { return vec_.end(); }
+  const_iterator end() const { return vec_.end(); }
+
+  std::vector<T*>& get() { return vec_; }
+  const std::vector<T*>& get() const { return vec_; }
+
+  std::vector<T*>* operator->() { return &vec_; }
+  const std::vector<T*>* operator->() const { return &vec_; }
+
+  T*& operator[](size_t i) { return vec_[i]; }
+  const T* operator[](size_t i) const { return vec_[i]; }
+
+ private:
+  std::vector<T*> vec_;
+
+  // no copying allowed.
+  ScopedVector<T>(const ScopedVector<T>&);
+  void operator=(const ScopedVector<T>&);
+};
+
+#endif // SCOPEDVECTOR_H_
diff --git a/mert/ScoreArray.cpp b/mert/ScoreArray.cpp
index 92824c818..b26b93114 100644
--- a/mert/ScoreArray.cpp
+++ b/mert/ScoreArray.cpp
@@ -6,12 +6,13 @@
  *
  */
 
-#include <fstream>
 #include "ScoreArray.h"
 #include "Util.h"
+#include "FileStream.h"
 
-ScoreArray::ScoreArray(): idx("")
-{};
+
+ScoreArray::ScoreArray()
+    : number_of_scores(0), idx("") {}
 
 void ScoreArray::savetxt(std::ofstream& outFile, const std::string& sctype)
 {
@@ -132,17 +133,15 @@ void ScoreArray::merge(ScoreArray& e)
     add(e.get(i));
 }
 
-bool ScoreArray::check_consistency()
+bool ScoreArray::check_consistency() const
 {
-  size_t sz = NumberOfScores();
-
+  const size_t sz = NumberOfScores();
   if (sz == 0)
     return true;
 
-  for (scorearray_t::iterator i=array_.begin(); i!=array_.end(); i++)
-    if (i->size()!=sz)
+  for (scorearray_t::const_iterator i = array_.begin(); i != array_.end(); ++i) {
+    if (i->size() != sz)
       return false;
+  }
   return true;
 }
-
-
diff --git a/mert/ScoreArray.h b/mert/ScoreArray.h
index 71ea2b51a..1240a704a 100644
--- a/mert/ScoreArray.h
+++ b/mert/ScoreArray.h
@@ -9,21 +9,19 @@
 #ifndef SCORE_ARRAY_H
 #define SCORE_ARRAY_H
 
-#define SCORES_TXT_BEGIN "SCORES_TXT_BEGIN_0"
-#define SCORES_TXT_END "SCORES_TXT_END_0"
-#define SCORES_BIN_BEGIN "SCORES_BIN_BEGIN_0"
-#define SCORES_BIN_END "SCORES_BIN_END_0"
-
 using namespace std;
 
-#include <limits>
 #include <vector>
 #include <iostream>
-#include <fstream>
+#include <string>
 
-#include "Util.h"
 #include "ScoreStats.h"
 
+const char SCORES_TXT_BEGIN[] = "SCORES_TXT_BEGIN_0";
+const char SCORES_TXT_END[] = "SCORES_TXT_END_0";
+const char SCORES_BIN_BEGIN[] = "SCORES_BIN_BEGIN_0";
+const char SCORES_BIN_END[] = "SCORES_BIN_END_0";
+
 class ScoreArray
 {
 protected:
@@ -32,19 +30,19 @@ protected:
   size_t number_of_scores;
 
 private:
-  std::string  idx; // idx to identify the utterance, it can differ from the index inside the vector
-
+  // idx to identify the utterance.
+  // It can differ from the index inside the vector.
+  std::string  idx;
 
 public:
   ScoreArray();
-
-  ~ScoreArray() {};
+  ~ScoreArray() {}
 
   inline void clear() {
     array_.clear();
   }
 
-  inline std::string getIndex() {
+  inline std::string getIndex() const {
     return idx;
   }
   inline void setIndex(const std::string& value) {
@@ -68,12 +66,13 @@ public:
 
   inline std::string name() const {
     return score_type;
-  };
+  }
+
   inline void name(std::string &sctype) {
     score_type = sctype;
-  };
+  }
 
-  inline size_t size() {
+  inline size_t size() const {
     return array_.size();
   }
   inline size_t NumberOfScores() const {
@@ -96,9 +95,7 @@ public:
   void load(ifstream& inFile);
   void load(const std::string &file);
 
-  bool check_consistency();
+  bool check_consistency() const;
 };
 
-
-#endif
-
+#endif  // SCORE_ARRAY_H
diff --git a/mert/ScoreData.cpp b/mert/ScoreData.cpp
index bdbfd18bf..e79595d06 100644
--- a/mert/ScoreData.cpp
+++ b/mert/ScoreData.cpp
@@ -6,20 +6,20 @@
  *
  */
 
-#include <fstream>
 #include "ScoreData.h"
 #include "Scorer.h"
 #include "Util.h"
-
+#include "FileStream.h"
 
 ScoreData::ScoreData(Scorer& ptr):
   theScorer(&ptr)
 {
   score_type = theScorer->getName();
-  theScorer->setScoreData(this);//this is not dangerous: we dont use the this pointer in SetScoreData
+  // This is not dangerous: we don't use the this pointer in SetScoreData.
+  theScorer->setScoreData(this);
   number_of_scores = theScorer->NumberOfScores();
   // TRACE_ERR("ScoreData: number_of_scores: " << number_of_scores << std::endl);
-};
+}
 
 void ScoreData::save(std::ofstream& outFile, bool bin)
 {
@@ -33,7 +33,8 @@ void ScoreData::save(const std::string &file, bool bin)
   if (file.empty()) return;
   TRACE_ERR("saving the array into " << file << std::endl);
 
-  std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
+  // matches a stream with a file. Opens the file.
+  std::ofstream outFile(file.c_str(), std::ios::out);
 
   ScoreStats entry;
 
@@ -94,29 +95,28 @@ void ScoreData::add(ScoreArray& e)
 void ScoreData::add(const ScoreStats& e, const std::string& sent_idx)
 {
   if (exists(sent_idx)) { // array at position e.getIndex() already exists
-    //enlarge array at position e.getIndex()
+    // Enlarge array at position e.getIndex()
     size_t pos = getIndex(sent_idx);
-    //		TRACE_ERR("Inserting in array " << sent_idx << std::endl);
+    //          TRACE_ERR("Inserting in array " << sent_idx << std::endl);
     array_.at(pos).add(e);
-    //		TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
+    //          TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
   } else {
-    //		TRACE_ERR("Creating a new entry in the array" << std::endl);
+    //          TRACE_ERR("Creating a new entry in the array" << std::endl);
     ScoreArray a;
     a.NumberOfScores(number_of_scores);
     a.add(e);
     a.setIndex(sent_idx);
     add(a);
-    //		TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
+    //          TRACE_ERR("size: " << size() << " -> " << a.size() << std::endl);
   }
 }
 
-
-bool ScoreData::check_consistency()
+bool ScoreData::check_consistency() const
 {
   if (array_.size() == 0)
     return true;
 
-  for (scoredata_t::iterator i = array_.begin(); i !=array_.end(); i++)
+  for (scoredata_t::const_iterator i = array_.begin(); i != array_.end(); ++i)
     if (!i->check_consistency()) return false;
 
   return true;
diff --git a/mert/ScoreData.h b/mert/ScoreData.h
index 500cd5c9b..cb4fb30f9 100644
--- a/mert/ScoreData.h
+++ b/mert/ScoreData.h
@@ -9,14 +9,15 @@
 #ifndef SCORE_DATA_H
 #define SCORE_DATA_H
 
-using namespace std;
-
-#include <limits>
+#include <fstream>
 #include <vector>
 #include <iostream>
-
-#include "Util.h"
+#include <stdexcept>
+#include <string>
 #include "ScoreArray.h"
+#include "ScoreStats.h"
+
+using namespace std;
 
 class Scorer;
 
@@ -24,18 +25,20 @@ class ScoreData
 {
 protected:
   scoredata_t array_;
-  idx2name idx2arrayname_; //map from index to name of array
-  name2idx arrayname2idx_; //map from name to index of array
+  idx2name idx2arrayname_; // map from index to name of array
+  name2idx arrayname2idx_; // map from name to index of array
 
 private:
+  // Do not allow the user to instanciate without arguments.
+  ScoreData() {}
+
   Scorer* theScorer;
   std::string score_type;
   size_t number_of_scores;
 
 public:
   ScoreData(Scorer& sc);
-
-  ~ScoreData() {};
+  ~ScoreData() {}
 
   inline void clear() {
     array_.clear();
@@ -51,11 +54,11 @@ public:
     return array_.at(idx);
   }
 
-  inline bool exists(const std::string & sent_idx) {
+  inline bool exists(const std::string& sent_idx) const {
     return exists(getIndex(sent_idx));
   }
-  inline bool exists(int sent_idx) {
-    return (sent_idx>-1 && sent_idx<(int)array_.size())?true:false;
+  inline bool exists(int sent_idx) const {
+    return (sent_idx > -1 && sent_idx < (int)array_.size()) ? true : false;
   }
 
   inline ScoreStats& get(size_t i, size_t j) {
@@ -65,20 +68,21 @@ public:
     return array_.at(i).get(j);
   }
 
-  inline std::string name() {
+  inline std::string name() const {
     return score_type;
-  };
-  inline std::string name(std::string &sctype) {
+  }
+
+  inline std::string name(const std::string &sctype) {
     return score_type = sctype;
-  };
+  }
 
   void add(ScoreArray& e);
   void add(const ScoreStats& e, const std::string& sent_idx);
 
-  inline size_t NumberOfScores() {
+  inline size_t NumberOfScores() const {
     return number_of_scores;
   }
-  inline size_t size() {
+  inline size_t size() const {
     return array_.size();
   }
 
@@ -91,23 +95,22 @@ public:
   void load(ifstream& inFile);
   void load(const std::string &file);
 
-  bool check_consistency();
+  bool check_consistency() const;
   void setIndex();
 
-  inline int getIndex(const std::string& idx) {
-    name2idx::iterator i = arrayname2idx_.find(idx);
-    if (i!=arrayname2idx_.end())
+  inline int getIndex(const std::string& idx) const {
+    name2idx::const_iterator i = arrayname2idx_.find(idx);
+    if (i != arrayname2idx_.end())
       return i->second;
     else
       return -1;
   }
-  inline std::string getIndex(size_t idx) {
-    idx2name::iterator i = idx2arrayname_.find(idx);
-    if (i!=idx2arrayname_.end())
+  inline std::string getIndex(size_t idx) const {
+    idx2name::const_iterator i = idx2arrayname_.find(idx);
+    if (i != idx2arrayname_.end())
       throw runtime_error("there is no entry at index " + idx);
     return i->second;
   }
 };
 
-
-#endif
+#endif  // SCORE_DATA_H
diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp
index d3b28de36..eb1750983 100644
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@@ -6,53 +6,65 @@
  *
  */
 
-#include <fstream>
+#include "Util.h"
 #include "ScoreStats.h"
 
-#define AVAILABLE_ 8;
-
+namespace {
+const int kAvailableSize = 8;
+} // namespace
 
 ScoreStats::ScoreStats()
+    : available_(kAvailableSize), entries_(0),
+      array_(new ScoreStatsType[available_]) {}
+
+ScoreStats::ScoreStats(const size_t size)
+    : available_(size), entries_(size),
+      array_(new ScoreStatsType[available_])
 {
-  available_ = AVAILABLE_;
-  entries_ = 0;
-  array_ = new ScoreStatsType[available_];
-};
+  memset(array_, 0, GetArraySizeWithBytes());
+}
+
+ScoreStats::ScoreStats(std::string &theString)
+    : available_(0), entries_(0), array_(NULL)
+{
+  set(theString);
+}
 
 ScoreStats::~ScoreStats()
 {
-  delete[] array_;
-};
+  if (array_) {
+    delete [] array_;
+    array_ = NULL;
+  }
+}
 
-ScoreStats::ScoreStats(const ScoreStats &stats)
+void ScoreStats::Copy(const ScoreStats &stats)
 {
   available_ = stats.available();
   entries_ = stats.size();
   array_ = new ScoreStatsType[available_];
-  memcpy(array_,stats.getArray(),scorebytes_);
-};
-
+  memcpy(array_, stats.getArray(), GetArraySizeWithBytes());
+}
 
-ScoreStats::ScoreStats(const size_t size)
+ScoreStats::ScoreStats(const ScoreStats &stats)
 {
-  available_ = size;
-  entries_ = size;
-  array_ = new ScoreStatsType[available_];
-  memset(array_,0,scorebytes_);
-};
+  Copy(stats);
+}
 
-ScoreStats::ScoreStats(std::string &theString)
+ScoreStats& ScoreStats::operator=(const ScoreStats &stats)
 {
-  set(theString);
+  delete [] array_;
+  Copy(stats);
+  return *this;
 }
 
 void ScoreStats::expand()
 {
-  available_*=2;
-  scorestats_t t_ = new ScoreStatsType[available_];
-  memcpy(t_,array_,scorebytes_);
-  delete array_;
-  array_=t_;
+  available_ *= 2;
+  scorestats_t buf = new ScoreStatsType[available_];
+  memcpy(buf, array_, GetArraySizeWithBytes());
+  delete [] array_;
+  array_ = buf;
 }
 
 void ScoreStats::add(ScoreStatsType v)
@@ -68,13 +80,13 @@ void ScoreStats::set(std::string &theString)
 
   while (!theString.empty()) {
     getNextPound(theString, substring);
-    add(ATOSST(substring.c_str()));
+    add(ConvertStringToScoreStatsType(substring));
   }
 }
 
 void ScoreStats::loadbin(std::ifstream& inFile)
 {
-  inFile.read((char*) array_, scorebytes_);
+  inFile.read((char*)array_, GetArraySizeWithBytes());
 }
 
 void ScoreStats::loadtxt(std::ifstream& inFile)
@@ -86,7 +98,7 @@ void ScoreStats::loadtxt(std::ifstream& inFile)
 
 void ScoreStats::loadtxt(const std::string &file)
 {
-//	TRACE_ERR("loading the stats from " << file << std::endl);
+//      TRACE_ERR("loading the stats from " << file << std::endl);
 
   std::ifstream inFile(file.c_str(), std::ios::in); // matches a stream with a file. Opens the file
 
@@ -96,7 +108,7 @@ void ScoreStats::loadtxt(const std::string &file)
 
 void ScoreStats::savetxt(const std::string &file)
 {
-//	TRACE_ERR("saving the stats into " << file << std::endl);
+//      TRACE_ERR("saving the stats into " << file << std::endl);
 
   std::ofstream outFile(file.c_str(), std::ios::out); // matches a stream with a file. Opens the file
 
@@ -111,27 +123,12 @@ void ScoreStats::savetxt(std::ofstream& outFile)
 
 void ScoreStats::savebin(std::ofstream& outFile)
 {
-  outFile.write((char*) array_, scorebytes_);
+  outFile.write((char*)array_, GetArraySizeWithBytes());
 }
 
-
-ScoreStats& ScoreStats::operator=(const ScoreStats &stats)
-{
-  delete array_;
-  available_ = stats.available();
-  entries_ = stats.size();
-  array_ = new ScoreStatsType[available_];
-  memcpy(array_,stats.getArray(),scorebytes_);
-
-  return *this;
-}
-
-
-/**write the whole object to a stream*/
 ostream& operator<<(ostream& o, const ScoreStats& e)
 {
   for (size_t i=0; i< e.size(); i++)
     o << e.get(i) << " ";
   return o;
 }
-
diff --git a/mert/ScoreStats.h b/mert/ScoreStats.h
index 9ceee4c6b..43a6f1f23 100644
--- a/mert/ScoreStats.h
+++ b/mert/ScoreStats.h
@@ -9,44 +9,51 @@
 #ifndef SCORE_STATS_H
 #define SCORE_STATS_H
 
-using namespace std;
-
-#include <limits>
 #include <vector>
 #include <iostream>
+#include <fstream>
 #include <cstdlib>
+#include <cstring>
 
-#include "Util.h"
+#include "Types.h"
 
-#define SCORE_STATS_MIN (numeric_limits<ScoreStatsType>::min())
-#define ATOSST(str) ((ScoreStatsType) atoi(str))
-
-#define scorebytes_ (entries_*sizeof(ScoreStatsType))
+using namespace std;
 
 class ScoreStats
 {
 private:
-  scorestats_t array_;
-  size_t entries_;
   size_t available_;
+  size_t entries_;
+
+  // TODO: Use smart pointer for exceptional-safety.
+  scorestats_t array_;
 
 public:
   ScoreStats();
-  ScoreStats(const size_t size);
+  explicit ScoreStats(const size_t size);
+  explicit ScoreStats(std::string &theString);
+  ~ScoreStats();
+
+  // We intentionally allow copying.
   ScoreStats(const ScoreStats &stats);
-  ScoreStats(std::string &theString);
   ScoreStats& operator=(const ScoreStats &stats);
 
-  ~ScoreStats();
+  void Copy(const ScoreStats &stats);
 
-  bool isfull() {
-    return (entries_ < available_)?0:1;
+  bool isfull() const {
+    return (entries_ < available_) ? 0 : 1;
   }
+
   void expand();
   void add(ScoreStatsType v);
 
-  inline void clear() {
-    memset((void*) array_,0,scorebytes_);
+  void clear() {
+    memset((void*)array_, 0, GetArraySizeWithBytes());
+  }
+
+  void reset() {
+    entries_ = 0;
+    clear();
   }
 
   inline ScoreStatsType get(size_t i) {
@@ -62,8 +69,13 @@ public:
   void set(std::string &theString);
 
   inline size_t bytes() const {
-    return scorebytes_;
+    return GetArraySizeWithBytes();
+  }
+
+  size_t GetArraySizeWithBytes() const {
+    return entries_ * sizeof(ScoreStatsType);
   }
+
   inline size_t size() const {
     return entries_;
   }
@@ -78,22 +90,14 @@ public:
     savetxt("/dev/stdout");
   }
 
-
-
   void loadtxt(const std::string &file);
   void loadtxt(ifstream& inFile);
   void loadbin(ifstream& inFile);
 
-
-  inline void reset() {
-    entries_ = 0;
-    clear();
-  }
-
-  /**write the whole object to a stream*/
+  /**
+   * Write the whole object to a stream.
+   */
   friend ostream& operator<<(ostream& o, const ScoreStats& e);
 };
 
-
-#endif
-
+#endif  // SCORE_STATS_H
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index 82bf6e680..55e1cd748 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -1,4 +1,27 @@
 #include "Scorer.h"
+#include <limits>
+
+Scorer::Scorer(const string& name, const string& config)
+    : _name(name), _scoreData(0), _preserveCase(true) {
+//    cerr << "Scorer config string: " << config << endl;
+  size_t start = 0;
+  while (start < config.size()) {
+    size_t end = config.find(",",start);
+    if (end == string::npos) {
+      end = config.size();
+    }
+    string nv = config.substr(start,end-start);
+    size_t split = nv.find(":");
+    if (split == string::npos) {
+      throw runtime_error("Missing colon when processing scorer config: " + config);
+    }
+    string name = nv.substr(0,split);
+    string value = nv.substr(split+1,nv.size()-split-1);
+    cerr << "name: " << name << " value: " << value << endl;
+    _config[name] = value;
+    start = end+1;
+  }
+}
 
 //regularisation strategies
 static float score_min(const statscores_t& scores, size_t start, size_t end)
@@ -15,7 +38,7 @@ static float score_min(const statscores_t& scores, size_t start, size_t end)
 static float score_average(const statscores_t& scores, size_t start, size_t end)
 {
   if ((end - start) < 1) {
-    //shouldn't happen
+    // this shouldn't happen
     return 0;
   }
   float total = 0;
@@ -26,13 +49,50 @@ static float score_average(const statscores_t& scores, size_t start, size_t end)
   return total / (end - start);
 }
 
+StatisticsBasedScorer::StatisticsBasedScorer(const string& name, const string& config)
+    : Scorer(name,config) {
+  //configure regularisation
+  static string KEY_TYPE = "regtype";
+  static string KEY_WINDOW = "regwin";
+  static string KEY_CASE = "case";
+  static string TYPE_NONE = "none";
+  static string TYPE_AVERAGE = "average";
+  static string TYPE_MINIMUM = "min";
+  static string TRUE = "true";
+  static string FALSE = "false";
+
+  string type = getConfig(KEY_TYPE,TYPE_NONE);
+  if (type == TYPE_NONE) {
+    _regularisationStrategy = REG_NONE;
+  } else if (type == TYPE_AVERAGE) {
+    _regularisationStrategy = REG_AVERAGE;
+  } else if (type == TYPE_MINIMUM) {
+    _regularisationStrategy = REG_MINIMUM;
+  } else {
+    throw runtime_error("Unknown scorer regularisation strategy: " + type);
+  }
+  //    cerr << "Using scorer regularisation strategy: " << type << endl;
+
+  string window = getConfig(KEY_WINDOW,"0");
+  _regularisationWindow = atoi(window.c_str());
+  //    cerr << "Using scorer regularisation window: " << _regularisationWindow << endl;
+
+  string preservecase = getConfig(KEY_CASE,TRUE);
+  if (preservecase == TRUE) {
+    _preserveCase = true;
+  } else if (preservecase == FALSE) {
+    _preserveCase = false;
+  }
+  //    cerr << "Using case preservation: " << _preserveCase << endl;
+}
+
 void  StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
-                                   statscores_t& scores)
+                                   statscores_t& scores) const
 {
   if (!_scoreData) {
     throw runtime_error("Score data not loaded");
   }
-  //calculate the score for the candidates
+  // calculate the score for the candidates
   if (_scoreData->size() == 0) {
     throw runtime_error("Score data is empty");
   }
@@ -57,7 +117,7 @@ void  StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t
   scores.push_back(calculateScore(totals));
 
   candidates_t last_candidates(candidates);
-  //apply each of the diffs, and get new scores
+  // apply each of the diffs, and get new scores
   for (size_t i = 0; i < diffs.size(); ++i) {
     for (size_t j = 0; j < diffs[i].size(); ++j) {
       size_t sid = diffs[i][j].first;
@@ -73,15 +133,15 @@ void  StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t
     scores.push_back(calculateScore(totals));
   }
 
-  //regularisation. This can either be none, or the min or average as described in
-  //Cer, Jurafsky and Manning at WMT08
+  // Regularisation. This can either be none, or the min or average as described in
+  // Cer, Jurafsky and Manning at WMT08.
   if (_regularisationStrategy == REG_NONE || _regularisationWindow <= 0) {
-    //no regularisation
+    // no regularisation
     return;
   }
 
-  //window size specifies the +/- in each direction
-  statscores_t raw_scores(scores);//copy scores
+  // window size specifies the +/- in each direction
+  statscores_t raw_scores(scores);      // copy scores
   for (size_t i = 0; i < scores.size(); ++i) {
     size_t start = 0;
     if (i >= _regularisationWindow) {
@@ -95,6 +155,3 @@ void  StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t
     }
   }
 }
-
-
-
diff --git a/mert/Scorer.h b/mert/Scorer.h
index 8d3a4c15c..c36f2e03a 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -1,12 +1,7 @@
 #ifndef __SCORER_H__
 #define __SCORER_H__
 
-#include <algorithm>
-#include <cmath>
 #include <iostream>
-#include <iterator>
-#include <limits>
-#include <set>
 #include <sstream>
 #include <stdexcept>
 #include <string>
@@ -21,52 +16,31 @@ enum ScorerRegularisationStrategy {REG_NONE, REG_AVERAGE, REG_MINIMUM};
 class ScoreStats;
 
 /**
-  * Superclass of all scorers and dummy implementation. In order to add a new
-  * scorer it should be sufficient to override prepareStats(), setReferenceFiles()
-  * and score() (or calculateScore()).
-**/
+ * Superclass of all scorers and dummy implementation.
+ *
+ * In order to add a new scorer it should be sufficient to override the members
+ * prepareStats(), setReferenceFiles() and score() (or calculateScore()).
+ */
 class Scorer
 {
 private:
   string _name;
 
 public:
-
-  Scorer(const string& name, const string& config): _name(name), _scoreData(0), _preserveCase(true) {
-//    cerr << "Scorer config string: " << config << endl;
-    size_t start = 0;
-    while (start < config.size()) {
-      size_t end = config.find(",",start);
-      if (end == string::npos) {
-        end = config.size();
-      }
-      string nv = config.substr(start,end-start);
-      size_t split = nv.find(":");
-      if (split == string::npos) {
-        throw runtime_error("Missing colon when processing scorer config: " + config);
-      }
-      string name = nv.substr(0,split);
-      string value = nv.substr(split+1,nv.size()-split-1);
-      cerr << "name: " << name << " value: " << value << endl;
-      _config[name] = value;
-      start = end+1;
-    }
-
-  };
-  virtual ~Scorer() {};
-
+  Scorer(const string& name, const string& config);
+  virtual ~Scorer() {}
 
   /**
-      * returns the number of statistics needed for the computation of the score
-      **/
-  virtual size_t NumberOfScores() {
+   * Return the number of statistics needed for the computation of the score.
+   */
+  virtual size_t NumberOfScores() const {
     cerr << "Scorer: 0" << endl;
     return 0;
-  };
+  }
 
   /**
-    * set the reference files. This must be called before prepareStats.
-    **/
+   * Set the reference files. This must be called before prepareStats().
+   */
   virtual void setReferenceFiles(const vector<string>& referenceFiles) {
     //do nothing
   }
@@ -74,7 +48,7 @@ public:
   /**
    * Process the given guessed text, corresponding to the given reference sindex
    * and add the appropriate statistics to the entry.
-  **/
+   */
   virtual void prepareStats(size_t sindex, const string& text, ScoreStats& entry)
   {}
 
@@ -86,11 +60,11 @@ public:
   }
 
   /**
-    * Score using each of the candidate index, then go through the diffs
-    * applying each in turn, and calculating a new score each time.
-    **/
+   * Score using each of the candidate index, then go through the diffs
+   * applying each in turn, and calculating a new score each time.
+   */
   virtual void score(const candidates_t& candidates, const diffs_t& diffs,
-                     statscores_t& scores) {
+                     statscores_t& scores) const {
     //dummy impl
     if (!_scoreData) {
       throw runtime_error("score data not loaded");
@@ -101,12 +75,11 @@ public:
     }
   }
 
-
   /**
-    * Calculate the score of the sentences corresponding to the list of candidate
-    * indices. Each index indicates the 1-best choice from the n-best list.
-    **/
-  float score(const candidates_t& candidates) {
+   * Calculate the score of the sentences corresponding to the list of candidate
+   * indices. Each index indicates the 1-best choice from the n-best list.
+   */
+  float score(const candidates_t& candidates) const {
     diffs_t diffs;
     statscores_t scores;
     score(candidates, diffs, scores);
@@ -117,17 +90,16 @@ public:
     return _name;
   }
 
-  size_t getReferenceSize() {
+  size_t getReferenceSize() const {
     if (_scoreData) {
       return _scoreData->size();
     }
     return 0;
   }
 
-
   /**
-    * Set the score data, prior to scoring.
-    **/
+   * Set the score data, prior to scoring.
+   */
   void setScoreData(ScoreData* data) {
     _scoreData = data;
   }
@@ -142,11 +114,11 @@ protected:
   bool _preserveCase;
 
   /**
-    * Value of config variable. If not provided, return default.
-    **/
-  string getConfig(const string& key, const string& def="") {
-    map<string,string>::iterator i = _config.find(key);
-    if (i  == _config.end()) {
+   * Get value of config variable. If not provided, return default.
+   */
+  string getConfig(const string& key, const string& def="") const {
+    map<string,string>::const_iterator i = _config.find(key);
+    if (i == _config.end()) {
       return def;
     } else {
       return i->second;
@@ -156,8 +128,8 @@ protected:
 
   /**
    * Tokenise line and encode.
-   *     Note: We assume that all tokens are separated by single spaces
-   **/
+   * Note: We assume that all tokens are separated by single spaces.
+   */
   void encode(const string& line, vector<int>& encoded) {
     //cerr << line << endl;
     istringstream in (line);
@@ -185,70 +157,30 @@ protected:
 
 private:
   map<string,string> _config;
-
-
 };
 
 
-
 /**
-  * Abstract base class for scorers that work by adding statistics across all
-  * outout sentences, then apply some formula, e.g. bleu, per. **/
+ * Abstract base class for Scorers that work by adding statistics across all
+ * outout sentences, then apply some formula, e.g., BLEU, PER.
+ */
 class StatisticsBasedScorer : public Scorer
 {
-
 public:
-  StatisticsBasedScorer(const string& name, const string& config): Scorer(name,config) {
-    //configure regularisation
-    static string KEY_TYPE = "regtype";
-    static string KEY_WINDOW = "regwin";
-    static string KEY_CASE = "case";
-    static string TYPE_NONE = "none";
-    static string TYPE_AVERAGE = "average";
-    static string TYPE_MINIMUM = "min";
-    static string TRUE = "true";
-    static string FALSE = "false";
-
-
-    string type = getConfig(KEY_TYPE,TYPE_NONE);
-    if (type == TYPE_NONE) {
-      _regularisationStrategy = REG_NONE;
-    } else if (type == TYPE_AVERAGE) {
-      _regularisationStrategy = REG_AVERAGE;
-    } else if (type == TYPE_MINIMUM) {
-      _regularisationStrategy = REG_MINIMUM;
-    } else {
-      throw runtime_error("Unknown scorer regularisation strategy: " + type);
-    }
-//    cerr << "Using scorer regularisation strategy: " << type << endl;
-
-    string window = getConfig(KEY_WINDOW,"0");
-    _regularisationWindow = atoi(window.c_str());
-//    cerr << "Using scorer regularisation window: " << _regularisationWindow << endl;
-
-    string preservecase = getConfig(KEY_CASE,TRUE);
-    if (preservecase == TRUE) {
-      _preserveCase = true;
-    } else if (preservecase == FALSE) {
-      _preserveCase = false;
-    }
-//    cerr << "Using case preservation: " << _preserveCase << endl;
-
-
-  }
-  ~StatisticsBasedScorer() {};
+  StatisticsBasedScorer(const string& name, const string& config);
+  virtual ~StatisticsBasedScorer() {}
   virtual void score(const candidates_t& candidates, const diffs_t& diffs,
-                     statscores_t& scores);
+                     statscores_t& scores) const;
 
 protected:
-  //calculate the actual score
-  virtual statscore_t calculateScore(const vector<int>& totals) = 0;
+  /**
+   * Calculate the actual score.
+   */
+  virtual statscore_t calculateScore(const vector<int>& totals) const = 0;
 
-  //regularisation
+  // regularisation
   ScorerRegularisationStrategy _regularisationStrategy;
   size_t  _regularisationWindow;
-
 };
 
-
-#endif //__SCORER_H
+#endif // __SCORER_H__
diff --git a/mert/ScorerFactory.cpp b/mert/ScorerFactory.cpp
new file mode 100644
index 000000000..2f47092ef
--- /dev/null
+++ b/mert/ScorerFactory.cpp
@@ -0,0 +1,37 @@
+#include "ScorerFactory.h"
+
+#include <stdexcept>
+#include "Scorer.h"
+#include "BleuScorer.h"
+#include "PerScorer.h"
+#include "TerScorer.h"
+#include "CderScorer.h"
+#include "MergeScorer.h"
+
+using namespace std;
+
+vector<string> ScorerFactory::getTypes() {
+  vector<string> types;
+  types.push_back(string("BLEU"));
+  types.push_back(string("PER"));
+  types.push_back(string("TER"));
+  types.push_back(string("CDER"));
+  types.push_back(string("MERGE"));
+  return types;
+}
+
+Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
+  if (type == "BLEU") {
+    return (BleuScorer*) new BleuScorer(config);
+  } else if (type == "PER") {
+    return (PerScorer*) new PerScorer(config);
+  } else if (type == "TER") {
+    return (TerScorer*) new TerScorer(config);
+  } else if (type == "CDER") {
+    return (CderScorer*) new CderScorer(config);
+  } else if (type == "MERGE") {
+    return (MergeScorer*) new MergeScorer(config);
+  } else {
+    throw runtime_error("Unknown scorer type: " + type);
+  }
+}
diff --git a/mert/ScorerFactory.h b/mert/ScorerFactory.h
index 9bab8d933..f6054c770 100644
--- a/mert/ScorerFactory.h
+++ b/mert/ScorerFactory.h
@@ -1,54 +1,21 @@
 #ifndef __SCORER_FACTORY_H
 #define __SCORER_FACTORY_H
 
-#include <algorithm>
-#include <cmath>
-#include <iostream>
-#include <iterator>
-#include <set>
-#include <sstream>
-#include <stdexcept>
-#include <string>
 #include <vector>
-#include "Types.h"
-#include "Scorer.h"
-#include "BleuScorer.h"
-#include "PerScorer.h"
-#include "TerScorer.h"
-#include "CderScorer.h"
-#include "MergeScorer.h"
+#include <string>
 
-using namespace std;
+class Scorer;
 
 class ScorerFactory
 {
-
 public:
-  vector<string> getTypes() {
-    vector<string> types;
-    types.push_back(string("BLEU"));
-    types.push_back(string("PER"));
-    types.push_back(string("TER"));
-    types.push_back(string("CDER"));
-    types.push_back(string("MERGE"));
-    return types;
-  }
+  static std::vector<std::string> getTypes();
+
+  static Scorer* getScorer(const std::string& type, const std::string& config = "");
 
-  Scorer* getScorer(const string& type, const string& config = "") {
-    if (type == "BLEU") {
-      return (BleuScorer*) new BleuScorer(config);
-    } else if (type == "PER") {
-      return (PerScorer*) new PerScorer(config);
-    } else if (type == "TER") {
-      return (TerScorer*) new TerScorer(config);
-    } else if (type == "CDER") {
-      return (CderScorer*) new CderScorer(config);
-    } else if (type == "MERGE") {
-      return (MergeScorer*) new MergeScorer(config);
-    } else {
-      throw runtime_error("Unknown scorer type: " + type);
-    }
-  }
+private:
+  ScorerFactory() {}
+  ~ScorerFactory() {}
 };
 
-#endif //__SCORER_FACTORY_H
+#endif  // __SCORER_FACTORY_H
diff --git a/mert/TERsrc/alignmentStruct.h b/mert/TERsrc/alignmentStruct.h
index c635d624c..27e8c35d3 100644
--- a/mert/TERsrc/alignmentStruct.h
+++ b/mert/TERsrc/alignmentStruct.h
@@ -43,4 +43,5 @@ public:
 };
 
 }
-#endif
-\ No newline at end of file
+
+#endif  // __TERCPPALIGNMENTSTRUCT_H__
diff --git a/mert/TERsrc/bestShiftStruct.h b/mert/TERsrc/bestShiftStruct.h
index 550d2f532..141ebdeb8 100644
--- a/mert/TERsrc/bestShiftStruct.h
+++ b/mert/TERsrc/bestShiftStruct.h
@@ -1,7 +1,6 @@
 #ifndef __BESTSHIFTSTRUCT_H__
 #define __BESTSHIFTSTRUCT_H__
 
-
 #include <vector>
 #include <stdio.h>
 #include <string>
@@ -47,4 +46,5 @@ public:
 };
 
 }
-#endif
+
+#endif  // __BESTSHIFTSTRUCT_H__
diff --git a/mert/TERsrc/hashMap.h b/mert/TERsrc/hashMap.h
index 1892c5591..c3e4578e5 100644
--- a/mert/TERsrc/hashMap.h
+++ b/mert/TERsrc/hashMap.h
@@ -1,8 +1,10 @@
 /*
  * Generic hashmap manipulation functions
  */
+
 #ifndef __HASHMAP_H__
 #define __HASHMAP_H__
+
 #include "stringHasher.h"
 #include <vector>
 #include <string>
@@ -36,8 +38,6 @@ public:
   string printStringHashForLexicon();
 };
 
-
 }
 
-
-#endif
+#endif  // __HASHMAP_H__
diff --git a/mert/TERsrc/hashMapInfos.h b/mert/TERsrc/hashMapInfos.h
index c18a2ae82..f4a46acf8 100644
--- a/mert/TERsrc/hashMapInfos.h
+++ b/mert/TERsrc/hashMapInfos.h
@@ -3,6 +3,7 @@
  */
 #ifndef __HASHMAPINFOS_H__
 #define __HASHMAPINFOS_H__
+
 #include "infosHasher.h"
 #include <vector>
 #include <string>
@@ -36,8 +37,6 @@ public:
   string printStringHashForLexicon();
 };
 
-
 }
 
-
-#endif
+#endif  // __HASHMAPINFOS_H__
diff --git a/mert/TERsrc/hashMapStringInfos.h b/mert/TERsrc/hashMapStringInfos.h
index 918e8ccb5..7912be0a2 100644
--- a/mert/TERsrc/hashMapStringInfos.h
+++ b/mert/TERsrc/hashMapStringInfos.h
@@ -3,6 +3,7 @@
  */
 #ifndef __HASHMAPSTRINGINFOS_H__
 #define __HASHMAPSTRINGINFOS_H__
+
 #include "stringInfosHasher.h"
 #include <vector>
 #include <string>
@@ -36,8 +37,6 @@ public:
   string printStringHashForLexicon();
 };
 
-
 }
 
-
-#endif
+#endif  // __HASHMAPSTRINGINFOS_H__
diff --git a/mert/TERsrc/infosHasher.h b/mert/TERsrc/infosHasher.h
index d03a9bdc6..8bc2ccd00 100644
--- a/mert/TERsrc/infosHasher.h
+++ b/mert/TERsrc/infosHasher.h
@@ -1,7 +1,7 @@
 #ifndef __INFOSHASHER_H__
 #define __INFOSHASHER_H__
+
 #include <string>
-// #include <ext/hash_map>
 #include <stdio.h>
 #include <iostream>
 #include <vector>
@@ -26,6 +26,6 @@ public:
 
 };
 
-
 }
-#endif
-\ No newline at end of file
+
+#endif  // __INFOSHASHER_H__
diff --git a/mert/TERsrc/stringHasher.h b/mert/TERsrc/stringHasher.h
index a995c0cc7..0894812f0 100644
--- a/mert/TERsrc/stringHasher.h
+++ b/mert/TERsrc/stringHasher.h
@@ -1,7 +1,7 @@
 #ifndef __STRINGHASHER_H__
 #define __STRINGHASHER_H__
+
 #include <string>
-//#include <ext/hash_map>
 #include <iostream>
 
 using namespace std;
@@ -21,10 +21,8 @@ public:
   string getKey();
   string getValue();
   void setValue ( string value );
-
-
 };
 
-
 }
-#endif
+
+#endif  // __STRINGHASHER_H__
diff --git a/mert/TERsrc/stringInfosHasher.h b/mert/TERsrc/stringInfosHasher.h
index c7a705659..e9324cc47 100644
--- a/mert/TERsrc/stringInfosHasher.h
+++ b/mert/TERsrc/stringInfosHasher.h
@@ -1,7 +1,7 @@
 #ifndef __STRINGINFOSHASHER_H__
 #define __STRINGINFOSHASHER_H__
+
 #include <string>
-// #include <ext/hash_map>
 #include <iostream>
 #include <vector>
 
@@ -21,10 +21,8 @@ public:
   string getKey();
   vector<string> getValue();
   void setValue ( vector<string> value );
-
-
 };
 
-
 }
-#endif
-\ No newline at end of file
+
+#endif  // __STRINGINFOSHASHER_H__
diff --git a/mert/TERsrc/terAlignment.h b/mert/TERsrc/terAlignment.h
index 7c91e3aae..bca00ead3 100644
--- a/mert/TERsrc/terAlignment.h
+++ b/mert/TERsrc/terAlignment.h
@@ -1,7 +1,6 @@
 #ifndef __TERCPPTERALIGNMENT_H__
 #define __TERCPPTERALIGNMENT_H__
 
-
 #include <vector>
 #include <stdio.h>
 #include <string.h>
@@ -48,4 +47,5 @@ public:
 };
 
 }
-#endif
-\ No newline at end of file
+
+#endif  // __TERCPPTERALIGNMENT_H__
diff --git a/mert/TERsrc/terShift.h b/mert/TERsrc/terShift.h
index e4ce27f19..a54ba633d 100644
--- a/mert/TERsrc/terShift.h
+++ b/mert/TERsrc/terShift.h
@@ -1,14 +1,12 @@
 #ifndef __TERCPPTERSHIFT_H__
 #define __TERCPPTERSHIFT_H__
 
-
 #include <vector>
 #include <stdio.h>
 #include <string>
 #include <sstream>
 #include "tools.h"
 
-
 using namespace std;
 using namespace Tools;
 
@@ -42,4 +40,5 @@ public:
 };
 
 }
-#endif
-\ No newline at end of file
+
+#endif  // __TERCPPTERSHIFT_H__
diff --git a/mert/TERsrc/tercalc.h b/mert/TERsrc/tercalc.h
index e66d75a98..cf205ccbb 100644
--- a/mert/TERsrc/tercalc.h
+++ b/mert/TERsrc/tercalc.h
@@ -79,4 +79,4 @@ public:
 
 }
 
-#endif
+#endif  // _TERCPPTERCALC_H___
diff --git a/mert/TERsrc/tools.h b/mert/TERsrc/tools.h
index af2204771..df681a2b2 100644
--- a/mert/TERsrc/tools.h
+++ b/mert/TERsrc/tools.h
@@ -1,7 +1,6 @@
 #ifndef __TERCPPTOOLS_H__
 #define __TERCPPTOOLS_H__
 
-
 #include <vector>
 #include <iostream>
 #include <stdio.h>
@@ -9,7 +8,6 @@
 #include <string>
 #include <sstream>
 
-
 using namespace std;
 
 namespace Tools
@@ -61,5 +59,7 @@ string normalizeStd(string str);
 string printParams(param p);
 // };
 param copyParam(param p);
+
 }
-#endif
+
+#endif  // __TERCPPTOOLS_H__
diff --git a/mert/TerScorer.cpp b/mert/TerScorer.cpp
index 8e8eb9b63..3f4ca65e8 100644
--- a/mert/TerScorer.cpp
+++ b/mert/TerScorer.cpp
@@ -1,16 +1,25 @@
 #include "TerScorer.h"
+
+#include <cmath>
+#include <sstream>
+#include <stdexcept>
+
+#include "ScoreStats.h"
 #include "TERsrc/tercalc.h"
 #include "TERsrc/terAlignment.h"
+#include "Util.h"
 
-const int TerScorer::LENGTH = 2;
 using namespace TERCpp;
-using namespace std;
 
+TerScorer::TerScorer(const string& config)
+    : StatisticsBasedScorer("TER",config), kLENGTH(2) {}
+
+TerScorer::~TerScorer() {}
 
 void TerScorer::setReferenceFiles ( const vector<string>& referenceFiles )
 {
   // for each line in the reference file, create a multiset of the
-  // word ids
+  // word ids.
   for ( int incRefs = 0; incRefs < ( int ) referenceFiles.size(); incRefs++ ) {
     stringstream convert;
     m_references.clear();
@@ -78,26 +87,14 @@ void TerScorer::prepareStats ( size_t sid, const string& text, ScoreStats& entry
 
   }
   ostringstream stats;
-//		multiplication by 100 in order to keep the average precision in the TER calculation
+  // multiplication by 100 in order to keep the average precision
+  // in the TER calculation.
   stats << result.numEdits*100.0 << " " << result.averageWords*100.0 << " " << result.scoreAv()*100.0 << " " ;
   string stats_str = stats.str();
   entry.set ( stats_str );
 }
 
-float TerScorer::calculateScore ( const vector<int>& comps )
-{
-  float denom = 1.0 * comps[1];
-  float num =  -1.0 * comps[0];
-  if ( denom == 0 ) {
-//         shouldn't happen!
-    return 1.0;
-  } else {
-    return (1.0+(num / denom));
-  }
-}
-
-/*
-float TerScorer::calculateScore ( const vector<float>& comps )
+float TerScorer::calculateScore(const vector<int>& comps) const
 {
   float denom = 1.0 * comps[1];
   float num =  -1.0 * comps[0];
@@ -108,4 +105,3 @@ float TerScorer::calculateScore ( const vector<float>& comps )
     return (1.0+(num / denom));
   }
 }
-*/
diff --git a/mert/TerScorer.h b/mert/TerScorer.h
index 566aa99b0..0e9996a99 100644
--- a/mert/TerScorer.h
+++ b/mert/TerScorer.h
@@ -1,59 +1,49 @@
 #ifndef __TERSCORER_H__
 #define __TERSCORER_H__
 
-// #include <stdio.h>
-#include <algorithm>
-#include <cmath>
 #include <iostream>
-#include <iterator>
 #include <set>
-#include <sstream>
-#include <stdexcept>
 #include <string>
 #include <vector>
-#include <limits.h>
+
 #include "Types.h"
-#include "ScoreData.h"
 #include "Scorer.h"
-#include "TERsrc/tercalc.h"
-#include "TERsrc/terAlignment.h"
 
 using namespace std;
-using namespace TERCpp;
 
 // enum TerReferenceLengthStrategy { TER_AVERAGE, TER_SHORTEST, TER_CLOSEST };
 
+class ScoreStats;
 
 /**
-  * Bleu scoring
- **/
+ * TER scoring
+ */
 class TerScorer: public StatisticsBasedScorer
 {
 public:
-  TerScorer(const string& config = "") : StatisticsBasedScorer("TER",config) {}
+  explicit TerScorer(const string& config = "");
+  ~TerScorer();
+
   virtual void setReferenceFiles(const vector<string>& referenceFiles);
   virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
-  static const int LENGTH;
-  virtual void whoami() {
+
+  virtual void whoami() const {
     cerr << "I AM TerScorer" << std::endl;
   }
-  size_t NumberOfScores() {
-    // cerr << "TerScorer: " << (LENGTH + 1) << endl;
-    return (LENGTH + 1);
-  };
 
+  virtual size_t NumberOfScores() const {
+    // cerr << "TerScorer: " << (LENGTH + 1) << endl;
+    return kLENGTH + 1;
+  }
 
-//protected:
-  float calculateScore(const vector<int>& comps);
-//  float calculateScore(const vector<float>& comps);
+  virtual float calculateScore(const vector<int>& comps) const;
 
 private:
+  const int kLENGTH;
+
   string javaEnv;
   string tercomEnv;
-  //no copy
-  TerScorer(const TerScorer&);
-  ~TerScorer() {};
-  TerScorer& operator=(const TerScorer&);
+
   // data extracted from reference files
   vector<size_t> _reflengths;
   vector<multiset<int> > _reftokens;
@@ -61,7 +51,9 @@ private:
   vector<vector<vector<int> > > m_multi_references;
   string m_pid;
 
+  // no copying allowed
+  TerScorer(const TerScorer&);
+  TerScorer& operator=(const TerScorer&);
 };
 
-
-#endif //__TERSCORER_H
+#endif // __TERSCORER_H__
diff --git a/mert/Timer.cpp b/mert/Timer.cpp
index 74db1b1e2..373eb4a2e 100644
--- a/mert/Timer.cpp
+++ b/mert/Timer.cpp
@@ -1,15 +1,6 @@
-#include <ctime>
-#include <iostream>
-#include <iomanip>
-#include "Util.h"
 #include "Timer.h"
+#include "Util.h"
 
-/***
- * Return the total time that the timer has been in the "running"
- * state since it was first "started" or last "restarted".  For
- * "short" time periods (less than an hour), the actual cpu time
- * used is reported instead of the elapsed time.
- */
 double Timer::elapsed_time()
 {
   time_t now;
@@ -17,22 +8,11 @@ double Timer::elapsed_time()
   return difftime(now, start_time);
 }
 
-/***
- * Return the total time that the timer has been in the "running"
- * state since it was first "started" or last "restarted".  For
- * "short" time periods (less than an hour), the actual cpu time
- * used is reported instead of the elapsed time.
- * This function is the public version of elapsed_time()
- */
 double Timer::get_elapsed_time()
 {
   return elapsed_time();
 }
 
-/***
- * Start a timer.  If it is already running, let it continue running.
- * Print an optional message.
- */
 void Timer::start(const char* msg)
 {
   // Print an optional message, something like "Starting timer t";
@@ -82,9 +62,7 @@ inline void Timer::stop(const char* msg)
   running = false;
 }
 */
-/***
- * Print out an optional message followed by the current timer timing.
- */
+
 void Timer::check(const char* msg)
 {
   // Print an optional message, something like "Checking timer t";
@@ -93,15 +71,3 @@ void Timer::check(const char* msg)
 //  TRACE_ERR( "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
   TRACE_ERR( "[" << (running ? elapsed_time() : 0) << "] seconds\n");
 }
-
-/***
- * Allow timers to be printed to ostreams using the syntax 'os << t'
- * for an ostream 'os' and a timer 't'.  For example, "cout << t" will
- * print out the total amount of time 't' has been "running".
- */
-std::ostream& operator<<(std::ostream& os, Timer& t)
-{
-  //os << std::setprecision(2) << std::setiosflags(std::ios::fixed) << (t.running ? t.elapsed_time() : 0);
-  os << (t.running ? t.elapsed_time() : 0);
-  return os;
-}
diff --git a/mert/Timer.h b/mert/Timer.h
index a9ec890c5..403547620 100644
--- a/mert/Timer.h
+++ b/mert/Timer.h
@@ -4,32 +4,64 @@
 #include <ctime>
 #include <iostream>
 #include <iomanip>
-#include "Util.h"
 
 class Timer
 {
+  /**
+   * Allow timers to be printed to ostreams using the syntax 'os << t'
+   * for an ostream 'os' and a timer 't'.  For example, "cout << t" will
+   * print out the total amount of time 't' has been "running".
+   */
   friend std::ostream& operator<<(std::ostream& os, Timer& t);
 
 private:
   bool running;
   time_t start_time;
 
-  //TODO in seconds?
+  /**
+   * Return the total time that the timer has been in the "running"
+   * state since it was first "started" or last "restarted".  For
+   * "short" time periods (less than an hour), the actual cpu time
+   * used is reported instead of the elapsed time.
+   * TODO in seconds?
+   */
   double elapsed_time();
 
 public:
-  /***
-   * 'running' is initially false.  A timer needs to be explicitly started
-   * using 'start' or 'restart'
+  /**
+   * 'running' is initially false. A timer needs to be explicitly started
+   * using 'start' or 'restart'.
    */
   Timer() : running(false), start_time(0) { }
 
+  /**
+   * Start a timer.  If it is already running, let it continue running.
+   * Print an optional message.
+   */
   void start(const char* msg = 0);
 //  void restart(const char* msg = 0);
 //  void stop(const char* msg = 0);
+
+  /**
+   * Print out an optional message followed by the current timer timing.
+   */
   void check(const char* msg = 0);
-  double get_elapsed_time();
 
+  /**
+   * Return the total time that the timer has been in the "running"
+   * state since it was first "started" or last "restarted".  For
+   * "short" time periods (less than an hour), the actual cpu time
+   * used is reported instead of the elapsed time.
+   * This function is the public version of elapsed_time()
+   */
+  double get_elapsed_time();
 };
 
-#endif // TIMER_H
+inline std::ostream& operator<<(std::ostream& os, Timer& t)
+{
+  //os << std::setprecision(2) << std::setiosflags(std::ios::fixed) << (t.running ? t.elapsed_time() : 0);
+  os << (t.running ? t.elapsed_time() : 0);
+  return os;
+}
+
+#endif  // TIMER_H
diff --git a/mert/Types.h b/mert/Types.h
index 659da2334..1d0fd0dd0 100644
--- a/mert/Types.h
+++ b/mert/Types.h
@@ -1,5 +1,6 @@
 #ifndef TYPE_H
 #define TYPE_H
+
 #include <vector>
 #include <map>
 #include <string>
@@ -39,4 +40,4 @@ typedef vector<ScoreArray> scoredata_t;
 typedef map<size_t, std::string> idx2name;
 typedef map<std::string, size_t> name2idx;
 
-#endif
+#endif  // TYPE_H
diff --git a/mert/Util.cpp b/mert/Util.cpp
index ffc8a4c7e..3769c71e7 100644
--- a/mert/Util.cpp
+++ b/mert/Util.cpp
@@ -6,17 +6,24 @@
  *
  */
 
-#include <stdexcept>
 #include "Util.h"
-
 #include "Timer.h"
 
 using namespace std;
 
-//global variable
+// global variables
 Timer g_timer;
 
-int verbose=0;
+int verbose = 0;
+
+namespace {
+
+bool FindDelimiter(const std::string &str, const std::string &delim, size_t *pos)
+{
+  *pos = str.find(delim);
+  return *pos != std::string::npos ? true : false;
+}
+} // namespace
 
 int verboselevel()
 {
@@ -25,26 +32,27 @@ int verboselevel()
 
 int setverboselevel(int v)
 {
-  verbose=v;
+  verbose = v;
   return verbose;
 }
 
-int getNextPound(std::string &theString, std::string &substring, const std::string delimiter)
+size_t getNextPound(std::string &str, std::string &substr,
+                    const std::string &delimiter)
 {
-  unsigned int pos = 0;
+  size_t pos = 0;
 
-  //skip all occurrences of delimiter
-  while ( pos == 0 ) {
-    if ((pos = theString.find(delimiter)) != std::string::npos) {
-      substring.assign(theString, 0, pos);
-      theString.erase(0,pos + delimiter.size());
+  // skip all occurrences of delimiter
+  while (pos == 0) {
+    if (FindDelimiter(str, delimiter, &pos)) {
+      substr.assign(str, 0, pos);
+      str.erase(0, pos + delimiter.size());
     } else {
-      substring.assign(theString);
-      theString.assign("");
+      substr.assign(str);
+      str.assign("");
     }
   }
-  return (pos);
-};
+  return pos;
+}
 
 void split(const std::string &s, char delim, std::vector<std::string> &elems) {
   std::stringstream ss(s);
@@ -54,79 +62,36 @@ void split(const std::string &s, char delim, std::vector<std::string> &elems) {
   }
 }
 
-inputfilestream::inputfilestream(const std::string &filePath)
-  : std::istream(0),
-    m_streambuf(0)
-{
-  //check if file is readable
-  std::filebuf* fb = new std::filebuf();
-  _good=(fb->open(filePath.c_str(), std::ios::in)!=NULL);
-
-  if (filePath.size() > 3 &&
-      filePath.substr(filePath.size() - 3, 3) == ".gz") {
-    fb->close();
-    delete fb;
-    m_streambuf = new gzfilebuf(filePath.c_str());
-  } else {
-    m_streambuf = fb;
+void Tokenize(const char *str, const char delim,
+              std::vector<std::string> *res) {
+  while (1) {
+    const char *begin = str;
+    while (*str != delim && *str) str++;
+    res->push_back(std::string(begin, str));
+    if (*str++ == 0) break;
   }
-  this->init(m_streambuf);
-}
-
-inputfilestream::~inputfilestream()
-{
-  delete m_streambuf;
-  m_streambuf = 0;
-}
-
-void inputfilestream::close()
-{
-}
-
-outputfilestream::outputfilestream(const std::string &filePath)
-  : std::ostream(0),
-    m_streambuf(0)
-{
-  //check if file is readable
-  std::filebuf* fb = new std::filebuf();
-  _good=(fb->open(filePath.c_str(), std::ios::out)!=NULL);
-
-  if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
-    throw runtime_error("Output to a zipped file not supported!");
-  } else {
-    m_streambuf = fb;
-  }
-  this->init(m_streambuf);
-}
-
-outputfilestream::~outputfilestream()
-{
-  delete m_streambuf;
-  m_streambuf = 0;
-}
-
-void outputfilestream::close()
-{
 }
 
 int swapbytes(char *p, int sz, int n)
 {
   char c, *l, *h;
 
-  if((n<1) || (sz<2)) return 0;
-  for(; n--; p+=sz) for(h=(l=p)+sz; --h>l; l++) {
-      c=*h;
-      *h=*l;
-      *l=c;
+  if((n < 1) || (sz < 2)) return 0;
+  for (; n--; p += sz) {
+    for (h = (l = p) + sz; --h > l; l++) {
+      c = *h;
+      *h = *l;
+      *l = c;
     }
+  }
   return 0;
 
-};
+}
 
 void ResetUserTime()
 {
   g_timer.start();
-};
+}
 
 void PrintUserTime(const std::string &message)
 {
@@ -137,4 +102,3 @@ double GetUserTime()
 {
   return g_timer.get_elapsed_time();
 }
-
diff --git a/mert/Util.h b/mert/Util.h
index 592344ddb..da68685c3 100644
--- a/mert/Util.h
+++ b/mert/Util.h
@@ -9,15 +9,9 @@
 #ifndef UTIL_H
 #define UTIL_H
 
-using namespace std;
-
+#include <cstdlib>
 #include <stdexcept>
 #include <limits>
-
-#define US_NOSET (numeric_limits<unsigned short>::max())
-
-#define MAX_LINE  1024
-
 #include <vector>
 #include <map>
 #include <iostream>
@@ -25,15 +19,12 @@ using namespace std;
 #include <string>
 #include <cstring>
 
-#include <fstream>
-#include "gzfilebuf.h"
-
 #include "Types.h"
-#include "ScoreStats.h"
-#include "FeatureStats.h"
 
-class ScoreStats;
-class FeatureStats;
+using namespace std;
+
+#define US_NOSET (numeric_limits<unsigned short>::max())
+#define MAX_LINE  1024
 
 #ifdef TRACE_ENABLE
 #define TRACE_ERR(str) { std::cerr << str; }
@@ -41,14 +32,28 @@ class FeatureStats;
 #define TRACE_ERR(str) { }
 #endif
 
-#define DELIMITER_SYMBOL " "
+const char kDefaultDelimiterSymbol[] = " ";
 
 int verboselevel();
 int setverboselevel(int v);
 
-int getNextPound(std::string &theString, std::string &substring, const std::string delimiter=DELIMITER_SYMBOL);
+/**
+ * Find the specified delimiter for the string 'str', and 'str' is assigned
+ * to a substring object that starts at the position of first occurrence of
+ * the delimiter in 'str'. 'substr' is copied from 'str' ranging from
+ * the start position of 'str' to the position of first occurrence of
+ * the delimiter.
+ *
+ * It returns the position of first occurrence in the queried string.
+ * If the content is not found, std::string::npos is returned.
+ */
+size_t getNextPound(std::string &str, std::string &substr,
+                    const std::string &delimiter = kDefaultDelimiterSymbol);
+
 void split(const std::string &s, char delim, std::vector<std::string> &elems);
 
+void Tokenize(const char *str, const char delim, std::vector<std::string> *res);
+
 template<typename T>
 inline T Scan(const std::string &input)
 {
@@ -56,37 +61,7 @@ inline T Scan(const std::string &input)
   T ret;
   stream >> ret;
   return ret;
-};
-
-class inputfilestream : public std::istream
-{
-protected:
-  std::streambuf *m_streambuf;
-  bool _good;
-public:
-
-  inputfilestream(const std::string &filePath);
-  ~inputfilestream();
-  bool good() {
-    return _good;
-  }
-  void close();
-};
-
-class outputfilestream : public std::ostream
-{
-protected:
-  std::streambuf *m_streambuf;
-  bool _good;
-public:
-
-  outputfilestream(const std::string &filePath);
-  ~outputfilestream();
-  bool good() {
-    return _good;
-  }
-  void close();
-};
+}
 
 template<typename T>
 inline std::string stringify(T x)
@@ -97,10 +72,29 @@ inline std::string stringify(T x)
   return o.str();
 }
 
+inline ScoreStatsType ConvertCharToScoreStatsType(const char *str)
+{
+  return std::atoi(str);
+}
+
+inline ScoreStatsType ConvertStringToScoreStatsType(const std::string& str)
+{
+  return ConvertCharToScoreStatsType(str.c_str());
+}
+
+inline FeatureStatsType ConvertCharToFeatureStatsType(const char *str)
+{
+  return static_cast<FeatureStatsType>(std::atof(str));
+}
+
+inline FeatureStatsType ConvertStringToFeatureStatsType(const std::string &str)
+{
+  return ConvertCharToFeatureStatsType(str.c_str());
+}
+
 // Utilities to measure decoding time
 void ResetUserTime();
 void PrintUserTime(const std::string &message);
 double GetUserTime();
 
-#endif
-
+#endif  // UTIL_H
diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp
index f766fb19f..c1d6039da 100644
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@@ -1,3 +1,4 @@
+#include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
@@ -33,10 +34,8 @@ void usage()
   cerr<<"[--rseed|-r] the random seed for bootstraping (defaults to system clock)"<<endl;
   cerr<<"[--help|-h] print this message and exit"<<endl;
   exit(1);
-  
 }
 
-
 static struct option long_options[] = {
   {"sctype",required_argument,0,'s'},
   {"scconfig",required_argument,0,'c'},
@@ -65,27 +64,27 @@ int main(int argc, char** argv)
   int c;
   while ((c=getopt_long (argc,argv, "s:c:R:C:b:r:h", long_options, &option_index)) != -1) {
     switch(c) {
-    case 's':
-      scorerType = string(optarg);
-      break;
-    case 'c':
-      scorerConfig = string(optarg);
-      break;
-    case 'R':
-      reference = string(optarg);
-      break;
-    case 'C':
-      candidate = string(optarg);
-      break;
-    case 'b':
-	  bootstrap = atoi(optarg);
-      break;
-    case 'r':
-      seed = strtol(optarg, NULL, 10);
-      hasSeed = true;
-      break;
-    default:
-      usage();
+      case 's':
+        scorerType = string(optarg);
+        break;
+      case 'c':
+        scorerConfig = string(optarg);
+        break;
+      case 'R':
+        reference = string(optarg);
+        break;
+      case 'C':
+        candidate = string(optarg);
+        break;
+      case 'b':
+        bootstrap = atoi(optarg);
+        break;
+      case 'r':
+        seed = strtol(optarg, NULL, 10);
+        hasSeed = true;
+        break;
+      default:
+        usage();
     }
   }
 
@@ -101,29 +100,30 @@ int main(int argc, char** argv)
   }
 
   try {
-	vector<string> refFiles;
-	vector<string> candFiles;
+    vector<string> refFiles;
+    vector<string> candFiles;
+
+    if (reference.length() == 0) throw runtime_error("You have to specify at least one reference file.");
+    split(reference,',',refFiles);
 
-	if (reference.length() == 0) throw runtime_error("You have to specify at least one reference file.");
-	split(reference,',',refFiles);
+    if (candidate.length() == 0) throw runtime_error("You have to specify at least one candidate file.");
+    split(candidate,',',candFiles);
 
-	if (candidate.length() == 0) throw runtime_error("You have to specify at least one candidate file.");
-	split(candidate,',',candFiles);
+    scorer = ScorerFactory::getScorer(scorerType,scorerConfig);
+    cerr << "Using scorer: " << scorer->getName() << endl;
 
-	ScorerFactory sfactory;
-	scorer = sfactory.getScorer(scorerType,scorerConfig);
-	cerr << "Using scorer: " << scorer->getName() << endl;
+    scorer->setReferenceFiles(refFiles);
+    PrintUserTime("Reference files loaded");
 
-	scorer->setReferenceFiles(refFiles);
-	PrintUserTime("Reference files loaded");
-	
 
-	for (vector<string>::const_iterator it = candFiles.begin(); it != candFiles.end(); ++it)
-	{
-		evaluate(*it);
-	}
+    for (vector<string>::const_iterator it = candFiles.begin(); it != candFiles.end(); ++it)
+    {
+      evaluate(*it);
+    }
 
-	PrintUserTime("Evaluation done");
+    PrintUserTime("Evaluation done");
+
+    delete scorer;
 
     return EXIT_SUCCESS;
   } catch (const exception& e) {
@@ -135,89 +135,91 @@ int main(int argc, char** argv)
 
 void evaluate(const string& candFile)
 {
-	ifstream cand(candFile.c_str());
-	if (!cand.good()) throw runtime_error("Error opening candidate file");
-
-	vector<ScoreStats> entries;
-
-	// Loading sentences and preparing statistics
-	ScoreStats scoreentry;
-	string line;
-	while (getline(cand, line))
-	{
-		scorer->prepareStats(entries.size(), line, scoreentry);
-		entries.push_back(scoreentry);
-	}
-
-	PrintUserTime("Candidate file " + candFile + " loaded and stats prepared");
-
-	int n = entries.size();
-	if (bootstrap)
-	{
-		vector<float> scores;
-		for (int i = 0; i < bootstrap; ++i)
-		{
-			ScoreData* scoredata = new ScoreData(*scorer);
-			for (int j = 0; j < n; ++j)
-			{
-				int randomIndex = random() % n;
-				string str_j = int2string(j);
-				scoredata->add(entries[randomIndex], str_j);
-			}
-			scorer->setScoreData(scoredata);
-			candidates_t candidates(n, 0);
-			float score = scorer->score(candidates);
-			scores.push_back(score);
-			delete scoredata;
-		}
-		
-		float avg = average(scores);
-		float dev = stdDeviation(scores, avg);
-
-		cout.setf(ios::fixed,ios::floatfield);
-		cout.precision(4);
-		cout << "File: " << candFile << "\t" << scorer->getName() << " Average score: " << avg << "\tStandard deviation: " << dev << endl;
-	}
-	else
-	{
-		ScoreData* scoredata = new ScoreData(*scorer);
-		for (int sid = 0; sid < n; ++sid)
-		{
-			string str_sid = int2string(sid);
-			scoredata->add(entries[sid], str_sid);
-		}
-		scorer->setScoreData(scoredata);
-		candidates_t candidates(n, 0);
-		float score = scorer->score(candidates);
-		delete scoredata;
-
-		cout.setf(ios::fixed,ios::floatfield);
-		cout.precision(4);
-		cout << "File: " << candFile << "\t" << scorer->getName() << " Score: " << score << endl;
-	}
+  ifstream cand(candFile.c_str());
+  if (!cand.good()) throw runtime_error("Error opening candidate file");
+
+  vector<ScoreStats> entries;
+
+  // Loading sentences and preparing statistics
+  ScoreStats scoreentry;
+  string line;
+  while (getline(cand, line))
+  {
+    scorer->prepareStats(entries.size(), line, scoreentry);
+    entries.push_back(scoreentry);
+  }
+
+  PrintUserTime("Candidate file " + candFile + " loaded and stats prepared");
+
+  int n = entries.size();
+  if (bootstrap)
+  {
+    vector<float> scores;
+    for (int i = 0; i < bootstrap; ++i)
+    {
+      // TODO: Use smart pointer for exceptional-safety.
+      ScoreData* scoredata = new ScoreData(*scorer);
+      for (int j = 0; j < n; ++j)
+      {
+        int randomIndex = random() % n;
+        string str_j = int2string(j);
+        scoredata->add(entries[randomIndex], str_j);
+      }
+      scorer->setScoreData(scoredata);
+      candidates_t candidates(n, 0);
+      float score = scorer->score(candidates);
+      scores.push_back(score);
+      delete scoredata;
+    }
+
+    float avg = average(scores);
+    float dev = stdDeviation(scores, avg);
+
+    cout.setf(ios::fixed,ios::floatfield);
+    cout.precision(4);
+    cout << "File: " << candFile << "\t" << scorer->getName() << " Average score: " << avg << "\tStandard deviation: " << dev << endl;
+  }
+  else
+  {
+    // TODO: Use smart pointer for exceptional-safety.
+    ScoreData* scoredata = new ScoreData(*scorer);
+    for (int sid = 0; sid < n; ++sid)
+    {
+      string str_sid = int2string(sid);
+      scoredata->add(entries[sid], str_sid);
+    }
+    scorer->setScoreData(scoredata);
+    candidates_t candidates(n, 0);
+    float score = scorer->score(candidates);
+    delete scoredata;
+
+    cout.setf(ios::fixed,ios::floatfield);
+    cout.precision(4);
+    cout << "File: " << candFile << "\t" << scorer->getName() << " Score: " << score << endl;
+  }
 }
 
 string int2string(int n)
 {
-	stringstream ss;
-	ss << n;
-	return ss.str();
+  stringstream ss;
+  ss << n;
+  return ss.str();
 }
 
 float average(const vector<float>& list)
 {
-	float sum = 0;
-	for (vector<float>::const_iterator it = list.begin(); it != list.end(); ++it)
-		sum += *it;
-	
-	return sum / list.size();	
+  float sum = 0;
+  for (vector<float>::const_iterator it = list.begin(); it != list.end(); ++it)
+    sum += *it;
+
+  return sum / list.size();
 }
 
 float stdDeviation(const vector<float>& list, float avg)
 {
-	vector<float> tmp;
-	for (vector<float>::const_iterator it = list.begin(); it != list.end(); ++it)
-		tmp.push_back(pow(*it - avg, 2));
+  vector<float> tmp;
+  for (vector<float>::const_iterator it = list.begin(); it != list.end(); ++it)
+    tmp.push_back(pow(*it - avg, 2));
 
-	return sqrt(average(tmp));
+  return sqrt(average(tmp));
 }
diff --git a/mert/example/memcheck_extractor.sh b/mert/example/memcheck_extractor.sh
new file mode 100755
index 000000000..ac613da25
--- /dev/null
+++ b/mert/example/memcheck_extractor.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+extractor=../extractor
+
+VALGRIND_OPTS="--leak-check=full --leak-resolution=high \
+--show-reachable=yes --track-origins=yes"
+VALGRIND="valgrind $VALGRIND_OPTS"
+
+for datafile in NBEST REF.0 REF.1 REF.2; do
+    if ! [ -f $datafile ]; then
+        echo "Error: $datafile does not exist."
+        exit 1
+    fi
+done
+
+$VALGRIND $extractor --nbest NBEST --reference REF.0,REF.1,REF.2 \
+    --ffile FEATSTAT --scfile SCORESTAT --sctype BLEU
diff --git a/mert/example/memcheck_mert.sh b/mert/example/memcheck_mert.sh
new file mode 100755
index 000000000..d0b7655be
--- /dev/null
+++ b/mert/example/memcheck_mert.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+VALGRIND_OPTS="--leak-check=full --leak-resolution=high \
+--show-reachable=yes --track-origins=yes"
+VALGRIND="valgrind $VALGRIND_OPTS"
+
+size=15
+extractor=../extractor
+mert=../mert
+
+# Make sure you need to run extractor.
+for f in SCORESTAT FEATSTAT; do
+    if ! [ -f "$f" ]; then
+        echo "Error: $f does not exist. Now we are running extractor."
+        $extractor --nbest NBEST --reference REF.0,REF.1,REF.2 \
+            --ffile FEATSTAT --scfile SCORESTAT --sctype BLEU
+        break
+    fi
+done
+
+$VALGRIND $mert -r 1234 --ifile init.opt --scfile SCORESTAT \
+    --ffile FEATSTAT -d $size --verbose 4 -n 5
diff --git a/mert/extractor.cpp b/mert/extractor.cpp
index ef17b9d16..37c46d2dd 100644
--- a/mert/extractor.cpp
+++ b/mert/extractor.cpp
@@ -78,92 +78,74 @@ int main(int argc, char** argv)
   int c;
   while ((c=getopt_long (argc,argv, "s:r:n:S:F:R:E:v:hb", long_options, &option_index)) != -1) {
     switch(c) {
-    case 's':
-      scorerType = string(optarg);
-      break;
-    case 'c':
-      scorerConfig = string(optarg);
-      break;
-    case 'r':
-      referenceFile = string(optarg);
-      break;
-    case 'b':
-      binmode = true;
-      break;
-    case 'n':
-      nbestFile = string(optarg);
-      break;
-    case 'S':
-      scoreDataFile = string(optarg);
-      break;
-    case 'F':
-      featureDataFile = string(optarg);
-      break;
-    case 'E':
-      prevFeatureDataFile = string(optarg);
-      break;
-    case 'R':
-      prevScoreDataFile = string(optarg);
-      break;
-    case 'v':
-      verbosity = atoi(optarg);
-      break;
-    default:
-      usage();
+      case 's':
+        scorerType = string(optarg);
+        break;
+      case 'c':
+        scorerConfig = string(optarg);
+        break;
+      case 'r':
+        referenceFile = string(optarg);
+        break;
+      case 'b':
+        binmode = true;
+        break;
+      case 'n':
+        nbestFile = string(optarg);
+        break;
+      case 'S':
+        scoreDataFile = string(optarg);
+        break;
+      case 'F':
+        featureDataFile = string(optarg);
+        break;
+      case 'E':
+        prevFeatureDataFile = string(optarg);
+        break;
+      case 'R':
+        prevScoreDataFile = string(optarg);
+        break;
+      case 'v':
+        verbosity = atoi(optarg);
+        break;
+      default:
+        usage();
     }
   }
   try {
-
-//check whether score statistics file is specified
+    // check whether score statistics file is specified
     if (scoreDataFile.length() == 0) {
       throw runtime_error("Error: output score statistics file is not specified");
     }
 
-//check wheter feature file is specified
+    // check wheter feature file is specified
     if (featureDataFile.length() == 0) {
       throw runtime_error("Error: output feature file is not specified");
     }
 
-//check whether reference file is specified when nbest is specified
+    // check whether reference file is specified when nbest is specified
     if ((nbestFile.length() > 0 && referenceFile.length() == 0)) {
       throw runtime_error("Error: reference file is not specified; you can not score the nbest");
     }
 
-
     vector<string> nbestFiles;
     if (nbestFile.length() > 0) {
-      std::string substring;
-      while (!nbestFile.empty()) {
-        getNextPound(nbestFile, substring, ",");
-        nbestFiles.push_back(substring);
-      }
+      Tokenize(nbestFile.c_str(), ',', &nbestFiles);
     }
 
     vector<string> referenceFiles;
     if (referenceFile.length() > 0) {
-      std::string substring;
-      while (!referenceFile.empty()) {
-        getNextPound(referenceFile, substring, ",");
-        referenceFiles.push_back(substring);
-      }
+      Tokenize(referenceFile.c_str(), ',', &referenceFiles);
     }
 
     vector<string> prevScoreDataFiles;
     if (prevScoreDataFile.length() > 0) {
-      std::string substring;
-      while (!prevScoreDataFile.empty()) {
-        getNextPound(prevScoreDataFile, substring, ",");
-        prevScoreDataFiles.push_back(substring);
-      }
+      Tokenize(prevScoreDataFile.c_str(), ',', &prevScoreDataFiles);
     }
 
     vector<string> prevFeatureDataFiles;
     if (prevFeatureDataFile.length() > 0) {
-      std::string substring;
-      while (!prevFeatureDataFile.empty()) {
-        getNextPound(prevFeatureDataFile, substring, ",");
-        prevFeatureDataFiles.push_back(substring);
-      }
+      Tokenize(prevFeatureDataFile.c_str(), ',', &prevFeatureDataFiles);
     }
 
     if (prevScoreDataFiles.size() != prevFeatureDataFiles.size()) {
@@ -175,10 +157,10 @@ int main(int argc, char** argv)
     else cerr << "Binary write mode is NOT selected" << endl;
 
     TRACE_ERR("Scorer type: " << scorerType << endl);
-    ScorerFactory sfactory;
-    Scorer* scorer = sfactory.getScorer(scorerType,scorerConfig);
+    // ScorerFactory sfactory;
+    Scorer* scorer = ScorerFactory::getScorer(scorerType,scorerConfig);
 
-    //load references
+    // load references
     if (referenceFiles.size() > 0)
       scorer->setReferenceFiles(referenceFiles);
 
@@ -186,14 +168,14 @@ int main(int argc, char** argv)
 
     Data data(*scorer);
 
-    //load old data
+    // load old data
     for (size_t i=0; i < prevScoreDataFiles.size(); i++) {
       data.load(prevFeatureDataFiles.at(i), prevScoreDataFiles.at(i));
     }
 
     PrintUserTime("Previous data loaded");
 
-    //computing score statistics of each nbest file
+    // computing score statistics of each nbest file
     for (size_t i=0; i < nbestFiles.size(); i++) {
       data.loadnbest(nbestFiles.at(i));
     }
@@ -207,9 +189,10 @@ int main(int argc, char** argv)
 
     data.save(featureDataFile, scoreDataFile, binmode);
     PrintUserTime("Stopping...");
-    /*
-     timer.stop("Stopping...");
-    		*/
+
+    // timer.stop("Stopping...");
+
+    delete scorer;
 
     return EXIT_SUCCESS;
   } catch (const exception& e) {
diff --git a/mert/gzfilebuf.h b/mert/gzfilebuf.h
index 48b0ab036..f9cd8a446 100644
--- a/mert/gzfilebuf.h
+++ b/mert/gzfilebuf.h
@@ -3,11 +3,12 @@
 
 #include <streambuf>
 #include <zlib.h>
+#include <cstring>
 
 class gzfilebuf : public std::streambuf
 {
 public:
-  gzfilebuf(const char *filename) {
+  explicit gzfilebuf(const char *filename) {
     _gzf = gzopen(filename, "rb");
     setg (_buff+sizeof(int),     // beginning of putback area
           _buff+sizeof(int),     // read position
@@ -32,7 +33,7 @@ protected:
     throw;
   }
 
-  //read one character
+  // read one character
   virtual int_type underflow () {
     // is read position before end of _buff?
     if (gptr() < egptr()) {
@@ -81,4 +82,4 @@ private:
   char _buff[_buffsize];
 };
 
-#endif
+#endif  // _GZFILEBUF_H_
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 91fa71e7b..98f2e986c 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -1,5 +1,5 @@
 /**
-\description The is the main for the new version of the mert algorithm developed during the 2nd MT marathon
+ * \description The is the main for the new version of the mert algorithm developed during the 2nd MT marathon
 */
 
 #include <limits>
@@ -59,7 +59,7 @@ static struct option long_options[] = {
   {"nrandom",1,0,'m'},
   {"rseed",required_argument,0,'r'},
   {"optimize",1,0,'o'},
-	{"pro",required_argument,0,'p'},
+  {"pro",required_argument,0,'p'},
   {"type",1,0,'t'},
   {"sctype",1,0,'s'},
   {"scconfig",required_argument,0,'c'},
@@ -78,40 +78,50 @@ static struct option long_options[] = {
 int option_index;
 
 /**
-  * Runs an optimisation, or a random restart.
-**/
-class OptimizationTask : public Moses::Task 
+ * Runs an optimisation, or a random restart.
+ */
+class OptimizationTask : public Moses::Task
 {
-  public:
-    OptimizationTask(Optimizer* optimizer, const Point& point) :
-       m_optimizer(optimizer), m_point(point) {}
+ public:
+  OptimizationTask(Optimizer* optimizer, const Point& point) :
+      m_optimizer(optimizer), m_point(point) {}
 
-    bool DeleteAfterExecution() {
-      return false;
-    }
+  ~OptimizationTask() {}
 
-    void Run() {
-      m_score = m_optimizer->Run(m_point);
+  void resetOptimizer() {
+    if (m_optimizer) {
+      delete m_optimizer;
+      m_optimizer = NULL;
     }
+  }
 
-    statscore_t getScore() const {
-      return m_score;
-    }
+  bool DeleteAfterExecution() {
+    return false;
+  }
 
-    const Point& getPoint() const  {
-      return m_point;
-    }
+  void Run() {
+    m_score = m_optimizer->Run(m_point);
+  }
 
-  private:
-    Optimizer* m_optimizer;
-    Point m_point;
-    statscore_t m_score;
+  statscore_t getScore() const {
+    return m_score;
+  }
+
+  const Point& getPoint() const  {
+    return m_point;
+  }
+
+ private:
+  // Do not allow the user to instanciate without arguments.
+  OptimizationTask() {}
+
+  Optimizer* m_optimizer;
+  Point m_point;
+  statscore_t m_score;
 };
 
 int main (int argc, char **argv)
 {
-
-
   ResetUserTime();
 
   /*
@@ -142,64 +152,64 @@ int main (int argc, char **argv)
   vector<vector<parameter_t> > start_list;
   vector<parameter_t> min;
   vector<parameter_t> max;
-  //note: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result!
+  // NOTE: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result!
 
   while ((c=getopt_long (argc, argv, "o:r:d:n:m:t:s:S:F:v:p:", long_options, &option_index)) != -1) {
     switch (c) {
-    case 'o':
-      tooptimizestr = string(optarg);
-      break;
-    case 'd':
-      pdim = strtol(optarg, NULL, 10);
-      break;
-    case 'n':
-      ntry=strtol(optarg, NULL, 10);
-      break;
-    case 'm':
-      nrandom=strtol(optarg, NULL, 10);
-      break;
-    case 'r':
-      seed=strtol(optarg, NULL, 10);
-      hasSeed = true;
-      break;
-    case 't':
-      type=string(optarg);
-      break;
-    case's':
-      scorertype=string(optarg);
-      break;
-    case 'c':
-      scorerconfig = string(optarg);
-      break;
-    case 'S':
-      scorerfile=string(optarg);
-      break;
-    case 'F':
-      featurefile=string(optarg);
-      break;
-    case 'i':
-      initfile=string(optarg);
-      break;
-    case 'v':
-      setverboselevel(strtol(optarg,NULL,10));
-      break;
+      case 'o':
+        tooptimizestr = string(optarg);
+        break;
+      case 'd':
+        pdim = strtol(optarg, NULL, 10);
+        break;
+      case 'n':
+        ntry=strtol(optarg, NULL, 10);
+        break;
+      case 'm':
+        nrandom=strtol(optarg, NULL, 10);
+        break;
+      case 'r':
+        seed=strtol(optarg, NULL, 10);
+        hasSeed = true;
+        break;
+      case 't':
+        type=string(optarg);
+        break;
+      case's':
+        scorertype=string(optarg);
+        break;
+      case 'c':
+        scorerconfig = string(optarg);
+        break;
+      case 'S':
+        scorerfile=string(optarg);
+        break;
+      case 'F':
+        featurefile=string(optarg);
+        break;
+      case 'i':
+        initfile=string(optarg);
+        break;
+      case 'v':
+        setverboselevel(strtol(optarg,NULL,10));
+        break;
 #ifdef WITH_THREADS
-    case 'T':
-      threads = strtol(optarg, NULL, 10);
-      if (threads < 1) threads = 1;
-      break;
+      case 'T':
+        threads = strtol(optarg, NULL, 10);
+        if (threads < 1) threads = 1;
+        break;
 #endif
-    case 'a':
-      shard_count = strtof(optarg,NULL);
-      break;
-    case 'b':
-      shard_size = strtof(optarg,NULL);
-      break;
-    case 'h':
-      usage(0);
-      break;
-    default:
-      usage(1);
+      case 'a':
+        shard_count = strtof(optarg,NULL);
+        break;
+      case 'b':
+        shard_size = strtof(optarg,NULL);
+        break;
+      case 'h':
+        usage(0);
+        break;
+      default:
+        usage(1);
     }
   }
   if (pdim < 0)
@@ -265,29 +275,20 @@ int main (int argc, char **argv)
 
   vector<string> ScoreDataFiles;
   if (scorerfile.length() > 0) {
-    std::string substring;
-    while (!scorerfile.empty()) {
-      getNextPound(scorerfile, substring, ",");
-      ScoreDataFiles.push_back(substring);
-    }
+    Tokenize(scorerfile.c_str(), ',', &ScoreDataFiles);
   }
 
   vector<string> FeatureDataFiles;
   if (featurefile.length() > 0) {
-    std::string substring;
-    while (!featurefile.empty()) {
-      getNextPound(featurefile, substring, ",");
-      FeatureDataFiles.push_back(substring);
-    }
+    Tokenize(featurefile.c_str(), ',', &FeatureDataFiles);
   }
 
   if (ScoreDataFiles.size() != FeatureDataFiles.size()) {
     throw runtime_error("Error: there is a different number of previous score and feature files");
   }
 
-  //it make sense to know what parameter set were used to generate the nbest
-  ScorerFactory SF;
-  Scorer *TheScorer=SF.getScorer(scorertype,scorerconfig);
+  // it make sense to know what parameter set were used to generate the nbest
+  Scorer *TheScorer = ScorerFactory::getScorer(scorertype,scorerconfig);
 
   //load data
   Data D(*TheScorer);
@@ -307,8 +308,7 @@ int main (int argc, char **argv)
   if (tooptimizestr.length() > 0) {
     cerr << "Weights to optimize: " << tooptimizestr << endl;
 
-    //parse string to get weights to optimize
-    //and set them as active
+    // Parse string to get weights to optimize, and set them as active
     std::string substring;
     int index;
     while (!tooptimizestr.empty()) {
@@ -365,17 +365,17 @@ int main (int argc, char **argv)
     allTasks.resize(shard_count);
   }
 
-  //launch tasks
+  // launch tasks
   for (size_t i = 0 ; i < allTasks.size(); ++i) {
     Data& data = D;
     if (shard_count) data = shards[i]; //use the sharded data if it exists
     vector<OptimizationTask*>& tasks = allTasks[i];
-    Optimizer *O=OptimizerFactory::BuildOptimizer(pdim,tooptimize,start_list[0],type,nrandom);
+    Optimizer *O = OptimizerFactory::BuildOptimizer(pdim,tooptimize,start_list[0],type,nrandom);
     O->SetScorer(data.getScorer());
     O->SetFData(data.getFeatureData());
     //A task for each start point
     for (size_t j = 0; j < startingPoints.size(); ++j) {
-      OptimizationTask* task = new OptimizationTask(O,startingPoints[j]);
+      OptimizationTask* task = new OptimizationTask(O, startingPoints[j]);
       tasks.push_back(task);
 #ifdef WITH_THREADS
       pool.Submit(task);
@@ -385,8 +385,7 @@ int main (int argc, char **argv)
     }
   }
 
-    
-  //wait for all threads to finish
+  // wait for all threads to finish
 #ifdef WITH_THREADS
   pool.Stop(true);
 #endif
@@ -394,7 +393,7 @@ int main (int argc, char **argv)
   statscore_t total = 0;
   Point totalP;
 
-  //collect results
+  // collect results
   for (size_t i = 0; i < allTasks.size(); ++i) {
     statscore_t best=0, mean=0, var=0;
     Point bestP;
@@ -406,7 +405,6 @@ int main (int argc, char **argv)
         bestP = allTasks[i][j]->getPoint();
         best = score;
       }
-      delete allTasks[i][j];
     }
 
     mean/=(float)ntry;
@@ -436,5 +434,13 @@ int main (int argc, char **argv)
   ofstream res("weights.txt");
   res<<finalP<<endl;
 
+  for (size_t i = 0; i < allTasks.size(); ++i) {
+    allTasks[i][0]->resetOptimizer();
+    for (size_t j = 0; j < allTasks[i].size(); ++j) {
+      delete allTasks[i][j];
+    }
+  }
+
+  delete TheScorer;
   PrintUserTime("Stopping...");
 }
author	Barry Haddow <barry.haddow@gmail.com>	2011-11-17 19:58:56 +0400
committer	Barry Haddow <barry.haddow@gmail.com>	2011-11-17 19:58:56 +0400
commit	ba5633c7b57b8843f9befc836db78989f3d15652 (patch)
tree	4342cdcbf03d6d0ae6e42a519ebb1a67ebee8746 /mert
parent	71c777f01df14c9523e20f118c90396d51678c02 (diff)
parent	492fe6d97af84065fa0b4e0afa9b77a26761a11f (diff)