6 files changed, 27 insertions, 28 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 1adbd0276..26723d36b 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -237,15 +237,19 @@ void BleuScorer::DumpCounts(ostream* os,
   *os << endl;
 }
 
-float sentenceLevelBleuPlusOne(const vector<float>& stats) {
+float smoothedSentenceBleu
+  (const std::vector<float>& stats, float smoothing, bool smoothBP) {
+
   CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
 
   float logbleu = 0.0;
   for (int j = 0; j < kBleuNgramOrder; j++) {
-    logbleu += log(stats[2 * j] + 1.0) - log(stats[2 * j + 1] + 1.0);
+    logbleu += log(stats[2 * j] + smoothing) - log(stats[2 * j + 1] + smoothing);
   }
   logbleu /= kBleuNgramOrder;
-  const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
+  const float reflength = stats[(kBleuNgramOrder * 2)]  + 
+    (smoothBP ? smoothing : 0.0f);
+  const float brevity = 1.0 - reflength / stats[1];
 
   if (brevity < 0.0) {
     logbleu += brevity;
@@ -334,26 +338,12 @@ vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string&
 	vector<float> bleuScores;
 	for (size_t i=0; i < hypotheses.size(); ++i) {
 		pair<size_t,size_t> translation = hypotheses[i];
-		float bleu = sentenceLevelBleuPlusOne(scoreDataIters[translation.first]->operator[](translation.second));
+		float bleu = smoothedSentenceBleu(scoreDataIters[translation.first]->operator[](translation.second));
 		bleuScores.push_back(bleu);
 	}
 	return bleuScores;
 }
 
-float BleuScorer::sentenceLevelBleuPlusOne(const vector<float>& stats) {
-	float logbleu = 0.0;
-	const unsigned int bleu_order = 4;
-	for (unsigned int j=0; j<bleu_order; j++) {
-		//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
-		logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
-	}
-	logbleu /= bleu_order;
-	float brevity = 1.0 - (float)stats[(bleu_order*2)]/stats[1];
-	if (brevity < 0.0) {
-		logbleu += brevity;
-	}
-	//cerr << brevity << " -> " << exp(logbleu) << endl;
-	return exp(logbleu);
-}
+
 
 }
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 6b8b8d046..af889b13e 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -34,7 +34,6 @@ public:
   ~BleuScorer();
 
   static std::vector<float> ScoreNbestList(const std::string& scoreFile, const std::string& featureFile);
-  static float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
 
   virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
   virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
@@ -74,7 +73,8 @@ private:
 /** Computes sentence-level BLEU+1 score.
  * This function is used in PRO.
  */
-float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
+float smoothedSentenceBleu
+  (const std::vector<float>& stats, float smoothing=1.0, bool smoothBP=false);
 
 /** Computes sentence-level BLEU score given a background corpus.
  * This function is used in batch MIRA.
diff --git a/mert/BleuScorerTest.cpp b/mert/BleuScorerTest.cpp
index 0d721422c..136f134eb 100644
--- a/mert/BleuScorerTest.cpp
+++ b/mert/BleuScorerTest.cpp
@@ -244,7 +244,7 @@ BOOST_AUTO_TEST_CASE(calculate_actual_score) {
   // reference-length
   stats[8] = 7;
 
-  BOOST_CHECK(IsAlmostEqual(0.5115f, scorer.calculateScore(stats)));
+  BOOST_CHECK_CLOSE(0.5115f, scorer.calculateScore(stats), 0.01);
 }
 
 BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
@@ -270,5 +270,7 @@ BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
   // reference-length
   stats[8] = 7.0;
 
-  BOOST_CHECK(IsAlmostEqual(0.5985f, sentenceLevelBleuPlusOne(stats)));
+  BOOST_CHECK_CLOSE(0.5985f, smoothedSentenceBleu(stats), 0.01);
+  BOOST_CHECK_CLOSE(0.5624f, smoothedSentenceBleu(stats, 0.5), 0.01 );
+  BOOST_CHECK_CLOSE(0.5067f, smoothedSentenceBleu(stats, 1.0, true), 0.01);
 }
diff --git a/mert/pro.cpp b/mert/pro.cpp
index 8055b19bd..3777d0470 100644
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@@ -105,6 +105,8 @@ int main(int argc, char** argv)
   const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
   const unsigned int n_samples = 50; // Xi, in Hopkins & May
   const float min_diff = 0.05;
+  bool smoothBP = false;
+  const float bleuSmoothing = 1.0f;
 
   po::options_description desc("Allowed options");
   desc.add_options()
@@ -113,6 +115,7 @@ int main(int argc, char** argv)
       ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
       ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
       ("output-file,o", po::value<string>(&outputFile), "Output file")
+      ("smooth-brevity-penalty,b", po::value(&smoothBP)->zero_tokens()->default_value(false), "Smooth the brevity penalty, as in Nakov et al. (Coling 2012)")
       ;
 
   po::options_description cmdline_options;
@@ -201,11 +204,11 @@ int main(int argc, char** argv)
     for(size_t  i=0; i<n_candidates; i++) {
       size_t rand1 = rand() % n_translations;
       pair<size_t,size_t> translation1 = hypotheses[rand1];
-      float bleu1 = sentenceLevelBleuPlusOne(scoreDataIters[translation1.first]->operator[](translation1.second));
+      float bleu1 = smoothedSentenceBleu(scoreDataIters[translation1.first]->operator[](translation1.second), bleuSmoothing, smoothBP);
 
       size_t rand2 = rand() % n_translations;
       pair<size_t,size_t> translation2 = hypotheses[rand2];
-      float bleu2 = sentenceLevelBleuPlusOne(scoreDataIters[translation2.first]->operator[](translation2.second));
+      float bleu2 = smoothedSentenceBleu(scoreDataIters[translation2.first]->operator[](translation2.second), bleuSmoothing, smoothBP);
 
       /*
       cerr << "t(" << translation1.first << "," << translation1.second << ") = " << bleu1 <<
diff --git a/mert/sentence-bleu.cpp b/mert/sentence-bleu.cpp
index 085c81efd..17a9737f2 100644
--- a/mert/sentence-bleu.cpp
+++ b/mert/sentence-bleu.cpp
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
   vector<ScoreStats>::const_iterator sentIt;
   for (sentIt = entries.begin(); sentIt != entries.end(); sentIt++) {
     vector<float> stats(sentIt->getArray(), sentIt->getArray() + sentIt->size());
-    cout << BleuScorer::sentenceLevelBleuPlusOne(stats) << "\n";
+    cout << smoothedSentenceBleu(stats) << "\n";
   }
   return 0;
 }
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index d1cee4582..0ac3b414f 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -148,6 +148,7 @@ my $mertdir = undef; # path to new mert directory
 my $mertargs = undef; # args to pass through to mert & extractor
 my $mertmertargs = undef; # args to pass through to mert only
 my $extractorargs = undef; # args to pass through to extractor only
+my $proargs = undef; # args to pass through to pro only
 
 # Args to pass through to batch mira only.  This flags is useful to
 # change MIRA's hyperparameters such as regularization parameter C,
@@ -197,6 +198,7 @@ GetOptions(
   "mertdir=s" => \$mertdir,
   "mertargs=s" => \$mertargs,
   "extractorargs=s" => \$extractorargs,
+  "proargs=s" => \$proargs,
   "mertmertargs=s" => \$mertmertargs,
   "rootdir=s" => \$SCRIPTS_ROOTDIR,
   "filtercmd=s" => \$filtercmd, # allow to override the default location
@@ -398,6 +400,8 @@ $mert_extract_args .= " $extractorargs";
 
 $mertmertargs = "" if !defined $mertmertargs;
 
+$proargs = "" unless $proargs;
+
 my $mert_mert_args = "$mertargs $mertmertargs";
 $mert_mert_args =~ s/\-+(binary|b)\b//;
 $mert_mert_args .= " $scconfig";
@@ -788,11 +792,11 @@ while (1) {
   my %sparse_weights; # sparse features
   my $pro_optimizer_cmd = "$pro_optimizer $megam_default_options run$run.pro.data";
   if ($___PAIRWISE_RANKED_OPTIMIZER) {  # pro optimization
-    $cmd = "$mert_pro_cmd $seed_settings $pro_file_settings -o run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer_cmd";
+    $cmd = "$mert_pro_cmd $proargs $seed_settings $pro_file_settings -o run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer_cmd";
     &submit_or_exec($cmd, $mert_outfile, $mert_logfile);
   } elsif ($___PRO_STARTING_POINT) {  # First, run pro, then mert
     # run pro...
-    my $pro_cmd = "$mert_pro_cmd $seed_settings $pro_file_settings -o run$run.pro.data ; $pro_optimizer_cmd";
+    my $pro_cmd = "$mert_pro_cmd $proargs $seed_settings $pro_file_settings -o run$run.pro.data ; $pro_optimizer_cmd";
     &submit_or_exec($pro_cmd, "run$run.pro.out", "run$run.pro.err");
     # ... get results ...
     ($bestpoint,$devbleu) = &get_weights_from_mert("run$run.pro.out","run$run.pro.err",scalar @{$featlist->{"names"}},\%sparse_weights);