Batch k-best MIRA is written and integrated into mert-moses.pl

Regression tests all check out, and kbmira seems to work fine on a Hansard French->English task. HypPackEnumerator class may be of interest to pro.cpp and future optimizers, as it abstracts a lot of the boilerplate involved in enumerating multiple k-best lists. MiraWeightVector is not really mira-specific - just a weight vector that enables efficient averaging. Could be useful to a perceptron as well. Same goes for MiraFeatureVector. Interaction with sparse features is written, but untested.
author: Colin Cherry <colin.a.cherry@gmail.com> 2012-05-29 21:38:57 +0400
committer: Colin Cherry <colin.a.cherry@gmail.com> 2012-05-29 21:38:57 +0400
commit: fd577d7a65cab923b9102d61873a032654d573a1 (patch)
tree: 24dddd8e7a412f29f2f55e8ecad0b6055f8530c0 /mert/BleuScorer.cpp
parent: 6d1165654caf8edc995a41a4c6c9666e65ebce96 (diff)
1 files changed, 41 insertions, 0 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 22ce81798..a8a0256f2 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -232,3 +232,44 @@ float sentenceLevelBleuPlusOne(const vector<float>& stats) {
   }
   return exp(logbleu);
 }
+
+float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
+{
+  // Sum sent and background
+  std::vector<float> stats;
+  CHECK(sent.size()==bg.size());
+  CHECK(sent.size()==kBleuNgramOrder*2+1);
+  for(size_t i=0;i<sent.size();i++) 
+    stats.push_back(sent[i]+bg[i]);
+
+  // Calculate BLEU
+  float logbleu = 0.0;
+  for (int j = 0; j < kBleuNgramOrder; j++) {
+    logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
+  }
+  logbleu /= kBleuNgramOrder;
+  const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
+  
+  if (brevity < 0.0) {
+    logbleu += brevity;
+  }
+
+  // Exponentiate and scale by reference length (as per Chiang et al 08)
+  return exp(logbleu) * stats[kBleuNgramOrder*2];
+}
+
+float unsmoothedBleu(const std::vector<float>& stats) {
+  CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
+
+  float logbleu = 0.0;
+  for (int j = 0; j < kBleuNgramOrder; j++) {
+    logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
+  }
+  logbleu /= kBleuNgramOrder;
+  const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
+
+  if (brevity < 0.0) {
+    logbleu += brevity;
+  }
+  return exp(logbleu);
+}
author	Colin Cherry <colin.a.cherry@gmail.com>	2012-05-29 21:38:57 +0400
committer	Colin Cherry <colin.a.cherry@gmail.com>	2012-05-29 21:38:57 +0400
commit	fd577d7a65cab923b9102d61873a032654d573a1 (patch)
tree	24dddd8e7a412f29f2f55e8ecad0b6055f8530c0 /mert/BleuScorer.cpp
parent	6d1165654caf8edc995a41a4c6c9666e65ebce96 (diff)