Implement EvaluateChart for BleuScoreFeature, add mira parameter --avg-ref-length (either shortest or average reference length), clean up CalculateBleu(..), clean up Decoder.cpp

author: Eva Hasler <evahasler@gmail.com> 2012-04-01 22:59:00 +0400
committer: Eva Hasler <evahasler@gmail.com> 2012-04-01 22:59:00 +0400
commit: 1b1459283cec71dbd3bbb9cc200f204cdf33101b (patch)
tree: d22f56b5da0dc10ca87a476d5fb335333fd7a30b /moses
parent: 0a537a9f58b27d43098095531820db0c7a6c0a2a (diff)
3 files changed, 340 insertions, 144 deletions
diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp
index 3dd401eed..536ffa337 100644
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@@ -9,7 +9,7 @@ namespace Moses {
 size_t BleuScoreState::bleu_order = 4;
 
 BleuScoreState::BleuScoreState(): m_words(1),
-//                                  m_source_length(0),
+                                  m_source_length(0),
                                   m_target_length(0),
                                   m_scaled_ref_length(0),
                                   m_ngram_counts(bleu_order),
@@ -24,10 +24,10 @@ int BleuScoreState::Compare(const FFState& o) const
 
     const BleuScoreState& other = dynamic_cast<const BleuScoreState&>(o);
 
-//    if (m_source_length < other.m_source_length)
-//    	return -1;
-//    if (m_source_length > other.m_source_length)
-//    	return 1;
+    if (m_source_length < other.m_source_length)
+    	return -1;
+    if (m_source_length > other.m_source_length)
+    	return 1;
 
     if (m_target_length < other.m_target_length)
         return -1;
@@ -62,7 +62,8 @@ std::ostream& operator<<(std::ostream& out, const BleuScoreState& state) {
 }
 
 void BleuScoreState::print(std::ostream& out) const {
-  out << "ref=" << m_scaled_ref_length //<< ";source=" << m_source_length
+  out << "ref=" << m_scaled_ref_length
+  	<< ";source=" << m_source_length
 	  << ";target=" << m_target_length << ";counts=";
   for (size_t i = 0; i < bleu_order; ++i) {
     out << m_ngram_matches[i] << "/" << m_ngram_counts[i] << ",";
@@ -71,6 +72,14 @@ void BleuScoreState::print(std::ostream& out) const {
     
 }
 
+void BleuScoreState::AddNgramCountAndMatches(std::vector< size_t >& counts,
+																									std::vector< size_t >& matches) {
+	for (size_t order = 0; order < BleuScoreState::bleu_order; ++order) {
+		m_ngram_counts[order] += counts[order];
+		m_ngram_matches[order] += matches[order];
+	}
+}
+
 void BleuScoreFeature::PrintHistory(std::ostream& out) const {
 	out << "source length history=" << m_source_length_history << endl;
 	out << "target length history=" << m_target_length_history << endl;
@@ -97,17 +106,19 @@ void BleuScoreFeature::SetBleuParameters(bool sentenceBleu, bool scaleByInputLen
 	m_useSourceLengthHistory = useSourceLengthHistory;
 }
 
+// Incoming references (refs) are stored as refs[file_id][[sent_id][reference]]
+// This data structure: m_refs[sent_id][[vector<length>][ngrams]]
 void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
 {
 	m_refs.clear();
 	FactorCollection& fc = FactorCollection::Instance();
 	for (size_t file_id = 0; file_id < refs.size(); file_id++) {
-		for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
-			const string& ref = refs[file_id][ref_id];
+		for (size_t sent_id = 0; sent_id < refs[file_id].size(); sent_id++) {
+			const string& ref = refs[file_id][sent_id];
 			vector<string> refTokens  = Tokenize(ref);
 			if (file_id == 0)
-				m_refs[ref_id] = pair<vector<size_t>,NGrams>();
-			pair<vector<size_t>,NGrams>& ref_pair = m_refs[ref_id];
+				m_refs[sent_id] = pair<vector<size_t>,NGrams>();
+			pair<vector<size_t>,NGrams>& ref_pair = m_refs[sent_id];
 			(ref_pair.first).push_back(refTokens.size());
 			for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
 				for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
@@ -124,24 +135,54 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
    	}
 	}
 
+//	cerr << "Number of ref files: " << refs.size() << endl;
 //	for (size_t i = 0; i < m_refs.size(); ++i) {
-//		cerr << "ref id " << i << ", number of entries: " << (m_refs[i].first).size() << endl;
+//		cerr << "Sent id " << i << ", number of references: " << (m_refs[i].first).size() << endl;
 //	}
 }
 
-void BleuScoreFeature::SetCurrentSourceLength(size_t source_length) {
+void BleuScoreFeature::SetCurrSourceLength(size_t source_length) {
     m_cur_source_length = source_length;
 }
 
-void BleuScoreFeature::SetCurrentShortestReference(size_t ref_id) {
+// m_refs[sent_id][[vector<length>][ngrams]]
+void BleuScoreFeature::SetCurrShortestRefLength(size_t sent_id) {
+		// look for shortest reference
+		int shortestRef = -1;
+		for (size_t i = 0; i < (m_refs[sent_id].first).size(); ++i) {
+			if (shortestRef == -1 || (m_refs[sent_id].first)[i] < shortestRef)
+				shortestRef = (m_refs[sent_id].first)[i];
+		}
+		m_cur_ref_length = shortestRef;
+//		cerr << "Set shortest cur_ref_length: " << m_cur_ref_length << endl;
+}
+
+void BleuScoreFeature::SetCurrAvgRefLength(size_t sent_id) {
+		// compute average reference length
+		size_t sum = 0;
+		size_t numberRefs = (m_refs[sent_id].first).size();
+		for (size_t i = 0; i < numberRefs; ++i) {
+			sum += (m_refs[sent_id].first)[i];
+		}
+		m_cur_ref_length = (float)sum/numberRefs;
+//		cerr << "Set average cur_ref_length: " << m_cur_ref_length << endl;
+}
+
+void BleuScoreFeature::SetCurrReferenceNgrams(size_t sent_id) {
+    m_cur_ref_ngrams = m_refs[sent_id].second;
+}
+
+size_t BleuScoreFeature::GetShortestRefIndex(size_t ref_id) {
 		// look for shortest reference
 		int shortestRef = -1;
+		size_t shortestRefIndex = 0;
 		for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
-			if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef)
+			if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef) {
 				shortestRef = (m_refs[ref_id].first)[i];
+				shortestRefIndex = i;
+			}
 		}
-    m_cur_ref_length = shortestRef;
-    m_cur_ref_ngrams = m_refs[ref_id].second;
+    return shortestRefIndex;
 }
 
 /*
@@ -183,7 +224,7 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
 	    // set current source and reference information for each oracle in the batch
 	    size_t cur_source_length = sourceLengths[ref_id];
 	    size_t hypo_length = hypos[ref_id].size();
-	    size_t cur_ref_length = GetClosestReferenceLength(ref_ids[ref_id], hypo_length);
+	    size_t cur_ref_length = GetClosestRefLength(ref_ids[ref_id], hypo_length);
 	    NGrams cur_ref_ngrams = m_refs[ref_ids[ref_id]].second;
 	    cerr << "reference length: " << cur_ref_length << endl;
 
@@ -229,18 +270,17 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
 	}
 }*/
 
-size_t BleuScoreFeature::GetClosestReferenceLength(size_t ref_id, int hypoLength) {
+size_t BleuScoreFeature::GetClosestRefLength(size_t ref_id, int hypoLength) {
 	// look for closest reference
 	int currentDist = -1;
-	int closestRef = -1;
+	int closestRefLength = -1;
 	for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
-		if (closestRef == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) {
-			closestRef = (m_refs[ref_id].first)[i];
+		if (closestRefLength == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) {
+			closestRefLength = (m_refs[ref_id].first)[i];
 			currentDist = abs(hypoLength - (int)(m_refs[ref_id].first)[i]);
 		}
 	}
-	size_t cur_ref_length = closestRef;
-	return cur_ref_length;
+	return (size_t)closestRefLength;
 }
 
 /*
@@ -274,6 +314,65 @@ void BleuScoreFeature::GetNgramMatchCounts(Phrase& phrase,
     }
 }
 
+// score ngrams of words that have been added before the previous word span
+void BleuScoreFeature::GetNgramMatchCounts_prefix(Phrase& phrase,
+                                           const NGrams& ref_ngram_counts,
+                                           std::vector< size_t >& ret_counts,
+                                           std::vector< size_t >& ret_matches,
+                                           size_t new_start_indices,
+                                           size_t last_end_index) const
+{
+    NGrams::const_iterator ref_ngram_counts_iter;
+    size_t ngram_start_idx, ngram_end_idx;
+
+    // Chiang et al (2008) use unclipped counts of ngram matches
+    for (size_t start_idx = 0; start_idx < new_start_indices; start_idx++) {
+        for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+        		ngram_start_idx = start_idx;
+            ngram_end_idx = start_idx + order;
+            if (order > ngram_end_idx) break;
+            if (ngram_end_idx > last_end_index) break;
+
+            Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0);
+            ret_counts[order]++;
+
+            ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+            if (ref_ngram_counts_iter != ref_ngram_counts.end())
+                ret_matches[order]++;
+        }
+    }
+}
+
+// score ngrams around the overlap of two previously scored phrases
+void BleuScoreFeature::GetNgramMatchCounts_overlap(Phrase& phrase,
+                                           const NGrams& ref_ngram_counts,
+                                           std::vector< size_t >& ret_counts,
+                                           std::vector< size_t >& ret_matches,
+                                           size_t overlap_index) const
+{
+    NGrams::const_iterator ref_ngram_counts_iter;
+    size_t ngram_start_idx, ngram_end_idx;
+
+    // Chiang et al (2008) use unclipped counts of ngram matches
+    for (size_t end_idx = overlap_index; end_idx < phrase.GetSize(); end_idx++) {
+    	if (end_idx >= (overlap_index+BleuScoreState::bleu_order-1)) break;
+    	for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+            if (order > end_idx) break;
+
+            ngram_end_idx = end_idx;
+            ngram_start_idx = end_idx - order;
+            if (ngram_start_idx >= overlap_index) continue; // only score ngrams that span the overlap point
+
+            Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0);
+            ret_counts[order]++;
+
+            ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+            if (ref_ngram_counts_iter != ref_ngram_counts.end())
+                ret_matches[order]++;
+        }
+    }
+}
+
 void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
                                            const NGrams& ref_ngram_counts,
                                            std::vector< size_t >& ret_counts,
@@ -352,7 +451,7 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
                         m_cur_ref_ngrams,
                         new_state->m_ngram_counts,
                         new_state->m_ngram_matches,
-                        new_state->m_words.GetSize());
+                        new_state->m_words.GetSize()); // number of words in previous states
 
     // Update state variables
     ctx_end_idx = new_words.GetSize()-1;
@@ -372,79 +471,138 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
 
     // we need a scaled reference length to compare the current target phrase to the corresponding reference phrase
     new_state->m_scaled_ref_length = m_cur_ref_length * 
-        ((float)coverageVector.GetNumWordsCovered() / coverageVector.GetSize());
+        ((float)coverageVector.GetNumWordsCovered()/coverageVector.GetSize());
 
     // Calculate new bleu.
     new_bleu = CalculateBleu(new_state);
 
     // Set score to new Bleu score
     accumulator->PlusEquals(this, new_bleu - old_bleu);
-
     return new_state;
 }
 
-/*FFState* BleuScoreFeature::EvaluateChart(const ChartHypothesis&  cur_hypo, int featureID,
-		ScoreComponentCollection accumulator ) const {
+FFState* BleuScoreFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureID,
+		ScoreComponentCollection* accumulator ) const {
   NGrams::const_iterator reference_ngrams_iter;
 
-  ChartHypothesis prev_hypo = cur_hypo.GetPrevHypo(0);
-  prev_hypo.GetFFState()
+  const Phrase& curr_target_phrase = static_cast<const Phrase&>(cur_hypo.GetCurrTargetPhrase());
+//  cerr << "\nCur target phrase: " << cur_hypo.GetTargetLHS() << " --> " << curr_target_phrase << endl;
 
-  const BleuScoreState& ps = dynamic_cast<const BleuScoreState&>(*prev_state);
-  BleuScoreState* new_state = new BleuScoreState(ps);
-  //cerr << "PS: " << ps << endl;
+  // Calculate old bleu of previous states
+  float old_bleu = 0, new_bleu = 0;
+  size_t num_old_words = 0, num_words_first_prev = 0;
+  size_t num_words_added_left = 0, num_words_added_right = 0;
 
-  float old_bleu, new_bleu;
-  size_t num_new_words, ctx_start_idx, ctx_end_idx;
-
-  // Calculate old bleu;
-  old_bleu = CalculateBleu(new_state);
-
-  // Get context and append new words.
-  num_new_words = cur_hypo.GetCurrTargetLength();
-  if (num_new_words == 0) {
-  	return new_state;
+  // double-check cases where more than two previous hypotheses were combined
+  assert(cur_hypo.GetPrevHypos().size() <= 2);
+  BleuScoreState* new_state;
+  if (cur_hypo.GetPrevHypos().size() == 0)
+  	new_state = new BleuScoreState();
+  else {
+  	const FFState* prev_state_zero = cur_hypo.GetPrevHypo(0)->GetFFState(featureID);
+  	const BleuScoreState& ps_zero = dynamic_cast<const BleuScoreState&>(*prev_state_zero);
+  	new_state = new BleuScoreState(ps_zero);
+  	num_words_first_prev = ps_zero.m_target_length;
+
+  	for (size_t i = 0; i < cur_hypo.GetPrevHypos().size(); ++i) {
+  		const FFState* prev_state = cur_hypo.GetPrevHypo(i)->GetFFState(featureID);
+  		const BleuScoreState* ps = dynamic_cast<const BleuScoreState*>(prev_state);
+  		BleuScoreState* ps_nonConst = const_cast<BleuScoreState*>(ps);
+//  		cerr << "prev phrase: " << cur_hypo.GetPrevHypo(i)->GetOutputPhrase()
+//  				<< " ( " << cur_hypo.GetPrevHypo(i)->GetTargetLHS() << ")" << endl;
+
+  		old_bleu += CalculateBleu(ps_nonConst);
+  		num_old_words += ps->m_target_length;
+
+  		if (i > 0)
+  			// add ngram matches from other previous states
+  			new_state->AddNgramCountAndMatches(ps_nonConst->m_ngram_counts, ps_nonConst->m_ngram_matches);
+  	}
   }
 
-  Phrase new_words = ps.m_words;
-  new_words.Append(cur_hypo.GetCurrTargetPhrase());
-  //cerr << "NW: " << new_words << endl;
+  // set new context
+  Phrase new_words = cur_hypo.GetOutputPhrase();
+  new_state->m_words = new_words;
+  size_t num_curr_words = new_words.GetSize();
 
   // get ngram matches for new words
-  GetNgramMatchCounts(new_words,
-                      m_cur_ref_ngrams,
-                      new_state->m_ngram_counts,
-                      new_state->m_ngram_matches,
-                      new_state->m_words.GetSize());
+  if (num_old_words == 0) {
+//  	cerr << "compute right ngram context" << endl;
+  	GetNgramMatchCounts(new_words,
+  											m_cur_ref_ngrams,
+  											new_state->m_ngram_counts,
+  											new_state->m_ngram_matches,
+  											0);
+  }
+  else if (new_words.GetSize() == num_old_words) {
+  	// two hypotheses were glued together, compute new ngrams on the basis of first hypothesis
+  	num_words_added_right = num_curr_words - num_words_first_prev;
+  	// score around overlap point
+//  	cerr << "compute overlap ngram context (" << (num_words_first_prev) << ")" << endl;
+  	GetNgramMatchCounts_overlap(new_words,
+  											m_cur_ref_ngrams,
+  											new_state->m_ngram_counts,
+  											new_state->m_ngram_matches,
+  											num_words_first_prev);
+  }
+  else if (num_old_words + curr_target_phrase.GetNumTerminals() == num_curr_words) {
+  	assert(curr_target_phrase.GetSize() == curr_target_phrase.GetNumTerminals()+1);
+  	// previous hypothesis + rule with 1 non-terminal were combined (NT substituted by Ts)
+  	for (size_t i = 0; i < curr_target_phrase.GetSize(); ++i)
+  		if (curr_target_phrase.GetWord(i).IsNonTerminal()) {
+  			num_words_added_left = i;
+  			num_words_added_right = curr_target_phrase.GetSize() - (i+1);
+  			break;
+  		}
+
+  	// left context
+//  	cerr << "compute left ngram context" << endl;
+  	if (num_words_added_left > 0)
+  		GetNgramMatchCounts_prefix(new_words,
+  											m_cur_ref_ngrams,
+  											new_state->m_ngram_counts,
+  											new_state->m_ngram_matches,
+  											num_words_added_left,
+  											num_curr_words - num_words_added_right - 1);
+
+  	// right context
+//  	cerr << "compute right ngram context" << endl;
+  	if (num_words_added_right > 0)
+  		GetNgramMatchCounts(new_words,
+  											m_cur_ref_ngrams,
+  											new_state->m_ngram_counts,
+  											new_state->m_ngram_matches,
+  											num_words_added_left + num_old_words);
+  }
+  else {
+  	cerr << "undefined state.. " << endl;
+  	exit(1);
+  }
 
   // Update state variables
-  ctx_end_idx = new_words.GetSize()-1;
+  size_t ctx_start_idx = 0;
+  size_t ctx_end_idx = new_words.GetSize()-1;
   size_t bleu_context_length = BleuScoreState::bleu_order -1;
   if (ctx_end_idx > bleu_context_length) {
     ctx_start_idx = ctx_end_idx - bleu_context_length;
-  } else {
-    ctx_start_idx = 0;
   }
 
-  WordsBitmap coverageVector = cur_hypo.GetWordsBitmap();
-  new_state->m_source_length = coverageVector.GetNumWordsCovered();
-
-  new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx,
-                                                         ctx_end_idx));
-  new_state->m_target_length += cur_hypo.GetCurrTargetLength();
+  new_state->m_source_length = cur_hypo.GetCurrSourceRange().GetNumWordsCovered();
+  new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx, ctx_end_idx));
+  new_state->m_target_length = cur_hypo.GetOutputPhrase().GetSize();
 
-  // we need a scaled reference length to compare the current target phrase to the corresponding reference phrase
-  new_state->m_scaled_ref_length = m_cur_ref_length *
-      ((float)coverageVector.GetNumWordsCovered() / coverageVector.GetSize());
+  // we need a scaled reference length to compare the current target phrase to the corresponding
+  // reference phrase
+  size_t cur_source_length = m_cur_source_length;
+  new_state->m_scaled_ref_length = m_cur_ref_length * (float(new_state->m_source_length)/cur_source_length);
 
   // Calculate new bleu.
   new_bleu = CalculateBleu(new_state);
 
   // Set score to new Bleu score
   accumulator->PlusEquals(this, new_bleu - old_bleu);
-
   return new_state;
-}*/
+}
 
 /*
  * Calculate Bleu score for a partial hypothesis given as state.
@@ -457,62 +615,61 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
   float smooth = 1;
   float smoothed_count, smoothed_matches;
 
-  // revised: compute Bleu in the context of the pseudo-document
-  // B(b) = size_of_oracle_doc * (Bleu(B_hist + b) - Bleu(B_hist))
-
-  // Calculate geometric mean of modified ngram precisions
-  // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
-  // 		= BP * 4th root(PRODUCT_1_4 p_n)
-  for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
-    if (state->m_ngram_counts[i]) {
-      smoothed_matches = m_match_history[i] + state->m_ngram_matches[i];
-      smoothed_count = m_count_history[i] + state->m_ngram_counts[i];
-
-      switch (m_smoothing_scheme) {
-        case PLUS_ONE:
-        default:
-          if (i > 0) {
-            // smoothing for all n > 1
-        	smoothed_matches += 1;
-        	smoothed_count += 1;
-          }
-          break;
-        case LIGHT:
-          if (i > 0) {
-    	    // smoothing for all n > 1
-        	  smoothed_matches += 0.1;
-        	  smoothed_count += 0.1;
-          }
-          break;
-        case PAPINENI:
-    	  if (state->m_ngram_matches[i] == 0) {
-            smooth *= 0.5;
-            smoothed_matches += smooth;
-            smoothed_count += smooth;
-    	  }
-    	  break;
+  if (m_sentence_bleu) {
+    // Calculate geometric mean of modified ngram precisions
+    // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
+    // 		= BP * 4th root(PRODUCT_1_4 p_n)
+    for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+      if (state->m_ngram_counts[i]) {
+      	smoothed_matches = state->m_ngram_matches[i];
+      	smoothed_count = state->m_ngram_counts[i];
+
+      	switch (m_smoothing_scheme) {
+      	case PLUS_ONE:
+      	default:
+      		if (i > 0) {
+      			// smoothing for all n > 1
+      			smoothed_matches += 1;
+      			smoothed_count += 1;
+      		}
+      		break;
+      	case LIGHT:
+      		if (i > 0) {
+      			// smoothing for all n > 1
+      			smoothed_matches += 0.1;
+      			smoothed_count += 0.1;
+      		}
+      		break;
+      	case PAPINENI:
+      		if (state->m_ngram_matches[i] == 0) {
+      			smooth *= 0.5;
+      			smoothed_matches += smooth;
+      			smoothed_count += smooth;
+      		}
+      		break;
+      	}
+
+        precision *= smoothed_matches / smoothed_count;
       }
-
-      precision *= smoothed_matches / smoothed_count;
     }
-  }
 
-  // take geometric mean
-  precision = pow(precision, (float)1/4);
-
-  // Apply brevity penalty if applicable.
-  // BP = 1 				if c > r
-  // BP = e^(1- r/c))		if c <= r
-  // where
-  // c: length of the candidate translation
-  // r: effective reference length (sum of best match lengths for each candidate sentence)
-  if (state->m_target_length < (state->m_scaled_ref_length * m_relax_BP)) {
-    float smoothed_target_length = m_target_length_history + state->m_target_length;
-    float smoothed_ref_length = m_ref_length_history + (state->m_scaled_ref_length * m_relax_BP);
-    precision *= exp(1 - (smoothed_ref_length/ smoothed_target_length));
-  }
+    // take geometric mean
+    precision = pow(precision, (float)1/4);
+
+    // Apply brevity penalty if applicable.
+    // BP = 1 				if c > r
+    // BP = e^(1- r/c))		if c <= r
+    // where
+    // c: length of the candidate translation
+    // r: effective reference length (sum of best match lengths for each candidate sentence)
+  	if (state->m_target_length < (state->m_scaled_ref_length * m_relax_BP)) {
+  		float smoothed_target_length = m_target_length_history + state->m_target_length;
+  		float smoothed_ref_length = m_ref_length_history + (state->m_scaled_ref_length * m_relax_BP);
+  		precision *= exp(1 - (smoothed_ref_length/ smoothed_target_length));
+  	}
+
+//  	cerr << "precision: " << precision << endl;
 
-  if (m_sentence_bleu) {
   	// Approximate bleu score as of Chiang/Resnik is scaled by the size of the input:
   	// B(e;f,{r_k}) = (O_f + |f|) * BLEU(O + c(e;{r_k}))
   	// where c(e;) is a vector of reference length, ngram counts and ngram matches
@@ -532,6 +689,28 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
   	return precision * m_scale_by_x;
   }
   else {
+    // Revised history BLEU: compute Bleu in the context of the pseudo-document
+    // B(b) = size_of_oracle_doc * (Bleu(B_hist + b) - Bleu(B_hist))
+    // Calculate geometric mean of modified ngram precisions
+    // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
+    // 		= BP * 4th root(PRODUCT_1_4 p_n)
+    for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+      if (state->m_ngram_counts[i]) {
+      	smoothed_matches = m_match_history[i] + state->m_ngram_matches[i];
+      	smoothed_count = m_count_history[i] + state->m_ngram_counts[i];
+      	precision *= smoothed_matches / smoothed_count;
+      }
+    }
+
+    // take geometric mean
+    precision = pow(precision, (float)1/4);
+
+  	// BP
+    if (m_target_length_history + state->m_target_length < m_ref_length_history + state->m_scaled_ref_length)
+  	  precision *= exp(1 - (m_ref_length_history + state->m_scaled_ref_length/m_target_length_history + state->m_target_length));
+
+//    cerr << "precision: " << precision << endl;
+
     // **BLEU score of pseudo-document**
     float precision_pd = 1.0;
     if (m_target_length_history > 0) {
@@ -549,6 +728,8 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
       precision_pd = 0;
     // **end BLEU of pseudo-document**
 
+//    cerr << "precision pd: " << precision_pd << endl;
+
     float sentence_impact;
     if (m_target_length_history > 0)
     	sentence_impact = m_target_length_history * (precision - precision_pd);
diff --git a/moses/src/BleuScoreFeature.h b/moses/src/BleuScoreFeature.h
index 732e09e4b..988db6a27 100644
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@@ -36,8 +36,11 @@ private:
 
     std::vector< size_t > m_ngram_counts;
     std::vector< size_t > m_ngram_matches;
+
+    void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches);
 };
 
+
 std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
 
 
@@ -55,14 +58,14 @@ public:
 	                                 m_match_history(BleuScoreState::bleu_order),
 	                                 m_source_length_history(0),
 	                                 m_target_length_history(0),
-	                                 m_useSourceLengthHistory(0),
 	                                 m_ref_length_history(0),
 	                                 m_scale_by_input_length(true),
 	                                 m_scale_by_avg_input_length(false),
 	                                 m_scale_by_inverse_length(false),
 	                                 m_scale_by_avg_inverse_length(false),
 	                                 m_scale_by_x(1),
-	                                 m_historySmoothing(0.7),
+	                                 m_historySmoothing(0.9),
+	                                 m_useSourceLengthHistory(0),
 	                                 m_smoothing_scheme(PLUS_ONE),
 	                                 m_relax_BP(1) {}
 
@@ -78,44 +81,50 @@ public:
 
     void PrintHistory(std::ostream& out) const;
     void LoadReferences(const std::vector< std::vector< std::string > > &);
-    void SetCurrentSourceLength(size_t);
-    void SetCurrentShortestReference(size_t);
+    void SetCurrSourceLength(size_t);
+    void SetCurrShortestRefLength(size_t);
+    void SetCurrAvgRefLength(size_t sent_id);
+    void SetAvgInputLength (float l) { m_avg_input_length = l; }
+    void SetCurrReferenceNgrams(size_t sent_id);
+    size_t GetShortestRefIndex(size_t ref_id);
+    size_t GetClosestRefLength(size_t ref_id, int hypoLength);
     void UpdateHistory(const std::vector< const Word* >&);
     void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
-    void PrintReferenceLength(const std::vector<size_t>& ref_ids);
-    size_t GetReferenceLength(size_t ref_id);
-    size_t GetClosestReferenceLength(size_t ref_id, int hypoLength);
+    void PrintRefLength(const std::vector<size_t>& ref_ids);
     void SetBleuParameters(bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
     		bool scaleByInverseLength, bool scaleByAvgInverseLength,
     		float scaleByX, float historySmoothing, size_t scheme, float relaxBP,
     		bool useSourceLengthHistory);
-    void SetAvgInputLength (float l) { m_avg_input_length = l; }
+
     void GetNgramMatchCounts(Phrase&,
                              const NGrams&,
                              std::vector< size_t >&,
                              std::vector< size_t >&,
                              size_t skip = 0) const;
+    void GetNgramMatchCounts_prefix(Phrase&,
+                             const NGrams&,
+                             std::vector< size_t >&,
+                             std::vector< size_t >&,
+                             size_t new_start_indices,
+                             size_t last_end_index) const;
+    void GetNgramMatchCounts_overlap(Phrase& phrase,
+    												 const NGrams& ref_ngram_counts,
+    												 std::vector< size_t >& ret_counts,
+    												 std::vector< size_t >& ret_matches,
+    												 size_t overlap_index) const;
     void GetClippedNgramMatchesAndCounts(Phrase&,
-                                 const NGrams&,
-                                 std::vector< size_t >&,
-                                 std::vector< size_t >&,
-                                 size_t skip = 0) const;
+    												 const NGrams&,
+    												 std::vector< size_t >&,
+    												 std::vector< size_t >&,
+    												 size_t skip = 0) const;
 
     FFState* Evaluate( const Hypothesis& cur_hypo, 
                        const FFState* prev_state, 
                        ScoreComponentCollection* accumulator) const;
-    virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
-                                    int /* featureID */,
-                                    ScoreComponentCollection* ) const
-                                    {
-                                      /* Not implemented */
-                                      CHECK(0);
-                                    }
-
-
-    FFState* EvaluateChart( const ChartHypothesis& cur_hypo,
-    												int featureID,
-    												ScoreComponentCollection) const;
+    FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
+    										int featureID,
+    										ScoreComponentCollection* accumulator) const;
+
     float CalculateBleu(BleuScoreState*) const;
     const FFState* EmptyHypothesisState(const InputType&) const;
 
@@ -124,6 +133,8 @@ public:
     float GetAverageInputLength() { return m_avg_input_length; }
 
 private:
+    bool m_sentence_bleu;
+
     // counts for pseudo-document
     std::vector< float > m_count_history;
     std::vector< float > m_match_history;
@@ -134,9 +145,7 @@ private:
     size_t m_cur_source_length;
     RefCounts m_refs;
     NGrams m_cur_ref_ngrams;
-    size_t m_cur_ref_length;
-
-    bool m_sentence_bleu;
+    float m_cur_ref_length;
 
     // scale BLEU score by history of input length
     bool m_scale_by_input_length;
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 33160d841..780ac66a2 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -1525,6 +1525,7 @@ bool StaticData::LoadReferences()
 {
   vector<string> bleuWeightStr = m_parameter->GetParam("weight-bl");
   vector<string> referenceFiles = m_parameter->GetParam("references");
+  cerr << "Loading reference file " << referenceFiles[0] << endl;
   if ((!referenceFiles.size() && bleuWeightStr.size()) || (referenceFiles.size() && !bleuWeightStr.size())) {
     UserMessage::Add("You cannot use the bleu feature without references, and vice-versa");
     return false;
@@ -1551,6 +1552,11 @@ bool StaticData::LoadReferences()
     }
     string line;
     while (getline(in,line)) {
+    	if (GetSearchAlgorithm() == ChartDecoding) {
+    		stringstream tmp;
+    		tmp << "<s> " << line << " </s>";
+    		line = tmp.str();
+    	}
       references[i].push_back(line);
     }
     if (i > 0) {
author	Eva Hasler <evahasler@gmail.com>	2012-04-01 22:59:00 +0400
committer	Eva Hasler <evahasler@gmail.com>	2012-04-01 22:59:00 +0400
commit	1b1459283cec71dbd3bbb9cc200f204cdf33101b (patch)
tree	d22f56b5da0dc10ca87a476d5fb335333fd7a30b /moses
parent	0a537a9f58b27d43098095531820db0c7a6c0a2a (diff)