Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorEva Hasler <evahasler@gmail.com>2012-04-01 22:59:00 +0400
committerEva Hasler <evahasler@gmail.com>2012-04-01 22:59:00 +0400
commit1b1459283cec71dbd3bbb9cc200f204cdf33101b (patch)
treed22f56b5da0dc10ca87a476d5fb335333fd7a30b /moses
parent0a537a9f58b27d43098095531820db0c7a6c0a2a (diff)
Implement EvaluateChart for BleuScoreFeature, add mira parameter --avg-ref-length (either shortest or average reference length), clean up CalculateBleu(..), clean up Decoder.cpp
Diffstat (limited to 'moses')
-rw-r--r--moses/src/BleuScoreFeature.cpp415
-rw-r--r--moses/src/BleuScoreFeature.h63
-rw-r--r--moses/src/StaticData.cpp6
3 files changed, 340 insertions, 144 deletions
diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp
index 3dd401eed..536ffa337 100644
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@@ -9,7 +9,7 @@ namespace Moses {
size_t BleuScoreState::bleu_order = 4;
BleuScoreState::BleuScoreState(): m_words(1),
-// m_source_length(0),
+ m_source_length(0),
m_target_length(0),
m_scaled_ref_length(0),
m_ngram_counts(bleu_order),
@@ -24,10 +24,10 @@ int BleuScoreState::Compare(const FFState& o) const
const BleuScoreState& other = dynamic_cast<const BleuScoreState&>(o);
-// if (m_source_length < other.m_source_length)
-// return -1;
-// if (m_source_length > other.m_source_length)
-// return 1;
+ if (m_source_length < other.m_source_length)
+ return -1;
+ if (m_source_length > other.m_source_length)
+ return 1;
if (m_target_length < other.m_target_length)
return -1;
@@ -62,7 +62,8 @@ std::ostream& operator<<(std::ostream& out, const BleuScoreState& state) {
}
void BleuScoreState::print(std::ostream& out) const {
- out << "ref=" << m_scaled_ref_length //<< ";source=" << m_source_length
+ out << "ref=" << m_scaled_ref_length
+ << ";source=" << m_source_length
<< ";target=" << m_target_length << ";counts=";
for (size_t i = 0; i < bleu_order; ++i) {
out << m_ngram_matches[i] << "/" << m_ngram_counts[i] << ",";
@@ -71,6 +72,14 @@ void BleuScoreState::print(std::ostream& out) const {
}
+void BleuScoreState::AddNgramCountAndMatches(std::vector< size_t >& counts,
+ std::vector< size_t >& matches) {
+ for (size_t order = 0; order < BleuScoreState::bleu_order; ++order) {
+ m_ngram_counts[order] += counts[order];
+ m_ngram_matches[order] += matches[order];
+ }
+}
+
void BleuScoreFeature::PrintHistory(std::ostream& out) const {
out << "source length history=" << m_source_length_history << endl;
out << "target length history=" << m_target_length_history << endl;
@@ -97,17 +106,19 @@ void BleuScoreFeature::SetBleuParameters(bool sentenceBleu, bool scaleByInputLen
m_useSourceLengthHistory = useSourceLengthHistory;
}
+// Incoming references (refs) are stored as refs[file_id][[sent_id][reference]]
+// This data structure: m_refs[sent_id][[vector<length>][ngrams]]
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
{
m_refs.clear();
FactorCollection& fc = FactorCollection::Instance();
for (size_t file_id = 0; file_id < refs.size(); file_id++) {
- for (size_t ref_id = 0; ref_id < refs[file_id].size(); ref_id++) {
- const string& ref = refs[file_id][ref_id];
+ for (size_t sent_id = 0; sent_id < refs[file_id].size(); sent_id++) {
+ const string& ref = refs[file_id][sent_id];
vector<string> refTokens = Tokenize(ref);
if (file_id == 0)
- m_refs[ref_id] = pair<vector<size_t>,NGrams>();
- pair<vector<size_t>,NGrams>& ref_pair = m_refs[ref_id];
+ m_refs[sent_id] = pair<vector<size_t>,NGrams>();
+ pair<vector<size_t>,NGrams>& ref_pair = m_refs[sent_id];
(ref_pair.first).push_back(refTokens.size());
for (size_t order = 1; order <= BleuScoreState::bleu_order; order++) {
for (size_t end_idx = order; end_idx <= refTokens.size(); end_idx++) {
@@ -124,24 +135,54 @@ void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::strin
}
}
+// cerr << "Number of ref files: " << refs.size() << endl;
// for (size_t i = 0; i < m_refs.size(); ++i) {
-// cerr << "ref id " << i << ", number of entries: " << (m_refs[i].first).size() << endl;
+// cerr << "Sent id " << i << ", number of references: " << (m_refs[i].first).size() << endl;
// }
}
-void BleuScoreFeature::SetCurrentSourceLength(size_t source_length) {
+void BleuScoreFeature::SetCurrSourceLength(size_t source_length) {
m_cur_source_length = source_length;
}
-void BleuScoreFeature::SetCurrentShortestReference(size_t ref_id) {
+// m_refs[sent_id][[vector<length>][ngrams]]
+void BleuScoreFeature::SetCurrShortestRefLength(size_t sent_id) {
+ // look for shortest reference
+ int shortestRef = -1;
+ for (size_t i = 0; i < (m_refs[sent_id].first).size(); ++i) {
+ if (shortestRef == -1 || (m_refs[sent_id].first)[i] < shortestRef)
+ shortestRef = (m_refs[sent_id].first)[i];
+ }
+ m_cur_ref_length = shortestRef;
+// cerr << "Set shortest cur_ref_length: " << m_cur_ref_length << endl;
+}
+
+void BleuScoreFeature::SetCurrAvgRefLength(size_t sent_id) {
+ // compute average reference length
+ size_t sum = 0;
+ size_t numberRefs = (m_refs[sent_id].first).size();
+ for (size_t i = 0; i < numberRefs; ++i) {
+ sum += (m_refs[sent_id].first)[i];
+ }
+ m_cur_ref_length = (float)sum/numberRefs;
+// cerr << "Set average cur_ref_length: " << m_cur_ref_length << endl;
+}
+
+void BleuScoreFeature::SetCurrReferenceNgrams(size_t sent_id) {
+ m_cur_ref_ngrams = m_refs[sent_id].second;
+}
+
+size_t BleuScoreFeature::GetShortestRefIndex(size_t ref_id) {
// look for shortest reference
int shortestRef = -1;
+ size_t shortestRefIndex = 0;
for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
- if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef)
+ if (shortestRef == -1 || (m_refs[ref_id].first)[i] < shortestRef) {
shortestRef = (m_refs[ref_id].first)[i];
+ shortestRefIndex = i;
+ }
}
- m_cur_ref_length = shortestRef;
- m_cur_ref_ngrams = m_refs[ref_id].second;
+ return shortestRefIndex;
}
/*
@@ -183,7 +224,7 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
// set current source and reference information for each oracle in the batch
size_t cur_source_length = sourceLengths[ref_id];
size_t hypo_length = hypos[ref_id].size();
- size_t cur_ref_length = GetClosestReferenceLength(ref_ids[ref_id], hypo_length);
+ size_t cur_ref_length = GetClosestRefLength(ref_ids[ref_id], hypo_length);
NGrams cur_ref_ngrams = m_refs[ref_ids[ref_id]].second;
cerr << "reference length: " << cur_ref_length << endl;
@@ -229,18 +270,17 @@ void BleuScoreFeature::UpdateHistory(const vector< vector< const Word* > >& hypo
}
}*/
-size_t BleuScoreFeature::GetClosestReferenceLength(size_t ref_id, int hypoLength) {
+size_t BleuScoreFeature::GetClosestRefLength(size_t ref_id, int hypoLength) {
// look for closest reference
int currentDist = -1;
- int closestRef = -1;
+ int closestRefLength = -1;
for (size_t i = 0; i < (m_refs[ref_id].first).size(); ++i) {
- if (closestRef == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) {
- closestRef = (m_refs[ref_id].first)[i];
+ if (closestRefLength == -1 || abs(hypoLength - (int)(m_refs[ref_id].first)[i]) < currentDist) {
+ closestRefLength = (m_refs[ref_id].first)[i];
currentDist = abs(hypoLength - (int)(m_refs[ref_id].first)[i]);
}
}
- size_t cur_ref_length = closestRef;
- return cur_ref_length;
+ return (size_t)closestRefLength;
}
/*
@@ -274,6 +314,65 @@ void BleuScoreFeature::GetNgramMatchCounts(Phrase& phrase,
}
}
+// score ngrams of words that have been added before the previous word span
+void BleuScoreFeature::GetNgramMatchCounts_prefix(Phrase& phrase,
+ const NGrams& ref_ngram_counts,
+ std::vector< size_t >& ret_counts,
+ std::vector< size_t >& ret_matches,
+ size_t new_start_indices,
+ size_t last_end_index) const
+{
+ NGrams::const_iterator ref_ngram_counts_iter;
+ size_t ngram_start_idx, ngram_end_idx;
+
+ // Chiang et al (2008) use unclipped counts of ngram matches
+ for (size_t start_idx = 0; start_idx < new_start_indices; start_idx++) {
+ for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+ ngram_start_idx = start_idx;
+ ngram_end_idx = start_idx + order;
+ if (order > ngram_end_idx) break;
+ if (ngram_end_idx > last_end_index) break;
+
+ Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0);
+ ret_counts[order]++;
+
+ ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+ if (ref_ngram_counts_iter != ref_ngram_counts.end())
+ ret_matches[order]++;
+ }
+ }
+}
+
+// score ngrams around the overlap of two previously scored phrases
+void BleuScoreFeature::GetNgramMatchCounts_overlap(Phrase& phrase,
+ const NGrams& ref_ngram_counts,
+ std::vector< size_t >& ret_counts,
+ std::vector< size_t >& ret_matches,
+ size_t overlap_index) const
+{
+ NGrams::const_iterator ref_ngram_counts_iter;
+ size_t ngram_start_idx, ngram_end_idx;
+
+ // Chiang et al (2008) use unclipped counts of ngram matches
+ for (size_t end_idx = overlap_index; end_idx < phrase.GetSize(); end_idx++) {
+ if (end_idx >= (overlap_index+BleuScoreState::bleu_order-1)) break;
+ for (size_t order = 0; order < BleuScoreState::bleu_order; order++) {
+ if (order > end_idx) break;
+
+ ngram_end_idx = end_idx;
+ ngram_start_idx = end_idx - order;
+ if (ngram_start_idx >= overlap_index) continue; // only score ngrams that span the overlap point
+
+ Phrase ngram = phrase.GetSubString(WordsRange(ngram_start_idx, ngram_end_idx), 0);
+ ret_counts[order]++;
+
+ ref_ngram_counts_iter = ref_ngram_counts.find(ngram);
+ if (ref_ngram_counts_iter != ref_ngram_counts.end())
+ ret_matches[order]++;
+ }
+ }
+}
+
void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
const NGrams& ref_ngram_counts,
std::vector< size_t >& ret_counts,
@@ -352,7 +451,7 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
m_cur_ref_ngrams,
new_state->m_ngram_counts,
new_state->m_ngram_matches,
- new_state->m_words.GetSize());
+ new_state->m_words.GetSize()); // number of words in previous states
// Update state variables
ctx_end_idx = new_words.GetSize()-1;
@@ -372,79 +471,138 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
// we need a scaled reference length to compare the current target phrase to the corresponding reference phrase
new_state->m_scaled_ref_length = m_cur_ref_length *
- ((float)coverageVector.GetNumWordsCovered() / coverageVector.GetSize());
+ ((float)coverageVector.GetNumWordsCovered()/coverageVector.GetSize());
// Calculate new bleu.
new_bleu = CalculateBleu(new_state);
// Set score to new Bleu score
accumulator->PlusEquals(this, new_bleu - old_bleu);
-
return new_state;
}
-/*FFState* BleuScoreFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureID,
- ScoreComponentCollection accumulator ) const {
+FFState* BleuScoreFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureID,
+ ScoreComponentCollection* accumulator ) const {
NGrams::const_iterator reference_ngrams_iter;
- ChartHypothesis prev_hypo = cur_hypo.GetPrevHypo(0);
- prev_hypo.GetFFState()
+ const Phrase& curr_target_phrase = static_cast<const Phrase&>(cur_hypo.GetCurrTargetPhrase());
+// cerr << "\nCur target phrase: " << cur_hypo.GetTargetLHS() << " --> " << curr_target_phrase << endl;
- const BleuScoreState& ps = dynamic_cast<const BleuScoreState&>(*prev_state);
- BleuScoreState* new_state = new BleuScoreState(ps);
- //cerr << "PS: " << ps << endl;
+ // Calculate old bleu of previous states
+ float old_bleu = 0, new_bleu = 0;
+ size_t num_old_words = 0, num_words_first_prev = 0;
+ size_t num_words_added_left = 0, num_words_added_right = 0;
- float old_bleu, new_bleu;
- size_t num_new_words, ctx_start_idx, ctx_end_idx;
-
- // Calculate old bleu;
- old_bleu = CalculateBleu(new_state);
-
- // Get context and append new words.
- num_new_words = cur_hypo.GetCurrTargetLength();
- if (num_new_words == 0) {
- return new_state;
+ // double-check cases where more than two previous hypotheses were combined
+ assert(cur_hypo.GetPrevHypos().size() <= 2);
+ BleuScoreState* new_state;
+ if (cur_hypo.GetPrevHypos().size() == 0)
+ new_state = new BleuScoreState();
+ else {
+ const FFState* prev_state_zero = cur_hypo.GetPrevHypo(0)->GetFFState(featureID);
+ const BleuScoreState& ps_zero = dynamic_cast<const BleuScoreState&>(*prev_state_zero);
+ new_state = new BleuScoreState(ps_zero);
+ num_words_first_prev = ps_zero.m_target_length;
+
+ for (size_t i = 0; i < cur_hypo.GetPrevHypos().size(); ++i) {
+ const FFState* prev_state = cur_hypo.GetPrevHypo(i)->GetFFState(featureID);
+ const BleuScoreState* ps = dynamic_cast<const BleuScoreState*>(prev_state);
+ BleuScoreState* ps_nonConst = const_cast<BleuScoreState*>(ps);
+// cerr << "prev phrase: " << cur_hypo.GetPrevHypo(i)->GetOutputPhrase()
+// << " ( " << cur_hypo.GetPrevHypo(i)->GetTargetLHS() << ")" << endl;
+
+ old_bleu += CalculateBleu(ps_nonConst);
+ num_old_words += ps->m_target_length;
+
+ if (i > 0)
+ // add ngram matches from other previous states
+ new_state->AddNgramCountAndMatches(ps_nonConst->m_ngram_counts, ps_nonConst->m_ngram_matches);
+ }
}
- Phrase new_words = ps.m_words;
- new_words.Append(cur_hypo.GetCurrTargetPhrase());
- //cerr << "NW: " << new_words << endl;
+ // set new context
+ Phrase new_words = cur_hypo.GetOutputPhrase();
+ new_state->m_words = new_words;
+ size_t num_curr_words = new_words.GetSize();
// get ngram matches for new words
- GetNgramMatchCounts(new_words,
- m_cur_ref_ngrams,
- new_state->m_ngram_counts,
- new_state->m_ngram_matches,
- new_state->m_words.GetSize());
+ if (num_old_words == 0) {
+// cerr << "compute right ngram context" << endl;
+ GetNgramMatchCounts(new_words,
+ m_cur_ref_ngrams,
+ new_state->m_ngram_counts,
+ new_state->m_ngram_matches,
+ 0);
+ }
+ else if (new_words.GetSize() == num_old_words) {
+ // two hypotheses were glued together, compute new ngrams on the basis of first hypothesis
+ num_words_added_right = num_curr_words - num_words_first_prev;
+ // score around overlap point
+// cerr << "compute overlap ngram context (" << (num_words_first_prev) << ")" << endl;
+ GetNgramMatchCounts_overlap(new_words,
+ m_cur_ref_ngrams,
+ new_state->m_ngram_counts,
+ new_state->m_ngram_matches,
+ num_words_first_prev);
+ }
+ else if (num_old_words + curr_target_phrase.GetNumTerminals() == num_curr_words) {
+ assert(curr_target_phrase.GetSize() == curr_target_phrase.GetNumTerminals()+1);
+ // previous hypothesis + rule with 1 non-terminal were combined (NT substituted by Ts)
+ for (size_t i = 0; i < curr_target_phrase.GetSize(); ++i)
+ if (curr_target_phrase.GetWord(i).IsNonTerminal()) {
+ num_words_added_left = i;
+ num_words_added_right = curr_target_phrase.GetSize() - (i+1);
+ break;
+ }
+
+ // left context
+// cerr << "compute left ngram context" << endl;
+ if (num_words_added_left > 0)
+ GetNgramMatchCounts_prefix(new_words,
+ m_cur_ref_ngrams,
+ new_state->m_ngram_counts,
+ new_state->m_ngram_matches,
+ num_words_added_left,
+ num_curr_words - num_words_added_right - 1);
+
+ // right context
+// cerr << "compute right ngram context" << endl;
+ if (num_words_added_right > 0)
+ GetNgramMatchCounts(new_words,
+ m_cur_ref_ngrams,
+ new_state->m_ngram_counts,
+ new_state->m_ngram_matches,
+ num_words_added_left + num_old_words);
+ }
+ else {
+ cerr << "undefined state.. " << endl;
+ exit(1);
+ }
// Update state variables
- ctx_end_idx = new_words.GetSize()-1;
+ size_t ctx_start_idx = 0;
+ size_t ctx_end_idx = new_words.GetSize()-1;
size_t bleu_context_length = BleuScoreState::bleu_order -1;
if (ctx_end_idx > bleu_context_length) {
ctx_start_idx = ctx_end_idx - bleu_context_length;
- } else {
- ctx_start_idx = 0;
}
- WordsBitmap coverageVector = cur_hypo.GetWordsBitmap();
- new_state->m_source_length = coverageVector.GetNumWordsCovered();
-
- new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx,
- ctx_end_idx));
- new_state->m_target_length += cur_hypo.GetCurrTargetLength();
+ new_state->m_source_length = cur_hypo.GetCurrSourceRange().GetNumWordsCovered();
+ new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx, ctx_end_idx));
+ new_state->m_target_length = cur_hypo.GetOutputPhrase().GetSize();
- // we need a scaled reference length to compare the current target phrase to the corresponding reference phrase
- new_state->m_scaled_ref_length = m_cur_ref_length *
- ((float)coverageVector.GetNumWordsCovered() / coverageVector.GetSize());
+ // we need a scaled reference length to compare the current target phrase to the corresponding
+ // reference phrase
+ size_t cur_source_length = m_cur_source_length;
+ new_state->m_scaled_ref_length = m_cur_ref_length * (float(new_state->m_source_length)/cur_source_length);
// Calculate new bleu.
new_bleu = CalculateBleu(new_state);
// Set score to new Bleu score
accumulator->PlusEquals(this, new_bleu - old_bleu);
-
return new_state;
-}*/
+}
/*
* Calculate Bleu score for a partial hypothesis given as state.
@@ -457,62 +615,61 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
float smooth = 1;
float smoothed_count, smoothed_matches;
- // revised: compute Bleu in the context of the pseudo-document
- // B(b) = size_of_oracle_doc * (Bleu(B_hist + b) - Bleu(B_hist))
-
- // Calculate geometric mean of modified ngram precisions
- // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
- // = BP * 4th root(PRODUCT_1_4 p_n)
- for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
- if (state->m_ngram_counts[i]) {
- smoothed_matches = m_match_history[i] + state->m_ngram_matches[i];
- smoothed_count = m_count_history[i] + state->m_ngram_counts[i];
-
- switch (m_smoothing_scheme) {
- case PLUS_ONE:
- default:
- if (i > 0) {
- // smoothing for all n > 1
- smoothed_matches += 1;
- smoothed_count += 1;
- }
- break;
- case LIGHT:
- if (i > 0) {
- // smoothing for all n > 1
- smoothed_matches += 0.1;
- smoothed_count += 0.1;
- }
- break;
- case PAPINENI:
- if (state->m_ngram_matches[i] == 0) {
- smooth *= 0.5;
- smoothed_matches += smooth;
- smoothed_count += smooth;
- }
- break;
+ if (m_sentence_bleu) {
+ // Calculate geometric mean of modified ngram precisions
+ // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
+ // = BP * 4th root(PRODUCT_1_4 p_n)
+ for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+ if (state->m_ngram_counts[i]) {
+ smoothed_matches = state->m_ngram_matches[i];
+ smoothed_count = state->m_ngram_counts[i];
+
+ switch (m_smoothing_scheme) {
+ case PLUS_ONE:
+ default:
+ if (i > 0) {
+ // smoothing for all n > 1
+ smoothed_matches += 1;
+ smoothed_count += 1;
+ }
+ break;
+ case LIGHT:
+ if (i > 0) {
+ // smoothing for all n > 1
+ smoothed_matches += 0.1;
+ smoothed_count += 0.1;
+ }
+ break;
+ case PAPINENI:
+ if (state->m_ngram_matches[i] == 0) {
+ smooth *= 0.5;
+ smoothed_matches += smooth;
+ smoothed_count += smooth;
+ }
+ break;
+ }
+
+ precision *= smoothed_matches / smoothed_count;
}
-
- precision *= smoothed_matches / smoothed_count;
}
- }
- // take geometric mean
- precision = pow(precision, (float)1/4);
-
- // Apply brevity penalty if applicable.
- // BP = 1 if c > r
- // BP = e^(1- r/c)) if c <= r
- // where
- // c: length of the candidate translation
- // r: effective reference length (sum of best match lengths for each candidate sentence)
- if (state->m_target_length < (state->m_scaled_ref_length * m_relax_BP)) {
- float smoothed_target_length = m_target_length_history + state->m_target_length;
- float smoothed_ref_length = m_ref_length_history + (state->m_scaled_ref_length * m_relax_BP);
- precision *= exp(1 - (smoothed_ref_length/ smoothed_target_length));
- }
+ // take geometric mean
+ precision = pow(precision, (float)1/4);
+
+ // Apply brevity penalty if applicable.
+ // BP = 1 if c > r
+ // BP = e^(1- r/c)) if c <= r
+ // where
+ // c: length of the candidate translation
+ // r: effective reference length (sum of best match lengths for each candidate sentence)
+ if (state->m_target_length < (state->m_scaled_ref_length * m_relax_BP)) {
+ float smoothed_target_length = m_target_length_history + state->m_target_length;
+ float smoothed_ref_length = m_ref_length_history + (state->m_scaled_ref_length * m_relax_BP);
+ precision *= exp(1 - (smoothed_ref_length/ smoothed_target_length));
+ }
+
+// cerr << "precision: " << precision << endl;
- if (m_sentence_bleu) {
// Approximate bleu score as of Chiang/Resnik is scaled by the size of the input:
// B(e;f,{r_k}) = (O_f + |f|) * BLEU(O + c(e;{r_k}))
// where c(e;) is a vector of reference length, ngram counts and ngram matches
@@ -532,6 +689,28 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
return precision * m_scale_by_x;
}
else {
+ // Revised history BLEU: compute Bleu in the context of the pseudo-document
+ // B(b) = size_of_oracle_doc * (Bleu(B_hist + b) - Bleu(B_hist))
+ // Calculate geometric mean of modified ngram precisions
+ // BLEU = BP * exp(SUM_1_4 1/4 * log p_n)
+ // = BP * 4th root(PRODUCT_1_4 p_n)
+ for (size_t i = 0; i < BleuScoreState::bleu_order; i++) {
+ if (state->m_ngram_counts[i]) {
+ smoothed_matches = m_match_history[i] + state->m_ngram_matches[i];
+ smoothed_count = m_count_history[i] + state->m_ngram_counts[i];
+ precision *= smoothed_matches / smoothed_count;
+ }
+ }
+
+ // take geometric mean
+ precision = pow(precision, (float)1/4);
+
+ // BP
+ if (m_target_length_history + state->m_target_length < m_ref_length_history + state->m_scaled_ref_length)
+ precision *= exp(1 - (m_ref_length_history + state->m_scaled_ref_length/m_target_length_history + state->m_target_length));
+
+// cerr << "precision: " << precision << endl;
+
// **BLEU score of pseudo-document**
float precision_pd = 1.0;
if (m_target_length_history > 0) {
@@ -549,6 +728,8 @@ float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
precision_pd = 0;
// **end BLEU of pseudo-document**
+// cerr << "precision pd: " << precision_pd << endl;
+
float sentence_impact;
if (m_target_length_history > 0)
sentence_impact = m_target_length_history * (precision - precision_pd);
diff --git a/moses/src/BleuScoreFeature.h b/moses/src/BleuScoreFeature.h
index 732e09e4b..988db6a27 100644
--- a/moses/src/BleuScoreFeature.h
+++ b/moses/src/BleuScoreFeature.h
@@ -36,8 +36,11 @@ private:
std::vector< size_t > m_ngram_counts;
std::vector< size_t > m_ngram_matches;
+
+ void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches);
};
+
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
@@ -55,14 +58,14 @@ public:
m_match_history(BleuScoreState::bleu_order),
m_source_length_history(0),
m_target_length_history(0),
- m_useSourceLengthHistory(0),
m_ref_length_history(0),
m_scale_by_input_length(true),
m_scale_by_avg_input_length(false),
m_scale_by_inverse_length(false),
m_scale_by_avg_inverse_length(false),
m_scale_by_x(1),
- m_historySmoothing(0.7),
+ m_historySmoothing(0.9),
+ m_useSourceLengthHistory(0),
m_smoothing_scheme(PLUS_ONE),
m_relax_BP(1) {}
@@ -78,44 +81,50 @@ public:
void PrintHistory(std::ostream& out) const;
void LoadReferences(const std::vector< std::vector< std::string > > &);
- void SetCurrentSourceLength(size_t);
- void SetCurrentShortestReference(size_t);
+ void SetCurrSourceLength(size_t);
+ void SetCurrShortestRefLength(size_t);
+ void SetCurrAvgRefLength(size_t sent_id);
+ void SetAvgInputLength (float l) { m_avg_input_length = l; }
+ void SetCurrReferenceNgrams(size_t sent_id);
+ size_t GetShortestRefIndex(size_t ref_id);
+ size_t GetClosestRefLength(size_t ref_id, int hypoLength);
void UpdateHistory(const std::vector< const Word* >&);
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
- void PrintReferenceLength(const std::vector<size_t>& ref_ids);
- size_t GetReferenceLength(size_t ref_id);
- size_t GetClosestReferenceLength(size_t ref_id, int hypoLength);
+ void PrintRefLength(const std::vector<size_t>& ref_ids);
void SetBleuParameters(bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
bool scaleByInverseLength, bool scaleByAvgInverseLength,
float scaleByX, float historySmoothing, size_t scheme, float relaxBP,
bool useSourceLengthHistory);
- void SetAvgInputLength (float l) { m_avg_input_length = l; }
+
void GetNgramMatchCounts(Phrase&,
const NGrams&,
std::vector< size_t >&,
std::vector< size_t >&,
size_t skip = 0) const;
+ void GetNgramMatchCounts_prefix(Phrase&,
+ const NGrams&,
+ std::vector< size_t >&,
+ std::vector< size_t >&,
+ size_t new_start_indices,
+ size_t last_end_index) const;
+ void GetNgramMatchCounts_overlap(Phrase& phrase,
+ const NGrams& ref_ngram_counts,
+ std::vector< size_t >& ret_counts,
+ std::vector< size_t >& ret_matches,
+ size_t overlap_index) const;
void GetClippedNgramMatchesAndCounts(Phrase&,
- const NGrams&,
- std::vector< size_t >&,
- std::vector< size_t >&,
- size_t skip = 0) const;
+ const NGrams&,
+ std::vector< size_t >&,
+ std::vector< size_t >&,
+ size_t skip = 0) const;
FFState* Evaluate( const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
- virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
- ScoreComponentCollection* ) const
- {
- /* Not implemented */
- CHECK(0);
- }
-
-
- FFState* EvaluateChart( const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection) const;
+ FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
+ int featureID,
+ ScoreComponentCollection* accumulator) const;
+
float CalculateBleu(BleuScoreState*) const;
const FFState* EmptyHypothesisState(const InputType&) const;
@@ -124,6 +133,8 @@ public:
float GetAverageInputLength() { return m_avg_input_length; }
private:
+ bool m_sentence_bleu;
+
// counts for pseudo-document
std::vector< float > m_count_history;
std::vector< float > m_match_history;
@@ -134,9 +145,7 @@ private:
size_t m_cur_source_length;
RefCounts m_refs;
NGrams m_cur_ref_ngrams;
- size_t m_cur_ref_length;
-
- bool m_sentence_bleu;
+ float m_cur_ref_length;
// scale BLEU score by history of input length
bool m_scale_by_input_length;
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 33160d841..780ac66a2 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -1525,6 +1525,7 @@ bool StaticData::LoadReferences()
{
vector<string> bleuWeightStr = m_parameter->GetParam("weight-bl");
vector<string> referenceFiles = m_parameter->GetParam("references");
+ cerr << "Loading reference file " << referenceFiles[0] << endl;
if ((!referenceFiles.size() && bleuWeightStr.size()) || (referenceFiles.size() && !bleuWeightStr.size())) {
UserMessage::Add("You cannot use the bleu feature without references, and vice-versa");
return false;
@@ -1551,6 +1552,11 @@ bool StaticData::LoadReferences()
}
string line;
while (getline(in,line)) {
+ if (GetSearchAlgorithm() == ChartDecoding) {
+ stringstream tmp;
+ tmp << "<s> " << line << " </s>";
+ line = tmp.str();
+ }
references[i].push_back(line);
}
if (i > 0) {