// // SentenceLevelScorer.cpp // mert_lib // // Created by Hieu Hoang on 22/06/2012. // Copyright 2012 __MyCompanyName__. All rights reserved. // #include "SentenceLevelScorer.h" #include #include using namespace std; namespace MosesTuning { SentenceLevelScorer::SentenceLevelScorer(const string& name, const string& config) : Scorer(name, config), m_regularisationStrategy(REG_NONE), m_regularisationWindow(0) { Init(); } SentenceLevelScorer::~SentenceLevelScorer() {} void SentenceLevelScorer::Init() { // Configure regularisation. static string KEY_TYPE = "regtype"; static string KEY_WINDOW = "regwin"; static string KEY_CASE = "case"; static string TYPE_NONE = "none"; static string TYPE_AVERAGE = "average"; static string TYPE_MINIMUM = "min"; static string TRUE = "true"; static string FALSE = "false"; const string type = getConfig(KEY_TYPE, TYPE_NONE); if (type == TYPE_NONE) { m_regularisationStrategy = REG_NONE; } else if (type == TYPE_AVERAGE) { m_regularisationStrategy = REG_AVERAGE; } else if (type == TYPE_MINIMUM) { m_regularisationStrategy = REG_MINIMUM; } else { throw boost::lexer::runtime_error("Unknown scorer regularisation strategy: " + type); } cerr << "Using scorer regularisation strategy: " << type << endl; const string window = getConfig(KEY_WINDOW, "0"); m_regularisationWindow = atoi(window.c_str()); cerr << "Using scorer regularisation window: " << m_regularisationWindow << endl; const string preservecase = getConfig(KEY_CASE, TRUE); if (preservecase == TRUE) { m_enable_preserve_case = true; } else if (preservecase == FALSE) { m_enable_preserve_case = false; } cerr << "Using case preservation: " << m_enable_preserve_case << endl; } void SentenceLevelScorer::score(const candidates_t& candidates, const diffs_t& diffs, statscores_t& scores) { //cout << "*******SentenceLevelScorer::score" << endl; if (!m_score_data) { throw runtime_error("Score data not loaded"); } //calculate the score for the candidates if (m_score_data->size() == 0) { throw runtime_error("Score data is empty"); } if (candidates.size() == 0) { throw runtime_error("No candidates supplied"); } const int numCounts = m_score_data->get(0,candidates[0]).size(); vector totals(numCounts); for (size_t i = 0; i < candidates.size(); ++i) { //cout << " i " << i << " candi " << candidates[i] ; ScoreStats stats = m_score_data->get(i,candidates[i]); if (stats.size() != totals.size()) { stringstream msg; msg << "Statistics for (" << "," << candidates[i] << ") have incorrect " << "number of fields. Found: " << stats.size() << " Expected: " << totals.size(); throw runtime_error(msg.str()); } //Add up scores for all sentences, would normally be just one score for (size_t k = 0; k < totals.size(); ++k) { totals[k] += stats.get(k); //cout << " stats " << stats.get(k) ; } //cout << endl; } //take average for (size_t k = 0; k < totals.size(); ++k) { //cout << "totals = " << totals[k] << endl; //cout << "cand = " << candidates.size() << endl; totals[k] /= candidates.size(); //cout << "finaltotals = " << totals[k] << endl; } scores.push_back(calculateScore(totals)); candidates_t last_candidates(candidates); //apply each of the diffs, and get new scores for (size_t i = 0; i < diffs.size(); ++i) { for (size_t j = 0; j < diffs[i].size(); ++j) { const size_t sid = diffs[i][j].first; const size_t nid = diffs[i][j].second; //cout << "sid = " << sid << endl; //cout << "nid = " << nid << endl; const size_t last_nid = last_candidates[sid]; for (size_t k = 0; k < totals.size(); ++k) { const float diff = m_score_data->get(sid,nid).get(k) - m_score_data->get(sid,last_nid).get(k); //cout << "diff = " << diff << endl; totals[k] += diff/candidates.size(); //cout << "totals = " << totals[k] << endl; } last_candidates[sid] = nid; } scores.push_back(calculateScore(totals)); } //regularisation. This can either be none, or the min or average as described in //Cer, Jurafsky and Manning at WMT08 if (m_regularisationStrategy == REG_NONE || m_regularisationWindow <= 0) { //no regularisation return; } //window size specifies the +/- in each direction statscores_t raw_scores(scores);//copy scores for (size_t i = 0; i < scores.size(); ++i) { size_t start = 0; if (i >= m_regularisationWindow) { start = i - m_regularisationWindow; } const size_t end = min(scores.size(), i + m_regularisationWindow+1); if (m_regularisationStrategy == REG_AVERAGE) { scores[i] = score_average(raw_scores, start, end); } else { scores[i] = score_min(raw_scores, start, end); } } } }