#include #include #include #include #include #include #include #include "Scorer.h" #include "Timer.h" #include "Util.h" #include "ScorerFactory.h" using namespace std; Scorer* scorer; int bootstrap = 0; void evaluate(const string& candFile); void addStats(vector& stats1, const vector& stats2); float average(const vector& list); float stdDeviation(const vector& list, float avg); string int2string(int n); void usage() { cerr<<"usage: evaluator [options] --reference ref1[,ref2[,ref3...]] --candidate cand1[,cand2[,cand3...]] "< refFiles; vector candFiles; if (reference.length() == 0) throw runtime_error("You have to specify at least one reference file."); split(reference,',',refFiles); if (candidate.length() == 0) throw runtime_error("You have to specify at least one candidate file."); split(candidate,',',candFiles); scorer = ScorerFactory::getScorer(scorerType,scorerConfig); cerr << "Using scorer: " << scorer->getName() << endl; scorer->setReferenceFiles(refFiles); PrintUserTime("Reference files loaded"); for (vector::const_iterator it = candFiles.begin(); it != candFiles.end(); ++it) { evaluate(*it); } PrintUserTime("Evaluation done"); delete scorer; return EXIT_SUCCESS; } catch (const exception& e) { cerr << "Exception: " << e.what() << endl; return EXIT_FAILURE; } } void evaluate(const string& candFile) { ifstream cand(candFile.c_str()); if (!cand.good()) throw runtime_error("Error opening candidate file"); vector entries; // Loading sentences and preparing statistics ScoreStats scoreentry; string line; while (getline(cand, line)) { scorer->prepareStats(entries.size(), line, scoreentry); entries.push_back(scoreentry); } PrintUserTime("Candidate file " + candFile + " loaded and stats prepared"); int n = entries.size(); if (bootstrap) { vector scores; for (int i = 0; i < bootstrap; ++i) { // TODO: Use smart pointer for exceptional-safety. ScoreData* scoredata = new ScoreData(*scorer); for (int j = 0; j < n; ++j) { int randomIndex = random() % n; string str_j = int2string(j); scoredata->add(entries[randomIndex], str_j); } scorer->setScoreData(scoredata); candidates_t candidates(n, 0); float score = scorer->score(candidates); scores.push_back(score); delete scoredata; } float avg = average(scores); float dev = stdDeviation(scores, avg); cout.setf(ios::fixed,ios::floatfield); cout.precision(4); cout << "File: " << candFile << "\t" << scorer->getName() << " Average score: " << avg << "\tStandard deviation: " << dev << endl; } else { // TODO: Use smart pointer for exceptional-safety. ScoreData* scoredata = new ScoreData(*scorer); for (int sid = 0; sid < n; ++sid) { string str_sid = int2string(sid); scoredata->add(entries[sid], str_sid); } scorer->setScoreData(scoredata); candidates_t candidates(n, 0); float score = scorer->score(candidates); delete scoredata; cout.setf(ios::fixed,ios::floatfield); cout.precision(4); cout << "File: " << candFile << "\t" << scorer->getName() << " Score: " << score << endl; } } string int2string(int n) { stringstream ss; ss << n; return ss.str(); } float average(const vector& list) { float sum = 0; for (vector::const_iterator it = list.begin(); it != list.end(); ++it) sum += *it; return sum / list.size(); } float stdDeviation(const vector& list, float avg) { vector tmp; for (vector::const_iterator it = list.begin(); it != list.end(); ++it) tmp.push_back(pow(*it - avg, 2)); return sqrt(average(tmp)); }