diff options
author | Eva Hasler <ehasler@saxnot.inf.ed.ac.uk> | 2012-06-07 14:16:52 +0400 |
---|---|---|
committer | Eva Hasler <ehasler@saxnot.inf.ed.ac.uk> | 2012-06-07 14:16:52 +0400 |
commit | e1c1a5343c6b15a38370885c851786b3d5109140 (patch) | |
tree | 14e965d9b6d4b28712d530e17892aa3fe7fd8827 /mert/BleuScorer.h | |
parent | 3c1728f30d74f7b7e1e7b04f639aa286c6f23264 (diff) | |
parent | 3c44d04baf6f4cdb938ac4e4e318b0591d3f553a (diff) |
merge
Diffstat (limited to 'mert/BleuScorer.h')
-rw-r--r-- | mert/BleuScorer.h | 94 |
1 files changed, 49 insertions, 45 deletions
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h index abd80478c..d0da10bac 100644 --- a/mert/BleuScorer.h +++ b/mert/BleuScorer.h @@ -1,7 +1,7 @@ -#ifndef __BLEUSCORER_H__ -#define __BLEUSCORER_H__ +#ifndef MERT_BLEU_SCORER_H_ +#define MERT_BLEU_SCORER_H_ -#include <iostream> +#include <ostream> #include <string> #include <vector> @@ -10,10 +10,12 @@ #include "Scorer.h" #include "ScopedVector.h" -using namespace std; +const int kBleuNgramOrder = 4; -enum BleuReferenceLengthStrategy { BLEU_AVERAGE, BLEU_SHORTEST, BLEU_CLOSEST }; +class NgramCounts; +class Reference; +using namespace std; /** * Bleu scoring @@ -21,64 +23,66 @@ enum BleuReferenceLengthStrategy { BLEU_AVERAGE, BLEU_SHORTEST, BLEU_CLOSEST }; class BleuScorer: public StatisticsBasedScorer { public: - explicit BleuScorer(const string& config = ""); + enum ReferenceLengthType { + AVERAGE, + CLOSEST, + SHORTEST + }; + + explicit BleuScorer(const std::string& config = ""); ~BleuScorer(); static vector<float> ScoreNbestList(string scoreFile, string featureFile); static float sentenceLevelBleuPlusOne(const vector<float>& stats); - virtual void setReferenceFiles(const vector<string>& referenceFiles); - virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry); - virtual float calculateScore(const vector<int>& comps) const; + virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles); + virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry); + virtual float calculateScore(const std::vector<int>& comps) const; + virtual std::size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; } - virtual size_t NumberOfScores() const { - return 2 * kLENGTH + 1; - } + int CalcReferenceLength(std::size_t sentence_id, std::size_t length); -private: - //Used to construct the ngram map - struct CompareNgrams { - bool operator()(const vector<int>& a, const vector<int>& b) const { - size_t i; - const size_t as = a.size(); - const size_t bs = b.size(); - for (i = 0; i < as && i < bs; ++i) { - if (a[i] < b[i]) { - //cerr << "true" << endl; - return true; - } - if (a[i] > b[i]) { - //cerr << "false" << endl; - return false; - } - } - //entries are equal, shortest wins - return as < bs;; - } - }; + ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; } + void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; } - typedef map<vector<int>,int,CompareNgrams> counts_t; - typedef map<vector<int>,int,CompareNgrams>::iterator counts_iterator; - typedef map<vector<int>,int,CompareNgrams>::const_iterator counts_const_iterator; - typedef ScopedVector<counts_t> refcounts_t; + const std::vector<Reference*>& GetReferences() const { return m_references.get(); } /** * Count the ngrams of each type, up to the given length in the input line. */ - size_t countNgrams(const string& line, counts_t& counts, unsigned int n); + std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n); + + void DumpCounts(std::ostream* os, const NgramCounts& counts) const; + + bool OpenReference(const char* filename, std::size_t file_id); - void dump_counts(counts_t& counts) const; + // NOTE: this function is used for unit testing. + bool OpenReferenceStream(std::istream* is, std::size_t file_id); - const int kLENGTH; - BleuReferenceLengthStrategy _refLengthStrategy; +private: + ReferenceLengthType m_ref_length_type; - // data extracted from reference files - refcounts_t _refcounts; - vector<vector<size_t> > _reflengths; + // reference translations. + ScopedVector<Reference> m_references; // no copying allowed BleuScorer(const BleuScorer&); BleuScorer& operator=(const BleuScorer&); }; -#endif // __BLEUSCORER_H__ +/** Computes sentence-level BLEU+1 score. + * This function is used in PRO. + */ +float sentenceLevelBleuPlusOne(const std::vector<float>& stats); + +/** Computes sentence-level BLEU score given a background corpus. + * This function is used in batch MIRA. + */ +float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg); + +/** + * Computes plain old BLEU from a vector of stats + */ +float unsmoothedBleu(const std::vector<float>& stats); + +#endif // MERT_BLEU_SCORER_H_ |