Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-06-07 14:16:52 +0400
committerEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-06-07 14:16:52 +0400
commite1c1a5343c6b15a38370885c851786b3d5109140 (patch)
tree14e965d9b6d4b28712d530e17892aa3fe7fd8827 /mert/BleuScorer.h
parent3c1728f30d74f7b7e1e7b04f639aa286c6f23264 (diff)
parent3c44d04baf6f4cdb938ac4e4e318b0591d3f553a (diff)
merge
Diffstat (limited to 'mert/BleuScorer.h')
-rw-r--r--mert/BleuScorer.h94
1 files changed, 49 insertions, 45 deletions
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index abd80478c..d0da10bac 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -1,7 +1,7 @@
-#ifndef __BLEUSCORER_H__
-#define __BLEUSCORER_H__
+#ifndef MERT_BLEU_SCORER_H_
+#define MERT_BLEU_SCORER_H_
-#include <iostream>
+#include <ostream>
#include <string>
#include <vector>
@@ -10,10 +10,12 @@
#include "Scorer.h"
#include "ScopedVector.h"
-using namespace std;
+const int kBleuNgramOrder = 4;
-enum BleuReferenceLengthStrategy { BLEU_AVERAGE, BLEU_SHORTEST, BLEU_CLOSEST };
+class NgramCounts;
+class Reference;
+using namespace std;
/**
* Bleu scoring
@@ -21,64 +23,66 @@ enum BleuReferenceLengthStrategy { BLEU_AVERAGE, BLEU_SHORTEST, BLEU_CLOSEST };
class BleuScorer: public StatisticsBasedScorer
{
public:
- explicit BleuScorer(const string& config = "");
+ enum ReferenceLengthType {
+ AVERAGE,
+ CLOSEST,
+ SHORTEST
+ };
+
+ explicit BleuScorer(const std::string& config = "");
~BleuScorer();
static vector<float> ScoreNbestList(string scoreFile, string featureFile);
static float sentenceLevelBleuPlusOne(const vector<float>& stats);
- virtual void setReferenceFiles(const vector<string>& referenceFiles);
- virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
- virtual float calculateScore(const vector<int>& comps) const;
+ virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
+ virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
+ virtual float calculateScore(const std::vector<int>& comps) const;
+ virtual std::size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
- virtual size_t NumberOfScores() const {
- return 2 * kLENGTH + 1;
- }
+ int CalcReferenceLength(std::size_t sentence_id, std::size_t length);
-private:
- //Used to construct the ngram map
- struct CompareNgrams {
- bool operator()(const vector<int>& a, const vector<int>& b) const {
- size_t i;
- const size_t as = a.size();
- const size_t bs = b.size();
- for (i = 0; i < as && i < bs; ++i) {
- if (a[i] < b[i]) {
- //cerr << "true" << endl;
- return true;
- }
- if (a[i] > b[i]) {
- //cerr << "false" << endl;
- return false;
- }
- }
- //entries are equal, shortest wins
- return as < bs;;
- }
- };
+ ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; }
+ void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; }
- typedef map<vector<int>,int,CompareNgrams> counts_t;
- typedef map<vector<int>,int,CompareNgrams>::iterator counts_iterator;
- typedef map<vector<int>,int,CompareNgrams>::const_iterator counts_const_iterator;
- typedef ScopedVector<counts_t> refcounts_t;
+ const std::vector<Reference*>& GetReferences() const { return m_references.get(); }
/**
* Count the ngrams of each type, up to the given length in the input line.
*/
- size_t countNgrams(const string& line, counts_t& counts, unsigned int n);
+ std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n);
+
+ void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
+
+ bool OpenReference(const char* filename, std::size_t file_id);
- void dump_counts(counts_t& counts) const;
+ // NOTE: this function is used for unit testing.
+ bool OpenReferenceStream(std::istream* is, std::size_t file_id);
- const int kLENGTH;
- BleuReferenceLengthStrategy _refLengthStrategy;
+private:
+ ReferenceLengthType m_ref_length_type;
- // data extracted from reference files
- refcounts_t _refcounts;
- vector<vector<size_t> > _reflengths;
+ // reference translations.
+ ScopedVector<Reference> m_references;
// no copying allowed
BleuScorer(const BleuScorer&);
BleuScorer& operator=(const BleuScorer&);
};
-#endif // __BLEUSCORER_H__
+/** Computes sentence-level BLEU+1 score.
+ * This function is used in PRO.
+ */
+float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
+
+/** Computes sentence-level BLEU score given a background corpus.
+ * This function is used in batch MIRA.
+ */
+float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg);
+
+/**
+ * Computes plain old BLEU from a vector of stats
+ */
+float unsmoothedBleu(const std::vector<float>& stats);
+
+#endif // MERT_BLEU_SCORER_H_