Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Huck <mhuck@inf.ed.ac.uk>2015-05-01 00:26:30 +0300
committerMatthias Huck <mhuck@inf.ed.ac.uk>2015-05-01 00:26:30 +0300
commit4ee8f2dec1c200858c4ed82becc95ed1f7c1017f (patch)
tree07ecd1b03f6aa7926bfa3fdeaf176833b836ed7b /mert/BleuScorer.h
parent1d86b8fde7f62dbd0897815479d44df981de5d84 (diff)
sentence-bleu less greedy regarding memory
Don't load all references, read them line by line. Corpora with millions of sentences can now be evaluated without consuming gigabytes of RAM.
Diffstat (limited to 'mert/BleuScorer.h')
-rw-r--r--mert/BleuScorer.h25
1 files changed, 13 insertions, 12 deletions
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index e90915822..d7ee8e4e7 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -1,23 +1,23 @@
-#ifndef MERT_BLEU_SCORER_H_
-#define MERT_BLEU_SCORER_H_
+#pragma once
-#include <ostream>
+#include <fstream>
#include <string>
#include <vector>
-#include "Types.h"
+#include <boost/shared_ptr.hpp>
+
+#include "Ngram.h"
+#include "Reference.h"
+#include "ScopedVector.h"
#include "ScoreData.h"
#include "StatisticsBasedScorer.h"
-#include "ScopedVector.h"
+#include "Types.h"
namespace MosesTuning
{
const size_t kBleuNgramOrder = 4;
-class NgramCounts;
-class Reference;
-
/**
* Bleu scoring
*/
@@ -42,9 +42,9 @@ public:
return 2 * kBleuNgramOrder + 1;
}
- void CalcBleuStats(const Reference* ref, const std::string& text, ScoreStats& entry) const;
+ void CalcBleuStats(const Reference& ref, const std::string& text, ScoreStats& entry) const;
- int CalcReferenceLength(const Reference* ref, std::size_t length) const;
+ int CalcReferenceLength(const Reference& ref, std::size_t length) const;
ReferenceLengthType GetReferenceLengthType() const {
return m_ref_length_type;
@@ -65,7 +65,7 @@ public:
/**
* Count the ngrams of each type, up to the given length in the input line.
*/
- std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false) const;
+ size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false) const;
void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
@@ -74,6 +74,8 @@ public:
void ProcessReferenceLine(const std::string& line, Reference* ref) const;
+ bool GetNextReferenceFromStreams(std::vector<boost::shared_ptr<std::ifstream> >& referenceStreams, Reference& ref) const;
+
//private:
protected:
ReferenceLengthType m_ref_length_type;
@@ -102,4 +104,3 @@ float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vec
}
-#endif // MERT_BLEU_SCORER_H_