From 4ee8f2dec1c200858c4ed82becc95ed1f7c1017f Mon Sep 17 00:00:00 2001 From: Matthias Huck Date: Thu, 30 Apr 2015 22:26:30 +0100 Subject: sentence-bleu less greedy regarding memory Don't load all references, read them line by line. Corpora with millions of sentences can now be evaluated without consuming gigabytes of RAM. --- mert/BleuScorer.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'mert/BleuScorer.h') diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h index e90915822..d7ee8e4e7 100644 --- a/mert/BleuScorer.h +++ b/mert/BleuScorer.h @@ -1,23 +1,23 @@ -#ifndef MERT_BLEU_SCORER_H_ -#define MERT_BLEU_SCORER_H_ +#pragma once -#include +#include #include #include -#include "Types.h" +#include + +#include "Ngram.h" +#include "Reference.h" +#include "ScopedVector.h" #include "ScoreData.h" #include "StatisticsBasedScorer.h" -#include "ScopedVector.h" +#include "Types.h" namespace MosesTuning { const size_t kBleuNgramOrder = 4; -class NgramCounts; -class Reference; - /** * Bleu scoring */ @@ -42,9 +42,9 @@ public: return 2 * kBleuNgramOrder + 1; } - void CalcBleuStats(const Reference* ref, const std::string& text, ScoreStats& entry) const; + void CalcBleuStats(const Reference& ref, const std::string& text, ScoreStats& entry) const; - int CalcReferenceLength(const Reference* ref, std::size_t length) const; + int CalcReferenceLength(const Reference& ref, std::size_t length) const; ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; @@ -65,7 +65,7 @@ public: /** * Count the ngrams of each type, up to the given length in the input line. */ - std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false) const; + size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false) const; void DumpCounts(std::ostream* os, const NgramCounts& counts) const; @@ -74,6 +74,8 @@ public: void ProcessReferenceLine(const std::string& line, Reference* ref) const; + bool GetNextReferenceFromStreams(std::vector >& referenceStreams, Reference& ref) const; + //private: protected: ReferenceLengthType m_ref_length_type; @@ -102,4 +104,3 @@ float sentenceLevelBackgroundBleu(const std::vector& sent, const std::vec } -#endif // MERT_BLEU_SCORER_H_ -- cgit v1.2.3