Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-19 17:45:15 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-19 17:45:15 +0400
commitf686e8771a5db09e32474ed0735dbdef275158d3 (patch)
treeb61c91008d5fb45b3e569da7f22063b549417372
parent54233d327052793807fc44583334dd3b396176e5 (diff)
Add some functions to BleuScorer for unit testing.
This commit also includes - Fix typo. - Fix indentations. - Add 'const' to Scorer::applyFactors().
-rw-r--r--mert/BleuScorer.cpp16
-rw-r--r--mert/BleuScorer.h21
-rw-r--r--mert/OptimizerFactoryTest.cpp8
-rw-r--r--mert/Reference.h6
-rw-r--r--mert/Scorer.cpp22
-rw-r--r--mert/Scorer.h2
6 files changed, 42 insertions, 33 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index f143df66b..5862dd3e1 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -1,6 +1,7 @@
#include "BleuScorer.h"
#include <algorithm>
+#include <cassert>
#include <cmath>
#include <climits>
#include <fstream>
@@ -37,9 +38,10 @@ BleuScorer::BleuScorer(const string& config)
BleuScorer::~BleuScorer() {}
-size_t BleuScorer::countNgrams(const string& line, NgramCounts& counts,
+size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
unsigned int n)
{
+ assert(n > 0);
vector<int> encoded_tokens;
TokenizeAndEncode(line, encoded_tokens);
for (size_t k = 1; k <= n; ++k) {
@@ -74,7 +76,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
string line;
size_t sid = 0; //sentence counter
while (getline(refin,line)) {
- line = this->applyFactors(line);
+ line = applyFactors(line);
if (i == 0) {
Reference* ref = new Reference;
m_references.push_back(ref); // Take ownership of the Reference object.
@@ -83,7 +85,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
throw runtime_error("File " + referenceFiles[i] + " has too many sentences");
}
NgramCounts counts;
- size_t length = countNgrams(line, counts, kBleuNgramOrder);
+ size_t length = CountNgrams(line, counts, kBleuNgramOrder);
//for any counts larger than those already there, merge them in
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
@@ -117,8 +119,8 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
NgramCounts testcounts;
// stats for this line
vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
- string sentence = this->applyFactors(text);
- const size_t length = countNgrams(sentence, testcounts, kBleuNgramOrder);
+ string sentence = applyFactors(text);
+ const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder);
const int reference_len = CalcReferenceLength(sid, length);
stats.push_back(reference_len);
@@ -176,8 +178,8 @@ int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length) {
}
}
-void BleuScorer::dump_counts(ostream* os,
- const NgramCounts& counts) const {
+void BleuScorer::DumpCounts(ostream* os,
+ const NgramCounts& counts) const {
for (NgramCounts::const_iterator it = counts.begin();
it != counts.end(); ++it) {
*os << "(";
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index d58277a41..2f2c2a153 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -23,6 +23,12 @@ class Reference;
class BleuScorer: public StatisticsBasedScorer
{
public:
+ enum ReferenceLengthType {
+ AVERAGE,
+ CLOSEST,
+ SHORTEST
+ };
+
explicit BleuScorer(const string& config = "");
~BleuScorer();
@@ -33,20 +39,19 @@ public:
int CalcReferenceLength(size_t sentence_id, size_t length);
-private:
- enum ReferenceLengthType {
- AVERAGE,
- SHORTEST,
- CLOSEST
- };
+ ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; }
+ void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; }
+
+ const std::vector<Reference*>& GetReferences() const { return m_references.get(); }
/**
* Count the ngrams of each type, up to the given length in the input line.
*/
- size_t countNgrams(const string& line, NgramCounts& counts, unsigned int n);
+ size_t CountNgrams(const string& line, NgramCounts& counts, unsigned int n);
- void dump_counts(std::ostream* os, const NgramCounts& counts) const;
+ void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
+private:
ReferenceLengthType m_ref_length_type;
// reference translations.
diff --git a/mert/OptimizerFactoryTest.cpp b/mert/OptimizerFactoryTest.cpp
index 30734ab67..7fb1bdc0b 100644
--- a/mert/OptimizerFactoryTest.cpp
+++ b/mert/OptimizerFactoryTest.cpp
@@ -8,10 +8,10 @@
namespace {
inline void CheckBuildOptimizer(unsigned dim,
- const vector<unsigned>& to_optimize,
- const vector<parameter_t>& start,
- const string& type,
- unsigned int num_random) {
+ const vector<unsigned>& to_optimize,
+ const vector<parameter_t>& start,
+ const string& type,
+ unsigned int num_random) {
boost::scoped_ptr<Optimizer> optimizer(
OptimizerFactory::BuildOptimizer(dim, to_optimize, start, type, num_random));
BOOST_CHECK(optimizer.get() != NULL);
diff --git a/mert/Reference.h b/mert/Reference.h
index de5a6fecc..15e7acd69 100644
--- a/mert/Reference.h
+++ b/mert/Reference.h
@@ -3,12 +3,14 @@
#include <algorithm>
#include <climits>
-#include <iostream>
#include <vector>
#include "Ngram.h"
-// Refernece class is a reference translation for an output translation.
+/**
+ * Reference class represents reference translations for an output
+ * translation used in calculating BLEU score.
+ */
class Reference {
public:
// for m_length
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index 6b36c8f14..1ff3f0067 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -107,40 +107,40 @@ void Scorer::setFactors(const string& factors)
for(vector<string>::iterator it = factors_vec.begin(); it != factors_vec.end(); ++it)
{
int factor = atoi(it->c_str());
- m_factors.push_back(factor);
+ m_factors.push_back(factor);
}
}
/**
* Take the factored sentence and return the desired factors
*/
-string Scorer::applyFactors(const string& sentence)
+string Scorer::applyFactors(const string& sentence) const
{
if (m_factors.size() == 0) return sentence;
-
+
vector<string> tokens;
split(sentence, ' ', tokens);
-
- stringstream sstream;
+
+ stringstream sstream;
for (size_t i = 0; i < tokens.size(); ++i)
{
- if (tokens[i] == "") continue;
+ if (tokens[i] == "") continue;
vector<string> factors;
split(tokens[i], '|', factors);
int fsize = factors.size();
-
- if (i>0) sstream << " ";
-
+
+ if (i > 0) sstream << " ";
+
for (size_t j = 0; j < m_factors.size(); ++j)
{
int findex = m_factors[j];
if (findex < 0 || findex >= fsize) throw runtime_error("Factor index is out of range.");
- if (j>0) sstream << "|";
+ if (j > 0) sstream << "|";
sstream << factors[findex];
- }
+ }
}
return sstream.str();
}
diff --git a/mert/Scorer.h b/mert/Scorer.h
index 880f4e228..9a46f0c92 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -105,7 +105,7 @@ class Scorer
/**
* Take the factored sentence and return the desired factors
*/
- virtual string applyFactors(const string& sentece);
+ virtual string applyFactors(const string& sentece) const;
private:
class Encoder {