Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authornicolabertoldi <nicolabertoldi@1f5c12ca-751b-0410-a591-d2e778427230>2009-08-05 19:38:35 +0400
committernicolabertoldi <nicolabertoldi@1f5c12ca-751b-0410-a591-d2e778427230>2009-08-05 19:38:35 +0400
commit0393183eb489aa4a7334cbf0af37cfa60b4957db (patch)
tree8e05ca63754b96158d7f035251dd3bd5fbb86954 /mert
parentc50596d1e18894ee0f81dcdd8cf1569a2238d75b (diff)
mert software now works with different reference length policies: shortest, average, closest (default) and with case information (default is preserving case). Pay attention that both defaults are different from the previous version (which were shortest reflen and case-insensitive).
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2459 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'mert')
-rw-r--r--mert/BleuScorer.cpp14
-rw-r--r--mert/BleuScorer.h21
-rw-r--r--mert/Scorer.h14
-rwxr-xr-xmert/mert.cpp3
4 files changed, 42 insertions, 10 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index f2e83dc5c..5f6349b55 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -53,7 +53,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles) {
string line;
size_t sid = 0; //sentence counter
while (getline(refin,line)) {
- //§cerr << line << endl;
+ //cerr << line << endl;
if (i == 0) {
counts_t* counts = new counts_t(); //these get leaked
_refcounts.push_back(counts);
@@ -116,13 +116,19 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
stats.push_back(mean);
} else if (_refLengthStrategy == BLEU_CLOSEST) {
int min_diff = INT_MAX;
+ int min_idx = 0;
for (size_t i = 0; i < _reflengths[sid].size(); ++i) {
int reflength = _reflengths[sid][i];
- if (abs(reflength-(int)length) < abs(min_diff)) {
+ if (abs(reflength-(int)length) < abs(min_diff)) { //look for the closest reference
min_diff = reflength-length;
- }
+ min_idx = i;
+ }else if (abs(reflength-(int)length) == abs(min_diff)) { // if two references has the same closest length, take the shortest
+ if (reflength < (int)_reflengths[sid][min_idx]){
+ min_idx = i;
+ }
+ }
}
- stats.push_back(length + min_diff);
+ stats.push_back(_reflengths[sid][min_idx]);
} else {
throw runtime_error("Unsupported reflength strategy");
}
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index fc5d5f332..cd2471b55 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -25,7 +25,26 @@ enum BleuReferenceLengthStrategy { BLEU_AVERAGE, BLEU_SHORTEST, BLEU_CLOSEST };
**/
class BleuScorer: public StatisticsBasedScorer {
public:
- BleuScorer(const string& config = "") : StatisticsBasedScorer("BLEU",config),_refLengthStrategy(BLEU_SHORTEST) {}
+ BleuScorer(const string& config = "") : StatisticsBasedScorer("BLEU",config),_refLengthStrategy(BLEU_CLOSEST) {
+ //configure regularisation
+ static string KEY_REFLEN = "reflen";
+ static string REFLEN_AVERAGE = "average";
+ static string REFLEN_SHORTEST = "shortest";
+ static string REFLEN_CLOSEST = "closest";
+
+
+ string reflen = getConfig(KEY_REFLEN,REFLEN_CLOSEST);
+ if (reflen == REFLEN_AVERAGE) {
+ _refLengthStrategy = BLEU_AVERAGE;
+ } else if (reflen == REFLEN_SHORTEST) {
+ _refLengthStrategy = BLEU_SHORTEST;
+ } else if (reflen == REFLEN_CLOSEST) {
+ _refLengthStrategy = BLEU_CLOSEST;
+ } else {
+ throw runtime_error("Unknown reference length strategy: " + reflen);
+ }
+ cerr << "Using reference length strategy: " << reflen << endl;
+}
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
static const int LENGTH;
diff --git a/mert/Scorer.h b/mert/Scorer.h
index 9be1047a2..ebe80c3de 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -31,7 +31,7 @@ class Scorer {
public:
- Scorer(const string& name, const string& config): _name(name), _scoreData(0),_preserveCase(false){
+ Scorer(const string& name, const string& config): _name(name), _scoreData(0), _preserveCase(true){
cerr << "Scorer config string: " << config << endl;
size_t start = 0;
while (start < config.size()) {
@@ -196,9 +196,12 @@ class StatisticsBasedScorer : public Scorer {
//configure regularisation
static string KEY_TYPE = "regtype";
static string KEY_WINDOW = "regwin";
+ static string KEY_CASE = "case";
static string TYPE_NONE = "none";
static string TYPE_AVERAGE = "average";
static string TYPE_MINIMUM = "min";
+ static string TRUE = "true";
+ static string FALSE = "false";
string type = getConfig(KEY_TYPE,TYPE_NONE);
@@ -217,8 +220,15 @@ class StatisticsBasedScorer : public Scorer {
_regularisationWindow = atoi(window.c_str());
cerr << "Using scorer regularisation window: " << _regularisationWindow << endl;
+ string preservecase = getConfig(KEY_CASE,TRUE);
+ if (preservecase == TRUE) {
+ _preserveCase = true;
+ }else if (preservecase == FALSE) {
+ _preserveCase = false;
+ }
+ cerr << "Using case preservation: " << _preserveCase << endl;
+
-
}
~StatisticsBasedScorer(){};
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 203906b1b..8ddb8c5b3 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -242,7 +242,4 @@ int main (int argc, char **argv) {
res<<bestP<<endl;
PrintUserTime("Stopping...");
- /*
- timer.stop("Stopping...");
- */
}