Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorbhaddow <bhaddow@1f5c12ca-751b-0410-a591-d2e778427230>2008-05-15 20:03:49 +0400
committerbhaddow <bhaddow@1f5c12ca-751b-0410-a591-d2e778427230>2008-05-15 20:03:49 +0400
commitc0643d47f24424766b50b8de226ae50667642af4 (patch)
tree6158a8a47f8c617ca5f0212df8fbfae866433ab6 /mert
parentf320cf51749f26658533e7d5ed33fff41f05a0e0 (diff)
Add scorer factory. Fix compile error in Optimizer
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1706 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'mert')
-rw-r--r--mert/BleuScorer.cpp2
-rw-r--r--mert/BleuScorer.h102
-rwxr-xr-xmert/Makefile2
-rw-r--r--mert/Optimizer.cpp24
-rw-r--r--mert/Optimizer.h2
-rw-r--r--mert/PerScorer.cpp2
-rw-r--r--mert/PerScorer.h50
-rw-r--r--mert/Scorer.cpp2
-rw-r--r--mert/Scorer.h121
-rw-r--r--mert/extractor.cpp2
-rw-r--r--mert/test_scorer.cpp3
11 files changed, 138 insertions, 174 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index f3cbdd320..d07f79008 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -1,4 +1,4 @@
-#include "BleuScorer.h"
+#include "Scorer.h"
const int BleuScorer::LENGTH = 4;
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
deleted file mode 100644
index ce541345a..000000000
--- a/mert/BleuScorer.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef __BLEUSCORER_H__
-#define __BLEUSCORER_H__
-
-#include <cctype>
-#include <cmath>
-#include <ctime>
-#include <stdexcept>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Scorer.h"
-#include "ScoreData.h"
-
-using namespace std;
-
-enum BleuReferenceLengthStrategy { AVERAGE, SHORTEST, CLOSEST };
-
-
-/**
- * Bleu scoring
- **/
-class BleuScorer: public StatisticsBasedScorer {
- public:
- BleuScorer() : StatisticsBasedScorer("BLEU"),_refLengthStrategy(SHORTEST) {}
- virtual void setReferenceFiles(const vector<string>& referenceFiles);
- virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
- static const int LENGTH;
-
- protected:
- float calculateScore(const vector<int>& comps);
-
- private:
- //no copy
- BleuScorer(const BleuScorer&);
- BleuScorer& operator=(const BleuScorer&);
-
-
- //Used to construct the ngram map
- struct CompareNgrams {
- int operator() (const vector<int>& a, const vector<int>& b) {
- /*
- cerr << "compare:";
- copy(a.begin(), a.end(), ostream_iterator<int>(cerr," "));
- cerr << " with ";
- copy(b.begin(), b.end(), ostream_iterator<int>(cerr," "));
- cerr << endl;
- */
- size_t i;
- size_t as = a.size();
- size_t bs = b.size();
- for (i = 0; i < as && i < bs; ++i) {
- if (a[i] < b[i]) {
- //cerr << "true" << endl;
- return true;
- }
- if (a[i] > b[i]) {
- //cerr << "false" << endl;
- return false;
- }
- }
- //entries are equal, shortest wins
- /*if (as < bs) {
- cerr << "true" << endl;
- } else {
- cerr << "false" << endl;
- }*/
- return as < bs;;
- }
- };
-
- typedef map<vector<int>,int,CompareNgrams> counts_t;
- typedef map<vector<int>,int,CompareNgrams>::iterator counts_it;
-
- typedef vector<counts_t*> refcounts_t;
-
- size_t countNgrams(const string& line, counts_t& counts, unsigned int n);
-
- void dump_counts(counts_t& counts) {
- for (counts_it i = counts.begin(); i != counts.end(); ++i) {
- cerr << "(";
- copy(i->first.begin(), i->first.end(), ostream_iterator<int>(cerr," "));
- cerr << ") " << i->second << ", ";
- }
- cerr << endl;
- }
- BleuReferenceLengthStrategy _refLengthStrategy;
-
- // data extracted from reference files
- refcounts_t _refcounts;
- vector<vector<size_t> > _reflengths;
-};
-
-
-
-
-#endif //__BLEUSCORER_H
diff --git a/mert/Makefile b/mert/Makefile
index eb52da940..5c2d5cdf5 100755
--- a/mert/Makefile
+++ b/mert/Makefile
@@ -27,7 +27,7 @@ test_scorer
clean:
rm -f *.o
-%.o : %.cpp %.h
+%.o : %.cpp
$(GCC) -c $(CFLAGS) $< -o $@
feature_extractor: $(OBJS) feature_extractor.cpp
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 43973e27b..c4520272b 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -1,14 +1,16 @@
#include <cassert>
-#include "Optimizer.h"
#include <vector>
-#include<list>
+#include <limits>
+#include <list>
#include <cfloat>
#include <iostream>
+#include "Optimizer.h"
+
using namespace std;
-static const float MINFLOAT=numeric_limits<float>::min();
-static const float MAXFLOAT=numeric_limits<float>::max();
+static const float MIN_FLOAT=numeric_limits<float>::min();
+static const float MAX_FLOAT=numeric_limits<float>::max();
enum OptType{POWELL=0,NOPTIMIZER};//Add new optimizetr here
@@ -74,7 +76,7 @@ statscore Optimizer::GetStatScore(const Point& param)const{
/**compute the intersection of 2 lines*/
float intersect (float m1, float b1,float m2,float b2){
if(m1==m2)
- return MAXFLOAT;//parrallel lines
+ return MAX_FLOAT;//parrallel lines
return((b2-b1)/(m1-m2));
}
@@ -85,7 +87,7 @@ statscore Optimizer::LineOptimize(const Point& origin,const Point& direction,Poi
typedef pair<float,vector<unsigned> > threshold;
list<threshold> thresholdlist;
- thresholdlist.push_back(pair<float,vector<unsigned> >(MINFLOAT,vector<unsigned>()));
+ thresholdlist.push_back(pair<float,vector<unsigned> >(MIN_FLOAT,vector<unsigned>()));
for(int S=0;S<size();S++){
//first we determine the translation with the best feature score for each sentence and each value of x
@@ -111,7 +113,7 @@ statscore Optimizer::LineOptimize(const Point& origin,const Point& direction,Poi
index=it->second;//the highest line is the one with he highest f0
}
--it;//we went one step too far in the while loop
- onebest.push_back(pair<float,unsigned>(MINFLOAT,index));//first 1best is the lowest gradient.
+ onebest.push_back(pair<float,unsigned>(MIN_FLOAT,index));//first 1best is the lowest gradient.
//now we look for the intersections points indicating a change of 1 best
//we use the fact that the function is convex, which means that the gradient can only go up
while(it!=gradient.end()){
@@ -199,7 +201,7 @@ statscore Optimizer::LineOptimize(const Point& origin,const Point& direction,Poi
//last thing to do is compute the Stat score (ie BLEU) and find the minimum
list<threshold>::iterator best;
list<threshold>::iterator lit2;
- statscore bestscore=MINFLOAT;
+ statscore bestscore=MIN_FLOAT;
for(lit2=thresholdlist.begin();lit2!=thresholdlist.end();lit2){
assert(lit2->second.size()==FData->size());
statscore cur=GetStatScore(lit2->second);
@@ -222,7 +224,7 @@ void Optimizer::Get1bests(const Point& P,vector<unsigned>& bests)const{
bests.resize(size());
for(unsigned i=0;i<size();i++){
- float bestfs=MINFLOAT;
+ float bestfs=MIN_FLOAT;
unsigned idx=0;
unsigned j;
for(j=0;j<FData->get(i).size();j++){
@@ -252,8 +254,8 @@ statscore Optimizer::Run(Point& P)const{
}
statscore SimpleOptimizer::TrueRun(Point& P)const{
- statscore prevscore=MAXFLOAT;
- statscore bestscore=MAXFLOAT;
+ statscore prevscore=MAX_FLOAT;
+ statscore bestscore=MAX_FLOAT;
do{
Point best;
Point linebest;
diff --git a/mert/Optimizer.h b/mert/Optimizer.h
index d479c5020..c3000e52c 100644
--- a/mert/Optimizer.h
+++ b/mert/Optimizer.h
@@ -1,6 +1,6 @@
#ifndef OPTIMIZER_H
#define OPTIMIZER_H
-#include<vector>
+#include <vector>
#include "FeatureStats.h"
#include "FeatureData.h"
#include "FeatureArray.h"
diff --git a/mert/PerScorer.cpp b/mert/PerScorer.cpp
index be5e29aef..d766874a9 100644
--- a/mert/PerScorer.cpp
+++ b/mert/PerScorer.cpp
@@ -1,4 +1,4 @@
-#include "PerScorer.h"
+#include "Scorer.h"
void PerScorer::setReferenceFiles(const vector<string>& referenceFiles) {
diff --git a/mert/PerScorer.h b/mert/PerScorer.h
deleted file mode 100644
index 868326638..000000000
--- a/mert/PerScorer.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef __PERSCORER_H__
-#define __PERSCORER_H__
-
-#include <stdexcept>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Scorer.h"
-#include "ScoreData.h"
-
-using namespace std;
-
-
-/**
- * Implementation of position-independent word error rate. This is defined
- * as 1 - (correct - max(0,output_length - ref_length)) / ref_length
- * In fact, we ignore the " 1 - " so that it can be maximised.
- **/
-class PerScorer: public StatisticsBasedScorer {
- public:
- PerScorer() : StatisticsBasedScorer("PER") {}
- virtual void setReferenceFiles(const vector<string>& referenceFiles);
- virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
-
- protected:
-
- virtual float calculateScore(const vector<int>& comps) ;
-
- private:
-
- //no copy
- PerScorer(const PerScorer&);
- PerScorer& operator=(const PerScorer&);
-
- // data extracted from reference files
- vector<size_t> _reflengths;
- vector<multiset<int> > _reftokens;
-};
-
-
-
-
-#endif //__PERSCORER_H
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index 329a401d8..400c857ff 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -1,7 +1,5 @@
#include "Scorer.h"
-
-
void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
scores_t& scores) {
if (!_scoreData) {
diff --git a/mert/Scorer.h b/mert/Scorer.h
index cde8a238f..294cb8533 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -2,7 +2,11 @@
#define __SCORER_H__
#include <algorithm>
+#include <cmath>
#include <iostream>
+#include <iterator>
+#include <set>
+#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
@@ -29,7 +33,6 @@ class Scorer {
Scorer(const string& name): _name(name), _scoreData(0),_preserveCase(false) {}
-
/**
* set the reference files. This must be called before prepareStats.
**/
@@ -152,4 +155,120 @@ class StatisticsBasedScorer : public Scorer {
};
+enum BleuReferenceLengthStrategy { AVERAGE, SHORTEST, CLOSEST };
+
+
+/**
+ * Bleu scoring
+ **/
+class BleuScorer: public StatisticsBasedScorer {
+ public:
+ BleuScorer() : StatisticsBasedScorer("BLEU"),_refLengthStrategy(SHORTEST) {}
+ virtual void setReferenceFiles(const vector<string>& referenceFiles);
+ virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
+ static const int LENGTH;
+
+ protected:
+ float calculateScore(const vector<int>& comps);
+
+ private:
+ //no copy
+ BleuScorer(const BleuScorer&);
+ BleuScorer& operator=(const BleuScorer&);
+
+
+ //Used to construct the ngram map
+ struct CompareNgrams {
+ int operator() (const vector<int>& a, const vector<int>& b) {
+ size_t i;
+ size_t as = a.size();
+ size_t bs = b.size();
+ for (i = 0; i < as && i < bs; ++i) {
+ if (a[i] < b[i]) {
+ //cerr << "true" << endl;
+ return true;
+ }
+ if (a[i] > b[i]) {
+ //cerr << "false" << endl;
+ return false;
+ }
+ }
+ //entries are equal, shortest wins
+ return as < bs;;
+ }
+ };
+
+ typedef map<vector<int>,int,CompareNgrams> counts_t;
+ typedef map<vector<int>,int,CompareNgrams>::iterator counts_it;
+
+ typedef vector<counts_t*> refcounts_t;
+
+ size_t countNgrams(const string& line, counts_t& counts, unsigned int n);
+
+ void dump_counts(counts_t& counts) {
+ for (counts_it i = counts.begin(); i != counts.end(); ++i) {
+ cerr << "(";
+ copy(i->first.begin(), i->first.end(), ostream_iterator<int>(cerr," "));
+ cerr << ") " << i->second << ", ";
+ }
+ cerr << endl;
+ }
+ BleuReferenceLengthStrategy _refLengthStrategy;
+
+ // data extracted from reference files
+ refcounts_t _refcounts;
+ vector<vector<size_t> > _reflengths;
+};
+
+
+
+
+/**
+ * Implementation of position-independent word error rate. This is defined
+ * as 1 - (correct - max(0,output_length - ref_length)) / ref_length
+ * In fact, we ignore the " 1 - " so that it can be maximised.
+ **/
+class PerScorer: public StatisticsBasedScorer {
+ public:
+ PerScorer() : StatisticsBasedScorer("PER") {}
+ virtual void setReferenceFiles(const vector<string>& referenceFiles);
+ virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
+
+ protected:
+
+ virtual float calculateScore(const vector<int>& comps) ;
+
+ private:
+
+ //no copy
+ PerScorer(const PerScorer&);
+ PerScorer& operator=(const PerScorer&);
+
+ // data extracted from reference files
+ vector<size_t> _reflengths;
+ vector<multiset<int> > _reftokens;
+};
+
+
+class ScorerFactory {
+
+ public:
+ vector<string> getTypes() {
+ vector<string> types;
+ types.push_back(string("BLEU"));
+ types.push_back(string("PER"));
+ return types;
+ }
+
+ Scorer* getScorer(string type) {
+ if (type == "BLEU") {
+ return new BleuScorer();
+ } else if (type == "PER") {
+ return new PerScorer();
+ } else {
+ throw runtime_error("Unknown scorer type: " + type);
+ }
+ }
+};
+
#endif //__SCORER_H
diff --git a/mert/extractor.cpp b/mert/extractor.cpp
index 524dcff96..bb47bf0d2 100644
--- a/mert/extractor.cpp
+++ b/mert/extractor.cpp
@@ -7,8 +7,6 @@ using namespace std;
#include "Util.h"
#include "Scorer.h"
-#include "BleuScorer.h"
-#include "PerScorer.h"
#include "Data.h"
int main (int argc, char * argv[]) {
diff --git a/mert/test_scorer.cpp b/mert/test_scorer.cpp
index 8e6b70ffb..bc1ac481f 100644
--- a/mert/test_scorer.cpp
+++ b/mert/test_scorer.cpp
@@ -2,8 +2,7 @@
#include <vector>
#include "ScoreData.h"
-#include "BleuScorer.h"
-#include "PerScorer.h"
+#include "Scorer.h"
using namespace std;