Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mert/BleuScorer.cpp2
-rw-r--r--mert/FeatureArray.cpp2
-rw-r--r--mert/FeatureData.h2
-rw-r--r--mert/FeatureStats.cpp6
-rwxr-xr-xmert/Makefile4
-rw-r--r--mert/Optimizer.cpp43
-rw-r--r--mert/PerScorer.cpp2
-rw-r--r--mert/Point.cpp25
-rw-r--r--mert/Point.h15
-rw-r--r--mert/ScoreData.h2
-rw-r--r--mert/ScoreStats.cpp4
-rw-r--r--mert/Scorer.h16
-rw-r--r--mert/Util.cpp2
-rwxr-xr-xmert/mert.cpp23
14 files changed, 84 insertions, 64 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 13ef70051..731504741 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -90,7 +90,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles) {
}
-void BleuScorer::prepareStats(int sid, const string& text, ScoreStats& entry) {
+void BleuScorer::prepareStats(unsigned int sid, const string& text, ScoreStats& entry) {
//cerr << text << endl;
//dump_counts(*_refcounts[sid]);
if (sid >= _refcounts.size()) {
diff --git a/mert/FeatureArray.cpp b/mert/FeatureArray.cpp
index d9b90654b..a9ee292d1 100644
--- a/mert/FeatureArray.cpp
+++ b/mert/FeatureArray.cpp
@@ -131,6 +131,6 @@ void FeatureArray::load(const std::string &file, bool bin)
void FeatureArray::merge(FeatureArray& e)
{
//dummy implementation
- for (int i=0; i< e.size(); i++)
+ for (unsigned int i=0; i< e.size(); i++)
add(e.get(i));
}
diff --git a/mert/FeatureData.h b/mert/FeatureData.h
index aacf81255..dc54c11be 100644
--- a/mert/FeatureData.h
+++ b/mert/FeatureData.h
@@ -48,7 +48,7 @@ public:
return array_[i];
#endif
}
- inline bool exists(int i){ return (i<array_.size())?true:false; }
+ inline bool exists(unsigned int i){ return (i<array_.size())?true:false; }
inline void setIndex(){ };
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 5d85c5655..54ea76295 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -19,7 +19,7 @@ array_(stats.array_)
FeatureStats::FeatureStats(const size_t size)
{
- for(int i = 0; i < size; i++)
+ for(unsigned int i = 0; i < size; i++)
array_.push_back(0);
};
@@ -32,7 +32,7 @@ FeatureStats::FeatureStats(std::string &theString)
void FeatureStats::set(std::string &theString)
{
std::string substring, stringBuf;
- std::string::size_type loc;
+
int nextPound;
FeatureStatsType sc;
// TRACE_ERR("Decompounding string: " << theString << std::endl);
@@ -79,7 +79,7 @@ void FeatureStats::savetxt(std::ofstream& outFile)
outFile << " " << *i;
i++;
}
- outFile << std::endl;
+// outFile << std::endl;
}
diff --git a/mert/Makefile b/mert/Makefile
index b3c0c54d4..312093d36 100755
--- a/mert/Makefile
+++ b/mert/Makefile
@@ -9,9 +9,9 @@ Scorer.o \
Optimizer.o \
ifndef DEBUG
-CFLAGS=-O3 -DTRACE_ENABLE
+CFLAGS=-O3 -DTRACE_ENABLE -Wall
else
-CFLAGS=-DTRACE_ENABLE -g -DDEBUG
+CFLAGS=-DTRACE_ENABLE -g -DDEBUG -Wall
endif
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index cc054e3e9..bc171ffd1 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -30,17 +30,18 @@ Optimizer::Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start)
//warning: the init vector is a full set of parameters, of dimension pdim!
Point::pdim=Pd;
+
assert(start.size()==Pd);
Point::dim=i2O.size();
Point::optindices=i2O;
- if(Point::pdim<Point::dim){
- for(int i=0;i<Point::pdim;i++){
- int j;
+ if (Point::pdim<Point::dim){
+ for (unsigned int i=0;i<Point::pdim;i++){
+ unsigned int j;
for(j=0;j<Point::dim;j++)
- if(i==i2O[j])
- break;
+ if (i==i2O[j])
+ break;
if(j==Point::dim)//the index i wasnt found on optindices, it is a fixed index, we use the valu of hte start vector
- Point::fixedweights[i]=start[i];
+ Point::fixedweights[i]=start[i];
}
}
};
@@ -53,10 +54,9 @@ Optimizer::~Optimizer(){
statscore_t Optimizer::GetStatScore(const Point& param)const{
vector<unsigned> bests;
Get1bests(param,bests);
- //cerr << "1BESTS: ";
//copy(bests.begin(),bests.end(),ostream_iterator<unsigned>(cerr," "));
statscore_t score = GetStatScore(bests);
- //cerr << " score = " << score << endl;
+ cerr << "1BESTS: " << param << " => " << score << endl;
return score;
};
@@ -91,11 +91,11 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
float min_int=0.0001;
//typedef pair<unsigned,unsigned> diff;//first the sentence that changes, second is the new 1best for this sentence
//list<threshold> thresholdlist;
-
+
map<float,diff_t> thresholdmap;
thresholdmap[MIN_FLOAT]=diff_t();
vector<unsigned> first1best;//the vector of nbests for x=-inf
- for(int S=0;S<size();S++){
+ for(unsigned int S=0;S<size();S++){
map<float,diff_t >::iterator previnserted=thresholdmap.begin();
//first we determine the translation with the best feature score for each sentence and each value of x
//cerr << "Sentence " << S << endl;
@@ -212,7 +212,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
statscore_t bestscore=MIN_FLOAT;
float bestx=MIN_FLOAT;
assert(scores.size()==thresholdmap.size());//we skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best
- for(int sc=0;sc!=scores.size();sc++){
+ for(unsigned int sc=0;sc!=scores.size();sc++){
//cerr << "x=" << thrit->first << " => " << scores[sc] << endl;
if (scores[sc] > bestscore) {
//This is the score for the interval [lit2->first, (lit2+1)->first]
@@ -297,8 +297,12 @@ statscore_t Optimizer::Run(Point& P)const{
cerr<<"error size mismatch between FeatureData and Scorer"<<endl;
exit(2);
}
- if(verboselevel()>1)
- cerr<<"starting point: "<< P;
+
+ statscore_t score=GetStatScore(P);
+ P.score=score;
+
+ if(verboselevel()>1)
+ cerr<<"starting point: "<< P << " => "<< P.score << endl;
statscore_t s=TrueRun(P);
P.score=s;//just in case its not done in TrueRun
if (verboselevel()>1)
@@ -323,6 +327,9 @@ vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst,vector<
float SimpleOptimizer::eps=0.0001;
statscore_t SimpleOptimizer::TrueRun(Point& P)const{
+
+ statscore_t score=GetStatScore(P);
+
statscore_t prevscore=0;
statscore_t bestscore=MIN_FLOAT;
Point best;
@@ -335,13 +342,13 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const{
Point linebest;
- for(int d=0;d<Point::getdim();d++){
+ for(unsigned int d=0;d<Point::getdim();d++){
if(verboselevel()>4){
// cerr<<"minimizing along direction "<<d<<endl;
cerr<<"starting point: " << P << " => " << prevscore << endl;
}
Point direction;
- for(int i=0;i<Point::getdim();i++)
+ for(unsigned int i=0;i<Point::getdim();i++)
direction[i];
direction[d]=1.0;
statscore_t curscore=LineOptimize(P,direction,linebest);//find the minimum on the line
@@ -378,7 +385,7 @@ Just return a random point*/
statscore_t RandomOptimizer::TrueRun(Point& P)const{
vector<parameter_t> min(Point::getdim());
vector<parameter_t> max(Point::getdim());
- for(int d=0;d<Point::getdim();d++){
+ for(unsigned int d=0;d<Point::getdim();d++){
min[d]=0.0;
max[d]=1.0;
}
@@ -405,7 +412,7 @@ vector<string> OptimizerFactory::GetTypeNames(){
}
OptimizerFactory::OptType OptimizerFactory::GetOType(string type){
- int thetype;
+ unsigned int thetype;
if(typenames.empty())
SetTypeNames();
for(thetype=0;thetype<typenames.size();thetype++)
@@ -420,7 +427,7 @@ Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,vector<unsigned> i2o,ve
if(T==NOPTIMIZER){
cerr<<"Error: unknown Optimizer type "<<type<<endl;
cerr<<"Known Algorithm are:"<<endl;
- int thetype;
+ unsigned int thetype;
for(thetype=0;thetype<typenames.size();thetype++)
cerr<<typenames[thetype]<<endl;
throw ("unknown Optimizer Type");
diff --git a/mert/PerScorer.cpp b/mert/PerScorer.cpp
index d766874a9..6d63de301 100644
--- a/mert/PerScorer.cpp
+++ b/mert/PerScorer.cpp
@@ -32,7 +32,7 @@ void PerScorer::setReferenceFiles(const vector<string>& referenceFiles) {
}
-void PerScorer::prepareStats(int sid, const string& text, ScoreStats& entry) {
+void PerScorer::prepareStats(unsigned int sid, const string& text, ScoreStats& entry) {
if (sid >= _reflengths.size()) {
stringstream msg;
msg << "Sentence id (" << sid << ") not found in reference set";
diff --git a/mert/Point.cpp b/mert/Point.cpp
index b61bd1f73..af9316679 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -16,17 +16,17 @@ unsigned Point::ncall=0;
void Point::Randomize(const vector<parameter_t>& min,const vector<parameter_t>& max){
assert(min.size()==Point::dim);
assert(max.size()==Point::dim);
- for (int i=0; i<size(); i++)
+ for (unsigned int i=0; i<size(); i++)
operator[](i)= min[i] + (float)random()/(float)RAND_MAX * (float)(max[i]-min[i]);
}
void Point::Normalize(){
parameter_t norm=0.0;
- for (int i=0; i<size(); i++)
+ for (unsigned int i=0; i<size(); i++)
norm+= operator[](i)*operator[](i);
if(norm!=0.0){
norm=sqrt(norm);
- for (int i=0; i<size(); i++)
+ for (unsigned int i=0; i<size(); i++)
operator[](i)/=norm;
}
}
@@ -34,11 +34,11 @@ void Point::Normalize(){
//Can initialize from a vector of dim or pdim
Point::Point(const vector<parameter_t>& init):vector<parameter_t>(Point::dim){
if(init.size()==dim){
- for (int i=0; i<Point::dim; i++)
+ for (unsigned int i=0; i<Point::dim; i++)
operator[](i)=init[i];
}else{
assert(init.size()==pdim);
- for (int i=0; i<Point::dim; i++)
+ for (unsigned int i=0; i<Point::dim; i++)
operator[](i)=init[optindices[i]];
}
};
@@ -77,10 +77,11 @@ Point Point::operator*(float l)const{
ostream& operator<<(ostream& o,const Point& P){
vector<parameter_t> w=P.GetAllWeights();
- for(int i=0;i<Point::pdim;i++)
- o<<w[i]<<' ';
-// o<<endl;
- return o;
+// o << "[" << Point::pdim << "] ";
+ for(unsigned int i=0;i<Point::pdim;i++)
+ o << w[i] << " ";
+// o << "=> " << P.GetScore();
+ return o;
};
vector<parameter_t> Point::GetAllWeights()const{
@@ -89,10 +90,10 @@ vector<parameter_t> Point::GetAllWeights()const{
w=*this;
}else{
w.resize(pdim);
- for (int i=0; i<size(); i++)
+ for (unsigned int i=0; i<size(); i++)
w[optindices[i]]=operator[](i);
- for(map<unsigned,float >::iterator it=fixedweights.begin();it!=fixedweights.end();it++)
- w[it->first]=it->second;
+ for(map<unsigned,float >::iterator it=fixedweights.begin();it!=fixedweights.end();it++)
+ w[it->first]=it->second;
}
return w;
};
diff --git a/mert/Point.h b/mert/Point.h
index e7f48d897..ac6dd6ccb 100644
--- a/mert/Point.h
+++ b/mert/Point.h
@@ -14,17 +14,17 @@ class Point:public vector<parameter_t>{
friend class Optimizer;
private:
/**The indices over which we optimize*/
- static vector<unsigned> optindices;
+ static vector<unsigned int> optindices;
/**dimension of optindices and of the parent vector*/
- static unsigned dim;
+ static unsigned int dim;
/**fixed weights in case of partial optimzation*/
- static map<unsigned,parameter_t> fixedweights;
+ static map<unsigned int,parameter_t> fixedweights;
/**total size of the parameter space; we have pdim=FixedWeight.size()+optinidices.size()*/
- static unsigned pdim;
- static unsigned ncall;
+ static unsigned int pdim;
+ static unsigned int ncall;
public:
- static unsigned getdim(){return dim;}
- static unsigned getpdim(){return pdim;}
+ static unsigned int getdim(){return dim;}
+ static unsigned int getpdim(){return pdim;}
static bool OptimizeAll(){return fixedweights.empty();};
statscore_t score;
Point():vector<parameter_t>(dim){};
@@ -39,6 +39,7 @@ class Point:public vector<parameter_t>{
void Normalize();
/**return a vector of size pdim where all weights have been put(including fixed ones)*/
vector<parameter_t> GetAllWeights()const;
+ statscore_t GetScore()const { return score; };
};
#endif
diff --git a/mert/ScoreData.h b/mert/ScoreData.h
index c2c046c45..d8d4a80ce 100644
--- a/mert/ScoreData.h
+++ b/mert/ScoreData.h
@@ -38,7 +38,7 @@ public:
inline void clear() { array_.clear(); }
inline ScoreArray get(int i){ return array_.at(i); }
- inline bool exists(int i){ return (i<array_.size())?true:false; }
+ inline bool exists(unsigned int i){ return (i<array_.size())?true:false; }
inline ScoreStats get(int i, int j){ return array_.at(i).get(j); }
diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp
index 8f571aac3..f62f9df45 100644
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@@ -19,7 +19,7 @@ array_(stats.array_)
ScoreStats::ScoreStats(const size_t size)
{
- for(int i = 0; i < size; i++)
+ for(unsigned int i = 0; i < size; i++)
array_.push_back(0);
};
@@ -32,7 +32,7 @@ ScoreStats::ScoreStats(std::string &theString)
void ScoreStats::set(std::string &theString)
{
std::string substring, stringBuf;
- std::string::size_type loc;
+
int nextPound;
ScoreStatsType sc;
while (!theString.empty()){
diff --git a/mert/Scorer.h b/mert/Scorer.h
index bef8b84b0..30e4fc641 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -28,8 +28,9 @@ class Scorer {
public:
- Scorer(const string& name): _name(name), _scoreData(0),_preserveCase(false) {}
-
+ Scorer(const string& name): _name(name), _scoreData(0),_preserveCase(false) {};
+ virtual ~Scorer(){};
+
/**
* set the reference files. This must be called before prepareStats.
**/
@@ -141,8 +142,9 @@ class Scorer {
class StatisticsBasedScorer : public Scorer {
public:
- StatisticsBasedScorer(const string& name): Scorer(name) {}
- virtual void score(const candidates_t& candidates, const diffs_t& diffs,
+ StatisticsBasedScorer(const string& name): Scorer(name) {}
+ ~StatisticsBasedScorer(){};
+ virtual void score(const candidates_t& candidates, const diffs_t& diffs,
statscores_t& scores);
protected:
@@ -162,7 +164,7 @@ class BleuScorer: public StatisticsBasedScorer {
public:
BleuScorer() : StatisticsBasedScorer("BLEU"),_refLengthStrategy(SHORTEST) {}
virtual void setReferenceFiles(const vector<string>& referenceFiles);
- virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
+ virtual void prepareStats(unsigned int sid, const string& text, ScoreStats& entry);
static const int LENGTH;
protected:
@@ -171,6 +173,7 @@ class BleuScorer: public StatisticsBasedScorer {
private:
//no copy
BleuScorer(const BleuScorer&);
+ ~BleuScorer(){};
BleuScorer& operator=(const BleuScorer&);
@@ -229,7 +232,7 @@ class PerScorer: public StatisticsBasedScorer {
public:
PerScorer() : StatisticsBasedScorer("PER") {}
virtual void setReferenceFiles(const vector<string>& referenceFiles);
- virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
+ virtual void prepareStats(unsigned int sid, const string& text, ScoreStats& entry);
protected:
@@ -239,6 +242,7 @@ class PerScorer: public StatisticsBasedScorer {
//no copy
PerScorer(const PerScorer&);
+ ~PerScorer(){};
PerScorer& operator=(const PerScorer&);
// data extracted from reference files
diff --git a/mert/Util.cpp b/mert/Util.cpp
index 4531a8645..aff734e1b 100644
--- a/mert/Util.cpp
+++ b/mert/Util.cpp
@@ -22,7 +22,7 @@ int setverboselevel(int v){
int getNextPound(std::string &theString, std::string &substring, const std::string delimiter)
{
- int pos = 0;
+ unsigned int pos = 0;
//skip all occurrences of delimiter
while ( pos == 0 )
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 902c99ae8..70297493c 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -31,8 +31,9 @@ void usage(void) {
cerr<<"[-t\tthe optimizer(default powell)]"<<endl;
cerr<<"[--sctype|-s] the scorer type (default BLEU)"<<endl;
cerr<<"[--scfile|-S] the scorer data file (default score.data)"<<endl;
- cerr<<"[--ffile|-F] the feature data file data file (default feature.data)"<<endl;
- cerr<<"[-v] verbose level"<<endl;
+ cerr<<"[--ffile|-F] the feature data file (default feature.data)"<<endl;
+ cerr<<"[--ifile|-i] the starting point data file (default init.opt)"<<endl;
+ cerr<<"[-v] verbose level"<<endl;
exit(1);
}
@@ -45,7 +46,8 @@ static struct option long_options[] =
{"sctype",1,0,'s'},
{"scfile",1,0,'S'},
{"ffile",1,0,'F'},
- {"verbose",1,0,'v'},
+ {"ifile",1,0,'i'},
+ {"verbose",1,0,'v'},
{0, 0, 0, 0}
};
int option_index;
@@ -58,7 +60,8 @@ int main (int argc, char **argv) {
string scorertype("BLEU");
string scorerfile("statscore.data");
string featurefile("features.data");
- vector<unsigned> tooptimize;
+ string initfile("init.opt");
+ vector<unsigned> tooptimize;
vector<parameter_t> start;
while ((c=getopt_long (argc, argv, "d:n:t:s:S:F:v:", long_options, &option_index)) != -1) {
switch (c) {
@@ -80,6 +83,9 @@ int main (int argc, char **argv) {
case 'F':
featurefile=string(optarg);
break;
+ case 'i':
+ initfile=string(optarg);
+ break;
case 'v':
setverboselevel(strtol(optarg,NULL,10));
break;
@@ -98,9 +104,9 @@ int main (int argc, char **argv) {
for(i=0;i<pdim;i++)
tooptimize[i]=i;
}
- ifstream opt("init.opt");
+ ifstream opt(initfile.c_str());
if(opt.fail()){
- cerr<<"could not open init.opt"<<endl;
+ cerr<<"could not open initfile: " << initfile << endl;
exit(3);
}
start.resize(pdim);//to do:read from file
@@ -108,7 +114,7 @@ int main (int argc, char **argv) {
for( j=0;j<pdim&&!opt.fail();j++)
opt>>start[j];
if(j<pdim){
- cerr<<"error could not initialize start point with init.opt"<<endl;
+ cerr<<"error could not initialize start point with " << initfile << endl;
exit(3);
}
@@ -136,7 +142,7 @@ int main (int argc, char **argv) {
vector<parameter_t> min(Point::getdim());
vector<parameter_t> max(Point::getdim());
- for(int d=0;d<Point::getdim();d++){
+ for(unsigned int d=0;d<Point::getdim();d++){
min[d]=0.0;
max[d]=1.0;
}
@@ -158,6 +164,7 @@ int main (int argc, char **argv) {
if(ntry>1)
cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl;
cerr<<"best score: "<<best<<endl;
+ cerr << "Best point: " << bestP << " => " << best << endl;
ofstream res("weights.txt");
res<<bestP<<endl;
timer.stop("Stopping...");