diff options
author | jfouet <jfouet@1f5c12ca-751b-0410-a591-d2e778427230> | 2008-05-16 14:57:24 +0400 |
---|---|---|
committer | jfouet <jfouet@1f5c12ca-751b-0410-a591-d2e778427230> | 2008-05-16 14:57:24 +0400 |
commit | e9df80a05c442b8b22aad79206872b90ab8c97c1 (patch) | |
tree | 74ddf98cf565dad31ef72c7f2219ae795c4d08d6 /mert | |
parent | bb42cb0dd591fa91d3ae63a2380a1ba6caed83a2 (diff) |
bugfixes
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1726 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'mert')
-rw-r--r-- | mert/Optimizer.cpp | 124 | ||||
-rw-r--r-- | mert/Optimizer.h | 27 | ||||
-rw-r--r-- | mert/Point.cpp | 23 | ||||
-rw-r--r-- | mert/Point.h | 12 | ||||
-rw-r--r-- | mert/Types.h | 2 | ||||
-rwxr-xr-x | mert/mert.cpp | 101 |
6 files changed, 193 insertions, 96 deletions
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp index 68a37b29d..a150b0efe 100644 --- a/mert/Optimizer.cpp +++ b/mert/Optimizer.cpp @@ -12,24 +12,6 @@ using namespace std; static const float MIN_FLOAT=numeric_limits<float>::min(); static const float MAX_FLOAT=numeric_limits<float>::max(); -enum OptType{POWELL=0,NOPTIMIZER};//Add new optimizetr here - -string names[NOPTIMIZER]={string("powell")}; - -Optimizer *BuildOptimizer(unsigned dim,vector<unsigned>to,vector<parameter_t>s,string type){ - int thetype; - for(thetype=0;thetype<(int)NOPTIMIZER;thetype++) - if(names[thetype]==type) - break; - switch((OptType)thetype){ - case POWELL: - return new SimpleOptimizer(dim,to,s); - case NOPTIMIZER: - cerr<<"error unknwon optimizer"<<type<<endl; - return NULL; - } - return NULL;//Should never go there -}; void Optimizer::SetScorer(Scorer *S){ @@ -106,8 +88,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P multimap<float,unsigned>::iterator it=gradient.begin(); float smallest=it->first;//smallest gradient unsigned index=it->second; - float biggestf0=f0[index]; - //several candidates can have the lowest slope (eg for word penalty where the gradient is an integer) + //several candidates can have the lowest slope (eg for word penalty where the gradient is an integer ) it++; while(it!=gradient.end()&&it->first==smallest){ if(f0[it->second]>f0[index]) @@ -129,7 +110,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P int d=0; for(;it2!=gradient.end();it2++){ //cerr<<"--"<<d++<<' '<<it2->first<<' '<<it2->second<<endl; - //look for all candidate with a gradient bigger than the current one and fond the one with the leftmost intersection + //look for all candidate with a gradient bigger than the current one and find the one with the leftmost intersection float curintersect=intersect(m,b,it2->first,f0[it2->second]); if(curintersect<leftmostx){ //we have found and intersection to the left of the leftmost we had so far. @@ -148,7 +129,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P right of the penultimate one. In that case, the Points would switch places in the sort, resulting in a bogus score for that interval. */ - if((leftmostx-onebest.back().first)<min_int) + if(abs(leftmostx-onebest.back().first)<min_int) onebest.back()=pair<float,unsigned>(leftmostx,leftmost->second);//leftmost->first is the gradient, we are interested in the value of the intersection else //normal case: we add a new threshold onebest.push_back(pair<float,unsigned>(leftmostx,leftmost->second)); @@ -194,6 +175,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P //it contains a list of all the parameter_ts where the function changed its value, along with the nbest list for the interval after each threshold //last thing to do is compute the Stat score (ie BLEU) and find the minimum + // cerr<<"thesholdlist size"<<thresholdlist.size()<<endl; list<threshold>::iterator lit2=thresholdlist.begin(); ++lit2; @@ -204,31 +186,33 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P lit2=thresholdlist.begin(); statscore_t bestscore=MIN_FLOAT; - float bestx; + float bestx=MIN_FLOAT; assert(scores.size()==thresholdlist.size());//we skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best - for(int sc=0;sc!=scores.size();sc++,lit2++){ -//We move the list iterator and the vector index at the same time -//because we need to get the value of lambda back from the list + for(int sc=0;sc!=scores.size();sc++){ + lit2++;//We move the list iterator and the vector index at the same time + //because we need to get the value of lambda back from the list + //cerr<<lit2->first<<endl; if(scores[sc]>bestscore){ bestscore=scores[sc]; if(lit2!=thresholdlist.end()){ //we dont want to stay exactly at the threshold where the function is discontinuous so we move just a little to the right + //but we dont want to cross a threshold bestx=lit2->first; lit2++; - bestx+=lit2->first; - bestx/=2.0; + if(lit2->first-bestx>0.0001)//distance to next threshold + bestx+=0.0001; lit2--; }else - bestx=lit2->first+0.001; + bestx=lit2->first+0.0001; } } - - //finally! we manage to extract the best score; - //nowwe convert bestx (position on the line) to a point! - - bestpoint=direction*bestx+origin; - bestpoint.score=bestscore; - return bestscore; + //finally! we manage to extract the best score; + //now we convert bestx (position on the line) to a point! + if(verboselevel()>3) + cerr<<"end Lineopt, bestx="<<bestx<<endl; + bestpoint=direction*bestx+origin; + bestpoint.score=bestscore; + return bestscore; }; void Optimizer::Get1bests(const Point& P,vector<unsigned>& bests)const{ @@ -293,7 +277,7 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const{ statscore_t prevscore=MAX_FLOAT; statscore_t bestscore=MIN_FLOAT; - int nrun; + int nrun=0; do{ ++nrun; if(verboselevel()>2) @@ -312,15 +296,81 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const{ if(curscore>bestscore){ bestscore=curscore; best=linebest; + if(verboselevel()>3) cerr<<"new best d"<<d<<" ("<<nrun<<")"<<endl; } } P=best;//update the current vector with the best points on all line tested }while(bestscore-prevscore<eps); + if(verboselevel()>2){ + cerr<<"end Powell Algo, nrun="<<nrun<<endl; + cerr<<"last diff="<<bestscore-prevscore<<endl; +} return bestscore; } +/**RandomOptimizer to use as beaseline and test.\n +Just return a random point*/ +statscore_t RandomOptimizer::TrueRun(Point& P)const{ + vector<parameter_t> min(Point::getdim()); + vector<parameter_t> max(Point::getdim()); + for(int d=0;d<Point::getdim();d++){ + min[d]=0.0; + max[d]=1.0; + } + P.Randomize(min,max); + statscore_t score=GetStatScore(P); + P.score=score; + return score; +} +//-------------------------------------- +vector<string> OptimizerFactory::typenames; +void OptimizerFactory::SetTypeNames(){ + if(typenames.empty()){ + typenames.resize(NOPTIMIZER); + typenames[POWELL]="powell"; + typenames[RANDOM]="random"; + //add new type there + } +} +vector<string> OptimizerFactory::GetTypeNames(){ + if(typenames.empty()) + SetTypeNames(); + return typenames; +} + +OptimizerFactory::OptType OptimizerFactory::GetOType(string type){ + int thetype; + if(typenames.empty()) + SetTypeNames(); + for(thetype=0;thetype<typenames.size();thetype++) + if(typenames[thetype]==type) + break; + return((OptType)thetype); +}; + +Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,vector<unsigned> i2o,vector<parameter_t> start,string type){ + + OptType T=GetOType(type); + if(T==NOPTIMIZER){ + cerr<<"Error unknow Optimizer type "<<type<<endl; + cerr<<"Known Algorithm are:"<<endl; + int thetype; + for(thetype=0;thetype<typenames.size();thetype++) + cerr<<typenames[thetype]<<endl; + throw ("unknown Optimizer Type"); + } + + switch((OptType)T){ + case POWELL: + return new SimpleOptimizer(dim,i2o,start); + break; + case NOPTIMIZER: + cerr<<"error unknwon optimizer"<<type<<endl; + return NULL; + } +} diff --git a/mert/Optimizer.h b/mert/Optimizer.h index d5c066bb6..23df73e67 100644 --- a/mert/Optimizer.h +++ b/mert/Optimizer.h @@ -20,7 +20,7 @@ class Optimizer{ Scorer * scorer; //no accessor for them only child can use them FeatureData * FData;//no accessor for them only child can use them public: - Optimizer(unsigned Pd,vector<unsigned> i2O,parameters_t start); + Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start); void SetScorer(Scorer *S); void SetFData(FeatureData *F); virtual ~Optimizer(); @@ -46,11 +46,32 @@ class SimpleOptimizer: public Optimizer{ private: static float eps; public: - SimpleOptimizer(unsigned dim,vector<unsigned> i2O,parameters_t start):Optimizer(dim,i2O,start){}; + SimpleOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start):Optimizer(dim,i2O,start){}; virtual statscore_t TrueRun(Point&)const; }; -Optimizer *BuildOptimizer(unsigned dim,vector<unsigned>tooptimize,parameters_t start,string type); +class RandomOptimizer: public Optimizer{ +public: + RandomOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start):Optimizer(dim,i2O,start){}; + virtual statscore_t TrueRun(Point&)const; +}; + + + +class OptimizerFactory{ + public: + // unsigned dim; + //Point Start; + static vector<string> GetTypeNames(); + static Optimizer* BuildOptimizer(unsigned dim,vector<unsigned>tooptimize,vector<parameter_t> start,string type); + private: + enum OptType{POWELL=0,RANDOM,NOPTIMIZER};//Add new optimizer here BEFORE NOPTIMZER + static OptType GetOType(string); + static vector<string> typenames; + static void SetTypeNames(); + +}; + #endif diff --git a/mert/Point.cpp b/mert/Point.cpp index 2fc365baf..18bc8e521 100644 --- a/mert/Point.cpp +++ b/mert/Point.cpp @@ -13,7 +13,9 @@ map<unsigned,statscore_t> Point::fixedweights; unsigned Point::pdim=0; unsigned Point::ncall=0; -void Point::Randomize(const parameters_t& min,const parameters_t& max){ +void Point::Randomize(const vector<parameter_t>& min,const vector<parameter_t>& max){ + assert(min.size()==Point::dim); + assert(max.size()==Point::dim); for (int i=0; i<size(); i++) operator[](i)= min[i] + (float)random()/(float)RAND_MAX * (float)(max[i]-min[i]); } @@ -29,6 +31,19 @@ void Point::Normalize(){ } } +//Can initialize from a vector of dim or pdim +Point::Point(const vector<parameter_t>& init):vector<parameter_t>(Point::dim){ + if(init.size()==dim){ + for (int i=0; i<Point::dim; i++) + operator[](i)=init[i]; + }else{ + assert(init.size()==pdim); + for (int i=0; i<Point::dim; i++) + operator[](i)=init[optindices[i]]; + } +}; + + double Point::operator*(const FeatureStats& F)const{ ncall++;//to track performance double prod=0.0; @@ -61,15 +76,15 @@ Point Point::operator*(float l)const{ }; ostream& operator<<(ostream& o,const Point& P){ - parameters_t w=P.GetAllWeights(); + vector<parameter_t> w=P.GetAllWeights(); for(int i=0;i<Point::pdim;i++) o<<w[i]<<' '; o<<endl; return o; }; -parameters_t Point::GetAllWeights()const{ - parameters_t w; +vector<parameter_t> Point::GetAllWeights()const{ + vector<parameter_t> w; if(OptimizeAll()){ w=*this; }else{ diff --git a/mert/Point.h b/mert/Point.h index 150ae10b7..e7f48d897 100644 --- a/mert/Point.h +++ b/mert/Point.h @@ -10,7 +10,7 @@ class Optimizer; /**class that handle the subset of the Feature weight on which we run the optimization*/ -class Point:public parameters_t{ +class Point:public vector<parameter_t>{ friend class Optimizer; private: /**The indices over which we optimize*/ @@ -27,9 +27,9 @@ class Point:public parameters_t{ static unsigned getpdim(){return pdim;} static bool OptimizeAll(){return fixedweights.empty();}; statscore_t score; - Point():parameters_t(dim){}; - Point(parameters_t init):parameters_t(init){assert(init.size()==dim);}; - void Randomize(const parameters_t& min,const parameters_t& max); + Point():vector<parameter_t>(dim){}; + Point(const vector<parameter_t>& init); + void Randomize(const vector<parameter_t>& min,const vector<parameter_t>& max); double operator*(const FeatureStats&)const;//compute the feature function Point operator+(const Point&)const; @@ -37,8 +37,8 @@ class Point:public parameters_t{ /**write the Whole featureweight to a stream (ie pdim float)*/ friend ostream& operator<<(ostream& o,const Point& P); void Normalize(); - /**return a vector of size pdim where all weights have been put*/ - parameters_t GetAllWeights()const; + /**return a vector of size pdim where all weights have been put(including fixed ones)*/ + vector<parameter_t> GetAllWeights()const; }; #endif diff --git a/mert/Types.h b/mert/Types.h index 04f849806..c1120e908 100644 --- a/mert/Types.h +++ b/mert/Types.h @@ -4,7 +4,7 @@ using namespace std; typedef float parameter_t; -typedef vector<parameter_t> parameters_t; +//typedef vector<parameter_t> parameters_t;confusing; use vector<parameter_t> typedef vector<pair<unsigned int, unsigned int> > diff_t; typedef vector<diff_t> diffs_t; diff --git a/mert/mert.cpp b/mert/mert.cpp index 72c4f5089..4f56eafa3 100755 --- a/mert/mert.cpp +++ b/mert/mert.cpp @@ -37,7 +37,7 @@ void usage(void) { static struct option long_options[] = { - {"dim", 1, 0, 'd'}, + {"pdim", 1, 0, 'd'}, {"ntry",1,0,'n'}, {"optimize",1,0,'o'}, {"type",1,0,'t'}, @@ -50,8 +50,8 @@ static struct option long_options[] = int option_index; int main (int argc, char **argv) { - int c,dim,i; - dim=-1; + int c,pdim,i; + pdim=-1; int ntry=1; string type("powell"); string scorertype("BLEU"); @@ -62,7 +62,7 @@ int main (int argc, char **argv) { while ((c=getopt_long (argc, argv, "d:n:t:s:S:F:v:", long_options, &option_index)) != -1) { switch (c) { case 'd': - dim = strtol(optarg, NULL, 10); + pdim = strtol(optarg, NULL, 10); break; case 'n': ntry=strtol(optarg, NULL, 10); @@ -85,57 +85,68 @@ int main (int argc, char **argv) { usage(); } } - if (dim < 0) + if (pdim < 0) usage(); - if(tooptimize.empty()){//We'll optimize on everything - tooptimize.resize(dim); - for(i=0;i<dim;i++) + if(tooptimize.empty()){ + tooptimize.resize(pdim);//We'll optimize on everything + for(i=0;i<pdim;i++) tooptimize[i]=i; } + ifstream opt("init.opt"); + if(opt.fail()){ + cerr<<"could not open init.opt"<<endl; + exit(3); + } + start.resize(pdim);//to do:read from file + int j; + for( j=0;j<pdim&&!opt.fail();j++) + opt>>start[j]; + if(j<pdim){ + cerr<<"error could not initialize start point with init.opt"<<endl; + exit(3); + } + + opt.close(); + //it make sense to know what parameter set were used to generate the nbest ScorerFactory SF; - Optimizer *O; - Scorer *TheScorer=NULL;; - FeatureData *FD=NULL; -; - start.resize(dim); - float score; - float best=numeric_limits<float>::min(); - float mean=0; - float var=0; - Point bestP; - //it make sense to know what parameter set where used to generate the nbest - O=BuildOptimizer(dim,tooptimize,start,"powell"); - - TheScorer=SF.getScorer(scorertype); + Scorer *TheScorer=SF.getScorer(scorertype); ScoreData *SD=new ScoreData(*TheScorer); - FD=new FeatureData(); - FD->load(featurefile); SD->load(scorerfile); + FeatureData *FD=new FeatureData(); + FD->load(featurefile); + Optimizer *O=OptimizerFactory::BuildOptimizer(pdim,tooptimize,start,type); O->SetScorer(TheScorer); O->SetFData(FD); - Point min;//to: initialize - Point max; - //note: thos min and maw are the bound for the starting poitns of the algorithm, not strict bound on the result! - for(int d=0;d<Point::getdim();d++){ + Point P(start);//Generate from the full feature set. Warning: must ne done after Optimiezr initialiazation + Point bestP=P; + statscore_t best=O->Run(P); + statscore_t mean=best; + statscore_t var=best*best; + + vector<parameter_t> min(Point::getdim()); + vector<parameter_t> max(Point::getdim()); + + for(int d=0;d<Point::getdim();d++){ min[d]=0.0; max[d]=1.0; } - for(int i=0;i<ntry;i++){ - Point P; - P.Randomize(min,max); - score=O->Run(P); - if(score>best){ - best=score; - bestP=P; - } - mean+=score; - var+=(score*score); + //note: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result! + + for(int i=1;i<ntry;i++){ + P.Randomize(min,max); + statscore_t score=O->Run(P); + if(score>best){ + best=score; + bestP=P; } - mean/=(float)ntry; - var/=(float)ntry; - var=sqrt(abs(var-mean*mean)); - cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl; - cerr<<"best score"<<best<<endl; - ofstream res("weights.txt"); - res<<bestP<<endl; + mean+=score; + var+=(score*score); + } + mean/=(float)ntry; + var/=(float)ntry; + var=sqrt(abs(var-mean*mean)); + cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl; + cerr<<"best score"<<best<<endl; + ofstream res("weights.txt"); + res<<bestP<<endl; } |