Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjfouet <jfouet@1f5c12ca-751b-0410-a591-d2e778427230>2008-05-16 14:57:24 +0400
committerjfouet <jfouet@1f5c12ca-751b-0410-a591-d2e778427230>2008-05-16 14:57:24 +0400
commite9df80a05c442b8b22aad79206872b90ab8c97c1 (patch)
tree74ddf98cf565dad31ef72c7f2219ae795c4d08d6
parentbb42cb0dd591fa91d3ae63a2380a1ba6caed83a2 (diff)
bugfixes
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1726 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--mert/Optimizer.cpp124
-rw-r--r--mert/Optimizer.h27
-rw-r--r--mert/Point.cpp23
-rw-r--r--mert/Point.h12
-rw-r--r--mert/Types.h2
-rwxr-xr-xmert/mert.cpp101
6 files changed, 193 insertions, 96 deletions
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 68a37b29d..a150b0efe 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -12,24 +12,6 @@ using namespace std;
static const float MIN_FLOAT=numeric_limits<float>::min();
static const float MAX_FLOAT=numeric_limits<float>::max();
-enum OptType{POWELL=0,NOPTIMIZER};//Add new optimizetr here
-
-string names[NOPTIMIZER]={string("powell")};
-
-Optimizer *BuildOptimizer(unsigned dim,vector<unsigned>to,vector<parameter_t>s,string type){
- int thetype;
- for(thetype=0;thetype<(int)NOPTIMIZER;thetype++)
- if(names[thetype]==type)
- break;
- switch((OptType)thetype){
- case POWELL:
- return new SimpleOptimizer(dim,to,s);
- case NOPTIMIZER:
- cerr<<"error unknwon optimizer"<<type<<endl;
- return NULL;
- }
- return NULL;//Should never go there
-};
void Optimizer::SetScorer(Scorer *S){
@@ -106,8 +88,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
multimap<float,unsigned>::iterator it=gradient.begin();
float smallest=it->first;//smallest gradient
unsigned index=it->second;
- float biggestf0=f0[index];
- //several candidates can have the lowest slope (eg for word penalty where the gradient is an integer)
+ //several candidates can have the lowest slope (eg for word penalty where the gradient is an integer )
it++;
while(it!=gradient.end()&&it->first==smallest){
if(f0[it->second]>f0[index])
@@ -129,7 +110,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
int d=0;
for(;it2!=gradient.end();it2++){
//cerr<<"--"<<d++<<' '<<it2->first<<' '<<it2->second<<endl;
- //look for all candidate with a gradient bigger than the current one and fond the one with the leftmost intersection
+ //look for all candidate with a gradient bigger than the current one and find the one with the leftmost intersection
float curintersect=intersect(m,b,it2->first,f0[it2->second]);
if(curintersect<leftmostx){
//we have found and intersection to the left of the leftmost we had so far.
@@ -148,7 +129,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
right of the penultimate one. In that case, the Points would
switch places in the sort, resulting in a bogus score for
that interval. */
- if((leftmostx-onebest.back().first)<min_int)
+ if(abs(leftmostx-onebest.back().first)<min_int)
onebest.back()=pair<float,unsigned>(leftmostx,leftmost->second);//leftmost->first is the gradient, we are interested in the value of the intersection
else //normal case: we add a new threshold
onebest.push_back(pair<float,unsigned>(leftmostx,leftmost->second));
@@ -194,6 +175,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
//it contains a list of all the parameter_ts where the function changed its value, along with the nbest list for the interval after each threshold
//last thing to do is compute the Stat score (ie BLEU) and find the minimum
+ // cerr<<"thesholdlist size"<<thresholdlist.size()<<endl;
list<threshold>::iterator lit2=thresholdlist.begin();
++lit2;
@@ -204,31 +186,33 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
lit2=thresholdlist.begin();
statscore_t bestscore=MIN_FLOAT;
- float bestx;
+ float bestx=MIN_FLOAT;
assert(scores.size()==thresholdlist.size());//we skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best
- for(int sc=0;sc!=scores.size();sc++,lit2++){
-//We move the list iterator and the vector index at the same time
-//because we need to get the value of lambda back from the list
+ for(int sc=0;sc!=scores.size();sc++){
+ lit2++;//We move the list iterator and the vector index at the same time
+ //because we need to get the value of lambda back from the list
+ //cerr<<lit2->first<<endl;
if(scores[sc]>bestscore){
bestscore=scores[sc];
if(lit2!=thresholdlist.end()){
//we dont want to stay exactly at the threshold where the function is discontinuous so we move just a little to the right
+ //but we dont want to cross a threshold
bestx=lit2->first;
lit2++;
- bestx+=lit2->first;
- bestx/=2.0;
+ if(lit2->first-bestx>0.0001)//distance to next threshold
+ bestx+=0.0001;
lit2--;
}else
- bestx=lit2->first+0.001;
+ bestx=lit2->first+0.0001;
}
}
-
- //finally! we manage to extract the best score;
- //nowwe convert bestx (position on the line) to a point!
-
- bestpoint=direction*bestx+origin;
- bestpoint.score=bestscore;
- return bestscore;
+ //finally! we manage to extract the best score;
+ //now we convert bestx (position on the line) to a point!
+ if(verboselevel()>3)
+ cerr<<"end Lineopt, bestx="<<bestx<<endl;
+ bestpoint=direction*bestx+origin;
+ bestpoint.score=bestscore;
+ return bestscore;
};
void Optimizer::Get1bests(const Point& P,vector<unsigned>& bests)const{
@@ -293,7 +277,7 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const{
statscore_t prevscore=MAX_FLOAT;
statscore_t bestscore=MIN_FLOAT;
- int nrun;
+ int nrun=0;
do{
++nrun;
if(verboselevel()>2)
@@ -312,15 +296,81 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const{
if(curscore>bestscore){
bestscore=curscore;
best=linebest;
+
if(verboselevel()>3)
cerr<<"new best d"<<d<<" ("<<nrun<<")"<<endl;
}
}
P=best;//update the current vector with the best points on all line tested
}while(bestscore-prevscore<eps);
+ if(verboselevel()>2){
+ cerr<<"end Powell Algo, nrun="<<nrun<<endl;
+ cerr<<"last diff="<<bestscore-prevscore<<endl;
+}
return bestscore;
}
+/**RandomOptimizer to use as beaseline and test.\n
+Just return a random point*/
+statscore_t RandomOptimizer::TrueRun(Point& P)const{
+ vector<parameter_t> min(Point::getdim());
+ vector<parameter_t> max(Point::getdim());
+ for(int d=0;d<Point::getdim();d++){
+ min[d]=0.0;
+ max[d]=1.0;
+ }
+ P.Randomize(min,max);
+ statscore_t score=GetStatScore(P);
+ P.score=score;
+ return score;
+}
+//--------------------------------------
+vector<string> OptimizerFactory::typenames;
+void OptimizerFactory::SetTypeNames(){
+ if(typenames.empty()){
+ typenames.resize(NOPTIMIZER);
+ typenames[POWELL]="powell";
+ typenames[RANDOM]="random";
+ //add new type there
+ }
+}
+vector<string> OptimizerFactory::GetTypeNames(){
+ if(typenames.empty())
+ SetTypeNames();
+ return typenames;
+}
+
+OptimizerFactory::OptType OptimizerFactory::GetOType(string type){
+ int thetype;
+ if(typenames.empty())
+ SetTypeNames();
+ for(thetype=0;thetype<typenames.size();thetype++)
+ if(typenames[thetype]==type)
+ break;
+ return((OptType)thetype);
+};
+
+Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,vector<unsigned> i2o,vector<parameter_t> start,string type){
+
+ OptType T=GetOType(type);
+ if(T==NOPTIMIZER){
+ cerr<<"Error unknow Optimizer type "<<type<<endl;
+ cerr<<"Known Algorithm are:"<<endl;
+ int thetype;
+ for(thetype=0;thetype<typenames.size();thetype++)
+ cerr<<typenames[thetype]<<endl;
+ throw ("unknown Optimizer Type");
+ }
+
+ switch((OptType)T){
+ case POWELL:
+ return new SimpleOptimizer(dim,i2o,start);
+ break;
+ case NOPTIMIZER:
+ cerr<<"error unknwon optimizer"<<type<<endl;
+ return NULL;
+ }
+}
diff --git a/mert/Optimizer.h b/mert/Optimizer.h
index d5c066bb6..23df73e67 100644
--- a/mert/Optimizer.h
+++ b/mert/Optimizer.h
@@ -20,7 +20,7 @@ class Optimizer{
Scorer * scorer; //no accessor for them only child can use them
FeatureData * FData;//no accessor for them only child can use them
public:
- Optimizer(unsigned Pd,vector<unsigned> i2O,parameters_t start);
+ Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start);
void SetScorer(Scorer *S);
void SetFData(FeatureData *F);
virtual ~Optimizer();
@@ -46,11 +46,32 @@ class SimpleOptimizer: public Optimizer{
private:
static float eps;
public:
- SimpleOptimizer(unsigned dim,vector<unsigned> i2O,parameters_t start):Optimizer(dim,i2O,start){};
+ SimpleOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start):Optimizer(dim,i2O,start){};
virtual statscore_t TrueRun(Point&)const;
};
-Optimizer *BuildOptimizer(unsigned dim,vector<unsigned>tooptimize,parameters_t start,string type);
+class RandomOptimizer: public Optimizer{
+public:
+ RandomOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start):Optimizer(dim,i2O,start){};
+ virtual statscore_t TrueRun(Point&)const;
+};
+
+
+
+class OptimizerFactory{
+ public:
+ // unsigned dim;
+ //Point Start;
+ static vector<string> GetTypeNames();
+ static Optimizer* BuildOptimizer(unsigned dim,vector<unsigned>tooptimize,vector<parameter_t> start,string type);
+ private:
+ enum OptType{POWELL=0,RANDOM,NOPTIMIZER};//Add new optimizer here BEFORE NOPTIMZER
+ static OptType GetOType(string);
+ static vector<string> typenames;
+ static void SetTypeNames();
+
+};
+
#endif
diff --git a/mert/Point.cpp b/mert/Point.cpp
index 2fc365baf..18bc8e521 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -13,7 +13,9 @@ map<unsigned,statscore_t> Point::fixedweights;
unsigned Point::pdim=0;
unsigned Point::ncall=0;
-void Point::Randomize(const parameters_t& min,const parameters_t& max){
+void Point::Randomize(const vector<parameter_t>& min,const vector<parameter_t>& max){
+ assert(min.size()==Point::dim);
+ assert(max.size()==Point::dim);
for (int i=0; i<size(); i++)
operator[](i)= min[i] + (float)random()/(float)RAND_MAX * (float)(max[i]-min[i]);
}
@@ -29,6 +31,19 @@ void Point::Normalize(){
}
}
+//Can initialize from a vector of dim or pdim
+Point::Point(const vector<parameter_t>& init):vector<parameter_t>(Point::dim){
+ if(init.size()==dim){
+ for (int i=0; i<Point::dim; i++)
+ operator[](i)=init[i];
+ }else{
+ assert(init.size()==pdim);
+ for (int i=0; i<Point::dim; i++)
+ operator[](i)=init[optindices[i]];
+ }
+};
+
+
double Point::operator*(const FeatureStats& F)const{
ncall++;//to track performance
double prod=0.0;
@@ -61,15 +76,15 @@ Point Point::operator*(float l)const{
};
ostream& operator<<(ostream& o,const Point& P){
- parameters_t w=P.GetAllWeights();
+ vector<parameter_t> w=P.GetAllWeights();
for(int i=0;i<Point::pdim;i++)
o<<w[i]<<' ';
o<<endl;
return o;
};
-parameters_t Point::GetAllWeights()const{
- parameters_t w;
+vector<parameter_t> Point::GetAllWeights()const{
+ vector<parameter_t> w;
if(OptimizeAll()){
w=*this;
}else{
diff --git a/mert/Point.h b/mert/Point.h
index 150ae10b7..e7f48d897 100644
--- a/mert/Point.h
+++ b/mert/Point.h
@@ -10,7 +10,7 @@ class Optimizer;
/**class that handle the subset of the Feature weight on which we run the optimization*/
-class Point:public parameters_t{
+class Point:public vector<parameter_t>{
friend class Optimizer;
private:
/**The indices over which we optimize*/
@@ -27,9 +27,9 @@ class Point:public parameters_t{
static unsigned getpdim(){return pdim;}
static bool OptimizeAll(){return fixedweights.empty();};
statscore_t score;
- Point():parameters_t(dim){};
- Point(parameters_t init):parameters_t(init){assert(init.size()==dim);};
- void Randomize(const parameters_t& min,const parameters_t& max);
+ Point():vector<parameter_t>(dim){};
+ Point(const vector<parameter_t>& init);
+ void Randomize(const vector<parameter_t>& min,const vector<parameter_t>& max);
double operator*(const FeatureStats&)const;//compute the feature function
Point operator+(const Point&)const;
@@ -37,8 +37,8 @@ class Point:public parameters_t{
/**write the Whole featureweight to a stream (ie pdim float)*/
friend ostream& operator<<(ostream& o,const Point& P);
void Normalize();
- /**return a vector of size pdim where all weights have been put*/
- parameters_t GetAllWeights()const;
+ /**return a vector of size pdim where all weights have been put(including fixed ones)*/
+ vector<parameter_t> GetAllWeights()const;
};
#endif
diff --git a/mert/Types.h b/mert/Types.h
index 04f849806..c1120e908 100644
--- a/mert/Types.h
+++ b/mert/Types.h
@@ -4,7 +4,7 @@
using namespace std;
typedef float parameter_t;
-typedef vector<parameter_t> parameters_t;
+//typedef vector<parameter_t> parameters_t;confusing; use vector<parameter_t>
typedef vector<pair<unsigned int, unsigned int> > diff_t;
typedef vector<diff_t> diffs_t;
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 72c4f5089..4f56eafa3 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -37,7 +37,7 @@ void usage(void) {
static struct option long_options[] =
{
- {"dim", 1, 0, 'd'},
+ {"pdim", 1, 0, 'd'},
{"ntry",1,0,'n'},
{"optimize",1,0,'o'},
{"type",1,0,'t'},
@@ -50,8 +50,8 @@ static struct option long_options[] =
int option_index;
int main (int argc, char **argv) {
- int c,dim,i;
- dim=-1;
+ int c,pdim,i;
+ pdim=-1;
int ntry=1;
string type("powell");
string scorertype("BLEU");
@@ -62,7 +62,7 @@ int main (int argc, char **argv) {
while ((c=getopt_long (argc, argv, "d:n:t:s:S:F:v:", long_options, &option_index)) != -1) {
switch (c) {
case 'd':
- dim = strtol(optarg, NULL, 10);
+ pdim = strtol(optarg, NULL, 10);
break;
case 'n':
ntry=strtol(optarg, NULL, 10);
@@ -85,57 +85,68 @@ int main (int argc, char **argv) {
usage();
}
}
- if (dim < 0)
+ if (pdim < 0)
usage();
- if(tooptimize.empty()){//We'll optimize on everything
- tooptimize.resize(dim);
- for(i=0;i<dim;i++)
+ if(tooptimize.empty()){
+ tooptimize.resize(pdim);//We'll optimize on everything
+ for(i=0;i<pdim;i++)
tooptimize[i]=i;
}
+ ifstream opt("init.opt");
+ if(opt.fail()){
+ cerr<<"could not open init.opt"<<endl;
+ exit(3);
+ }
+ start.resize(pdim);//to do:read from file
+ int j;
+ for( j=0;j<pdim&&!opt.fail();j++)
+ opt>>start[j];
+ if(j<pdim){
+ cerr<<"error could not initialize start point with init.opt"<<endl;
+ exit(3);
+ }
+
+ opt.close();
+ //it make sense to know what parameter set were used to generate the nbest
ScorerFactory SF;
- Optimizer *O;
- Scorer *TheScorer=NULL;;
- FeatureData *FD=NULL;
-;
- start.resize(dim);
- float score;
- float best=numeric_limits<float>::min();
- float mean=0;
- float var=0;
- Point bestP;
- //it make sense to know what parameter set where used to generate the nbest
- O=BuildOptimizer(dim,tooptimize,start,"powell");
-
- TheScorer=SF.getScorer(scorertype);
+ Scorer *TheScorer=SF.getScorer(scorertype);
ScoreData *SD=new ScoreData(*TheScorer);
- FD=new FeatureData();
- FD->load(featurefile);
SD->load(scorerfile);
+ FeatureData *FD=new FeatureData();
+ FD->load(featurefile);
+ Optimizer *O=OptimizerFactory::BuildOptimizer(pdim,tooptimize,start,type);
O->SetScorer(TheScorer);
O->SetFData(FD);
- Point min;//to: initialize
- Point max;
- //note: thos min and maw are the bound for the starting poitns of the algorithm, not strict bound on the result!
- for(int d=0;d<Point::getdim();d++){
+ Point P(start);//Generate from the full feature set. Warning: must ne done after Optimiezr initialiazation
+ Point bestP=P;
+ statscore_t best=O->Run(P);
+ statscore_t mean=best;
+ statscore_t var=best*best;
+
+ vector<parameter_t> min(Point::getdim());
+ vector<parameter_t> max(Point::getdim());
+
+ for(int d=0;d<Point::getdim();d++){
min[d]=0.0;
max[d]=1.0;
}
- for(int i=0;i<ntry;i++){
- Point P;
- P.Randomize(min,max);
- score=O->Run(P);
- if(score>best){
- best=score;
- bestP=P;
- }
- mean+=score;
- var+=(score*score);
+ //note: those mins and max are the bound for the starting points of the algorithm, not strict bound on the result!
+
+ for(int i=1;i<ntry;i++){
+ P.Randomize(min,max);
+ statscore_t score=O->Run(P);
+ if(score>best){
+ best=score;
+ bestP=P;
}
- mean/=(float)ntry;
- var/=(float)ntry;
- var=sqrt(abs(var-mean*mean));
- cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl;
- cerr<<"best score"<<best<<endl;
- ofstream res("weights.txt");
- res<<bestP<<endl;
+ mean+=score;
+ var+=(score*score);
+ }
+ mean/=(float)ntry;
+ var/=(float)ntry;
+ var=sqrt(abs(var-mean*mean));
+ cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl;
+ cerr<<"best score"<<best<<endl;
+ ofstream res("weights.txt");
+ res<<bestP<<endl;
}