diff options
-rw-r--r-- | mert/BleuScorer.cpp | 2 | ||||
-rw-r--r-- | mert/FeatureArray.cpp | 2 | ||||
-rw-r--r-- | mert/FeatureData.h | 2 | ||||
-rw-r--r-- | mert/FeatureStats.cpp | 6 | ||||
-rwxr-xr-x | mert/Makefile | 4 | ||||
-rw-r--r-- | mert/Optimizer.cpp | 43 | ||||
-rw-r--r-- | mert/PerScorer.cpp | 2 | ||||
-rw-r--r-- | mert/Point.cpp | 25 | ||||
-rw-r--r-- | mert/Point.h | 15 | ||||
-rw-r--r-- | mert/ScoreData.h | 2 | ||||
-rw-r--r-- | mert/ScoreStats.cpp | 4 | ||||
-rw-r--r-- | mert/Scorer.h | 16 | ||||
-rw-r--r-- | mert/Util.cpp | 2 | ||||
-rwxr-xr-x | mert/mert.cpp | 23 |
14 files changed, 84 insertions, 64 deletions
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp index 13ef70051..731504741 100644 --- a/mert/BleuScorer.cpp +++ b/mert/BleuScorer.cpp @@ -90,7 +90,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles) { } -void BleuScorer::prepareStats(int sid, const string& text, ScoreStats& entry) { +void BleuScorer::prepareStats(unsigned int sid, const string& text, ScoreStats& entry) { //cerr << text << endl; //dump_counts(*_refcounts[sid]); if (sid >= _refcounts.size()) { diff --git a/mert/FeatureArray.cpp b/mert/FeatureArray.cpp index d9b90654b..a9ee292d1 100644 --- a/mert/FeatureArray.cpp +++ b/mert/FeatureArray.cpp @@ -131,6 +131,6 @@ void FeatureArray::load(const std::string &file, bool bin) void FeatureArray::merge(FeatureArray& e) { //dummy implementation - for (int i=0; i< e.size(); i++) + for (unsigned int i=0; i< e.size(); i++) add(e.get(i)); } diff --git a/mert/FeatureData.h b/mert/FeatureData.h index aacf81255..dc54c11be 100644 --- a/mert/FeatureData.h +++ b/mert/FeatureData.h @@ -48,7 +48,7 @@ public: return array_[i]; #endif } - inline bool exists(int i){ return (i<array_.size())?true:false; } + inline bool exists(unsigned int i){ return (i<array_.size())?true:false; } inline void setIndex(){ }; diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp index 5d85c5655..54ea76295 100644 --- a/mert/FeatureStats.cpp +++ b/mert/FeatureStats.cpp @@ -19,7 +19,7 @@ array_(stats.array_) FeatureStats::FeatureStats(const size_t size) { - for(int i = 0; i < size; i++) + for(unsigned int i = 0; i < size; i++) array_.push_back(0); }; @@ -32,7 +32,7 @@ FeatureStats::FeatureStats(std::string &theString) void FeatureStats::set(std::string &theString) { std::string substring, stringBuf; - std::string::size_type loc; + int nextPound; FeatureStatsType sc; // TRACE_ERR("Decompounding string: " << theString << std::endl); @@ -79,7 +79,7 @@ void FeatureStats::savetxt(std::ofstream& outFile) outFile << " " << *i; i++; } - outFile << std::endl; +// outFile << std::endl; } diff --git a/mert/Makefile b/mert/Makefile index b3c0c54d4..312093d36 100755 --- a/mert/Makefile +++ b/mert/Makefile @@ -9,9 +9,9 @@ Scorer.o \ Optimizer.o \ ifndef DEBUG -CFLAGS=-O3 -DTRACE_ENABLE +CFLAGS=-O3 -DTRACE_ENABLE -Wall else -CFLAGS=-DTRACE_ENABLE -g -DDEBUG +CFLAGS=-DTRACE_ENABLE -g -DDEBUG -Wall endif diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp index cc054e3e9..bc171ffd1 100644 --- a/mert/Optimizer.cpp +++ b/mert/Optimizer.cpp @@ -30,17 +30,18 @@ Optimizer::Optimizer(unsigned Pd,vector<unsigned> i2O,vector<parameter_t> start) //warning: the init vector is a full set of parameters, of dimension pdim! Point::pdim=Pd; + assert(start.size()==Pd); Point::dim=i2O.size(); Point::optindices=i2O; - if(Point::pdim<Point::dim){ - for(int i=0;i<Point::pdim;i++){ - int j; + if (Point::pdim<Point::dim){ + for (unsigned int i=0;i<Point::pdim;i++){ + unsigned int j; for(j=0;j<Point::dim;j++) - if(i==i2O[j]) - break; + if (i==i2O[j]) + break; if(j==Point::dim)//the index i wasnt found on optindices, it is a fixed index, we use the valu of hte start vector - Point::fixedweights[i]=start[i]; + Point::fixedweights[i]=start[i]; } } }; @@ -53,10 +54,9 @@ Optimizer::~Optimizer(){ statscore_t Optimizer::GetStatScore(const Point& param)const{ vector<unsigned> bests; Get1bests(param,bests); - //cerr << "1BESTS: "; //copy(bests.begin(),bests.end(),ostream_iterator<unsigned>(cerr," ")); statscore_t score = GetStatScore(bests); - //cerr << " score = " << score << endl; + cerr << "1BESTS: " << param << " => " << score << endl; return score; }; @@ -91,11 +91,11 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P float min_int=0.0001; //typedef pair<unsigned,unsigned> diff;//first the sentence that changes, second is the new 1best for this sentence //list<threshold> thresholdlist; - + map<float,diff_t> thresholdmap; thresholdmap[MIN_FLOAT]=diff_t(); vector<unsigned> first1best;//the vector of nbests for x=-inf - for(int S=0;S<size();S++){ + for(unsigned int S=0;S<size();S++){ map<float,diff_t >::iterator previnserted=thresholdmap.begin(); //first we determine the translation with the best feature score for each sentence and each value of x //cerr << "Sentence " << S << endl; @@ -212,7 +212,7 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P statscore_t bestscore=MIN_FLOAT; float bestx=MIN_FLOAT; assert(scores.size()==thresholdmap.size());//we skipped the first el of thresholdlist but GetIncStatScore return 1 more for first1best - for(int sc=0;sc!=scores.size();sc++){ + for(unsigned int sc=0;sc!=scores.size();sc++){ //cerr << "x=" << thrit->first << " => " << scores[sc] << endl; if (scores[sc] > bestscore) { //This is the score for the interval [lit2->first, (lit2+1)->first] @@ -297,8 +297,12 @@ statscore_t Optimizer::Run(Point& P)const{ cerr<<"error size mismatch between FeatureData and Scorer"<<endl; exit(2); } - if(verboselevel()>1) - cerr<<"starting point: "<< P; + + statscore_t score=GetStatScore(P); + P.score=score; + + if(verboselevel()>1) + cerr<<"starting point: "<< P << " => "<< P.score << endl; statscore_t s=TrueRun(P); P.score=s;//just in case its not done in TrueRun if (verboselevel()>1) @@ -323,6 +327,9 @@ vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst,vector< float SimpleOptimizer::eps=0.0001; statscore_t SimpleOptimizer::TrueRun(Point& P)const{ + + statscore_t score=GetStatScore(P); + statscore_t prevscore=0; statscore_t bestscore=MIN_FLOAT; Point best; @@ -335,13 +342,13 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const{ Point linebest; - for(int d=0;d<Point::getdim();d++){ + for(unsigned int d=0;d<Point::getdim();d++){ if(verboselevel()>4){ // cerr<<"minimizing along direction "<<d<<endl; cerr<<"starting point: " << P << " => " << prevscore << endl; } Point direction; - for(int i=0;i<Point::getdim();i++) + for(unsigned int i=0;i<Point::getdim();i++) direction[i]; direction[d]=1.0; statscore_t curscore=LineOptimize(P,direction,linebest);//find the minimum on the line @@ -378,7 +385,7 @@ Just return a random point*/ statscore_t RandomOptimizer::TrueRun(Point& P)const{ vector<parameter_t> min(Point::getdim()); vector<parameter_t> max(Point::getdim()); - for(int d=0;d<Point::getdim();d++){ + for(unsigned int d=0;d<Point::getdim();d++){ min[d]=0.0; max[d]=1.0; } @@ -405,7 +412,7 @@ vector<string> OptimizerFactory::GetTypeNames(){ } OptimizerFactory::OptType OptimizerFactory::GetOType(string type){ - int thetype; + unsigned int thetype; if(typenames.empty()) SetTypeNames(); for(thetype=0;thetype<typenames.size();thetype++) @@ -420,7 +427,7 @@ Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,vector<unsigned> i2o,ve if(T==NOPTIMIZER){ cerr<<"Error: unknown Optimizer type "<<type<<endl; cerr<<"Known Algorithm are:"<<endl; - int thetype; + unsigned int thetype; for(thetype=0;thetype<typenames.size();thetype++) cerr<<typenames[thetype]<<endl; throw ("unknown Optimizer Type"); diff --git a/mert/PerScorer.cpp b/mert/PerScorer.cpp index d766874a9..6d63de301 100644 --- a/mert/PerScorer.cpp +++ b/mert/PerScorer.cpp @@ -32,7 +32,7 @@ void PerScorer::setReferenceFiles(const vector<string>& referenceFiles) { } -void PerScorer::prepareStats(int sid, const string& text, ScoreStats& entry) { +void PerScorer::prepareStats(unsigned int sid, const string& text, ScoreStats& entry) { if (sid >= _reflengths.size()) { stringstream msg; msg << "Sentence id (" << sid << ") not found in reference set"; diff --git a/mert/Point.cpp b/mert/Point.cpp index b61bd1f73..af9316679 100644 --- a/mert/Point.cpp +++ b/mert/Point.cpp @@ -16,17 +16,17 @@ unsigned Point::ncall=0; void Point::Randomize(const vector<parameter_t>& min,const vector<parameter_t>& max){ assert(min.size()==Point::dim); assert(max.size()==Point::dim); - for (int i=0; i<size(); i++) + for (unsigned int i=0; i<size(); i++) operator[](i)= min[i] + (float)random()/(float)RAND_MAX * (float)(max[i]-min[i]); } void Point::Normalize(){ parameter_t norm=0.0; - for (int i=0; i<size(); i++) + for (unsigned int i=0; i<size(); i++) norm+= operator[](i)*operator[](i); if(norm!=0.0){ norm=sqrt(norm); - for (int i=0; i<size(); i++) + for (unsigned int i=0; i<size(); i++) operator[](i)/=norm; } } @@ -34,11 +34,11 @@ void Point::Normalize(){ //Can initialize from a vector of dim or pdim Point::Point(const vector<parameter_t>& init):vector<parameter_t>(Point::dim){ if(init.size()==dim){ - for (int i=0; i<Point::dim; i++) + for (unsigned int i=0; i<Point::dim; i++) operator[](i)=init[i]; }else{ assert(init.size()==pdim); - for (int i=0; i<Point::dim; i++) + for (unsigned int i=0; i<Point::dim; i++) operator[](i)=init[optindices[i]]; } }; @@ -77,10 +77,11 @@ Point Point::operator*(float l)const{ ostream& operator<<(ostream& o,const Point& P){ vector<parameter_t> w=P.GetAllWeights(); - for(int i=0;i<Point::pdim;i++) - o<<w[i]<<' '; -// o<<endl; - return o; +// o << "[" << Point::pdim << "] "; + for(unsigned int i=0;i<Point::pdim;i++) + o << w[i] << " "; +// o << "=> " << P.GetScore(); + return o; }; vector<parameter_t> Point::GetAllWeights()const{ @@ -89,10 +90,10 @@ vector<parameter_t> Point::GetAllWeights()const{ w=*this; }else{ w.resize(pdim); - for (int i=0; i<size(); i++) + for (unsigned int i=0; i<size(); i++) w[optindices[i]]=operator[](i); - for(map<unsigned,float >::iterator it=fixedweights.begin();it!=fixedweights.end();it++) - w[it->first]=it->second; + for(map<unsigned,float >::iterator it=fixedweights.begin();it!=fixedweights.end();it++) + w[it->first]=it->second; } return w; }; diff --git a/mert/Point.h b/mert/Point.h index e7f48d897..ac6dd6ccb 100644 --- a/mert/Point.h +++ b/mert/Point.h @@ -14,17 +14,17 @@ class Point:public vector<parameter_t>{ friend class Optimizer; private: /**The indices over which we optimize*/ - static vector<unsigned> optindices; + static vector<unsigned int> optindices; /**dimension of optindices and of the parent vector*/ - static unsigned dim; + static unsigned int dim; /**fixed weights in case of partial optimzation*/ - static map<unsigned,parameter_t> fixedweights; + static map<unsigned int,parameter_t> fixedweights; /**total size of the parameter space; we have pdim=FixedWeight.size()+optinidices.size()*/ - static unsigned pdim; - static unsigned ncall; + static unsigned int pdim; + static unsigned int ncall; public: - static unsigned getdim(){return dim;} - static unsigned getpdim(){return pdim;} + static unsigned int getdim(){return dim;} + static unsigned int getpdim(){return pdim;} static bool OptimizeAll(){return fixedweights.empty();}; statscore_t score; Point():vector<parameter_t>(dim){}; @@ -39,6 +39,7 @@ class Point:public vector<parameter_t>{ void Normalize(); /**return a vector of size pdim where all weights have been put(including fixed ones)*/ vector<parameter_t> GetAllWeights()const; + statscore_t GetScore()const { return score; }; }; #endif diff --git a/mert/ScoreData.h b/mert/ScoreData.h index c2c046c45..d8d4a80ce 100644 --- a/mert/ScoreData.h +++ b/mert/ScoreData.h @@ -38,7 +38,7 @@ public: inline void clear() { array_.clear(); } inline ScoreArray get(int i){ return array_.at(i); } - inline bool exists(int i){ return (i<array_.size())?true:false; } + inline bool exists(unsigned int i){ return (i<array_.size())?true:false; } inline ScoreStats get(int i, int j){ return array_.at(i).get(j); } diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp index 8f571aac3..f62f9df45 100644 --- a/mert/ScoreStats.cpp +++ b/mert/ScoreStats.cpp @@ -19,7 +19,7 @@ array_(stats.array_) ScoreStats::ScoreStats(const size_t size) { - for(int i = 0; i < size; i++) + for(unsigned int i = 0; i < size; i++) array_.push_back(0); }; @@ -32,7 +32,7 @@ ScoreStats::ScoreStats(std::string &theString) void ScoreStats::set(std::string &theString) { std::string substring, stringBuf; - std::string::size_type loc; + int nextPound; ScoreStatsType sc; while (!theString.empty()){ diff --git a/mert/Scorer.h b/mert/Scorer.h index bef8b84b0..30e4fc641 100644 --- a/mert/Scorer.h +++ b/mert/Scorer.h @@ -28,8 +28,9 @@ class Scorer { public: - Scorer(const string& name): _name(name), _scoreData(0),_preserveCase(false) {} - + Scorer(const string& name): _name(name), _scoreData(0),_preserveCase(false) {}; + virtual ~Scorer(){}; + /** * set the reference files. This must be called before prepareStats. **/ @@ -141,8 +142,9 @@ class Scorer { class StatisticsBasedScorer : public Scorer { public: - StatisticsBasedScorer(const string& name): Scorer(name) {} - virtual void score(const candidates_t& candidates, const diffs_t& diffs, + StatisticsBasedScorer(const string& name): Scorer(name) {} + ~StatisticsBasedScorer(){}; + virtual void score(const candidates_t& candidates, const diffs_t& diffs, statscores_t& scores); protected: @@ -162,7 +164,7 @@ class BleuScorer: public StatisticsBasedScorer { public: BleuScorer() : StatisticsBasedScorer("BLEU"),_refLengthStrategy(SHORTEST) {} virtual void setReferenceFiles(const vector<string>& referenceFiles); - virtual void prepareStats(int sid, const string& text, ScoreStats& entry); + virtual void prepareStats(unsigned int sid, const string& text, ScoreStats& entry); static const int LENGTH; protected: @@ -171,6 +173,7 @@ class BleuScorer: public StatisticsBasedScorer { private: //no copy BleuScorer(const BleuScorer&); + ~BleuScorer(){}; BleuScorer& operator=(const BleuScorer&); @@ -229,7 +232,7 @@ class PerScorer: public StatisticsBasedScorer { public: PerScorer() : StatisticsBasedScorer("PER") {} virtual void setReferenceFiles(const vector<string>& referenceFiles); - virtual void prepareStats(int sid, const string& text, ScoreStats& entry); + virtual void prepareStats(unsigned int sid, const string& text, ScoreStats& entry); protected: @@ -239,6 +242,7 @@ class PerScorer: public StatisticsBasedScorer { //no copy PerScorer(const PerScorer&); + ~PerScorer(){}; PerScorer& operator=(const PerScorer&); // data extracted from reference files diff --git a/mert/Util.cpp b/mert/Util.cpp index 4531a8645..aff734e1b 100644 --- a/mert/Util.cpp +++ b/mert/Util.cpp @@ -22,7 +22,7 @@ int setverboselevel(int v){ int getNextPound(std::string &theString, std::string &substring, const std::string delimiter) { - int pos = 0; + unsigned int pos = 0; //skip all occurrences of delimiter while ( pos == 0 ) diff --git a/mert/mert.cpp b/mert/mert.cpp index 902c99ae8..70297493c 100755 --- a/mert/mert.cpp +++ b/mert/mert.cpp @@ -31,8 +31,9 @@ void usage(void) { cerr<<"[-t\tthe optimizer(default powell)]"<<endl; cerr<<"[--sctype|-s] the scorer type (default BLEU)"<<endl; cerr<<"[--scfile|-S] the scorer data file (default score.data)"<<endl; - cerr<<"[--ffile|-F] the feature data file data file (default feature.data)"<<endl; - cerr<<"[-v] verbose level"<<endl; + cerr<<"[--ffile|-F] the feature data file (default feature.data)"<<endl; + cerr<<"[--ifile|-i] the starting point data file (default init.opt)"<<endl; + cerr<<"[-v] verbose level"<<endl; exit(1); } @@ -45,7 +46,8 @@ static struct option long_options[] = {"sctype",1,0,'s'}, {"scfile",1,0,'S'}, {"ffile",1,0,'F'}, - {"verbose",1,0,'v'}, + {"ifile",1,0,'i'}, + {"verbose",1,0,'v'}, {0, 0, 0, 0} }; int option_index; @@ -58,7 +60,8 @@ int main (int argc, char **argv) { string scorertype("BLEU"); string scorerfile("statscore.data"); string featurefile("features.data"); - vector<unsigned> tooptimize; + string initfile("init.opt"); + vector<unsigned> tooptimize; vector<parameter_t> start; while ((c=getopt_long (argc, argv, "d:n:t:s:S:F:v:", long_options, &option_index)) != -1) { switch (c) { @@ -80,6 +83,9 @@ int main (int argc, char **argv) { case 'F': featurefile=string(optarg); break; + case 'i': + initfile=string(optarg); + break; case 'v': setverboselevel(strtol(optarg,NULL,10)); break; @@ -98,9 +104,9 @@ int main (int argc, char **argv) { for(i=0;i<pdim;i++) tooptimize[i]=i; } - ifstream opt("init.opt"); + ifstream opt(initfile.c_str()); if(opt.fail()){ - cerr<<"could not open init.opt"<<endl; + cerr<<"could not open initfile: " << initfile << endl; exit(3); } start.resize(pdim);//to do:read from file @@ -108,7 +114,7 @@ int main (int argc, char **argv) { for( j=0;j<pdim&&!opt.fail();j++) opt>>start[j]; if(j<pdim){ - cerr<<"error could not initialize start point with init.opt"<<endl; + cerr<<"error could not initialize start point with " << initfile << endl; exit(3); } @@ -136,7 +142,7 @@ int main (int argc, char **argv) { vector<parameter_t> min(Point::getdim()); vector<parameter_t> max(Point::getdim()); - for(int d=0;d<Point::getdim();d++){ + for(unsigned int d=0;d<Point::getdim();d++){ min[d]=0.0; max[d]=1.0; } @@ -158,6 +164,7 @@ int main (int argc, char **argv) { if(ntry>1) cerr<<"variance of the score (for "<<ntry<<" try):"<<var<<endl; cerr<<"best score: "<<best<<endl; + cerr << "Best point: " << bestP << " => " << best << endl; ofstream res("weights.txt"); res<<bestP<<endl; timer.stop("Stopping..."); |