Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorjfouet <jfouet@1f5c12ca-751b-0410-a591-d2e778427230>2008-05-16 00:49:49 +0400
committerjfouet <jfouet@1f5c12ca-751b-0410-a591-d2e778427230>2008-05-16 00:49:49 +0400
commitad79af41dfb7160c11521bdae1b1e3db63d18a56 (patch)
treeeb0f2bc98683499eef9979f208b7477a8ea26802 /mert
parent165f7d049fe42d46315b7512268ea6e4891f2986 (diff)
bug fixes
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1719 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'mert')
-rw-r--r--mert/Optimizer.cpp19
-rw-r--r--mert/ScoreData.cpp2
-rwxr-xr-xmert/mert.cpp40
3 files changed, 48 insertions, 13 deletions
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 118d285fe..655c46322 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -90,9 +90,11 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
thresholdlist.push_back(threshold(MIN_FLOAT,vector<diff>()));
vector<unsigned> first1best;//the vector of nbrests for x=-inf
for(int S=0;S<size();S++){
+ cerr<<"SENTENCE"<<S;
//first we determine the translation with the best feature score for each sentence and each value of x
multimap<float,unsigned> gradient;
vector<float> f0;
+ f0.resize(size());
for(unsigned j=0;j<FData->get(S).size();j++){
gradient.insert(pair<float,unsigned>(direction*(FData->get(S,j)),j));//gradient of the feature function for this particular target sentence
f0[j]=origin*FData->get(S,j);//compute the feature function at the origin point
@@ -111,19 +113,24 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
while(it!=gradient.end()&&it->first==smallest){
if(f0[it->second]>f0[index])
index=it->second;//the highest line is the one with he highest f0
- }
+ it++;
+ }
--it;//we went one step too far in the while loop
onebest.push_back(pair<float,unsigned>(MIN_FLOAT,index));//first 1best is the lowest gradient.
//now we look for the intersections points indicating a change of 1 best
//we use the fact that the function is convex, which means that the gradient can only go up
+ int c=0;
while(it!=gradient.end()){
+ cerr<<c++<<endl;
map<float,unsigned>::iterator leftmost=it;
float leftmostx=onebest.back().first;
float m=it->first;
float b=f0[it->second];
multimap<float,unsigned>::iterator it2=it;
it2++;
+ int d=0;
for(;it2!=gradient.end();it2++){
+ //cerr<<"--"<<d++<<' '<<it2->first<<' '<<it2->second<<endl;
//look for all candidate with a gradient bigger than the current one and fond the one with the leftmost intersection
float curintersect=intersect(m,b,it2->first,f0[it2->second]);
if(curintersect<leftmostx){
@@ -147,7 +154,11 @@ statscore_t Optimizer::LineOptimize(const Point& origin,const Point& direction,P
onebest.back()=pair<float,unsigned>(leftmostx,leftmost->second);//leftmost->first is the gradient, we are interested in the value of the intersection
else //normal case: we add a new threshold
onebest.push_back(pair<float,unsigned>(leftmostx,leftmost->second));
- it=leftmost;
+ if(it==leftmost)
+ it=gradient.end();
+ else
+ cerr<<"ok"<<endl;
+ it=leftmost;
}
//we have the onebest list and the threshold for the current sentence.
//now we update the thresholdlist: we add the new threshold and the value of the onebest.
@@ -253,6 +264,10 @@ statscore_t Optimizer::Run(Point& P)const{
cerr<<"error trying to optimize without a Scorer loaded"<<endl;
exit(2);
}
+ if (scorer->getReferenceSize()!=FData->size()){
+ cerr<<"errror size mismatch between FeatureData and Scorer"<<endl;
+ exit(2);
+ }
statscore_t s=TrueRun(P);
P.score=s;//just in case its not done in TrueRun
return s;
diff --git a/mert/ScoreData.cpp b/mert/ScoreData.cpp
index 6b5e7975f..fb1d4b9fb 100644
--- a/mert/ScoreData.cpp
+++ b/mert/ScoreData.cpp
@@ -17,7 +17,7 @@ theScorer(&ptr)
{
score_type = theScorer->getName();
TRACE_ERR("score_type:" << score_type << std::endl);
-
+ theScorer->setScoreData(this);//this is not dangerous: we dont use the this pointer in SetScoreData
};
void ScoreData::save(std::ofstream& outFile, bool bin)
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 02834888f..590c74f91 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -24,20 +24,25 @@ float min_interval = 1e-3;
using namespace std;
void usage(void) {
- cerr<<"usage: mert -d <dimensions>"<<endl;
- cerr<<"[-n retry ntimes]"<<endl;
- cerr<<"[-o indexes to optimize]"<<endl;
- cerr<<"[-t optimizer]"<<endl;
- cerr<<"[-s scorer]"<<endl;
+ cerr<<"usage: mert -d <dimensions> (mandatory )"<<endl;
+ cerr<<"[-n retry ntimes (default 1)]"<<endl;
+ cerr<<"[-o\tthe indexes to optimize(default all)]"<<endl;
+ cerr<<"[-t\tthe optimizer(default Powell)]"<<endl;
+ cerr<<"[-sctype] the scorer type (default BLEU)"<<endl;
+ cerr<<"[-scfile] the scorer data file (default score.data)"<<endl;
+ cerr<<"[-ffile] the feature data file data file (default feature.data)"<<endl;
exit(1);
}
static struct option long_options[] =
{
{"dim", 1, 0, 'd'},
- {"only",1,0,'o'},
+ {"ntry",1,0,'n'},
+ {"optimize",1,0,'o'},
{"type",1,0,'t'},
- {"scorer",1,0,'s'},
+ {"sctype",1,0,'s'},
+ {"scfile",1,0,'S'},
+ {"ffile",1,0,'F'},
{0, 0, 0, 0}
};
int option_index;
@@ -48,22 +53,30 @@ int main (int argc, char **argv) {
int ntry=1;
string type("powell");
string scorertype("BLEU");
+ string scorerfile("statscore.data");
+ string featurefile("features.data");
vector<unsigned> tooptimize;
vector<parameter_t> start;
- while (getopt_long (argc, argv, "d:n:o:t:s:", long_options, &option_index) != -1) {
+ while ((c=getopt_long (argc, argv, "d:n:t:s:S:F:", long_options, &option_index)) != -1) {
switch (c) {
case 'd':
dim = strtol(optarg, NULL, 10);
+ cerr<<dim;
break;
case 'n':
ntry=strtol(optarg, NULL, 10);
break;
- case 'o':
- //TODO
case 't':
type=string(optarg);
+ break;
case's':
scorertype=string(optarg);
+ break;
+ case 'S':
+ scorerfile=string(optarg);
+ case 'F':
+ featurefile=string(optarg);
+ break;
default:
usage();
}
@@ -73,6 +86,7 @@ int main (int argc, char **argv) {
for(i=0;i<dim;i++)
tooptimize[i]=i;
}
+ ScorerFactory SF;
Optimizer *O;
Scorer *TheScorer=NULL;;
FeatureData *FD=NULL;
@@ -86,6 +100,12 @@ int main (int argc, char **argv) {
Point bestP;
//it make sense to know what parameter set where used to generate the nbest
O=BuildOptimizer(dim,tooptimize,start,"powell");
+
+ TheScorer=SF.getScorer(scorertype);
+ ScoreData *SD=new ScoreData(*TheScorer);
+ FD=new FeatureData();
+ FD->load(featurefile);
+ SD->load(scorerfile);
O->SetScorer(TheScorer);
O->SetFData(FD);
Point min;//to: initialize