Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2011-11-16 21:43:54 +0400
committerBarry Haddow <barry.haddow@gmail.com>2011-11-16 21:43:54 +0400
commit71c777f01df14c9523e20f118c90396d51678c02 (patch)
treef017487e82a931d6978ebf05cde367b6cc6a7c4e /mert
parent7aa9073abec82ae76f1d3ed02c64e90a061a707f (diff)
Remove old PRO. Fix running of PRO from mert script.
Diffstat (limited to 'mert')
-rw-r--r--mert/Data.cpp131
-rw-r--r--mert/Data.h3
-rwxr-xr-xmert/mert.cpp11
3 files changed, 0 insertions, 145 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 0acfbeac3..806308cb1 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -126,137 +126,6 @@ void Data::mergeSparseFeatures() {
exit(1);
}
-// really not the right place...
-float sentenceLevelBleuPlusOne( ScoreStats &stats ) {
- float logbleu = 0.0;
- const unsigned int bleu_order = 4;
- for (unsigned int j=0; j<bleu_order; j++) {
- //cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
- logbleu += log(stats.get(2*j)+1) - log(stats.get(2*j+1)+1);
- }
- logbleu /= bleu_order;
- float brevity = 1.0 - (float)stats.get(bleu_order*2)/stats.get(1);
- if (brevity < 0.0) {
- logbleu += brevity;
- }
- //cerr << brevity << " -> " << exp(logbleu) << endl;
- return exp(logbleu);
-}
-
-class SampledPair {
-private:
- unsigned int translation1;
- unsigned int translation2;
- float scoreDiff;
-public:
- SampledPair( unsigned int t1, unsigned int t2, float diff ) {
- if (diff > 0) {
- translation1 = t1;
- translation2 = t2;
- scoreDiff = diff;
- }
- else {
- translation1 = t2;
- translation2 = t1;
- scoreDiff = -diff;
- }
- }
- float getDiff() { return scoreDiff; }
- unsigned int getTranslation1() { return translation1; }
- unsigned int getTranslation2() { return translation2; }
-};
-
-
-void Data::sampleRankedPairs( const std::string &rankedpairfile ) {
- cout << "Sampling ranked pairs." << endl;
-
- ostream* out = NULL;
- ofstream* outFile = NULL;
- if (rankedpairfile == "stdout") {
- out = &cout;
- } else {
- outFile = new ofstream();
- outFile->open( rankedpairfile.c_str() );
- out = outFile;
- }
-
- const unsigned int n_samplings = 5000;
- const unsigned int n_samples = 50;
- const float min_diff = 0.05;
-
- // loop over all sentences
- for(unsigned int S=0; S<featdata->size(); S++) {
- unsigned int n_translations = featdata->get(S).size();
- // sample a fixed number of times
- vector< SampledPair* > samples;
- vector< float > scores;
- for(unsigned int i=0; i<n_samplings; i++) {
- unsigned int translation1 = rand() % n_translations;
- float bleu1 = sentenceLevelBleuPlusOne(scoredata->get(S,translation1));
-
- unsigned int translation2 = rand() % n_translations;
- float bleu2 = sentenceLevelBleuPlusOne(scoredata->get(S,translation2));
-
- if (abs(bleu1-bleu2) < min_diff)
- continue;
-
- samples.push_back( new SampledPair( translation1, translation2, bleu1-bleu2) );
- scores.push_back( 1.0 - abs(bleu1-bleu2) );
- }
- //cerr << "sampled " << samples.size() << " pairs\n";
-
- float min_diff = -1.0;
- if (samples.size() > n_samples) {
- nth_element(scores.begin(), scores.begin()+(n_samples-1), scores.end());
- min_diff = 0.99999-scores[n_samples-1];
- //cerr << "min_diff = " << min_diff << endl;
- }
-
- unsigned int collected = 0;
- for(unsigned int i=0; i<samples.size() && collected < n_samples; i++) {
- if (samples[i]->getDiff() >= min_diff) {
- collected++;
-
- *out << "1";
- outputSample( *out, featdata->get(S,samples[i]->getTranslation1()),
- featdata->get(S,samples[i]->getTranslation2()) );
- *out << endl;
- *out << "0";
- outputSample( *out, featdata->get(S,samples[i]->getTranslation2()),
- featdata->get(S,samples[i]->getTranslation1()) );
- *out << endl;
- }
- delete samples[i];
- }
- //cerr << "collected " << collected << endl;
- }
- out->flush();
- if (outFile) {
- outFile->close();
- delete outFile;
- }
-}
-
-void Data::outputSample( ostream &out, const FeatureStats &f1, const FeatureStats &f2 )
-{
- // difference in score in regular features
- for(unsigned int j=0; j<f1.size(); j++)
- if (abs(f1.get(j)-f2.get(j)) > 0.00001)
- out << " F" << j << " " << (f1.get(j)-f2.get(j));
-
- if (!hasSparseFeatures())
- return;
-
- out << " ";
-
- // sparse features
- const SparseVector &s1 = f1.getSparse();
- const SparseVector &s2 = f2.getSparse();
- SparseVector diff = s1 - s2;
- diff.write(out);
-}
-
-
void Data::createShards(size_t shard_count, float shard_size, const string& scorerconfig,
std::vector<Data>& shards)
{
diff --git a/mert/Data.h b/mert/Data.h
index 47400c34a..84dcb884e 100644
--- a/mert/Data.h
+++ b/mert/Data.h
@@ -99,9 +99,6 @@ public:
return featdata->getFeatureIndex(name);
};
- void sampleRankedPairs( const std::string &rankedPairFile );
- void outputSample( std::ostream &out, const FeatureStats &f1, const FeatureStats &f2 );
-
/**
* Create shard_count shards. If shard_size == 0, then the shards are non-overlapping
* and exhaust the data. If 0 < shard_size <= 1, then shards are chosen by sampling
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 128398b9b..91fa71e7b 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -38,7 +38,6 @@ void usage(int ret)
cerr<<"[-o] the indexes to optimize(default all)"<<endl;
cerr<<"[-t] the optimizer(default powell)"<<endl;
cerr<<"[-r] the random seed (defaults to system clock)"<<endl;
- cerr<<"[-p] only create data for paired ranked optimizer"<<endl;
cerr<<"[--sctype|-s] the scorer type (default BLEU)"<<endl;
cerr<<"[--scconfig|-c] configuration string passed to scorer"<<endl;
cerr<<"[--scfile|-S] comma separated list of scorer data files (default score.data)"<<endl;
@@ -137,7 +136,6 @@ int main (int argc, char **argv)
string scorerfile("statscore.data");
string featurefile("features.data");
string initfile("init.opt");
- string pairedrankfile("");
string tooptimizestr("");
vector<unsigned> tooptimize;
@@ -151,9 +149,6 @@ int main (int argc, char **argv)
case 'o':
tooptimizestr = string(optarg);
break;
- case 'p':
- pairedrankfile = string(optarg);
- break;
case 'd':
pdim = strtol(optarg, NULL, 10);
break;
@@ -335,12 +330,6 @@ int main (int argc, char **argv)
}
}
- if (pairedrankfile.compare("") != 0) {
- D.sampleRankedPairs(pairedrankfile);
- PrintUserTime("Stopping...");
- exit(0);
- }
-
// treat sparse features just like regular features
if (D.hasSparseFeatures()) {
D.mergeSparseFeatures();