Remove old PRO. Fix running of PRO from mert script.

author: Barry Haddow <barry.haddow@gmail.com> 2011-11-16 21:43:54 +0400
committer: Barry Haddow <barry.haddow@gmail.com> 2011-11-16 21:43:54 +0400
commit: 71c777f01df14c9523e20f118c90396d51678c02 (patch)
tree: f017487e82a931d6978ebf05cde367b6cc6a7c4e /mert
parent: 7aa9073abec82ae76f1d3ed02c64e90a061a707f (diff)
3 files changed, 0 insertions, 145 deletions
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 0acfbeac3..806308cb1 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -126,137 +126,6 @@ void Data::mergeSparseFeatures() {
   exit(1);
 }
 
-// really not the right place...
-float sentenceLevelBleuPlusOne( ScoreStats &stats ) {
-	float logbleu = 0.0;
-	const unsigned int bleu_order = 4;
-	for (unsigned int j=0; j<bleu_order; j++) {
-		//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
-		logbleu += log(stats.get(2*j)+1) - log(stats.get(2*j+1)+1);
-	}
-	logbleu /= bleu_order;
-	float brevity = 1.0 - (float)stats.get(bleu_order*2)/stats.get(1);
-	if (brevity < 0.0) {
-		logbleu += brevity;
-	}
-	//cerr << brevity << " -> " << exp(logbleu) << endl;
-	return exp(logbleu);
-}
-
-class SampledPair {
-private:
-	unsigned int translation1;
-	unsigned int translation2;
-	float scoreDiff;
-public:
-	SampledPair( unsigned int t1, unsigned int t2, float diff ) {
-		if (diff > 0) {
-			translation1 = t1;
-			translation2 = t2;
-			scoreDiff = diff;
-		}
-		else {
-			translation1 = t2;
-			translation2 = t1;
-			scoreDiff = -diff;
-		}			
-	}
-	float getDiff() { return scoreDiff; }
-	unsigned int getTranslation1() { return translation1; }
-	unsigned int getTranslation2() { return translation2; }
-};
-	
-
-void Data::sampleRankedPairs( const std::string &rankedpairfile ) {
-	cout << "Sampling ranked pairs." << endl;
-
-  ostream* out = NULL;
-  ofstream*  outFile = NULL;
-  if (rankedpairfile == "stdout") {
-    out = &cout;
-  } else {
-    outFile = new ofstream();
-    outFile->open( rankedpairfile.c_str() );
-    out = outFile;
-  }
-
-	const unsigned int n_samplings = 5000;
-	const unsigned int n_samples = 50;
-	const float min_diff = 0.05;
-
-	// loop over all sentences
-  for(unsigned int S=0; S<featdata->size(); S++) {
-		unsigned int n_translations = featdata->get(S).size();
-		// sample a fixed number of times
-		vector< SampledPair* > samples;
-		vector< float > scores;
-		for(unsigned int i=0; i<n_samplings; i++) {
-			unsigned int translation1 = rand() % n_translations;
-			float bleu1 = sentenceLevelBleuPlusOne(scoredata->get(S,translation1));
-
-			unsigned int translation2 = rand() % n_translations;
-			float bleu2 = sentenceLevelBleuPlusOne(scoredata->get(S,translation2));
-			
-			if (abs(bleu1-bleu2) < min_diff)
-				continue;
-			
-			samples.push_back( new SampledPair( translation1, translation2, bleu1-bleu2) );
-			scores.push_back( 1.0 - abs(bleu1-bleu2) );
-		}
-		//cerr << "sampled " << samples.size() << " pairs\n";
-
-		float min_diff = -1.0;
-		if (samples.size() > n_samples) {
-			nth_element(scores.begin(), scores.begin()+(n_samples-1), scores.end());
-			min_diff = 0.99999-scores[n_samples-1];
-			//cerr << "min_diff = " << min_diff << endl;
-		}
-
-		unsigned int collected = 0;
-		for(unsigned int i=0; i<samples.size() && collected < n_samples; i++) {
-			if (samples[i]->getDiff() >= min_diff) {
-				collected++;
-
-				*out << "1";
-        outputSample( *out, featdata->get(S,samples[i]->getTranslation1()),
-                            featdata->get(S,samples[i]->getTranslation2()) );
-        *out << endl;
-				*out << "0";
-        outputSample( *out, featdata->get(S,samples[i]->getTranslation2()),
-                            featdata->get(S,samples[i]->getTranslation1()) );
-        *out << endl;
-			}
-			delete samples[i];
-		}
-		//cerr << "collected " << collected << endl;
-	}
-	out->flush();
-  if (outFile) {
-    outFile->close();
-    delete outFile;
-  }
-}
-
-void Data::outputSample( ostream &out, const FeatureStats &f1, const FeatureStats &f2 ) 
-{
-  // difference in score in regular features
-	for(unsigned int j=0; j<f1.size(); j++)
-		if (abs(f1.get(j)-f2.get(j)) > 0.00001)
-			out << " F" << j << " " << (f1.get(j)-f2.get(j));
-
-  if (!hasSparseFeatures())
-    return;
-
-  out << " ";
-
-  // sparse features
-  const SparseVector &s1 = f1.getSparse();
-  const SparseVector &s2 = f2.getSparse();
-  SparseVector diff = s1 - s2;
-  diff.write(out);
-}
-
-
 void Data::createShards(size_t shard_count, float shard_size, const string& scorerconfig,
       std::vector<Data>& shards) 
 {
diff --git a/mert/Data.h b/mert/Data.h
index 47400c34a..84dcb884e 100644
--- a/mert/Data.h
+++ b/mert/Data.h
@@ -99,9 +99,6 @@ public:
     return featdata->getFeatureIndex(name);
   };
 
-	void sampleRankedPairs( const std::string &rankedPairFile );
-  void outputSample( std::ostream &out, const FeatureStats &f1, const FeatureStats &f2 );
-
   /**
    *  Create shard_count shards. If shard_size == 0, then the shards are non-overlapping
    *  and exhaust the data. If 0 < shard_size <= 1, then shards are chosen by sampling 
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 128398b9b..91fa71e7b 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -38,7 +38,6 @@ void usage(int ret)
   cerr<<"[-o] the indexes to optimize(default all)"<<endl;
   cerr<<"[-t] the optimizer(default powell)"<<endl;
   cerr<<"[-r] the random seed (defaults to system clock)"<<endl;
-	cerr<<"[-p] only create data for paired ranked optimizer"<<endl;
   cerr<<"[--sctype|-s] the scorer type (default BLEU)"<<endl;
   cerr<<"[--scconfig|-c] configuration string passed to scorer"<<endl;
   cerr<<"[--scfile|-S] comma separated list of scorer data files (default score.data)"<<endl;
@@ -137,7 +136,6 @@ int main (int argc, char **argv)
   string scorerfile("statscore.data");
   string featurefile("features.data");
   string initfile("init.opt");
-	string pairedrankfile("");
 
   string tooptimizestr("");
   vector<unsigned> tooptimize;
@@ -151,9 +149,6 @@ int main (int argc, char **argv)
     case 'o':
       tooptimizestr = string(optarg);
       break;
-		case 'p':
-			pairedrankfile = string(optarg);
-			break;
     case 'd':
       pdim = strtol(optarg, NULL, 10);
       break;
@@ -335,12 +330,6 @@ int main (int argc, char **argv)
     }
   }
 
-	if (pairedrankfile.compare("") != 0) {
-		D.sampleRankedPairs(pairedrankfile);
-		PrintUserTime("Stopping...");
-		exit(0);
-	}
-
   // treat sparse features just like regular features
   if (D.hasSparseFeatures()) {
     D.mergeSparseFeatures();
author	Barry Haddow <barry.haddow@gmail.com>	2011-11-16 21:43:54 +0400
committer	Barry Haddow <barry.haddow@gmail.com>	2011-11-16 21:43:54 +0400
commit	71c777f01df14c9523e20f118c90396d51678c02 (patch)
tree	f017487e82a931d6978ebf05cde367b6cc6a7c4e /mert
parent	7aa9073abec82ae76f1d3ed02c64e90a061a707f (diff)