1 files changed, 416 insertions, 5 deletions
diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp
index c3ccfb011..a0fb07238 100755..100644
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@@ -8,7 +8,6 @@ using namespace std;
 namespace Mira {
 
 size_t MiraOptimiser::updateWeights(
-		ScoreComponentCollection& currWeights,
 		ScoreComponentCollection& weightUpdate,
     const vector<vector<ScoreComponentCollection> >& featureValues,
     const vector<vector<float> >& losses,
@@ -152,7 +151,6 @@ size_t MiraOptimiser::updateWeights(
 }
 
 size_t MiraOptimiser::updateWeightsHopeFear(
-		Moses::ScoreComponentCollection& currWeights,
 		Moses::ScoreComponentCollection& weightUpdate,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
@@ -315,7 +313,6 @@ size_t MiraOptimiser::updateWeightsHopeFear(
 }
 
 size_t MiraOptimiser::updateWeightsAnalytically(
-		ScoreComponentCollection& currWeights,
 		ScoreComponentCollection& weightUpdate,
 		ScoreComponentCollection& featureValuesHope,
     ScoreComponentCollection& featureValuesFear,
@@ -457,7 +454,6 @@ size_t MiraOptimiser::updateWeightsAnalytically(
 }
 
 size_t MiraOptimiser::updateWeightsRankModel(
-		Moses::ScoreComponentCollection& currWeights,
 		Moses::ScoreComponentCollection& weightUpdate,
 		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
 		const std::vector<std::vector<float> >& bleuScores,
@@ -478,11 +474,12 @@ size_t MiraOptimiser::updateWeightsRankModel(
 	float oldDistanceFromOptimum = 0;
 
 	// iterate over input sentences (1 (online) or more (batch))
-	float minBleuDifference = 1.0;
+	float minBleuDifference = 0.005;
 	for (size_t i = 0; i < featureValues.size(); ++i) {
 		// Build all pairs where the first has higher Bleu than the second
 		for (size_t j = 0; j < featureValues[i].size(); ++j) {
 			for (size_t k = 0; k < featureValues[i].size(); ++k) {
+			        if (j == k) continue;
 				if (bleuScores[i][j] - minBleuDifference  < bleuScores[i][k]) // need at least a positive Bleu difference of specified amount
 				  continue;
 				
@@ -598,5 +595,419 @@ size_t MiraOptimiser::updateWeightsRankModel(
 	return 0;
 }
 
+size_t MiraOptimiser::updateWeightsHopeFearSelective(
+		Moses::ScoreComponentCollection& weightUpdate,
+		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+		const std::vector<std::vector<float> >& bleuScoresHope,
+		const std::vector<std::vector<float> >& bleuScoresFear,
+		const std::vector<std::vector<float> >& modelScoresHope,
+		const std::vector<std::vector<float> >& modelScoresFear,
+		float learning_rate,
+		size_t rank,
+		size_t epoch,
+		int updatePosition) {
+
+  // vector of feature values differences for all created constraints
+  vector<ScoreComponentCollection> nonZeroFeatures;
+  vector<float> lossMinusModelScoreDiffs;
+ 
+  // Make constraints for new hypothesis translations
+  float epsilon = 0.0001;
+  int violatedConstraintsBefore = 0;
+  
+  // iterate over input sentences (1 (online) or more (batch))
+  for (size_t i = 0; i < featureValuesHope.size(); ++i) {
+    if (updatePosition != -1) {
+      if (i < updatePosition)
+	continue;
+      else if (i > updatePosition)
+	break;
+    }
+    
+    // Pick all pairs[j,j] of hope and fear translations for one input sentence
+    for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
+      ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
+      featureValueDiff.MinusEquals(featureValuesFear[i][j]);
+      if (featureValueDiff.GetL1Norm() == 0) {
+	cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+	continue;
+      }
+           
+      // check if constraint is violated
+      float loss = bleuScoresHope[i][j] - bleuScoresFear[i][j];
+      float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][j];
+      float diff = 0;
+      if (loss > modelScoreDiff) 
+	diff = loss - modelScoreDiff;
+      if (diff > epsilon) 
+	++violatedConstraintsBefore;   				
+      cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;	
+	    
+      // iterate over difference vector and add a constraint for every non-zero feature
+      FVector features = featureValueDiff.GetScoresVector();
+      size_t n_core = 0, n_sparse = 0, n_sparse_hope = 0, n_sparse_fear = 0;
+      for (size_t i=0; i<features.coreSize(); ++i) {
+	if (features[i] != 0.0) {
+	  ++n_core;
+	  ScoreComponentCollection f;
+	  f.Assign(i, features[i]);
+	  nonZeroFeatures.push_back(f);
+	}
+      }
+      // 1 + 2 
+      /*for (FVector::iterator i = features.begin(); i != features.end(); ++i) {
+	if (i->second != 0.0) {
+	  ++n_sparse;
+	  ScoreComponentCollection f;
+          f.Assign((i->first).name(), i->second);
+	  nonZeroFeatures.push_back(f);
+	  cerr << "Rank " << rank << ", epoch " << epoch << ", f: " << f << endl;
+	}
+      }
+      cerr << "Rank " << rank << ", epoch " << epoch << ", non-zero features: " << nonZeroFeatures.size() << endl;*/
+
+      // 3 
+      vector<ScoreComponentCollection> nonZeroFeaturesHope;
+      vector<ScoreComponentCollection> nonZeroFeaturesFear;
+      for (FVector::iterator i = features.begin(); i != features.end(); ++i) {
+        if (i->second != 0.0) {
+          ScoreComponentCollection f;
+          f.Assign((i->first).name(), i->second);
+          cerr << "Rank " << rank << ", epoch " << epoch << ", f: " << f << endl;
+
+	  if (i->second > 0.0) {
+	    ++n_sparse_hope;
+	    nonZeroFeaturesHope.push_back(f);
+	  }
+	  else {
+	    ++n_sparse_fear;
+	    nonZeroFeaturesFear.push_back(f);
+	  }
+        }
+      }
+
+      //1
+      /*float n = n_core + n_sparse;
+      for (size_t i=0; i<n; ++i)
+	lossMinusModelScoreDiffs.push_back(diff/n);
+      
+      //2 
+      float diff_10 = diff * 0.1;
+      float diff_90 = diff * 0.9;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", core diff: " << diff_10/n_core << endl;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", sparse diff: " << diff_90/n_sparse << endl;
+      for (size_t i=0; i<n_core; ++i) 
+	lossMinusModelScoreDiffs.push_back(diff_10/n_core);
+      for (size_t i=0; i<n_sparse; ++i) 
+      lossMinusModelScoreDiffs.push_back(diff_90/n_sparse);*/
+
+      // 3 
+      float n = n_core + n_sparse_hope + n_sparse_fear;
+      for (size_t i=0; i<n_core; ++i)
+        lossMinusModelScoreDiffs.push_back(diff/n);      
+      for (size_t i=0; i<n_sparse_hope; ++i) {
+	nonZeroFeatures.push_back(nonZeroFeaturesHope[i]);
+        lossMinusModelScoreDiffs.push_back((diff/n)*1.1);
+      }
+      for (size_t i=0; i<n_sparse_fear; ++i) {
+	nonZeroFeatures.push_back(nonZeroFeaturesFear[i]);
+	lossMinusModelScoreDiffs.push_back(diff/n);
+      }
+      cerr << "Rank " << rank << ", epoch " << epoch << ", core diff: " << diff/n << endl;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", hope diff: " << ((diff/n)*1.1) << endl;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", fear diff: " << diff/n << endl;
+    }
+  }
+
+  assert(nonZeroFeatures.size() == lossMinusModelScoreDiffs.size());
+
+  // run optimisation: compute alphas for all given constraints
+  vector<float> alphas;
+  ScoreComponentCollection summedUpdate;
+  if (violatedConstraintsBefore > 0) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << nonZeroFeatures.size() << endl;
+    alphas = Hildreth::optimise(nonZeroFeatures, lossMinusModelScoreDiffs, m_slack);
+    
+    // Update the weight vector according to the alphas and the feature value differences
+    // * w' = w' + SUM alpha_i * (h_i(oracle) - h_i(hypothesis))
+    for (size_t k = 0; k < nonZeroFeatures.size(); ++k) {
+      float alpha = alphas[k];
+      cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
+      if (alpha != 0) {
+	ScoreComponentCollection update(nonZeroFeatures[k]);
+	update.MultiplyEquals(alpha);
+	
+	// sum updates
+	summedUpdate.PlusEquals(update);
+      }
+    }
+  }
+  else {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
+    //	  return 0;
+    return 1;
+  }
+  
+  // apply learning rate
+  if (learning_rate != 1) {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl;
+    summedUpdate.MultiplyEquals(learning_rate);
+  }
+  
+  // scale update by BLEU of oracle (for batch size 1 only)
+  if (featureValuesHope.size() == 1) {
+    if (m_scale_update) {
+      cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << bleuScoresHope[0][0] << endl;
+      summedUpdate.MultiplyEquals(bleuScoresHope[0][0]);
+    }
+  }
+  
+  //cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << summedUpdate << endl;
+  weightUpdate.PlusEquals(summedUpdate);
+  return 0;
+}
+
+size_t MiraOptimiser::updateWeightsHopeFearSummed(
+		Moses::ScoreComponentCollection& weightUpdate,
+		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+		const std::vector<std::vector<float> >& bleuScoresHope,
+		const std::vector<std::vector<float> >& bleuScoresFear,
+		const std::vector<std::vector<float> >& modelScoresHope,
+		const std::vector<std::vector<float> >& modelScoresFear,
+		float learning_rate,
+		size_t rank,
+		size_t epoch,
+		bool rescaleSlack,
+		bool rewardHope,
+		bool makePairs) {
+
+  // vector of feature values differences for all created constraints
+  ScoreComponentCollection averagedFeatureDiffs;
+  float averagedViolations = 0;
+ 
+  // Make constraints for new hypothesis translations
+  float epsilon = 0.0001;
+  int violatedConstraintsBefore = 0;
+  
+  if (!makePairs) {
+    ScoreComponentCollection featureValueDiff;
+    float lossHope = 0, lossFear = 0, modelScoreHope = 0, modelScoreFear = 0, hopeCount = 0, fearCount = 0;
+    // add all hope vectors
+    for (size_t i = 0; i < featureValuesHope.size(); ++i) {
+      for (size_t j = 0; j < featureValuesHope[i].size(); ++j)  {
+	featureValueDiff.PlusEquals(featureValuesHope[i][j]);
+	lossHope += bleuScoresHope[i][j];
+	modelScoreHope += modelScoresHope[i][j];
+	++hopeCount;
+      }
+    }
+    lossHope /= hopeCount;
+    modelScoreHope /= hopeCount;
+    
+    // subtract all fear  vectors
+    for (size_t i = 0; i < featureValuesFear.size(); ++i) {
+      for (size_t j = 0; j < featureValuesFear[i].size(); ++j) {
+        featureValueDiff.MinusEquals(featureValuesFear[i][j]);
+	lossFear += bleuScoresFear[i][j];
+        modelScoreFear += modelScoresFear[i][j];
+	++fearCount;
+      }
+    }
+    lossFear /= fearCount;
+    modelScoreFear /= fearCount;
+
+    if (featureValueDiff.GetL1Norm() == 0) {
+      cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+      cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
+      return 1;
+    }
+
+    // check if constraint is violated                                                                                                       
+    float lossDiff = lossHope - lossFear;
+    float modelScoreDiff = modelScoreHope - modelScoreFear;
+    float diff = 0;
+    if (lossDiff > modelScoreDiff)
+      diff = lossDiff - modelScoreDiff;
+    if (diff > epsilon)
+      ++violatedConstraintsBefore;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << lossDiff << " (current violation: " <<\
+      diff << ")" << endl;
+
+    // add constraint                                                                                                                        
+    averagedFeatureDiffs = featureValueDiff;
+    averagedViolations = diff;
+  }
+  else {
+  // iterate over input sentences (1 (online) or more (batch))
+  for (size_t i = 0; i < featureValuesHope.size(); ++i) {    
+    // Pick all pairs[j,j] of hope and fear translations for one input sentence and add them up 
+    for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
+      ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
+      featureValueDiff.MinusEquals(featureValuesFear[i][j]);
+      if (featureValueDiff.GetL1Norm() == 0) {
+	cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+	continue;
+      }
+           
+      // check if constraint is violated
+      float lossDiff = bleuScoresHope[i][j] - bleuScoresFear[i][j];
+      float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][j];
+      if (rescaleSlack) {
+	cerr << "Rank " << rank << ", epoch " << epoch << ", modelScoreDiff scaled by lossDiff: " << modelScoreDiff << " --> " << modelScoreDiff*lossDiff << endl;
+	modelScoreDiff *= lossDiff; 
+      }
+      float diff = 0;
+      if (lossDiff > modelScoreDiff) 
+	diff = lossDiff - modelScoreDiff;
+      if (diff > epsilon) 
+	++violatedConstraintsBefore;   				
+      cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << lossDiff << " (current violation: " << diff << ")" << endl;	
+
+      // add constraint
+      if (rescaleSlack) {
+	averagedFeatureDiffs.MultiplyEquals(lossDiff);
+	cerr << "Rank " << rank << ", epoch " << epoch << ", featureValueDiff scaled by lossDiff." << endl;
+      }
+      averagedFeatureDiffs.PlusEquals(featureValueDiff);
+      averagedViolations += diff;
+    }	    
+  }
+  }
+
+  // divide by number of constraints (1/n)
+  if (!makePairs) {
+    averagedFeatureDiffs.DivideEquals(featureValuesHope[0].size());
+  }
+  else {
+    averagedFeatureDiffs.DivideEquals(featureValuesHope[0].size());
+    averagedViolations /= featureValuesHope[0].size();
+  }
+  cerr << "Rank " << rank << ", epoch " << epoch << ", averaged feature diffs: " << averagedFeatureDiffs << endl;
+  cerr << "Rank " << rank << ", epoch " << epoch << ", averaged violations: " << averagedViolations << endl;
+
+  if (violatedConstraintsBefore > 0) {
+    if (rewardHope) {
+      averagedFeatureDiffs.RedistributeMass(0.1);
+      cerr << "Rank " << rank << ", epoch " << epoch << ", redistributed feature diffs: " << averagedFeatureDiffs << endl;
+    }
+
+    // compute alpha for given constraint: (loss diff - model score diff) / || feature value diff ||^2                                
+    // featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm() == featureValueDiff.InnerProduct(featureValueDiff)                     
+    // from Crammer&Singer 2006: alpha = min {C , l_t/ ||x||^2}                                                         
+    // adjusted for 1 slack according to Joachims 2009, OP4 (margin rescaling), OP5 (slack rescaling)
+    float squaredNorm = averagedFeatureDiffs.GetL2Norm() * averagedFeatureDiffs.GetL2Norm();
+    float alpha = averagedViolations / squaredNorm;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", unclipped alpha: " << alpha << endl;
+    if (m_slack > 0 ) {
+      if (alpha > m_slack) {
+	alpha = m_slack;
+      }
+      else if (alpha < m_slack*(-1)) {
+	alpha = m_slack*(-1);
+      }
+    }
+    cerr << "Rank " << rank << ", epoch " << epoch << ", clipped alpha: " << alpha << endl;
+  
+    // compute update
+    averagedFeatureDiffs.MultiplyEquals(alpha);
+    weightUpdate.PlusEquals(averagedFeatureDiffs);
+    return 0;
+  }
+  else {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
+    return 1;
+  }
+}
+
+size_t MiraOptimiser::updateWeightsRankModelSummed(
+		Moses::ScoreComponentCollection& weightUpdate,
+		const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValues,
+		const std::vector<std::vector<float> >& bleuScores,
+		const std::vector<std::vector<float> >& modelScores,
+		float learning_rate,
+		size_t rank,
+		size_t epoch) {
+  // vector of feature values differences for all created constraints                                                                                  
+  ScoreComponentCollection averagedFeatureDiffs;
+  float averagedViolations = 0;
+
+  // Make constraints for new hypothesis translations
+  float epsilon = 0.0001;
+  int violatedConstraintsBefore = 0;
+  float oldDistanceFromOptimum = 0;
+  
+  // iterate over input sentences (1 (online) or more (batch))
+  float minBleuDifference = 0.005;
+  size_t numConstraints = 0;
+  for (size_t i = 0; i < featureValues.size(); ++i) {
+    // Build all pairs where the first has higher Bleu than the second
+    for (size_t j = 0; j < featureValues[i].size(); ++j) {
+      for (size_t k = 0; k < featureValues[i].size(); ++k) {
+	if (j == k) continue;
+	if (bleuScores[i][j] - minBleuDifference  < bleuScores[i][k]) // need at least a positive Bleu difference of specified amount
+	  continue;
+	
+	ScoreComponentCollection featureValueDiff = featureValues[i][j];
+	featureValueDiff.MinusEquals(featureValues[i][k]);
+	if (featureValueDiff.GetL1Norm() == 0) {
+	  cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
+	  continue;
+	}
+	
+	// check if constraint is violated                                                                         
+	float loss = bleuScores[i][j] - bleuScores[i][k];
+	float modelScoreDiff = modelScores[i][j] - modelScores[i][k];
+	float diff = 0;
+	if (loss > modelScoreDiff)
+	  diff = loss - modelScoreDiff;
+	if (diff > epsilon)
+	  ++violatedConstraintsBefore;
+	cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
+
+	// add constraint                                                                                                        
+	averagedFeatureDiffs.PlusEquals(featureValueDiff);
+	averagedViolations += diff;
+	++numConstraints;
+      }
+    }
+  }
+
+  // divide by number of constraints                                                                                                              
+  averagedFeatureDiffs.DivideEquals(numConstraints);
+  averagedViolations /= numConstraints;
+
+  cerr << "Rank " << rank << ", epoch " << epoch << ", averaged feature diffs: " << averagedFeatureDiffs << endl;
+  cerr << "Rank " << rank << ", epoch " << epoch << ", averaged violations: " << averagedViolations << endl;
+
+  if (violatedConstraintsBefore > 0) {
+    // compute alpha for given constraint: (loss - model score diff) / || feature value diff ||^2                                                          
+    // featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm() == featureValueDiff.InnerProduct(featureValueDiff)                                
+    // from Crammer&Singer 2006: alpha = min {C , l_t/ ||x||^2}                                                                                         
+    float squaredNorm = averagedFeatureDiffs.GetL2Norm() * averagedFeatureDiffs.GetL2Norm();
+    float alpha = averagedViolations / squaredNorm;
+    cerr << "Rank " << rank << ", epoch " << epoch << ", unclipped alpha: " << alpha << endl;
+    if (m_slack > 0 ) {
+      if (alpha > m_slack) {
+        alpha = m_slack;
+      }
+      else if (alpha < m_slack*(-1)) {
+        alpha = m_slack*(-1);
+      }
+    }
+    cerr << "Rank " << rank << ", epoch " << epoch << ", clipped alpha: " << alpha << endl;
+
+    // compute update                                                                                                                            
+    averagedFeatureDiffs.MultiplyEquals(alpha);
+    weightUpdate.PlusEquals(averagedFeatureDiffs);
+    return 0;
+  }
+  else {
+    cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
+    return 1;
+  }
+}
+
 }