1 files changed, 0 insertions, 178 deletions
diff --git a/moses-cmd/mbr.cpp b/moses-cmd/mbr.cpp
deleted file mode 100644
index 6a8dfa823..000000000
--- a/moses-cmd/mbr.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <iomanip>
-#include <vector>
-#include <map>
-#include <stdlib.h>
-#include <math.h>
-#include <algorithm>
-#include <stdio.h>
-#include "moses/TrellisPathList.h"
-#include "moses/TrellisPath.h"
-#include "moses/StaticData.h"
-#include "moses/Util.h"
-#include "mbr.h"
-
-using namespace std ;
-using namespace Moses;
-
-
-/* Input :
-   1. a sorted  n-best list, with duplicates filtered out in the following  format
-   0 ||| amr moussa is currently on a visit to libya , tomorrow , sunday , to hold talks with regard to the in sudan . ||| 0 -4.94418 0 0 -2.16036 0 0 -81.4462 -106.593 -114.43 -105.55 -12.7873 -26.9057 -25.3715 -52.9336 7.99917 -24 ||| -4.58432
-
-   2. a weight vector
-   3. bleu order ( default = 4)
-   4. scaling factor to weigh the weight vector (default = 1.0)
-
-   Output :
-   translations that minimise the Bayes Risk of the n-best list
-
-
-*/
-
-int BLEU_ORDER = 4;
-int SMOOTH = 1;
-float min_interval = 1e-4;
-void extract_ngrams(const vector<const Factor* >& sentence, map < vector < const Factor* >, int >  & allngrams)
-{
-  vector< const Factor* > ngram;
-  for (int k = 0; k < BLEU_ORDER; k++) {
-    for(int i =0; i < max((int)sentence.size()-k,0); i++) {
-      for ( int j = i; j<= i+k; j++) {
-        ngram.push_back(sentence[j]);
-      }
-      ++allngrams[ngram];
-      ngram.clear();
-    }
-  }
-}
-
-float calculate_score(const vector< vector<const Factor*> > & sents, int ref, int hyp,  vector < map < vector < const Factor *>, int > > & ngram_stats )
-{
-  int comps_n = 2*BLEU_ORDER+1;
-  vector<int> comps(comps_n);
-  float logbleu = 0.0, brevity;
-
-  int hyp_length = sents[hyp].size();
-
-  for (int i =0; i<BLEU_ORDER; i++) {
-    comps[2*i] = 0;
-    comps[2*i+1] = max(hyp_length-i,0);
-  }
-
-  map< vector < const Factor * > ,int > & hyp_ngrams = ngram_stats[hyp] ;
-  map< vector < const Factor * >, int > & ref_ngrams = ngram_stats[ref] ;
-
-  for (map< vector< const Factor * >, int >::iterator it = hyp_ngrams.begin();
-       it != hyp_ngrams.end(); it++) {
-    map< vector< const Factor * >, int >::iterator ref_it = ref_ngrams.find(it->first);
-    if(ref_it != ref_ngrams.end()) {
-      comps[2* (it->first.size()-1)] += min(ref_it->second,it->second);
-    }
-  }
-  comps[comps_n-1] = sents[ref].size();
-
-  for (int i=0; i<BLEU_ORDER; i++) {
-    if (comps[0] == 0)
-      return 0.0;
-    if ( i > 0 )
-      logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH);
-    else
-      logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]);
-  }
-  logbleu /= BLEU_ORDER;
-  brevity = 1.0-(float)comps[comps_n-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
-  if (brevity < 0.0)
-    logbleu += brevity;
-  return exp(logbleu);
-}
-
-const TrellisPath doMBR(const TrellisPathList& nBestList)
-{
-  float marginal = 0;
-
-  vector<float> joint_prob_vec;
-  vector< vector<const Factor*> > translations;
-  float joint_prob;
-  vector< map < vector <const Factor *>, int > > ngram_stats;
-
-  TrellisPathList::const_iterator iter;
-
-  // get max score to prevent underflow
-  float maxScore = -1e20;
-  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
-    const TrellisPath &path = **iter;
-    float score = StaticData::Instance().GetMBRScale()
-                  * path.GetScoreBreakdown().GetWeightedScore();
-    if (maxScore < score) maxScore = score;
-  }
-
-  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
-    const TrellisPath &path = **iter;
-    joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().GetWeightedScore() - maxScore);
-    marginal += joint_prob;
-    joint_prob_vec.push_back(joint_prob);
-
-    // get words in translation
-    vector<const Factor*> translation;
-    GetOutputFactors(path, translation);
-
-    // collect n-gram counts
-    map < vector < const Factor *>, int > counts;
-    extract_ngrams(translation,counts);
-
-    ngram_stats.push_back(counts);
-    translations.push_back(translation);
-  }
-
-  vector<float> mbr_loss;
-  float bleu, weightedLoss;
-  float weightedLossCumul = 0;
-  float minMBRLoss = 1000000;
-  int minMBRLossIdx = -1;
-
-  /* Main MBR computation done here */
-  iter = nBestList.begin();
-  for (unsigned int i = 0; i < nBestList.GetSize(); i++) {
-    weightedLossCumul = 0;
-    for (unsigned int j = 0; j < nBestList.GetSize(); j++) {
-      if ( i != j) {
-        bleu = calculate_score(translations, j, i,ngram_stats );
-        weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal);
-        weightedLossCumul += weightedLoss;
-        if (weightedLossCumul > minMBRLoss)
-          break;
-      }
-    }
-    if (weightedLossCumul < minMBRLoss) {
-      minMBRLoss = weightedLossCumul;
-      minMBRLossIdx = i;
-    }
-    iter++;
-  }
-  /* Find sentence that minimises Bayes Risk under 1- BLEU loss */
-  return nBestList.at(minMBRLossIdx);
-  //return translations[minMBRLossIdx];
-}
-
-void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translation)
-{
-  const std::vector<const Hypothesis *> &edges = path.GetEdges();
-  const std::vector<FactorType>& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
-  assert (outputFactorOrder.size() == 1);
-
-  // print the surface factor of the translation
-  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
-    const Hypothesis &edge = *edges[currEdge];
-    const Phrase &phrase = edge.GetCurrTargetPhrase();
-    size_t size = phrase.GetSize();
-    for (size_t pos = 0 ; pos < size ; pos++) {
-
-      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
-      translation.push_back(factor);
-    }
-  }
-}
-