contrib/reranking/src/NBest.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

/*
 *  nbest: tool to process moses n-best lists
 *
 *  File: NBest.cpp
 *        basic functions on n-best lists
 *
 *  Created by Holger Schwenk, University of Le Mans, 05/16/2008
 *
 */


#include "NBest.h"

#include "Util.h"  // from Moses

#include <sstream>
#include <algorithm>

//NBest::NBest() {
//cerr << "NBEST: constructor called" << endl;
//}


bool NBest::ParseLine(ifstream &inpf, const int n)
{
  static string line; // used internally to buffer an input line
  static int prev_id=-1; // used to detect a change of the n-best ID
  int id;
  vector<float> f;
  float s;
  int pos=0, epos;
  vector<string> blocks;


  if (line.empty()) {
    getline(inpf,line);
    if (inpf.eof()) return false;
  }

  // split line into blocks
  //cerr << "PARSE line: " << line << endl;
  while ((epos=line.find(NBEST_DELIM,pos))!=string::npos) {
    blocks.push_back(line.substr(pos,epos-pos));
    // cerr << " block: " << blocks.back() << endl;
    pos=epos+strlen(NBEST_DELIM);
  }
  blocks.push_back(line.substr(pos,line.size()));
  // cerr << " block: " << blocks.back() << endl;

  if (blocks.size()<4) {
    cerr << line << endl;
    Error("can't parse the above line");
  }

  // parse ID
  id=Scan<int>(blocks[0]);
  if (prev_id>=0 && id!=prev_id) {
    prev_id=id;  // new nbest list has started
    return false;
  }
  prev_id=id;
  //cerr << "same ID " << id << endl;

  if (n>0 && nbest.size() >= n) {
    //cerr << "skipped" << endl;
    line.clear();
    return true; // skip parsing of unused hypos
  }

  // parse feature function scores
  //cerr << "PARSE features: '" << blocks[2] << "' size: " << blocks[2].size() << endl;
  pos=blocks[2].find_first_not_of(' ');
  while (pos<blocks[2].size() && (epos=blocks[2].find(" ",pos))!=string::npos) {
    string feat=blocks[2].substr(pos,epos-pos);
    //cerr << " feat: '" << feat << "', pos: " << pos << ", " << epos << endl;
    if (feat.find(":",0)!=string::npos) {
      //cerr << "  name: " << feat << endl;
    } else {
      f.push_back(Scan<float>(feat));
      //cerr << "  value: " << f.back() << endl;
    }
    pos=epos+1;
  }

  // eventually parse segmentation
  if (blocks.size()>4) {
    Error("parsing segmentation not yet supported");
  }

  nbest.push_back(Hypo(id, blocks[1], f, Scan<float>(blocks[3])));

  line.clear(); // force read of new line

  return true;
}


NBest::NBest(ifstream &inpf, const int n)
{
  //cerr << "NBEST: constructor with file called" << endl;
  while (ParseLine(inpf,n));
  //cerr << "NBEST: found " << nbest.size() << " lines" << endl;
}


NBest::~NBest()
{
  //cerr << "NBEST: destructor called" << endl;
}

void NBest::Write(ofstream &outf, int n)
{
  if (n<1 || n>nbest.size()) n=nbest.size();
  for (int i=0; i<n; i++) nbest[i].Write(outf);
}


float NBest::CalcGlobal(Weights &w)
{
  //cerr << "NBEST: calc global of size " << nbest.size() << endl;
  for (vector<Hypo>::iterator i = nbest.begin(); i != nbest.end(); i++) {
    (*i).CalcGlobal(w);
  }
}


void NBest::Sort()
{
  sort(nbest.begin(),nbest.end());
}