Welcome to mirror list, hosted at ThFree Co, Russian Federation.

MeteorScorer.cpp « mert - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d030e52bde57b5edc950c304fe212a9400038c47 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#include "MeteorScorer.h"

#include <algorithm>
#include <cmath>
#include <fstream>
#include <iterator>
#include <sstream>
#include <stdexcept>
#include <cstdio>
#include <string>
#include <vector>

#include <boost/thread/mutex.hpp>

#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
#include "Fdstream.h"
#endif

#include "ScoreStats.h"
#include "Util.h"

using namespace std;

namespace MosesTuning
{

// Meteor supported
#if (defined(__GLIBCXX__) || defined(__GLIBCPP__)) && !defined(_WIN32)

// for clarity
#define CHILD_STDIN_READ pipefds_input[0]
#define CHILD_STDIN_WRITE pipefds_input[1]
#define CHILD_STDOUT_READ pipefds_output[0]
#define CHILD_STDOUT_WRITE pipefds_output[1]

MeteorScorer::MeteorScorer(const string& config)
  : StatisticsBasedScorer("METEOR",config)
{
  meteor_jar = getConfig("jar", "");
  meteor_lang = getConfig("lang", "en");
  meteor_task = getConfig("task", "tune");
  meteor_m = getConfig("m", "");
  meteor_p = getConfig("p", "");
  meteor_w = getConfig("w", "");
  if (meteor_jar == "") {
    throw runtime_error("Meteor jar required, see MeteorScorer.h for full list of options: --scconfig jar:/path/to/meteor-1.4.jar");
  }
  int pipe_status;
  int pipefds_input[2];
  int pipefds_output[2];
  // Create pipes for process communication
  pipe_status = pipe(pipefds_input);
  if (pipe_status == -1) {
    throw runtime_error("Error creating pipe");
  }
  pipe_status = pipe(pipefds_output);
  if (pipe_status == -1) {
    throw runtime_error("Error creating pipe");
  }
  // Fork
  pid_t pid;
  pid = fork();
  if (pid == pid_t(0)) {
    // Child's IO
    dup2(CHILD_STDIN_READ, 0);
    dup2(CHILD_STDOUT_WRITE, 1);
    close(CHILD_STDIN_WRITE);
    close(CHILD_STDOUT_READ);
    // Call Meteor
    stringstream meteor_cmd;
    meteor_cmd << "java -Xmx1G -jar " << meteor_jar << " - - -stdio -lower -t " << meteor_task << " -l " << meteor_lang;
    if (meteor_m != "") {
      meteor_cmd << " -m '" << meteor_m << "'";
    }
    if (meteor_p != "") {
      meteor_cmd << " -p '" << meteor_p << "'";
    }
    if (meteor_w != "") {
      meteor_cmd << " -w '" << meteor_w << "'";
    }
    TRACE_ERR("Executing: " + meteor_cmd.str() + "\n");
    execl("/bin/bash", "bash", "-c", meteor_cmd.str().c_str(), (char*)NULL);
    throw runtime_error("Continued after execl");
  }
  // Parent's IO
  close(CHILD_STDIN_READ);
  close(CHILD_STDOUT_WRITE);
  m_to_meteor = new ofdstream(CHILD_STDIN_WRITE);
  m_from_meteor = new ifdstream(CHILD_STDOUT_READ);
}

MeteorScorer::~MeteorScorer()
{
  // Cleanup IO
  delete m_to_meteor;
  delete m_from_meteor;
}

void MeteorScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
  // Just store strings since we're sending lines to an external process
  for (int incRefs = 0; incRefs < (int)referenceFiles.size(); incRefs++) {
    m_references.clear();
    ifstream in(referenceFiles.at(incRefs).c_str());
    if (!in) {
      throw runtime_error("Unable to open " + referenceFiles.at(incRefs));
    }
    string line;
    while (getline(in, line)) {
      line = this->preprocessSentence(line);
      m_references.push_back(line);
    }
    m_multi_references.push_back(m_references);
  }
  m_references=m_multi_references.at(0);
}

void MeteorScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
  string sentence = this->preprocessSentence(text);
  string stats_str;
  stringstream input;
  // SCORE ||| ref1 ||| ref2 ||| ... ||| text
  input << "SCORE";
  for (int incRefs = 0; incRefs < (int)m_multi_references.size(); incRefs++) {
    if (sid >= m_multi_references.at(incRefs).size()) {
      stringstream msg;
      msg << "Sentence id (" << sid << ") not found in reference set";
      throw runtime_error(msg.str());
    }
    string ref = m_multi_references.at(incRefs).at(sid);
    input << " ||| " << ref;
  }
  input << " ||| " << text << "\n";
  // Threadsafe IO
#ifdef WITH_THREADS
  mtx.lock();
#endif
  //TRACE_ERR ( "in: " + input.str() );
  *m_to_meteor << input.str();
  m_from_meteor->getline(stats_str);
  //TRACE_ERR ( "out: " + stats_str + "\n" );
#ifdef WITH_THREADS
  mtx.unlock();
#endif
  entry.set(stats_str);
}

float MeteorScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
  string score;
  stringstream input;
  // EVAL ||| stats
  input << "EVAL |||";
  copy(comps.begin(), comps.end(), ostream_iterator<int>(input, " "));
  input << "\n";
  // Threadsafe IO
#ifdef WITH_THREADS
  mtx.lock();
#endif
  //TRACE_ERR ( "in: " + input.str() );
  *m_to_meteor << input.str();
  m_from_meteor->getline(score);
  //TRACE_ERR ( "out: " + score + "\n" );
#ifdef WITH_THREADS
  mtx.unlock();
#endif
  return atof(score.c_str());
}

#else

// Meteor unsupported, throw error if used

MeteorScorer::MeteorScorer(const string& config)
  : StatisticsBasedScorer("METEOR",config)
{
  throw runtime_error("Meteor unsupported, requires GLIBCXX");
}

MeteorScorer::~MeteorScorer() {}

void MeteorScorer::setReferenceFiles(const vector<string>& referenceFiles) {}

void MeteorScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry) {}

float MeteorScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
  // Should never be reached
  return 0.0;
}

#endif

}