move c++ code out of /script/ to /

author: Hieu Hoang <fishandfrolick@gmail.com> 2012-05-31 20:58:10 +0400
committer: Hieu Hoang <fishandfrolick@gmail.com> 2012-05-31 20:58:10 +0400
commit: a5ca652a766ddb687891adac8e7ef252fa2f430d (patch)
tree: 7cac031a4a7d688369e0fd4538a65d855b6c390e /phrase-extract
parent: 4eef94b1217a82eb979242dd3e06d8a4b6255e6e (diff)
8 files changed, 1013 insertions, 2 deletions
diff --git a/phrase-extract/extract-rules.cpp b/phrase-extract/extract-rules.cpp
index 997038224..762327681 100644
--- a/phrase-extract/extract-rules.cpp
+++ b/phrase-extract/extract-rules.cpp
@@ -46,8 +46,8 @@
 #include "XmlTree.h"
 #include "InputFileStream.h"
 #include "OutputFileStream.h"
-#include "../../../moses/src/ThreadPool.h"
-#include "../../../moses/src/OutputCollector.h"
+#include "../moses/src/ThreadPool.h"
+#include "../moses/src/OutputCollector.h"
 
 #define LINE_MAX_LENGTH 500000
 
diff --git a/phrase-extract/lexical-reordering/InputFileStream.cpp b/phrase-extract/lexical-reordering/InputFileStream.cpp
new file mode 100755
index 000000000..013781c36
--- /dev/null
+++ b/phrase-extract/lexical-reordering/InputFileStream.cpp
@@ -0,0 +1,67 @@
+// $Id: InputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include "InputFileStream.h"
+#include "gzfilebuf.h"
+#include <iostream>
+
+using namespace std;
+
+namespace Moses
+{
+InputFileStream::InputFileStream(const std::string &filePath)
+  : std::istream(NULL)
+  , m_streambuf(NULL)
+{
+  Open(filePath);
+}
+
+InputFileStream::~InputFileStream()
+{
+  Close();
+}
+
+void InputFileStream::Open(const std::string &filePath)
+{
+  if (filePath.size() > 3 &&
+      filePath.substr(filePath.size() - 3, 3) == ".gz") {
+    m_streambuf = new gzfilebuf(filePath.c_str());
+  } else {
+    std::filebuf* fb = new std::filebuf();
+    fb = fb->open(filePath.c_str(), std::ios::in);
+    if (! fb) {
+      cerr << "Can't read " << filePath.c_str() << endl;
+      exit(1);
+    }
+    m_streambuf = fb;
+  }
+  this->init(m_streambuf);
+}
+
+void InputFileStream::Close()
+{
+  delete m_streambuf;
+  m_streambuf = NULL;
+}
+
+
+}
+
diff --git a/phrase-extract/lexical-reordering/InputFileStream.h b/phrase-extract/lexical-reordering/InputFileStream.h
new file mode 100755
index 000000000..1f37715fd
--- /dev/null
+++ b/phrase-extract/lexical-reordering/InputFileStream.h
@@ -0,0 +1,49 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#ifndef moses_InputFileStream_h
+#define moses_InputFileStream_h
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+namespace Moses
+{
+
+/** Used in place of std::istream, can read zipped files if it ends in .gz
+ */
+class InputFileStream : public std::istream
+{
+protected:
+  std::streambuf *m_streambuf;
+public:
+
+  InputFileStream(const std::string &filePath);
+  ~InputFileStream();
+
+  void Open(const std::string &filePath);
+  void Close();
+};
+
+}
+
+#endif
diff --git a/phrase-extract/lexical-reordering/Jamfile b/phrase-extract/lexical-reordering/Jamfile
new file mode 100644
index 000000000..a53465577
--- /dev/null
+++ b/phrase-extract/lexical-reordering/Jamfile
@@ -0,0 +1,2 @@
+exe lexical-reordering-score : InputFileStream.cpp reordering_classes.cpp score.cpp ../..//z ;
+
diff --git a/phrase-extract/lexical-reordering/gzfilebuf.h b/phrase-extract/lexical-reordering/gzfilebuf.h
new file mode 100755
index 000000000..b5b0ce87f
--- /dev/null
+++ b/phrase-extract/lexical-reordering/gzfilebuf.h
@@ -0,0 +1,85 @@
+#ifndef moses_gzfile_buf_h
+#define moses_gzfile_buf_h
+
+#include <streambuf>
+#include <zlib.h>
+#include <cstring>
+
+class gzfilebuf : public std::streambuf
+{
+public:
+  gzfilebuf(const char *filename) {
+    _gzf = gzopen(filename, "rb");
+    setg (_buff+sizeof(int),     // beginning of putback area
+          _buff+sizeof(int),     // read position
+          _buff+sizeof(int));    // end position
+  }
+  ~gzfilebuf() {
+    gzclose(_gzf);
+  }
+protected:
+  virtual int_type overflow (int_type c) {
+    throw;
+  }
+
+  // write multiple characters
+  virtual
+  std::streamsize xsputn (const char* s,
+                          std::streamsize num) {
+    throw;
+  }
+
+  virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ) {
+    throw;
+  }
+
+  //read one character
+  virtual int_type underflow () {
+    // is read position before end of _buff?
+    if (gptr() < egptr()) {
+      return traits_type::to_int_type(*gptr());
+    }
+
+    /* process size of putback area
+     * - use number of characters read
+     * - but at most four
+     */
+    unsigned int numPutback = gptr() - eback();
+    if (numPutback > sizeof(int)) {
+      numPutback = sizeof(int);
+    }
+
+    /* copy up to four characters previously read into
+     * the putback _buff (area of first four characters)
+     */
+    std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
+                  numPutback);
+
+    // read new characters
+    int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
+    if (num <= 0) {
+      // ERROR or EOF
+      return EOF;
+    }
+
+    // reset _buff pointers
+    setg (_buff+(sizeof(int)-numPutback),   // beginning of putback area
+          _buff+sizeof(int),                // read position
+          _buff+sizeof(int)+num);           // end of buffer
+
+    // return next character
+    return traits_type::to_int_type(*gptr());
+  }
+
+  std::streamsize xsgetn (char* s,
+                          std::streamsize num) {
+    return gzread(_gzf,s,num);
+  }
+
+private:
+  gzFile _gzf;
+  static const unsigned int _buffsize = 1024;
+  char _buff[_buffsize];
+};
+
+#endif
diff --git a/phrase-extract/lexical-reordering/reordering_classes.cpp b/phrase-extract/lexical-reordering/reordering_classes.cpp
new file mode 100644
index 000000000..2f159e4fa
--- /dev/null
+++ b/phrase-extract/lexical-reordering/reordering_classes.cpp
@@ -0,0 +1,437 @@
+
+#include <vector>
+#include <iostream>
+#include <cstdlib>
+#include <numeric>
+#include <cstdio>
+#include <sstream>
+#include <string>
+#include "zlib.h"
+
+#include "reordering_classes.h"
+
+using namespace std;
+
+ModelScore::ModelScore()
+{
+  for(int i=MONO; i<=NOMONO; ++i) {
+    count_fe_prev.push_back(0);
+    count_fe_next.push_back(0);
+    count_f_prev.push_back(0);
+    count_f_next.push_back(0);
+  }
+}
+
+ModelScore::~ModelScore() {}
+
+ModelScore* ModelScore::createModelScore(const string& modeltype)
+{
+  if (modeltype.compare("mslr") == 0) {
+    return new ModelScoreMSLR();
+  } else if (modeltype.compare("msd") == 0) {
+    return new ModelScoreMSD();
+  } else if (modeltype.compare("monotonicity") == 0 ) {
+    return new ModelScoreMonotonicity();
+  } else if (modeltype.compare("leftright") == 0) {
+    return new ModelScoreLR();
+  } else {
+    cerr << "Illegal model type given for lexical reordering model scoring: " << modeltype << ". The allowed types are: mslr, msd, monotonicity, leftright" << endl;
+    exit(1);
+  }
+}
+
+void ModelScore::reset_fe()
+{
+  for(int i=MONO; i<=NOMONO; ++i) {
+    count_fe_prev[i] = 0;
+    count_fe_next[i] = 0;
+  }
+}
+
+void ModelScore::reset_f()
+{
+  for(int i=MONO; i<=NOMONO; ++i) {
+    count_f_prev[i] = 0;
+    count_f_next[i] = 0;
+  }
+}
+
+void ModelScore::add_example(const string& previous, string& next)
+{
+  count_fe_prev[getType(previous)]++;
+  count_f_prev[getType(previous)]++;
+  count_fe_next[getType(next)]++;
+  count_f_next[getType(next)]++;
+}
+
+const vector<double>& ModelScore::get_scores_fe_prev() const
+{
+  return count_fe_prev;
+}
+
+const vector<double>& ModelScore::get_scores_fe_next() const
+{
+  return count_fe_next;
+}
+
+const vector<double>& ModelScore::get_scores_f_prev() const
+{
+  return count_f_prev;
+}
+
+const vector<double>& ModelScore::get_scores_f_next() const
+{
+  return count_f_next;
+}
+
+
+ORIENTATION ModelScore::getType(const string& s)
+{
+  if (s.compare("mono") == 0) {
+    return MONO;
+  } else if (s.compare("swap") == 0) {
+    return SWAP;
+  } else if (s.compare("dright") == 0) {
+    return DRIGHT;
+  } else if (s.compare("dleft") == 0) {
+    return DLEFT;
+  } else if (s.compare("other") == 0) {
+    return OTHER;
+  } else if (s.compare("nomono") == 0) {
+    return NOMONO;
+  } else {
+    cerr << "Illegal reordering type used: " << s << endl;
+    exit(1);
+  }
+}
+
+
+ORIENTATION ModelScoreMSLR::getType(const string& s)
+{
+  if (s.compare("mono") == 0) {
+    return MONO;
+  } else if (s.compare("swap") == 0) {
+    return SWAP;
+  } else if (s.compare("dright") == 0) {
+    return DRIGHT;
+  } else if (s.compare("dleft") == 0) {
+    return DLEFT;
+  } else if (s.compare("other") == 0 || s.compare("nomono") == 0) {
+    cerr << "Illegal reordering type used: " << s << " for model type mslr. You have to re-run step 5 in order to train such a model." <<  endl;
+    exit(1);
+  } else {
+    cerr << "Illegal reordering type used: " << s << endl;
+    exit(1);
+  }
+}
+
+
+ORIENTATION ModelScoreLR::getType(const string& s)
+{
+  if (s.compare("mono") == 0 || s.compare("dright") == 0) {
+    return DRIGHT;
+  } else if (s.compare("swap") == 0 || s.compare("dleft") == 0) {
+    return DLEFT;
+  } else if (s.compare("other") == 0 || s.compare("nomono") == 0) {
+    cerr << "Illegal reordering type used: " << s << " for model type LeftRight. You have to re-run step 5 in order to train such a model." <<  endl;
+    exit(1);
+  } else {
+    cerr << "Illegal reordering type used: " << s << endl;
+    exit(1);
+  }
+}
+
+
+ORIENTATION ModelScoreMSD::getType(const string& s)
+{
+  if (s.compare("mono") == 0) {
+    return MONO;
+  } else if (s.compare("swap") == 0) {
+    return SWAP;
+  } else if (s.compare("dleft") == 0 ||
+             s.compare("dright") == 0 ||
+             s.compare("other") == 0) {
+    return OTHER;
+  } else if (s.compare("nomono") == 0) {
+    cerr << "Illegal reordering type used: " << s << " for model type msd. You have to re-run step 5 in order to train such a model." <<  endl;
+    exit(1);
+  } else {
+    cerr << "Illegal reordering type used: " << s << endl;
+    exit(1);
+  }
+}
+
+ORIENTATION ModelScoreMonotonicity::getType(const string& s)
+{
+  if (s.compare("mono") == 0) {
+    return MONO;
+  } else if (s.compare("swap") == 0 ||
+             s.compare("dleft") == 0 ||
+             s.compare("dright") == 0 ||
+             s.compare("other") == 0 ||
+             s.compare("nomono") == 0 ) {
+    return NOMONO;
+  } else {
+    cerr << "Illegal reordering type used: " << s << endl;
+    exit(1);
+  }
+}
+
+
+
+void ScorerMSLR::score(const vector<double>&  all_scores, vector<double>&  scores) const
+{
+  scores.push_back(all_scores[MONO]);
+  scores.push_back(all_scores[SWAP]);
+  scores.push_back(all_scores[DLEFT]);
+  scores.push_back(all_scores[DRIGHT]);
+}
+
+void ScorerMSD::score(const vector<double>&  all_scores, vector<double>&  scores) const
+{
+  scores.push_back(all_scores[MONO]);
+  scores.push_back(all_scores[SWAP]);
+  scores.push_back(all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]);
+}
+
+void ScorerMonotonicity::score(const vector<double>&  all_scores, vector<double>&  scores) const
+{
+  scores.push_back(all_scores[MONO]);
+  scores.push_back(all_scores[SWAP]+all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]+all_scores[NOMONO]);
+}
+
+
+void ScorerLR::score(const vector<double>&  all_scores, vector<double>&  scores) const
+{
+  scores.push_back(all_scores[MONO]+all_scores[DRIGHT]);
+  scores.push_back(all_scores[SWAP]+all_scores[DLEFT]);
+}
+
+
+void ScorerMSLR::createSmoothing(const vector<double>&  scores, double weight, vector<double>& smoothing) const
+{
+  double total = accumulate(scores.begin(), scores.end(), 0);
+  smoothing.push_back(weight*(scores[MONO]+0.1)/total);
+  smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
+  smoothing.push_back(weight*(scores[DLEFT]+0.1)/total);
+  smoothing.push_back(weight*(scores[DRIGHT]+0.1)/total);
+}
+
+void ScorerMSLR::createConstSmoothing(double weight, vector<double>& smoothing) const
+{
+  for (int i=1; i<=4; ++i) {
+    smoothing.push_back(weight);
+  }
+}
+
+
+void ScorerMSD::createSmoothing(const vector<double>&  scores, double weight, vector<double>& smoothing) const
+{
+  double total = accumulate(scores.begin(), scores.end(), 0);
+  smoothing.push_back(weight*(scores[MONO]+0.1)/total);
+  smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
+  smoothing.push_back(weight*(scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+0.1)/total);
+}
+
+void ScorerMSD::createConstSmoothing(double weight, vector<double>& smoothing) const
+{
+  for (int i=1; i<=3; ++i) {
+    smoothing.push_back(weight);
+  }
+}
+
+void ScorerMonotonicity::createSmoothing(const vector<double>&  scores, double weight, vector<double>& smoothing) const
+{
+  double total = accumulate(scores.begin(), scores.end(), 0);
+  smoothing.push_back(weight*(scores[MONO]+0.1)/total);
+  smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+scores[NOMONO]+0.1)/total);
+}
+
+void ScorerMonotonicity::createConstSmoothing(double weight, vector<double>& smoothing) const
+{
+  for (double i=1; i<=2; ++i) {
+    smoothing.push_back(weight);
+  }
+}
+
+
+void ScorerLR::createSmoothing(const vector<double>&  scores, double weight, vector<double>& smoothing) const
+{
+  double total = accumulate(scores.begin(), scores.end(), 0);
+  smoothing.push_back(weight*(scores[MONO]+scores[DRIGHT]+0.1)/total);
+  smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT])/total);
+}
+
+void ScorerLR::createConstSmoothing(double weight, vector<double>& smoothing) const
+{
+  for (int i=1; i<=2; ++i) {
+    smoothing.push_back(weight);
+  }
+}
+
+void Model::score_fe(const string& f, const string& e)
+{
+  if (!fe)    //Make sure we do not do anything if it is not a fe model
+    return;
+  fprintf(file,"%s ||| %s ||| ",f.c_str(),e.c_str());
+  //condition on the previous phrase
+  if (previous) {
+    vector<double> scores;
+    scorer->score(modelscore->get_scores_fe_prev(), scores);
+    double sum = 0;
+    for(size_t i=0; i<scores.size(); ++i) {
+      scores[i] += smoothing_prev[i];
+      sum += scores[i];
+    }
+    for(size_t i=0; i<scores.size(); ++i) {
+      fprintf(file,"%f ",scores[i]/sum);
+    }
+    //fprintf(file, "||| ");
+  }
+  //condition on the next phrase
+  if (next) {
+    vector<double> scores;
+    scorer->score(modelscore->get_scores_fe_next(), scores);
+    double sum = 0;
+    for(size_t i=0; i<scores.size(); ++i) {
+      scores[i] += smoothing_next[i];
+      sum += scores[i];
+    }
+    for(size_t i=0; i<scores.size(); ++i) {
+      fprintf(file, "%f ", scores[i]/sum);
+    }
+  }
+  fprintf(file,"\n");
+}
+
+void Model::score_f(const string& f)
+{
+  if (fe)      //Make sure we do not do anything if it is not a f model
+    return;
+  fprintf(file, "%s ||| ", f.c_str());
+  //condition on the previous phrase
+  if (previous) {
+    vector<double> scores;
+    scorer->score(modelscore->get_scores_f_prev(), scores);
+    double sum = 0;
+    for(size_t i=0; i<scores.size(); ++i) {
+      scores[i] += smoothing_prev[i];
+      sum += scores[i];
+    }
+    for(size_t i=0; i<scores.size(); ++i) {
+      fprintf(file, "%f ", scores[i]/sum);
+    }
+    //fprintf(file, "||| ");
+  }
+  //condition on the next phrase
+  if (next) {
+    vector<double> scores;
+    scorer->score(modelscore->get_scores_f_next(), scores);
+    double sum = 0;
+    for(size_t i=0; i<scores.size(); ++i) {
+      scores[i] += smoothing_next[i];
+      sum += scores[i];
+    }
+    for(size_t i=0; i<scores.size(); ++i) {
+      fprintf(file, "%f ", scores[i]/sum);
+    }
+  }
+  fprintf(file, "\n");
+}
+
+Model::Model(ModelScore* ms, Scorer* sc, const string& dir, const string& lang, const string& fn)
+  : modelscore(ms), scorer(sc), filename(fn)
+{
+
+  file = fopen(filename.c_str(),"w");
+  if (!file) {
+    cerr << "Could not open the model output file: " << filename << endl;
+    exit(1);
+  }
+
+  fe = false;
+  if (lang.compare("fe") == 0) {
+    fe = true;
+  } else if (lang.compare("f") != 0) {
+    cerr << "You have given an illegal language to condition on: "  << lang
+         << "\nLegal types: fe (on both languages), f (only on source language)\n";
+    exit(1);
+  }
+
+  previous = true;
+  next = true;
+  if (dir.compare("backward") == 0) {
+    next = false;
+  } else if (dir.compare("forward") == 0) {
+    previous = false;
+  }
+}
+
+Model::~Model()
+{
+  fclose(file);
+  delete modelscore;
+  delete scorer;
+}
+
+void Model::zipFile()
+{
+  fclose(file);
+  file = fopen(filename.c_str(), "rb");
+  gzFile gzfile = gzopen((filename+".gz").c_str(),"wb");
+  char inbuffer[128];
+  int num_read;
+  while ((num_read = fread(inbuffer, 1, sizeof(inbuffer), file)) > 0) {
+    gzwrite(gzfile, inbuffer, num_read);
+  }
+  fclose(file);
+  gzclose(gzfile);
+
+  //Remove the unzipped file
+  remove(filename.c_str());
+}
+
+void Model::split_config(const string& config, string& dir, string& lang, string& orient)
+{
+  istringstream is(config);
+  string type;
+  getline(is, type, '-');
+  getline(is, orient, '-');
+  getline(is, dir, '-');
+  getline(is, lang, '-');
+}
+
+Model* Model::createModel(ModelScore* modelscore, const string& config, const string& filepath)
+{
+  string dir, lang, orient, filename;
+  split_config(config,dir,lang,orient);
+
+  filename = filepath + config;
+  if (orient.compare("mslr") == 0) {
+    return new Model(modelscore, new ScorerMSLR(), dir, lang, filename);
+  } else if (orient.compare("msd") == 0) {
+    return new Model(modelscore, new ScorerMSD(), dir, lang, filename);
+  } else if (orient.compare("monotonicity") == 0) {
+    return new Model(modelscore, new ScorerMonotonicity(), dir, lang, filename);
+  } else if (orient.compare("leftright") == 0) {
+    return new Model(modelscore, new ScorerLR(), dir, lang, filename);
+  } else {
+    cerr << "Illegal orientation type of reordering model: " << orient
+         << "\n allowed types: mslr, msd, monotonicity, leftright\n";
+    exit(1);
+  }
+}
+
+
+
+void Model::createSmoothing(double w)
+{
+  scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_prev);
+  scorer->createSmoothing(modelscore->get_scores_fe_next(), w, smoothing_next);
+}
+
+void Model::createConstSmoothing(double w)
+{
+  scorer->createConstSmoothing(w, smoothing_prev);
+  scorer->createConstSmoothing(w, smoothing_next);
+}
diff --git a/phrase-extract/lexical-reordering/reordering_classes.h b/phrase-extract/lexical-reordering/reordering_classes.h
new file mode 100644
index 000000000..4d0b56240
--- /dev/null
+++ b/phrase-extract/lexical-reordering/reordering_classes.h
@@ -0,0 +1,146 @@
+/*
+ * reordering_classes.h
+ * Utility classes for lexical reordering table scoring
+ *
+ *      Created by: Sara Stymne - Linköping University
+ *      Machine Translation Marathon 2010, Dublin
+ */
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include <fstream>
+
+
+enum ORIENTATION {MONO, SWAP, DRIGHT, DLEFT, OTHER, NOMONO};
+
+
+//Keeps the counts for the different reordering types
+//(Instantiated in 1-3 instances, one for each type of model (hier, phrase, wbe))
+class ModelScore
+{
+private:
+  std::vector<double> count_fe_prev;
+  std::vector<double> count_fe_next;
+  std::vector<double> count_f_prev;
+  std::vector<double> count_f_next;
+
+protected:
+  virtual ORIENTATION getType(const std::string& s);
+
+public:
+  ModelScore();
+  virtual ~ModelScore();
+  void add_example(const std::string& previous, std::string& next);
+  void reset_fe();
+  void reset_f();
+  const std::vector<double>& get_scores_fe_prev() const;
+  const std::vector<double>& get_scores_fe_next() const;
+  const std::vector<double>& get_scores_f_prev() const;
+  const std::vector<double>& get_scores_f_next() const;
+
+  static ModelScore* createModelScore(const std::string& modeltype);
+};
+
+class ModelScoreMSLR : public ModelScore
+{
+protected:
+  virtual ORIENTATION getType(const std::string& s);
+};
+
+class ModelScoreLR : public ModelScore
+{
+protected:
+  virtual ORIENTATION getType(const std::string& s);
+};
+
+class ModelScoreMSD : public ModelScore
+{
+protected:
+  virtual ORIENTATION getType(const std::string& s);
+};
+
+class ModelScoreMonotonicity : public ModelScore
+{
+protected:
+  virtual ORIENTATION getType(const std::string& s);
+};
+
+//Class for calculating total counts, and to calculate smoothing
+class Scorer
+{
+public:
+  virtual ~Scorer() {}
+  virtual void score(const std::vector<double>&, std::vector<double>&) const = 0;
+  virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const = 0;
+  virtual void createConstSmoothing(double, std::vector<double>&) const = 0;
+};
+
+class ScorerMSLR : public Scorer
+{
+public:
+  virtual void score(const std::vector<double>&, std::vector<double>&) const;
+  virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
+  virtual void createConstSmoothing(double, std::vector<double>&) const;
+};
+
+class ScorerMSD : public Scorer
+{
+public:
+  virtual void score(const std::vector<double>&, std::vector<double>&) const;
+  virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
+  virtual void createConstSmoothing(double, std::vector<double>&) const;
+};
+
+class ScorerMonotonicity : public Scorer
+{
+public:
+  virtual void score(const std::vector<double>&, std::vector<double>&) const;
+  virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
+  virtual void createConstSmoothing(double, std::vector<double>&) const;
+};
+
+class ScorerLR : public Scorer
+{
+public:
+  virtual void score(const std::vector<double>&, std::vector<double>&) const;
+  virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
+  virtual void createConstSmoothing(double, std::vector<double>&) const;
+};
+
+
+//Class for representing each model
+//Contains a modelscore and scorer (which can be of different model types (mslr, msd...)),
+//and file handling.
+//This class also keeps track of bidirectionality, and which language to condition on
+class Model
+{
+private:
+  ModelScore* modelscore;
+  Scorer* scorer;
+
+  std::FILE* file;
+  std::string filename;
+
+  bool fe;
+  bool previous;
+  bool next;
+
+  std::vector<double> smoothing_prev;
+  std::vector<double> smoothing_next;
+
+  static void split_config(const std::string& config, std::string& dir,
+                           std::string& lang, std::string& orient);
+public:
+  Model(ModelScore* ms, Scorer* sc, const std::string& dir,
+        const std::string& lang, const std::string& fn);
+  ~Model();
+  static Model* createModel(ModelScore*, const std::string&, const std::string&);
+  void createSmoothing(double w);
+  void createConstSmoothing(double w);
+  void score_fe(const std::string& f, const std::string& e);
+  void score_f(const std::string& f);
+  void zipFile();
+};
+
diff --git a/phrase-extract/lexical-reordering/score.cpp b/phrase-extract/lexical-reordering/score.cpp
new file mode 100644
index 000000000..7f14b9fc8
--- /dev/null
+++ b/phrase-extract/lexical-reordering/score.cpp
@@ -0,0 +1,225 @@
+/*
+ * score_reordering.cpp
+ *
+ *      Created by: Sara Stymne - Linköping University
+ *      Machine Translation Marathon 2010, Dublin
+ */
+
+#include <string>
+#include <vector>
+#include <map>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <cstdlib>
+#include <cstring>
+#include "InputFileStream.h"
+
+#include "reordering_classes.h"
+
+using namespace std;
+
+void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier);
+void get_orientations(const string& pair, string& previous, string& next);
+
+
+int main(int argc, char* argv[])
+{
+
+  cerr << "Lexical Reordering Scorer\n"
+       << "scores lexical reordering models of several types (hierarchical, phrase-based and word-based-extraction\n";
+
+  if (argc < 3) {
+    cerr << "syntax: score_reordering extractFile smoothingValue filepath (--model \"type max-orientation (specification-strings)\" )+\n";
+    exit(1);
+  }
+
+  char* extractFileName = argv[1];
+  double smoothingValue = atof(argv[2]);
+  string filepath = argv[3];
+
+  Moses::InputFileStream eFile(extractFileName);
+  if (!eFile) {
+    cerr << "Could not open the extract file " << extractFileName <<"for scoring of lexical reordering models\n";
+    exit(1);
+  }
+
+  bool smoothWithCounts = false;
+  map<string,ModelScore*> modelScores;
+  vector<Model*> models;
+  bool hier = false;
+  bool phrase = false;
+  bool wbe = false;
+
+  string e,f,w,p,h;
+  string prev, next;
+
+  int i = 4;
+  while (i<argc) {
+    if (strcmp(argv[i],"--SmoothWithCounts") == 0) {
+      smoothWithCounts = true;
+    } else if (strcmp(argv[i],"--model") == 0) {
+      if (i+1 >= argc) {
+        cerr << "score: syntax error, no model information provided to the option" << argv[i] << endl;
+        exit(1);
+      }
+      istringstream is(argv[++i]);
+      string m,t;
+      is >> m >> t;
+      modelScores[m] = ModelScore::createModelScore(t);
+      if (m.compare("hier") == 0) {
+        hier = true;
+      } else if (m.compare("phrase") == 0) {
+        phrase = true;
+      }
+      if (m.compare("wbe") == 0) {
+        wbe = true;
+      }
+
+      if (!hier && !phrase && !wbe) {
+        cerr << "WARNING: No models specified for lexical reordering. No lexical reordering table will be trained.\n";
+        return 0;
+      }
+
+      string config;
+      //Store all models
+      while (is >> config) {
+        models.push_back(Model::createModel(modelScores[m],config,filepath));
+      }
+    } else {
+      cerr << "illegal option given to lexical reordering model score\n";
+      exit(1);
+    }
+    i++;
+  }
+
+  ////////////////////////////////////
+  //calculate smoothing
+  if (smoothWithCounts) {
+    string line;
+    while (getline(eFile,line)) {
+      split_line(line,e,f,w,p,h);
+      if (hier) {
+        get_orientations(h, prev, next);
+        modelScores["hier"]->add_example(prev,next);
+      }
+      if (phrase) {
+        get_orientations(p, prev, next);
+        modelScores["phrase"]->add_example(prev,next);
+      }
+      if (wbe) {
+        get_orientations(w, prev, next);
+        modelScores["wbe"]->add_example(prev,next);
+      }
+    }
+
+    // calculate smoothing for each model
+    for (size_t i=0; i<models.size(); ++i) {
+      models[i]->createSmoothing(smoothingValue);
+    }
+
+    //reopen eFile
+    eFile.Close();
+    eFile.Open(extractFileName);
+  } else {
+    //constant smoothing
+    for (size_t i=0; i<models.size(); ++i) {
+      models[i]->createConstSmoothing(smoothingValue);
+    }
+  }
+
+  ////////////////////////////////////
+  //calculate scores for reordering table
+  string line,f_current,e_current;
+  bool first = true;
+  while (getline(eFile, line)) {
+    split_line(line,f,e,w,p,h);
+
+    if (first) {
+      f_current = f;
+      e_current = e;
+      first = false;
+    } else if (f.compare(f_current) != 0 || e.compare(e_current) != 0) {
+      //fe - score
+      for (size_t i=0; i<models.size(); ++i) {
+        models[i]->score_fe(f_current,e_current);
+      }
+      //reset
+      for(map<string,ModelScore*>::const_iterator it = modelScores.begin(); it != modelScores.end(); ++it) {
+        it->second->reset_fe();
+      }
+
+      if (f.compare(f_current) != 0) {
+        //f - score
+        for (size_t i=0; i<models.size(); ++i) {
+          models[i]->score_f(f_current);
+        }
+        //reset
+        for(map<string,ModelScore*>::const_iterator it = modelScores.begin(); it != modelScores.end(); ++it) {
+          it->second->reset_f();
+        }
+      }
+      f_current = f;
+      e_current = e;
+    }
+
+    // uppdate counts
+    if (hier) {
+      get_orientations(h, prev, next);
+      modelScores["hier"]->add_example(prev,next);
+    }
+    if (phrase) {
+      get_orientations(p, prev, next);
+      modelScores["phrase"]->add_example(prev,next);
+    }
+    if (wbe) {
+      get_orientations(w, prev, next);
+      modelScores["wbe"]->add_example(prev,next);
+    }
+  }
+  //Score the last phrases
+  for (size_t i=0; i<models.size(); ++i) {
+    models[i]->score_fe(f,e);
+  }
+  for (size_t i=0; i<models.size(); ++i) {
+    models[i]->score_f(f);
+  }
+
+  //Zip all files
+  for (size_t i=0; i<models.size(); ++i) {
+    models[i]->zipFile();
+  }
+
+  return 0;
+}
+
+
+
+void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier)
+{
+
+  int begin = 0;
+  int end = line.find(" ||| ");
+  foreign = line.substr(begin, end - begin);
+
+  begin = end+5;
+  end = line.find(" ||| ", begin);
+  english = line.substr(begin, end - begin);
+
+  begin = end+5;
+  end = line.find(" | ", begin);
+  wbe = line.substr(begin, end - begin);
+
+  begin = end+3;
+  end = line.find(" | ", begin);
+  phrase = line.substr(begin, end - begin);
+
+  begin = end+3;
+  hier = line.substr(begin, line.size() - begin);
+}
+
+void get_orientations(const string& pair, string& previous, string& next)
+{
+  istringstream is(pair);
+  is >> previous >> next;
+}
author	Hieu Hoang <fishandfrolick@gmail.com>	2012-05-31 20:58:10 +0400
committer	Hieu Hoang <fishandfrolick@gmail.com>	2012-05-31 20:58:10 +0400
commit	a5ca652a766ddb687891adac8e7ef252fa2f430d (patch)
tree	7cac031a4a7d688369e0fd4538a65d855b6c390e /phrase-extract
parent	4eef94b1217a82eb979242dd3e06d8a4b6255e6e (diff)