Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <fishandfrolick@gmail.com>2012-05-31 20:58:10 +0400
committerHieu Hoang <fishandfrolick@gmail.com>2012-05-31 20:58:10 +0400
commita5ca652a766ddb687891adac8e7ef252fa2f430d (patch)
tree7cac031a4a7d688369e0fd4538a65d855b6c390e /scripts/training
parent4eef94b1217a82eb979242dd3e06d8a4b6255e6e (diff)
move c++ code out of /script/ to /
Diffstat (limited to 'scripts/training')
-rw-r--r--scripts/training/Jamfile9
-rwxr-xr-xscripts/training/lexical-reordering/InputFileStream.cpp67
-rwxr-xr-xscripts/training/lexical-reordering/InputFileStream.h49
-rw-r--r--scripts/training/lexical-reordering/Jamfile2
-rwxr-xr-xscripts/training/lexical-reordering/gzfilebuf.h85
-rw-r--r--scripts/training/lexical-reordering/reordering_classes.cpp437
-rw-r--r--scripts/training/lexical-reordering/reordering_classes.h146
-rw-r--r--scripts/training/lexical-reordering/score.cpp225
8 files changed, 0 insertions, 1020 deletions
diff --git a/scripts/training/Jamfile b/scripts/training/Jamfile
index e290bb0cb..e69de29bb 100644
--- a/scripts/training/Jamfile
+++ b/scripts/training/Jamfile
@@ -1,9 +0,0 @@
-if $(WITH-GIZA) != no || $(CLEANING) != no {
- make train-model.perl : train-model.perl.missing_bin_dir : @missing_bin_dir ;
- actions missing_bin_dir {
- sed 's#^my \$BINDIR\s*=.*#my\ \$BINDIR=\"$(WITH-GIZA)\";#' $(>) >$(<)
- chmod +x $(<)
- }
-
- install legacy : train-model.perl : <location>. ;
-}
diff --git a/scripts/training/lexical-reordering/InputFileStream.cpp b/scripts/training/lexical-reordering/InputFileStream.cpp
deleted file mode 100755
index 013781c36..000000000
--- a/scripts/training/lexical-reordering/InputFileStream.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-// $Id: InputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "InputFileStream.h"
-#include "gzfilebuf.h"
-#include <iostream>
-
-using namespace std;
-
-namespace Moses
-{
-InputFileStream::InputFileStream(const std::string &filePath)
- : std::istream(NULL)
- , m_streambuf(NULL)
-{
- Open(filePath);
-}
-
-InputFileStream::~InputFileStream()
-{
- Close();
-}
-
-void InputFileStream::Open(const std::string &filePath)
-{
- if (filePath.size() > 3 &&
- filePath.substr(filePath.size() - 3, 3) == ".gz") {
- m_streambuf = new gzfilebuf(filePath.c_str());
- } else {
- std::filebuf* fb = new std::filebuf();
- fb = fb->open(filePath.c_str(), std::ios::in);
- if (! fb) {
- cerr << "Can't read " << filePath.c_str() << endl;
- exit(1);
- }
- m_streambuf = fb;
- }
- this->init(m_streambuf);
-}
-
-void InputFileStream::Close()
-{
- delete m_streambuf;
- m_streambuf = NULL;
-}
-
-
-}
-
diff --git a/scripts/training/lexical-reordering/InputFileStream.h b/scripts/training/lexical-reordering/InputFileStream.h
deleted file mode 100755
index 1f37715fd..000000000
--- a/scripts/training/lexical-reordering/InputFileStream.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#ifndef moses_InputFileStream_h
-#define moses_InputFileStream_h
-
-#include <cstdlib>
-#include <fstream>
-#include <string>
-
-namespace Moses
-{
-
-/** Used in place of std::istream, can read zipped files if it ends in .gz
- */
-class InputFileStream : public std::istream
-{
-protected:
- std::streambuf *m_streambuf;
-public:
-
- InputFileStream(const std::string &filePath);
- ~InputFileStream();
-
- void Open(const std::string &filePath);
- void Close();
-};
-
-}
-
-#endif
diff --git a/scripts/training/lexical-reordering/Jamfile b/scripts/training/lexical-reordering/Jamfile
deleted file mode 100644
index 0ff1af829..000000000
--- a/scripts/training/lexical-reordering/Jamfile
+++ /dev/null
@@ -1,2 +0,0 @@
-exe lexical-reordering-score : InputFileStream.cpp reordering_classes.cpp score.cpp ../../..//z ;
-
diff --git a/scripts/training/lexical-reordering/gzfilebuf.h b/scripts/training/lexical-reordering/gzfilebuf.h
deleted file mode 100755
index b5b0ce87f..000000000
--- a/scripts/training/lexical-reordering/gzfilebuf.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef moses_gzfile_buf_h
-#define moses_gzfile_buf_h
-
-#include <streambuf>
-#include <zlib.h>
-#include <cstring>
-
-class gzfilebuf : public std::streambuf
-{
-public:
- gzfilebuf(const char *filename) {
- _gzf = gzopen(filename, "rb");
- setg (_buff+sizeof(int), // beginning of putback area
- _buff+sizeof(int), // read position
- _buff+sizeof(int)); // end position
- }
- ~gzfilebuf() {
- gzclose(_gzf);
- }
-protected:
- virtual int_type overflow (int_type c) {
- throw;
- }
-
- // write multiple characters
- virtual
- std::streamsize xsputn (const char* s,
- std::streamsize num) {
- throw;
- }
-
- virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ) {
- throw;
- }
-
- //read one character
- virtual int_type underflow () {
- // is read position before end of _buff?
- if (gptr() < egptr()) {
- return traits_type::to_int_type(*gptr());
- }
-
- /* process size of putback area
- * - use number of characters read
- * - but at most four
- */
- unsigned int numPutback = gptr() - eback();
- if (numPutback > sizeof(int)) {
- numPutback = sizeof(int);
- }
-
- /* copy up to four characters previously read into
- * the putback _buff (area of first four characters)
- */
- std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
- numPutback);
-
- // read new characters
- int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
- if (num <= 0) {
- // ERROR or EOF
- return EOF;
- }
-
- // reset _buff pointers
- setg (_buff+(sizeof(int)-numPutback), // beginning of putback area
- _buff+sizeof(int), // read position
- _buff+sizeof(int)+num); // end of buffer
-
- // return next character
- return traits_type::to_int_type(*gptr());
- }
-
- std::streamsize xsgetn (char* s,
- std::streamsize num) {
- return gzread(_gzf,s,num);
- }
-
-private:
- gzFile _gzf;
- static const unsigned int _buffsize = 1024;
- char _buff[_buffsize];
-};
-
-#endif
diff --git a/scripts/training/lexical-reordering/reordering_classes.cpp b/scripts/training/lexical-reordering/reordering_classes.cpp
deleted file mode 100644
index 2f159e4fa..000000000
--- a/scripts/training/lexical-reordering/reordering_classes.cpp
+++ /dev/null
@@ -1,437 +0,0 @@
-
-#include <vector>
-#include <iostream>
-#include <cstdlib>
-#include <numeric>
-#include <cstdio>
-#include <sstream>
-#include <string>
-#include "zlib.h"
-
-#include "reordering_classes.h"
-
-using namespace std;
-
-ModelScore::ModelScore()
-{
- for(int i=MONO; i<=NOMONO; ++i) {
- count_fe_prev.push_back(0);
- count_fe_next.push_back(0);
- count_f_prev.push_back(0);
- count_f_next.push_back(0);
- }
-}
-
-ModelScore::~ModelScore() {}
-
-ModelScore* ModelScore::createModelScore(const string& modeltype)
-{
- if (modeltype.compare("mslr") == 0) {
- return new ModelScoreMSLR();
- } else if (modeltype.compare("msd") == 0) {
- return new ModelScoreMSD();
- } else if (modeltype.compare("monotonicity") == 0 ) {
- return new ModelScoreMonotonicity();
- } else if (modeltype.compare("leftright") == 0) {
- return new ModelScoreLR();
- } else {
- cerr << "Illegal model type given for lexical reordering model scoring: " << modeltype << ". The allowed types are: mslr, msd, monotonicity, leftright" << endl;
- exit(1);
- }
-}
-
-void ModelScore::reset_fe()
-{
- for(int i=MONO; i<=NOMONO; ++i) {
- count_fe_prev[i] = 0;
- count_fe_next[i] = 0;
- }
-}
-
-void ModelScore::reset_f()
-{
- for(int i=MONO; i<=NOMONO; ++i) {
- count_f_prev[i] = 0;
- count_f_next[i] = 0;
- }
-}
-
-void ModelScore::add_example(const string& previous, string& next)
-{
- count_fe_prev[getType(previous)]++;
- count_f_prev[getType(previous)]++;
- count_fe_next[getType(next)]++;
- count_f_next[getType(next)]++;
-}
-
-const vector<double>& ModelScore::get_scores_fe_prev() const
-{
- return count_fe_prev;
-}
-
-const vector<double>& ModelScore::get_scores_fe_next() const
-{
- return count_fe_next;
-}
-
-const vector<double>& ModelScore::get_scores_f_prev() const
-{
- return count_f_prev;
-}
-
-const vector<double>& ModelScore::get_scores_f_next() const
-{
- return count_f_next;
-}
-
-
-ORIENTATION ModelScore::getType(const string& s)
-{
- if (s.compare("mono") == 0) {
- return MONO;
- } else if (s.compare("swap") == 0) {
- return SWAP;
- } else if (s.compare("dright") == 0) {
- return DRIGHT;
- } else if (s.compare("dleft") == 0) {
- return DLEFT;
- } else if (s.compare("other") == 0) {
- return OTHER;
- } else if (s.compare("nomono") == 0) {
- return NOMONO;
- } else {
- cerr << "Illegal reordering type used: " << s << endl;
- exit(1);
- }
-}
-
-
-ORIENTATION ModelScoreMSLR::getType(const string& s)
-{
- if (s.compare("mono") == 0) {
- return MONO;
- } else if (s.compare("swap") == 0) {
- return SWAP;
- } else if (s.compare("dright") == 0) {
- return DRIGHT;
- } else if (s.compare("dleft") == 0) {
- return DLEFT;
- } else if (s.compare("other") == 0 || s.compare("nomono") == 0) {
- cerr << "Illegal reordering type used: " << s << " for model type mslr. You have to re-run step 5 in order to train such a model." << endl;
- exit(1);
- } else {
- cerr << "Illegal reordering type used: " << s << endl;
- exit(1);
- }
-}
-
-
-ORIENTATION ModelScoreLR::getType(const string& s)
-{
- if (s.compare("mono") == 0 || s.compare("dright") == 0) {
- return DRIGHT;
- } else if (s.compare("swap") == 0 || s.compare("dleft") == 0) {
- return DLEFT;
- } else if (s.compare("other") == 0 || s.compare("nomono") == 0) {
- cerr << "Illegal reordering type used: " << s << " for model type LeftRight. You have to re-run step 5 in order to train such a model." << endl;
- exit(1);
- } else {
- cerr << "Illegal reordering type used: " << s << endl;
- exit(1);
- }
-}
-
-
-ORIENTATION ModelScoreMSD::getType(const string& s)
-{
- if (s.compare("mono") == 0) {
- return MONO;
- } else if (s.compare("swap") == 0) {
- return SWAP;
- } else if (s.compare("dleft") == 0 ||
- s.compare("dright") == 0 ||
- s.compare("other") == 0) {
- return OTHER;
- } else if (s.compare("nomono") == 0) {
- cerr << "Illegal reordering type used: " << s << " for model type msd. You have to re-run step 5 in order to train such a model." << endl;
- exit(1);
- } else {
- cerr << "Illegal reordering type used: " << s << endl;
- exit(1);
- }
-}
-
-ORIENTATION ModelScoreMonotonicity::getType(const string& s)
-{
- if (s.compare("mono") == 0) {
- return MONO;
- } else if (s.compare("swap") == 0 ||
- s.compare("dleft") == 0 ||
- s.compare("dright") == 0 ||
- s.compare("other") == 0 ||
- s.compare("nomono") == 0 ) {
- return NOMONO;
- } else {
- cerr << "Illegal reordering type used: " << s << endl;
- exit(1);
- }
-}
-
-
-
-void ScorerMSLR::score(const vector<double>& all_scores, vector<double>& scores) const
-{
- scores.push_back(all_scores[MONO]);
- scores.push_back(all_scores[SWAP]);
- scores.push_back(all_scores[DLEFT]);
- scores.push_back(all_scores[DRIGHT]);
-}
-
-void ScorerMSD::score(const vector<double>& all_scores, vector<double>& scores) const
-{
- scores.push_back(all_scores[MONO]);
- scores.push_back(all_scores[SWAP]);
- scores.push_back(all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]);
-}
-
-void ScorerMonotonicity::score(const vector<double>& all_scores, vector<double>& scores) const
-{
- scores.push_back(all_scores[MONO]);
- scores.push_back(all_scores[SWAP]+all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]+all_scores[NOMONO]);
-}
-
-
-void ScorerLR::score(const vector<double>& all_scores, vector<double>& scores) const
-{
- scores.push_back(all_scores[MONO]+all_scores[DRIGHT]);
- scores.push_back(all_scores[SWAP]+all_scores[DLEFT]);
-}
-
-
-void ScorerMSLR::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
-{
- double total = accumulate(scores.begin(), scores.end(), 0);
- smoothing.push_back(weight*(scores[MONO]+0.1)/total);
- smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
- smoothing.push_back(weight*(scores[DLEFT]+0.1)/total);
- smoothing.push_back(weight*(scores[DRIGHT]+0.1)/total);
-}
-
-void ScorerMSLR::createConstSmoothing(double weight, vector<double>& smoothing) const
-{
- for (int i=1; i<=4; ++i) {
- smoothing.push_back(weight);
- }
-}
-
-
-void ScorerMSD::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
-{
- double total = accumulate(scores.begin(), scores.end(), 0);
- smoothing.push_back(weight*(scores[MONO]+0.1)/total);
- smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
- smoothing.push_back(weight*(scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+0.1)/total);
-}
-
-void ScorerMSD::createConstSmoothing(double weight, vector<double>& smoothing) const
-{
- for (int i=1; i<=3; ++i) {
- smoothing.push_back(weight);
- }
-}
-
-void ScorerMonotonicity::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
-{
- double total = accumulate(scores.begin(), scores.end(), 0);
- smoothing.push_back(weight*(scores[MONO]+0.1)/total);
- smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+scores[NOMONO]+0.1)/total);
-}
-
-void ScorerMonotonicity::createConstSmoothing(double weight, vector<double>& smoothing) const
-{
- for (double i=1; i<=2; ++i) {
- smoothing.push_back(weight);
- }
-}
-
-
-void ScorerLR::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
-{
- double total = accumulate(scores.begin(), scores.end(), 0);
- smoothing.push_back(weight*(scores[MONO]+scores[DRIGHT]+0.1)/total);
- smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT])/total);
-}
-
-void ScorerLR::createConstSmoothing(double weight, vector<double>& smoothing) const
-{
- for (int i=1; i<=2; ++i) {
- smoothing.push_back(weight);
- }
-}
-
-void Model::score_fe(const string& f, const string& e)
-{
- if (!fe) //Make sure we do not do anything if it is not a fe model
- return;
- fprintf(file,"%s ||| %s ||| ",f.c_str(),e.c_str());
- //condition on the previous phrase
- if (previous) {
- vector<double> scores;
- scorer->score(modelscore->get_scores_fe_prev(), scores);
- double sum = 0;
- for(size_t i=0; i<scores.size(); ++i) {
- scores[i] += smoothing_prev[i];
- sum += scores[i];
- }
- for(size_t i=0; i<scores.size(); ++i) {
- fprintf(file,"%f ",scores[i]/sum);
- }
- //fprintf(file, "||| ");
- }
- //condition on the next phrase
- if (next) {
- vector<double> scores;
- scorer->score(modelscore->get_scores_fe_next(), scores);
- double sum = 0;
- for(size_t i=0; i<scores.size(); ++i) {
- scores[i] += smoothing_next[i];
- sum += scores[i];
- }
- for(size_t i=0; i<scores.size(); ++i) {
- fprintf(file, "%f ", scores[i]/sum);
- }
- }
- fprintf(file,"\n");
-}
-
-void Model::score_f(const string& f)
-{
- if (fe) //Make sure we do not do anything if it is not a f model
- return;
- fprintf(file, "%s ||| ", f.c_str());
- //condition on the previous phrase
- if (previous) {
- vector<double> scores;
- scorer->score(modelscore->get_scores_f_prev(), scores);
- double sum = 0;
- for(size_t i=0; i<scores.size(); ++i) {
- scores[i] += smoothing_prev[i];
- sum += scores[i];
- }
- for(size_t i=0; i<scores.size(); ++i) {
- fprintf(file, "%f ", scores[i]/sum);
- }
- //fprintf(file, "||| ");
- }
- //condition on the next phrase
- if (next) {
- vector<double> scores;
- scorer->score(modelscore->get_scores_f_next(), scores);
- double sum = 0;
- for(size_t i=0; i<scores.size(); ++i) {
- scores[i] += smoothing_next[i];
- sum += scores[i];
- }
- for(size_t i=0; i<scores.size(); ++i) {
- fprintf(file, "%f ", scores[i]/sum);
- }
- }
- fprintf(file, "\n");
-}
-
-Model::Model(ModelScore* ms, Scorer* sc, const string& dir, const string& lang, const string& fn)
- : modelscore(ms), scorer(sc), filename(fn)
-{
-
- file = fopen(filename.c_str(),"w");
- if (!file) {
- cerr << "Could not open the model output file: " << filename << endl;
- exit(1);
- }
-
- fe = false;
- if (lang.compare("fe") == 0) {
- fe = true;
- } else if (lang.compare("f") != 0) {
- cerr << "You have given an illegal language to condition on: " << lang
- << "\nLegal types: fe (on both languages), f (only on source language)\n";
- exit(1);
- }
-
- previous = true;
- next = true;
- if (dir.compare("backward") == 0) {
- next = false;
- } else if (dir.compare("forward") == 0) {
- previous = false;
- }
-}
-
-Model::~Model()
-{
- fclose(file);
- delete modelscore;
- delete scorer;
-}
-
-void Model::zipFile()
-{
- fclose(file);
- file = fopen(filename.c_str(), "rb");
- gzFile gzfile = gzopen((filename+".gz").c_str(),"wb");
- char inbuffer[128];
- int num_read;
- while ((num_read = fread(inbuffer, 1, sizeof(inbuffer), file)) > 0) {
- gzwrite(gzfile, inbuffer, num_read);
- }
- fclose(file);
- gzclose(gzfile);
-
- //Remove the unzipped file
- remove(filename.c_str());
-}
-
-void Model::split_config(const string& config, string& dir, string& lang, string& orient)
-{
- istringstream is(config);
- string type;
- getline(is, type, '-');
- getline(is, orient, '-');
- getline(is, dir, '-');
- getline(is, lang, '-');
-}
-
-Model* Model::createModel(ModelScore* modelscore, const string& config, const string& filepath)
-{
- string dir, lang, orient, filename;
- split_config(config,dir,lang,orient);
-
- filename = filepath + config;
- if (orient.compare("mslr") == 0) {
- return new Model(modelscore, new ScorerMSLR(), dir, lang, filename);
- } else if (orient.compare("msd") == 0) {
- return new Model(modelscore, new ScorerMSD(), dir, lang, filename);
- } else if (orient.compare("monotonicity") == 0) {
- return new Model(modelscore, new ScorerMonotonicity(), dir, lang, filename);
- } else if (orient.compare("leftright") == 0) {
- return new Model(modelscore, new ScorerLR(), dir, lang, filename);
- } else {
- cerr << "Illegal orientation type of reordering model: " << orient
- << "\n allowed types: mslr, msd, monotonicity, leftright\n";
- exit(1);
- }
-}
-
-
-
-void Model::createSmoothing(double w)
-{
- scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_prev);
- scorer->createSmoothing(modelscore->get_scores_fe_next(), w, smoothing_next);
-}
-
-void Model::createConstSmoothing(double w)
-{
- scorer->createConstSmoothing(w, smoothing_prev);
- scorer->createConstSmoothing(w, smoothing_next);
-}
diff --git a/scripts/training/lexical-reordering/reordering_classes.h b/scripts/training/lexical-reordering/reordering_classes.h
deleted file mode 100644
index 4d0b56240..000000000
--- a/scripts/training/lexical-reordering/reordering_classes.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * reordering_classes.h
- * Utility classes for lexical reordering table scoring
- *
- * Created by: Sara Stymne - Linköping University
- * Machine Translation Marathon 2010, Dublin
- */
-
-#pragma once
-
-#include <vector>
-#include <string>
-#include <fstream>
-
-
-enum ORIENTATION {MONO, SWAP, DRIGHT, DLEFT, OTHER, NOMONO};
-
-
-//Keeps the counts for the different reordering types
-//(Instantiated in 1-3 instances, one for each type of model (hier, phrase, wbe))
-class ModelScore
-{
-private:
- std::vector<double> count_fe_prev;
- std::vector<double> count_fe_next;
- std::vector<double> count_f_prev;
- std::vector<double> count_f_next;
-
-protected:
- virtual ORIENTATION getType(const std::string& s);
-
-public:
- ModelScore();
- virtual ~ModelScore();
- void add_example(const std::string& previous, std::string& next);
- void reset_fe();
- void reset_f();
- const std::vector<double>& get_scores_fe_prev() const;
- const std::vector<double>& get_scores_fe_next() const;
- const std::vector<double>& get_scores_f_prev() const;
- const std::vector<double>& get_scores_f_next() const;
-
- static ModelScore* createModelScore(const std::string& modeltype);
-};
-
-class ModelScoreMSLR : public ModelScore
-{
-protected:
- virtual ORIENTATION getType(const std::string& s);
-};
-
-class ModelScoreLR : public ModelScore
-{
-protected:
- virtual ORIENTATION getType(const std::string& s);
-};
-
-class ModelScoreMSD : public ModelScore
-{
-protected:
- virtual ORIENTATION getType(const std::string& s);
-};
-
-class ModelScoreMonotonicity : public ModelScore
-{
-protected:
- virtual ORIENTATION getType(const std::string& s);
-};
-
-//Class for calculating total counts, and to calculate smoothing
-class Scorer
-{
-public:
- virtual ~Scorer() {}
- virtual void score(const std::vector<double>&, std::vector<double>&) const = 0;
- virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const = 0;
- virtual void createConstSmoothing(double, std::vector<double>&) const = 0;
-};
-
-class ScorerMSLR : public Scorer
-{
-public:
- virtual void score(const std::vector<double>&, std::vector<double>&) const;
- virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
- virtual void createConstSmoothing(double, std::vector<double>&) const;
-};
-
-class ScorerMSD : public Scorer
-{
-public:
- virtual void score(const std::vector<double>&, std::vector<double>&) const;
- virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
- virtual void createConstSmoothing(double, std::vector<double>&) const;
-};
-
-class ScorerMonotonicity : public Scorer
-{
-public:
- virtual void score(const std::vector<double>&, std::vector<double>&) const;
- virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
- virtual void createConstSmoothing(double, std::vector<double>&) const;
-};
-
-class ScorerLR : public Scorer
-{
-public:
- virtual void score(const std::vector<double>&, std::vector<double>&) const;
- virtual void createSmoothing(const std::vector<double>&, double, std::vector<double>&) const;
- virtual void createConstSmoothing(double, std::vector<double>&) const;
-};
-
-
-//Class for representing each model
-//Contains a modelscore and scorer (which can be of different model types (mslr, msd...)),
-//and file handling.
-//This class also keeps track of bidirectionality, and which language to condition on
-class Model
-{
-private:
- ModelScore* modelscore;
- Scorer* scorer;
-
- std::FILE* file;
- std::string filename;
-
- bool fe;
- bool previous;
- bool next;
-
- std::vector<double> smoothing_prev;
- std::vector<double> smoothing_next;
-
- static void split_config(const std::string& config, std::string& dir,
- std::string& lang, std::string& orient);
-public:
- Model(ModelScore* ms, Scorer* sc, const std::string& dir,
- const std::string& lang, const std::string& fn);
- ~Model();
- static Model* createModel(ModelScore*, const std::string&, const std::string&);
- void createSmoothing(double w);
- void createConstSmoothing(double w);
- void score_fe(const std::string& f, const std::string& e);
- void score_f(const std::string& f);
- void zipFile();
-};
-
diff --git a/scripts/training/lexical-reordering/score.cpp b/scripts/training/lexical-reordering/score.cpp
deleted file mode 100644
index 7f14b9fc8..000000000
--- a/scripts/training/lexical-reordering/score.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * score_reordering.cpp
- *
- * Created by: Sara Stymne - Linköping University
- * Machine Translation Marathon 2010, Dublin
- */
-
-#include <string>
-#include <vector>
-#include <map>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <cstdlib>
-#include <cstring>
-#include "InputFileStream.h"
-
-#include "reordering_classes.h"
-
-using namespace std;
-
-void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier);
-void get_orientations(const string& pair, string& previous, string& next);
-
-
-int main(int argc, char* argv[])
-{
-
- cerr << "Lexical Reordering Scorer\n"
- << "scores lexical reordering models of several types (hierarchical, phrase-based and word-based-extraction\n";
-
- if (argc < 3) {
- cerr << "syntax: score_reordering extractFile smoothingValue filepath (--model \"type max-orientation (specification-strings)\" )+\n";
- exit(1);
- }
-
- char* extractFileName = argv[1];
- double smoothingValue = atof(argv[2]);
- string filepath = argv[3];
-
- Moses::InputFileStream eFile(extractFileName);
- if (!eFile) {
- cerr << "Could not open the extract file " << extractFileName <<"for scoring of lexical reordering models\n";
- exit(1);
- }
-
- bool smoothWithCounts = false;
- map<string,ModelScore*> modelScores;
- vector<Model*> models;
- bool hier = false;
- bool phrase = false;
- bool wbe = false;
-
- string e,f,w,p,h;
- string prev, next;
-
- int i = 4;
- while (i<argc) {
- if (strcmp(argv[i],"--SmoothWithCounts") == 0) {
- smoothWithCounts = true;
- } else if (strcmp(argv[i],"--model") == 0) {
- if (i+1 >= argc) {
- cerr << "score: syntax error, no model information provided to the option" << argv[i] << endl;
- exit(1);
- }
- istringstream is(argv[++i]);
- string m,t;
- is >> m >> t;
- modelScores[m] = ModelScore::createModelScore(t);
- if (m.compare("hier") == 0) {
- hier = true;
- } else if (m.compare("phrase") == 0) {
- phrase = true;
- }
- if (m.compare("wbe") == 0) {
- wbe = true;
- }
-
- if (!hier && !phrase && !wbe) {
- cerr << "WARNING: No models specified for lexical reordering. No lexical reordering table will be trained.\n";
- return 0;
- }
-
- string config;
- //Store all models
- while (is >> config) {
- models.push_back(Model::createModel(modelScores[m],config,filepath));
- }
- } else {
- cerr << "illegal option given to lexical reordering model score\n";
- exit(1);
- }
- i++;
- }
-
- ////////////////////////////////////
- //calculate smoothing
- if (smoothWithCounts) {
- string line;
- while (getline(eFile,line)) {
- split_line(line,e,f,w,p,h);
- if (hier) {
- get_orientations(h, prev, next);
- modelScores["hier"]->add_example(prev,next);
- }
- if (phrase) {
- get_orientations(p, prev, next);
- modelScores["phrase"]->add_example(prev,next);
- }
- if (wbe) {
- get_orientations(w, prev, next);
- modelScores["wbe"]->add_example(prev,next);
- }
- }
-
- // calculate smoothing for each model
- for (size_t i=0; i<models.size(); ++i) {
- models[i]->createSmoothing(smoothingValue);
- }
-
- //reopen eFile
- eFile.Close();
- eFile.Open(extractFileName);
- } else {
- //constant smoothing
- for (size_t i=0; i<models.size(); ++i) {
- models[i]->createConstSmoothing(smoothingValue);
- }
- }
-
- ////////////////////////////////////
- //calculate scores for reordering table
- string line,f_current,e_current;
- bool first = true;
- while (getline(eFile, line)) {
- split_line(line,f,e,w,p,h);
-
- if (first) {
- f_current = f;
- e_current = e;
- first = false;
- } else if (f.compare(f_current) != 0 || e.compare(e_current) != 0) {
- //fe - score
- for (size_t i=0; i<models.size(); ++i) {
- models[i]->score_fe(f_current,e_current);
- }
- //reset
- for(map<string,ModelScore*>::const_iterator it = modelScores.begin(); it != modelScores.end(); ++it) {
- it->second->reset_fe();
- }
-
- if (f.compare(f_current) != 0) {
- //f - score
- for (size_t i=0; i<models.size(); ++i) {
- models[i]->score_f(f_current);
- }
- //reset
- for(map<string,ModelScore*>::const_iterator it = modelScores.begin(); it != modelScores.end(); ++it) {
- it->second->reset_f();
- }
- }
- f_current = f;
- e_current = e;
- }
-
- // uppdate counts
- if (hier) {
- get_orientations(h, prev, next);
- modelScores["hier"]->add_example(prev,next);
- }
- if (phrase) {
- get_orientations(p, prev, next);
- modelScores["phrase"]->add_example(prev,next);
- }
- if (wbe) {
- get_orientations(w, prev, next);
- modelScores["wbe"]->add_example(prev,next);
- }
- }
- //Score the last phrases
- for (size_t i=0; i<models.size(); ++i) {
- models[i]->score_fe(f,e);
- }
- for (size_t i=0; i<models.size(); ++i) {
- models[i]->score_f(f);
- }
-
- //Zip all files
- for (size_t i=0; i<models.size(); ++i) {
- models[i]->zipFile();
- }
-
- return 0;
-}
-
-
-
-void split_line(const string& line, string& foreign, string& english, string& wbe, string& phrase, string& hier)
-{
-
- int begin = 0;
- int end = line.find(" ||| ");
- foreign = line.substr(begin, end - begin);
-
- begin = end+5;
- end = line.find(" ||| ", begin);
- english = line.substr(begin, end - begin);
-
- begin = end+5;
- end = line.find(" | ", begin);
- wbe = line.substr(begin, end - begin);
-
- begin = end+3;
- end = line.find(" | ", begin);
- phrase = line.substr(begin, end - begin);
-
- begin = end+3;
- hier = line.substr(begin, line.size() - begin);
-}
-
-void get_orientations(const string& pair, string& previous, string& next)
-{
- istringstream is(pair);
- is >> previous >> next;
-}