Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 16:01:37 +0400
committerskyload <skyload@1f5c12ca-751b-0410-a591-d2e778427230>2010-04-21 16:01:37 +0400
commitbac88f75f65ec0849c836ecee50a3aa05f65de73 (patch)
tree1f3736f34201148d356d597eb3efbb40a06c0d61
parent69f9aecaf2b55f858cbae62ae6ca785de3bf50e5 (diff)
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/DPR_MOSES@3163 1f5c12ca-751b-0410-a591-d2e778427230
-rw-r--r--reranking/data/README5
-rw-r--r--reranking/data/nbest.small7
-rw-r--r--reranking/data/weights11
-rw-r--r--reranking/src/Hypo.cpp58
-rw-r--r--reranking/src/Hypo.h43
-rw-r--r--reranking/src/Main.cpp92
-rw-r--r--reranking/src/Makefile18
-rw-r--r--reranking/src/NBest.cpp125
-rw-r--r--reranking/src/NBest.h41
-rw-r--r--reranking/src/ParameterNBest.cpp355
-rw-r--r--reranking/src/ParameterNBest.h78
-rw-r--r--reranking/src/Tools.cpp27
-rw-r--r--reranking/src/Tools.h71
13 files changed, 931 insertions, 0 deletions
diff --git a/reranking/data/README b/reranking/data/README
new file mode 100644
index 000000000..59b20b32d
--- /dev/null
+++ b/reranking/data/README
@@ -0,0 +1,5 @@
+
+sample usage:
+
+../src/nbest -input-file nbest.small -output-file nbest.1best 1 -sort -weights weights
+
diff --git a/reranking/data/nbest.small b/reranking/data/nbest.small
new file mode 100644
index 000000000..0fcbc44ce
--- /dev/null
+++ b/reranking/data/nbest.small
@@ -0,0 +1,7 @@
+0 ||| Once a major milestone in the Balkans ||| d: 0 -0.608213 0 0 -0.512647 0 0 lm: -35.7187 tm: -3.97053 -17.5137 -3.24082 -15.8638 2.99969 w: -7 ||| -3.92049
+0 ||| Once a crucial period in the Balkans ||| d: 0 -0.944329 0 0 -1.06468 0 0 lm: -37.5341 tm: -4.27619 -19.441 -3.81074 -14.767 3.99959 w: -7 ||| -4.00353
+1 ||| Since the world is focused on Iraq , North Korea and a possible crisis with Iran on nuclear weapons , Kosovo is somewhat unnoticed . ||| d: -6 -5.80589 -0.65383 -1.29291 -6.19413 -0.0861354 -0.993748 lm: -112.868 tm: -42.7841 -61.6487 -16.5351 -23.8061 21.9977 w: -25 ||| -13.0796
+2 ||| The public will soon turn its attention back to that province during a decision regarding his fate . ||| d: -8 -4.61691 0 -3.62979 -4.85916 0 -4.43407 lm: -81.3478 tm: -46.0407 -63.79 -23.7663 -25.175 14.9984 w: -18 ||| -12.1226
+2 ||| The public will soon be able to turn its attention back into this province during a decision on his fate . ||| d: -8 -5.53064 0 -3.51999 -3.26708 0 -4.44003 lm: -84.7939 tm: -36.2621 -66.32 -21.0804 -33.9136 13.9985 w: -21 ||| -12.1227
+2 ||| The public will soon turn his attention to them at a decision on his destiny . ||| d: -8 -5.3448 0 -2.65118 -4.35949 0 -3.95447 lm: -67.451 tm: -54.851 -89.0503 -17.9389 -22.9488 12.9986 w: -16 ||| -12.1234
+2 ||| The public will soon turn his attention to them at a decision on his destiny . ||| d: -8 -5.3448 0 -2.65118 -4.35949 0 -3.95447 lm: -67.451 tm: -54.851 -89.0503 -17.9389 -22.9488 12.9986 w: -16 ||| -12.1234
diff --git a/reranking/data/weights b/reranking/data/weights
new file mode 100644
index 000000000..c6b6c1ac0
--- /dev/null
+++ b/reranking/data/weights
@@ -0,0 +1,11 @@
+0
+1 2 3
+4
+5
+6
+7
+8
+9
+10
+11
+12 13
diff --git a/reranking/src/Hypo.cpp b/reranking/src/Hypo.cpp
new file mode 100644
index 000000000..78019cf7e
--- /dev/null
+++ b/reranking/src/Hypo.cpp
@@ -0,0 +1,58 @@
+/*
+ * nbest: tool to process moses n-best lists
+ *
+ * File: Hypo.cpp
+ * basic functions to process one hypothesis
+ *
+ * Created by Holger Schwenk, University of Le Mans, 05/16/2008
+ *
+ */
+
+
+#include "Hypo.h"
+#include <iostream>
+
+//const char* NBEST_DELIM = "|||";
+
+Hypo::Hypo()
+{
+ //cerr << "Hypo: constructor called" << endl;
+}
+
+Hypo::~Hypo()
+{
+ //cerr << "Hypo: destructor called" << endl;
+}
+
+void Hypo::Write(ofstream &outf)
+{
+ outf << id << NBEST_DELIM2 << trg << NBEST_DELIM2;
+ for (vector<float>::iterator i = f.begin(); i != f.end(); i++)
+ outf << (*i) << " ";
+ outf << NBEST_DELIM << " " << s << endl;
+
+}
+
+float Hypo::CalcGlobal(Weights &w)
+{
+ //cerr << " HYP: calc global" << endl;
+ int sz=w.val.size();
+ if (sz<f.size()) {
+ cerr << " - NOTE: padding weight vector with " << f.size()-sz << " zeros" << endl;
+ w.val.resize(f.size());
+ }
+
+ s=0;
+ for (int i=0; i<f.size(); i++) {
+ //cerr << "i=" << i << ", " << w.val[i] << ", " << f[i] << endl;
+ s+=w.val[i]*f[i];
+ }
+ //cerr << "s=" << s << endl;
+ return s;
+}
+
+// this is actually a "greater than" since we want to sort in descending order
+bool Hypo::operator< (const Hypo &h2) const {
+ return (this->s > h2.s);
+}
+
diff --git a/reranking/src/Hypo.h b/reranking/src/Hypo.h
new file mode 100644
index 000000000..9d024caa4
--- /dev/null
+++ b/reranking/src/Hypo.h
@@ -0,0 +1,43 @@
+/*
+ * nbest: tool to process moses n-best lists
+ *
+ * File: Hypo.h
+ * basic functions to process one hypothesis
+ *
+ * Created by Holger Schwenk, University of Le Mans, 05/16/2008
+ *
+ */
+
+
+#ifndef _HYPO_H_
+#define _HYPO_H_
+
+using namespace std;
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "Tools.h"
+
+#define NBEST_DELIM "|||"
+#define NBEST_DELIM2 " ||| "
+
+class Hypo {
+ int id;
+ string trg; // translation
+ vector<float> f; // feature function scores
+ float s; // global score
+ // segmentation
+public:
+ Hypo();
+ Hypo(int p_id,string &p_trg, vector<float> &p_f, float p_s) : id(p_id),trg(p_trg),f(p_f),s(p_s) {};
+ ~Hypo();
+ float CalcGlobal(Weights&);
+ void Write(ofstream&);
+ bool operator< (const Hypo&) const;
+ // bool CompareLikelihoods (const Hypo&, const Hypo&) const;
+};
+
+#endif
diff --git a/reranking/src/Main.cpp b/reranking/src/Main.cpp
new file mode 100644
index 000000000..170de0980
--- /dev/null
+++ b/reranking/src/Main.cpp
@@ -0,0 +1,92 @@
+/*
+ * nbest: tool to process moses n-best lists
+ *
+ * File: Main.cpp
+ * command line interface
+ *
+ * Created by Holger Schwenk, University of Le Mans, 05/16/2008
+ *
+ */
+
+#include <iostream>
+#include <fstream>
+#include "ParameterNBest.h"
+#include "NBest.h"
+#include "Tools.h"
+
+#include "Util.h" // from Moses
+
+
+using namespace std;
+
+int main (int argc, char *argv[]) {
+ // parse parameters
+ ParameterNBest *parameter = new ParameterNBest();
+ if (!parameter->LoadParam(argc, argv))
+ {
+ parameter->Explain();
+ delete parameter;
+ return 1;
+ }
+
+ // read input
+ ifstream inpf;
+ PARAM_VEC p=parameter->GetParam("input-file");
+ if (p.size()<1 || p.size()>2) Error("The option -input-file requires one or two arguments");
+ int in_n=p.size()>1 ? Scan<int>(p[1]) : 0;
+ cout << "NBest version 0.1, written by Holger.Schwenk@lium.univ-lemans.fr" << endl
+ << " - reading input from file '" << p[0] << "'";
+ if (in_n>0) cout << " (limited to the first " << in_n << " hypothesis)";
+ cout << endl;
+ inpf.open(p[0].c_str());
+ if (inpf.fail()) { perror ("ERROR"); exit(1); }
+
+ // open output
+ ofstream outf;
+ p=parameter->GetParam("output-file");
+ if (p.size()<1 || p.size()>2) Error("The option -output-file requires one or two arguments");
+ int out_n=p.size()>1 ? Scan<int>(p[1]) : 0;
+ cout << " - writing output to file '" << p[0] << "'";
+ if (out_n>0) cout << " (limited to the first " << out_n << " hypothesis)";
+ cout << endl;
+ outf.open(p[0].c_str());
+ if (outf.fail()) { perror ("ERROR"); exit(1); }
+
+ // eventually read weights
+ Weights w;
+ int do_calc=false;
+ if (parameter->isParamSpecified("weights")) {
+ p=parameter->GetParam("weights");
+ if (p.size()<1) Error("The option -weights requires one argument");
+ cout << " - reading weights from file '" << p[0] << "'";
+ int n=w.Read(p[0].c_str());
+ cout << " (found " << n << " values)" << endl;
+ do_calc=true;
+ cout << " - recalculating global scores" << endl;
+ }
+
+ // shall we sort ?
+ bool do_sort = parameter->isParamSpecified("sort");
+ if (do_sort) cout << " - sorting global scores" << endl;
+
+ // main loop
+ int nb_sent=0, nb_nbest=0;
+ while (!inpf.eof()) {
+ NBest nbest(inpf, in_n);
+
+ if (do_calc) nbest.CalcGlobal(w);
+ if (do_sort) nbest.Sort();
+ nbest.Write(outf, out_n);
+
+ nb_sent++;
+ nb_nbest+=nbest.NbNBest();
+ }
+ inpf.close();
+ outf.close();
+
+ // display final statistics
+ cout << " - processed " << nb_nbest << " n-best hypotheses in " << nb_sent << " sentences"
+ << " (average " << (float) nb_nbest/nb_sent << ")" << endl;
+
+ return 0;
+}
diff --git a/reranking/src/Makefile b/reranking/src/Makefile
new file mode 100644
index 000000000..7b0ec6945
--- /dev/null
+++ b/reranking/src/Makefile
@@ -0,0 +1,18 @@
+
+# where to find include files and libraries from Moses
+MOSES_INC=../../moses/src
+LIB_DIR=../../moses/src/
+
+LIBS=-lmoses -lz
+OBJS=Main.o NBest.o Hypo.o Tools.o ParameterNBest.o
+
+CFLAGS=-I$(MOSES_INC)
+
+nbest-tool: $(OBJS)
+ $(CXX) -o nbest $(OBJS) -L$(LIB_DIR) $(LIBS)
+
+%.o: %.cpp
+ $(CXX) $(CFLAGS) -o $@ -c $<
+
+clean:
+ -rm $(OBJS) nbest
diff --git a/reranking/src/NBest.cpp b/reranking/src/NBest.cpp
new file mode 100644
index 000000000..5ef119abe
--- /dev/null
+++ b/reranking/src/NBest.cpp
@@ -0,0 +1,125 @@
+/*
+ * nbest: tool to process moses n-best lists
+ *
+ * File: NBest.cpp
+ * basic functions on n-best lists
+ *
+ * Created by Holger Schwenk, University of Le Mans, 05/16/2008
+ *
+ */
+
+
+#include "NBest.h"
+
+#include "Util.h" // from Moses
+
+#include <sstream>
+#include <algorithm>
+
+//NBest::NBest() {
+ //cerr << "NBEST: constructor called" << endl;
+//}
+
+
+bool NBest::ParseLine(ifstream &inpf, const int n) {
+ static string line; // used internally to buffer an input line
+ static int prev_id=-1; // used to detect a change of the n-best ID
+ int id;
+ vector<float> f;
+ float s;
+ int pos=0, epos;
+ vector<string> blocks;
+
+
+ if (line.empty()) {
+ getline(inpf,line);
+ if (inpf.eof()) return false;
+ }
+
+ // split line into blocks
+ //cerr << "PARSE line: " << line << endl;
+ while ((epos=line.find(NBEST_DELIM,pos))!=string::npos) {
+ blocks.push_back(line.substr(pos,epos-pos));
+ // cerr << " block: " << blocks.back() << endl;
+ pos=epos+strlen(NBEST_DELIM);
+ }
+ blocks.push_back(line.substr(pos,line.size()));
+ // cerr << " block: " << blocks.back() << endl;
+
+ if (blocks.size()<4) {
+ cerr << line << endl;
+ Error("can't parse the above line");
+ }
+
+ // parse ID
+ id=Scan<int>(blocks[0]);
+ if (prev_id>=0 && id!=prev_id) {prev_id=id; return false;} // new nbest list has started
+ prev_id=id;
+ //cerr << "same ID " << id << endl;
+
+ if (n>0 && nbest.size() >= n) {
+ //cerr << "skipped" << endl;
+ line.clear();
+ return true; // skip parsing of unused hypos
+ }
+
+ // parse feature function scores
+ //cerr << "PARSE features: '" << blocks[2] << "' size: " << blocks[2].size() << endl;
+ pos=blocks[2].find_first_not_of(' ');
+ while (pos<blocks[2].size() && (epos=blocks[2].find(" ",pos))!=string::npos) {
+ string feat=blocks[2].substr(pos,epos-pos);
+ //cerr << " feat: '" << feat << "', pos: " << pos << ", " << epos << endl;
+ if (feat.find(":",0)!=string::npos) {
+ //cerr << " name: " << feat << endl;
+ }
+ else {
+ f.push_back(Scan<float>(feat));
+ //cerr << " value: " << f.back() << endl;
+ }
+ pos=epos+1;
+ }
+
+ // eventually parse segmentation
+ if (blocks.size()>4) {
+ Error("parsing segmentation not yet supported");
+ }
+
+ nbest.push_back(Hypo(id, blocks[1], f, Scan<float>(blocks[3])));
+
+ line.clear(); // force read of new line
+
+ return true;
+}
+
+
+NBest::NBest(ifstream &inpf, const int n) {
+ //cerr << "NBEST: constructor with file called" << endl;
+ while (ParseLine(inpf,n));
+ //cerr << "NBEST: found " << nbest.size() << " lines" << endl;
+}
+
+
+NBest::~NBest() {
+ //cerr << "NBEST: destructor called" << endl;
+}
+
+void NBest::Write(ofstream &outf, int n)
+{
+ if (n<1 || n>nbest.size()) n=nbest.size();
+ for (int i=0; i<n; i++) nbest[i].Write(outf);
+}
+
+
+float NBest::CalcGlobal(Weights &w)
+{
+ //cerr << "NBEST: calc global of size " << nbest.size() << endl;
+ for (vector<Hypo>::iterator i = nbest.begin(); i != nbest.end(); i++) {
+ (*i).CalcGlobal(w);
+ }
+}
+
+
+void NBest::Sort() {
+ sort(nbest.begin(),nbest.end());
+}
+
diff --git a/reranking/src/NBest.h b/reranking/src/NBest.h
new file mode 100644
index 000000000..24c29f69b
--- /dev/null
+++ b/reranking/src/NBest.h
@@ -0,0 +1,41 @@
+/*
+ * nbest: tool to process moses n-best lists
+ *
+ * File: NBest.h
+ * basic functions on n-best lists
+ *
+ * Created by Holger Schwenk, University of Le Mans, 05/16/2008
+ *
+ */
+
+
+#ifndef _NBEST_H_
+#define _NBEST_H_
+
+using namespace std;
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "Tools.h"
+#include "Hypo.h"
+
+class NBest {
+ int id;
+ string src;
+ vector<Hypo> nbest;
+ bool ParseLine(ifstream &inpf, const int n);
+ public:
+ NBest(ifstream&, const int=0);
+ ~NBest();
+ int NbNBest() {return nbest.size(); };
+ float CalcGlobal(Weights&);
+ void Sort(); // largest values first
+ void Write(ofstream&, int=0);
+};
+
+void Error(char *msg);
+
+#endif
diff --git a/reranking/src/ParameterNBest.cpp b/reranking/src/ParameterNBest.cpp
new file mode 100644
index 000000000..9ed3bff8e
--- /dev/null
+++ b/reranking/src/ParameterNBest.cpp
@@ -0,0 +1,355 @@
+// $Id: $
+
+/***********************************************************************
+nbest - tool to process Moses n-best list
+Copyright (C) 2008 Holger Schwenk, University of Le Mans, France
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <iostream>
+#include <iterator>
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+#include "ParameterNBest.h"
+#include "Tools.h"
+
+#include "Util.h" // from Moses
+#include "InputFileStream.h"
+#include "UserMessage.h"
+
+using namespace std;
+
+/** define allowed parameters */
+ParameterNBest::ParameterNBest()
+{
+ AddParam("input-file", "i", "file name of the input n-best list");
+ AddParam("output-file", "o", "file name of the output n-best list");
+ AddParam("recalc", "r", "recalc global scores");
+ AddParam("weights", "w", "coefficients of the feature functions");
+ AddParam("sort", "s", "sort n-best list according to the global scores");
+ AddParam("lexical", "l", "report number of lexically different hypothesis");
+}
+
+ParameterNBest::~ParameterNBest()
+{
+}
+
+/** initialize a parameter, sub of constructor */
+void ParameterNBest::AddParam(const string &paramName, const string &description)
+{
+ m_valid[paramName] = true;
+ m_description[paramName] = description;
+}
+
+/** initialize a parameter (including abbreviation), sub of constructor */
+void ParameterNBest::AddParam(const string &paramName, const string &abbrevName, const string &description)
+{
+ m_valid[paramName] = true;
+ m_valid[abbrevName] = true;
+ m_abbreviation[paramName] = abbrevName;
+ m_description[paramName] = description;
+}
+
+/** print descriptions of all parameters */
+void ParameterNBest::Explain() {
+ cerr << "Usage:" << endl;
+ for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++)
+ {
+ const string paramName = iterParam->first;
+ const string paramDescription = iterParam->second;
+ cerr << "\t-" << paramName;
+ PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
+ if ( iterAbbr != m_abbreviation.end() )
+ cerr << " (" << iterAbbr->second << ")";
+ cerr << ": " << paramDescription << endl;
+ }
+}
+
+/** check whether an item on the command line is a switch or a value
+ * \param token token on the command line to checked **/
+
+bool ParameterNBest::isOption(const char* token) {
+ if (! token) return false;
+ std::string tokenString(token);
+ size_t length = tokenString.size();
+ if (length > 0 && tokenString.substr(0,1) != "-") return false;
+ if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
+ return false;
+}
+
+/** load all parameters from the configuration file and the command line switches */
+bool ParameterNBest::LoadParam(const string &filePath)
+{
+ const char *argv[] = {"executable", "-f", filePath.c_str() };
+ return LoadParam(3, (char**) argv);
+}
+
+/** load all parameters from the configuration file and the command line switches */
+bool ParameterNBest::LoadParam(int argc, char* argv[])
+{
+ // config file (-f) arg mandatory
+ string configPath;
+/*
+ if ( (configPath = FindParam("-f", argc, argv)) == ""
+ && (configPath = FindParam("-config", argc, argv)) == "")
+ {
+ PrintCredit();
+
+ UserMessage::Add("No configuration file was specified. Use -config or -f");
+ return false;
+ }
+ else
+ {
+ if (!ReadConfigFile(configPath))
+ {
+ UserMessage::Add("Could not read "+configPath);
+ return false;
+ }
+ }
+*/
+
+ // overwrite parameters with values from switches
+ for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++)
+ {
+ const string paramName = iterParam->first;
+ OverwriteParam("-" + paramName, paramName, argc, argv);
+ }
+
+ // ... also shortcuts
+ for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++)
+ {
+ const string paramName = iterParam->first;
+ const string paramShortName = iterParam->second;
+ OverwriteParam("-" + paramShortName, paramName, argc, argv);
+ }
+
+ // logging of parameters that were set in either config or switch
+ int verbose = 1;
+ if (m_setting.find("verbose") != m_setting.end() &&
+ m_setting["verbose"].size() > 0)
+ verbose = Scan<int>(m_setting["verbose"][0]);
+ if (verbose >= 1) { // only if verbose
+ TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
+ for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {
+ TRACE_ERR( "\t" << iterParam->first << ": ");
+ for ( size_t i = 0; i < iterParam->second.size(); i++ )
+ TRACE_ERR( iterParam->second[i] << " ");
+ TRACE_ERR( endl);
+ }
+ }
+
+ // check for illegal parameters
+ bool noErrorFlag = true;
+ for (int i = 0 ; i < argc ; i++)
+ {
+ if (isOption(argv[i]))
+ {
+ string paramSwitch = (string) argv[i];
+ string paramName = paramSwitch.substr(1);
+ if (m_valid.find(paramName) == m_valid.end())
+ {
+ UserMessage::Add("illegal switch: " + paramSwitch);
+ noErrorFlag = false;
+ }
+ }
+ }
+
+ // check if parameters make sense
+ return Validate() && noErrorFlag;
+}
+
+/** check that parameter settings make sense */
+bool ParameterNBest::Validate()
+{
+ bool noErrorFlag = true;
+
+ // required parameters
+ if (m_setting["input-file"].size() == 0) {
+ UserMessage::Add("No input-file");
+ noErrorFlag = false;
+ }
+
+ if (m_setting["output-file"].size() == 0) {
+ UserMessage::Add("No output-file");
+ noErrorFlag = false;
+ }
+
+ if (m_setting["recalc"].size() > 0 && m_setting["weights"].size()==0) {
+ UserMessage::Add("you need to spezify weight when recalculating global scores");
+ noErrorFlag = false;
+ }
+
+
+ return noErrorFlag;
+}
+
+/** check whether a file exists */
+bool ParameterNBest::FilesExist(const string &paramName, size_t tokenizeIndex,std::vector<std::string> const& extensions)
+{
+ typedef std::vector<std::string> StringVec;
+ StringVec::const_iterator iter;
+
+ PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
+ if (iterParam == m_setting.end())
+ { // no param. therefore nothing to check
+ return true;
+ }
+ const StringVec &pathVec = (*iterParam).second;
+ for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter)
+ {
+ StringVec vec = Tokenize(*iter);
+ if (tokenizeIndex >= vec.size())
+ {
+ stringstream errorMsg("");
+ errorMsg << "Expected at least " << (tokenizeIndex+1) << " tokens per emtry in '"
+ << paramName << "', but only found "
+ << vec.size();
+ UserMessage::Add(errorMsg.str());
+ return false;
+ }
+ const string &pathStr = vec[tokenizeIndex];
+
+ bool fileFound=0;
+ for(size_t i=0;i<extensions.size() && !fileFound;++i)
+ {
+ fileFound|=FileExists(pathStr + extensions[i]);
+ }
+ if(!fileFound)
+ {
+ stringstream errorMsg("");
+ errorMsg << "File " << pathStr << " does not exist";
+ UserMessage::Add(errorMsg.str());
+ return false;
+ }
+ }
+ return true;
+}
+
+/** look for a switch in arg, update parameter */
+// TODO arg parsing like this does not belong in the library, it belongs
+// in moses-cmd
+string ParameterNBest::FindParam(const string &paramSwitch, int argc, char* argv[])
+{
+ for (int i = 0 ; i < argc ; i++)
+ {
+ if (string(argv[i]) == paramSwitch)
+ {
+ if (i+1 < argc)
+ {
+ return argv[i+1];
+ } else {
+ stringstream errorMsg("");
+ errorMsg << "Option " << paramSwitch << " requires a parameter!";
+ UserMessage::Add(errorMsg.str());
+ // TODO return some sort of error, not the empty string
+ }
+ }
+ }
+ return "";
+}
+
+/** update parameter settings with command line switches
+ * \param paramSwitch (potentially short) name of switch
+ * \param paramName full name of parameter
+ * \param argc number of arguments on command line
+ * \param argv values of paramters on command line */
+void ParameterNBest::OverwriteParam(const string &paramSwitch, const string &paramName, int argc, char* argv[])
+{
+ int startPos = -1;
+ for (int i = 0 ; i < argc ; i++)
+ {
+ if (string(argv[i]) == paramSwitch)
+ {
+ startPos = i+1;
+ break;
+ }
+ }
+ if (startPos < 0)
+ return;
+
+ int index = 0;
+ m_setting[paramName]; // defines the parameter, important for boolean switches
+ while (startPos < argc && (!isOption(argv[startPos])))
+ {
+ if (m_setting[paramName].size() > (size_t)index)
+ m_setting[paramName][index] = argv[startPos];
+ else
+ m_setting[paramName].push_back(argv[startPos]);
+ index++;
+ startPos++;
+ }
+}
+
+
+/** read parameters from a configuration file */
+bool ParameterNBest::ReadConfigFile( string filePath )
+{
+ InputFileStream inFile(filePath);
+ string line, paramName;
+ while(getline(inFile, line))
+ {
+ // comments
+ size_t comPos = line.find_first_of("#");
+ if (comPos != string::npos)
+ line = line.substr(0, comPos);
+ // trim leading and trailing spaces/tabs
+ line = Trim(line);
+
+ if (line[0]=='[')
+ { // new parameter
+ for (size_t currPos = 0 ; currPos < line.size() ; currPos++)
+ {
+ if (line[currPos] == ']')
+ {
+ paramName = line.substr(1, currPos - 1);
+ break;
+ }
+ }
+ }
+ else if (line != "")
+ { // add value to parameter
+ m_setting[paramName].push_back(line);
+ }
+ }
+ return true;
+}
+
+
+void ParameterNBest::PrintCredit()
+{
+ cerr << "NBest - A tool to process Moses n-best lists" << endl
+ << "Copyright (C) 2008 Holger Schwenk" << endl << endl
+
+ << "This library is free software; you can redistribute it and/or" << endl
+ << "modify it under the terms of the GNU Lesser General Public" << endl
+ << "License as published by the Free Software Foundation; either" << endl
+ << "version 2.1 of the License, or (at your option) any later version." << endl << endl
+
+ << "This library is distributed in the hope that it will be useful," << endl
+ << "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl
+ << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU" << endl
+ << "Lesser General Public License for more details." << endl << endl
+
+ << "You should have received a copy of the GNU Lesser General Public" << endl
+ << "License along with this library; if not, write to the Free Software" << endl
+ << "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA" << endl << endl
+ << "***********************************************************************" << endl << endl
+ << "Built on " << __DATE__ << endl << endl
+
+ << "Written by Holger Schwenk, Holger.Schwenk@lium.univ-lemans.fr" << endl << endl;
+}
+
diff --git a/reranking/src/ParameterNBest.h b/reranking/src/ParameterNBest.h
new file mode 100644
index 000000000..c653176dc
--- /dev/null
+++ b/reranking/src/ParameterNBest.h
@@ -0,0 +1,78 @@
+// $Id: $
+
+/***********************************************************************
+nbest - tool to process Moses n-best list
+Copyright (C) 2008 Holger Schwenk, University of Le Mans, France
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef _PARAMETER_NBEST_H_
+#define _PARAMETER_NBEST_H_
+
+#include <string>
+#include <map>
+#include <vector>
+#include "TypeDef.h"
+
+typedef std::vector<std::string> PARAM_VEC;
+typedef std::map<std::string, PARAM_VEC > PARAM_MAP;
+typedef std::map<std::string, bool> PARAM_BOOL;
+typedef std::map<std::string, std::string > PARAM_STRING;
+
+/** Handles parameter values set in config file or on command line.
+ * Process raw parameter data (names and values as strings) for StaticData
+ * to parse; to get useful values, see StaticData. */
+class ParameterNBest
+{
+protected:
+ PARAM_MAP m_setting;
+ PARAM_BOOL m_valid;
+ PARAM_STRING m_abbreviation;
+ PARAM_STRING m_description;
+
+ std::string FindParam(const std::string &paramSwitch, int argc, char* argv[]);
+ void OverwriteParam(const std::string &paramSwitch, const std::string &paramName, int argc, char* argv[]);
+ bool ReadConfigFile( std::string filePath );
+ bool FilesExist(const std::string &paramName, size_t tokenizeIndex,std::vector<std::string> const& fileExtension=std::vector<std::string>(1,""));
+ bool isOption(const char* token);
+ bool Validate();
+
+ void AddParam(const std::string &paramName, const std::string &description);
+ void AddParam(const std::string &paramName, const std::string &abbrevName, const std::string &description);
+
+ void PrintCredit();
+
+public:
+ ParameterNBest();
+ ~ParameterNBest();
+ bool LoadParam(int argc, char* argv[]);
+ bool LoadParam(const std::string &filePath);
+ void Explain();
+
+ /** return a vector of strings holding the whitespace-delimited values on the ini-file line corresponding to the given parameter name */
+ const PARAM_VEC &GetParam(const std::string &paramName)
+ {
+ return m_setting[paramName];
+ }
+ /** check if parameter is defined (either in moses.ini or as switch) */
+ bool isParamSpecified(const std::string &paramName)
+ {
+ return m_setting.find( paramName ) != m_setting.end();
+ }
+
+};
+
+#endif
diff --git a/reranking/src/Tools.cpp b/reranking/src/Tools.cpp
new file mode 100644
index 000000000..bbee84cbc
--- /dev/null
+++ b/reranking/src/Tools.cpp
@@ -0,0 +1,27 @@
+/*
+ * nbest: tool to process moses n-best lists
+ *
+ * File: Tools.cpp
+ * basic utility functions
+ *
+ * Created by Holger Schwenk, University of Le Mans, 05/16/2008
+ *
+ */
+
+#include "Tools.h"
+
+int Weights::Read(const char *fname) {
+ ifstream inpf;
+
+ inpf.open(fname);
+ if (inpf.fail()) {
+ perror ("ERROR"); exit(1);
+ }
+
+ float f;
+ while (inpf >> f) val.push_back(f);
+
+ inpf.close();
+ return val.size();
+}
+
diff --git a/reranking/src/Tools.h b/reranking/src/Tools.h
new file mode 100644
index 000000000..73228d5e4
--- /dev/null
+++ b/reranking/src/Tools.h
@@ -0,0 +1,71 @@
+/*
+ * nbest: tool to process moses n-best lists
+ *
+ * File: Tools.cpp
+ * basic utility functions
+ *
+ * Created by Holger Schwenk, University of Le Mans, 05/16/2008
+ *
+ */
+
+
+#ifndef _TOOLS_H_
+#define _TOOLS_H_
+
+using namespace std;
+
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+class Weights {
+ vector<float> val;
+ public:
+ Weights() {};
+ ~Weights() {};
+ int Read(const char *);
+ friend class Hypo;
+};
+
+//******************************************************
+
+/*
+template<typename T>
+inline T Scan(const std::string &input)
+{
+ std::stringstream stream(input);
+ T ret;
+ stream >> ret;
+ return ret;
+}
+*/
+
+//******************************************************
+
+inline void Error (char *msg) {
+ cerr << "ERROR: " << msg << endl;
+ exit(1);
+}
+
+//******************************************************
+// From Moses code:
+
+
+/*
+ * Outputting debugging/verbose information to stderr.
+ * Use TRACE_ENABLE flag to redirect tracing output into oblivion
+ * so that you can output your own ad-hoc debugging info.
+ * However, if you use stderr diretly, please delete calls to it once
+ * you finished debugging so that it won't clutter up.
+ * Also use TRACE_ENABLE to turn off output of any debugging info
+ * when compiling for a gui front-end so that running gui won't generate
+ * output on command line
+ * */
+#ifdef TRACE_ENABLE
+#define TRACE_ERR(str) std::cerr << str
+#else
+#define TRACE_ERR(str) {}
+#endif
+
+#endif
+