Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorbhaddow <barry.haddow@gmail.com>2011-11-14 22:32:36 +0400
committerbhaddow <barry.haddow@gmail.com>2011-11-14 22:32:36 +0400
commit4cf6e0320a90950ab2688861066c36e613c534fa (patch)
treeb355207220c50a9b4982b954c7d86b6f83de2c04 /mert
parent4bb9ecb8eb7548e47b3563b3632a7801a8f13032 (diff)
pro extraction mainline and stub of feature data iterator
Diffstat (limited to 'mert')
-rw-r--r--mert/FeatureDataIterator.cpp40
-rw-r--r--mert/FeatureDataIterator.h94
-rw-r--r--mert/Makefile.am11
-rw-r--r--mert/pro.cpp85
4 files changed, 227 insertions, 3 deletions
diff --git a/mert/FeatureDataIterator.cpp b/mert/FeatureDataIterator.cpp
new file mode 100644
index 000000000..a581965c6
--- /dev/null
+++ b/mert/FeatureDataIterator.cpp
@@ -0,0 +1,40 @@
+// $Id$
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "FeatureDataIterator.h"
+
+using namespace std;
+
+FeatureDataIterator::FeatureDataIterator(const string filename) {
+}
+
+void FeatureDataIterator::increment() {
+
+}
+
+bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
+
+}
+
+const vector<FeatureDataItem>& FeatureDataIterator::dereference() const {
+
+}
diff --git a/mert/FeatureDataIterator.h b/mert/FeatureDataIterator.h
new file mode 100644
index 000000000..e87a6386d
--- /dev/null
+++ b/mert/FeatureDataIterator.h
@@ -0,0 +1,94 @@
+// $Id$
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef _FEATURE_DATA_ITERATOR_
+#define _FEATURE_DATA_ITERATOR_
+
+/**
+ * For loading from the feature data file.
+**/
+
+#include <fstream>
+#include <map>
+#include <vector>
+
+#include <boost/iterator/iterator_facade.hpp>
+
+
+//Minimal sparse vector
+class SparseVector {
+
+ public:
+ typedef std::map<size_t,float> fvector_t;
+ typedef std::map<std::string, size_t> name2id_t;
+ typedef std::vector<std::string> id2name_t;
+
+ float get(std::string name) const;
+ float get(size_t id) const;
+ void set(std::string name, float value);
+ void clear();
+ size_t size() const;
+
+ void write(std::ostream& out, const std::string& sep = " ") const;
+
+ SparseVector& operator-=(const SparseVector& rhs);
+
+ private:
+ static name2id_t name2id_;
+ static id2name_t id2name_;
+ fvector_t fvector_;
+};
+
+SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
+
+class FeatureDataItem {
+ public:
+ std::vector<float> dense;
+ SparseVector sparse;
+};
+
+class FeatureDataIterator :
+ public boost::iterator_facade<FeatureDataIterator,
+ const std::vector<FeatureDataItem>,
+ boost::forward_traversal_tag>
+{
+ public:
+ FeatureDataIterator(const std::string filename);
+
+ static FeatureDataIterator end() {
+ return FeatureDataIterator("");
+ }
+
+
+ private:
+ friend class boost::iterator_core_access;
+
+ void increment();
+ bool equal(const FeatureDataIterator& rhs) const;
+ const std::vector<FeatureDataItem>& dereference() const;
+
+ std::ifstream* in_;
+};
+
+#endif
+
+
diff --git a/mert/Makefile.am b/mert/Makefile.am
index 461b9fb74..3454a0205 100644
--- a/mert/Makefile.am
+++ b/mert/Makefile.am
@@ -1,5 +1,5 @@
lib_LTLIBRARIES = libmert.la
-bin_PROGRAMS = mert extractor evaluator
+bin_PROGRAMS = mert extractor evaluator pro
AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)
libmert_la_SOURCES = \
@@ -7,6 +7,7 @@ Util.cpp \
Timer.cpp \
ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \
+FeatureDataIterator.cpp \
Data.cpp \
BleuScorer.cpp \
Point.cpp \
@@ -29,9 +30,13 @@ CderScorer.cpp \
MergeScorer.cpp
mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp
-extractor_SOURCES = extractor.cpp
-evaluator_SOURCES = evaluator.cpp
+extractor_SOURCES = extractor.cpp
+evaluator_SOURCES = evaluator.cpp
+pro_SOURCES = pro.cpp
extractor_LDADD = libmert.la -lm -lz
mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
evaluator_LDADD = libmert.la -lm -lz
+pro_LDADD = libmert.la @KENLM_LDFLAGS@ $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
+pro_DEPENDENCIES = $(top_srcdir)/kenlm/libkenlm.la
+
diff --git a/mert/pro.cpp b/mert/pro.cpp
index e69de29bb..ac2503172 100644
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@@ -0,0 +1,85 @@
+// $Id$
+// vim:tabstop=2
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2011- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+
+/**
+ * This is part of the PRO implementation. It converts the features and scores
+ * files into a form suitable for input into the megam maxent trainer.
+ *
+ * For details of PRO, refer to Hopkins & May (EMNLP 2011)
+ **/
+#include <cstdlib>
+#include <ctime>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include "FeatureDataIterator.h"
+
+using namespace std;
+
+namespace po = boost::program_options;
+
+int main(int argc, char** argv)
+{
+ bool help;
+ vector<string> scoreFiles;
+ vector<string> featureFiles;
+ int seed;
+
+ po::options_description desc("Allowed options");
+ desc.add_options()
+ ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+ ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
+ ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+ ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
+ ;
+
+ po::options_description cmdline_options;
+ cmdline_options.add(desc);
+ po::variables_map vm;
+ po::store(po::command_line_parser(argc,argv).
+ options(cmdline_options).run(), vm);
+ po::notify(vm);
+ if (help) {
+ cout << "Usage: " + string(argv[0]) + " [options]" << endl;
+ cout << desc << endl;
+ return 0;
+ }
+
+ if (vm.count("random-seed")) {
+ cerr << "Initialising random seed to " << seed << endl;
+ srand(seed);
+ } else {
+ cerr << "Initialising random seed from system clock" << endl;
+ srand(time(NULL));
+ }
+
+ FeatureDataIterator fi(featureFiles[0]);
+ for (; fi != FeatureDataIterator::end(); ++fi) {
+ const vector<FeatureDataItem>& featureData = *fi;
+ }
+
+}
+