Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mert/FeatureStats.cpp2
-rw-r--r--mert/Makefile.am6
-rw-r--r--mert/ScoreStats.cpp2
-rwxr-xr-xmert/mert.cpp121
-rwxr-xr-xmert/regression-testing/tests/mert-basic/command8
-rw-r--r--mert/regression-testing/tests/mert-basic/data/INIT2
-rw-r--r--moses/src/ThreadPool.cpp7
-rw-r--r--moses/src/ThreadPool.h3
-rw-r--r--scripts/Makefile4
-rwxr-xr-xscripts/training/mert-moses.pl12
10 files changed, 124 insertions, 43 deletions
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 74f1ff8a9..118599fa6 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -21,7 +21,7 @@ FeatureStats::FeatureStats()
FeatureStats::~FeatureStats()
{
- delete array_;
+ delete[] array_;
};
FeatureStats::FeatureStats(const FeatureStats &stats)
diff --git a/mert/Makefile.am b/mert/Makefile.am
index d751f798d..f5ea46ce1 100644
--- a/mert/Makefile.am
+++ b/mert/Makefile.am
@@ -1,6 +1,6 @@
lib_LTLIBRARIES = libmert.la
bin_PROGRAMS = mert extractor evaluator
-AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE
+AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)
libmert_la_SOURCES = \
Util.cpp \
@@ -27,10 +27,10 @@ TERsrc/tools.cpp \
TerScorer.cpp \
CderScorer.cpp
-mert_SOURCES = mert.cpp
+mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp
extractor_SOURCES = extractor.cpp
evaluator_SOURCES = evaluator.cpp
extractor_LDADD = libmert.la -lm -lz
-mert_LDADD = libmert.la -lm -lz
+mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
evaluator_LDADD = libmert.la -lm -lz
diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp
index bbfc38743..d3b28de36 100644
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@@ -21,7 +21,7 @@ ScoreStats::ScoreStats()
ScoreStats::~ScoreStats()
{
- delete array_;
+ delete[] array_;
};
ScoreStats::ScoreStats(const ScoreStats &stats)
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 4cb8ea9d1..b0ea491bb 100755
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -23,6 +23,8 @@
#include "Timer.h"
#include "Util.h"
+#include "../moses/src/ThreadPool.h"
+
float min_interval = 1e-3;
@@ -42,6 +44,9 @@ void usage(void)
cerr<<"[--scfile|-S] comma separated list of scorer data files (default score.data)"<<endl;
cerr<<"[--ffile|-F] comma separated list of feature data files (default feature.data)"<<endl;
cerr<<"[--ifile|-i] the starting point data file (default init.opt)"<<endl;
+#ifdef WITH_THREADS
+ cerr<<"[--threads|-T] use multiple threads for random restart (default 1)"<<endl;
+#endif
cerr<<"[-v] verbose level"<<endl;
cerr<<"[--help|-h] print this message and exit"<<endl;
exit(1);
@@ -60,12 +65,46 @@ static struct option long_options[] = {
{"scfile",1,0,'S'},
{"ffile",1,0,'F'},
{"ifile",1,0,'i'},
+#ifdef WITH_THREADS
+ {"threads", required_argument,0,'T'},
+#endif
{"verbose",1,0,'v'},
{"help",no_argument,0,'h'},
{0, 0, 0, 0}
};
int option_index;
+/**
+ * Runs an optimisation, or a random restart.
+**/
+class OptimizationTask : public Moses::Task
+{
+ public:
+ OptimizationTask(Optimizer* optimizer, const Point& point) :
+ m_optimizer(optimizer), m_point(point) {}
+
+ bool DeleteAfterExecution() {
+ return false;
+ }
+
+ void Run() {
+ m_score = m_optimizer->Run(m_point);
+ }
+
+ statscore_t getScore() const {
+ return m_score;
+ }
+
+ const Point& getPoint() const {
+ return m_point;
+ }
+
+ private:
+ Optimizer* m_optimizer;
+ Point m_point;
+ statscore_t m_score;
+};
+
int main (int argc, char **argv)
{
@@ -83,6 +122,9 @@ int main (int argc, char **argv)
int nrandom=0;
int seed=0;
bool hasSeed = false;
+#ifdef WITH_THREADS
+ size_t threads=1;
+#endif
string type("powell");
string scorertype("BLEU");
string scorerconfig("");
@@ -140,6 +182,12 @@ int main (int argc, char **argv)
case 'v':
setverboselevel(strtol(optarg,NULL,10));
break;
+#ifdef WITH_THREADS
+ case 'T':
+ threads = strtol(optarg, NULL, 10);
+ if (threads < 1) threads = 1;
+ break;
+#endif
default:
usage();
}
@@ -266,41 +314,58 @@ int main (int argc, char **argv)
O->SetScorer(TheScorer);
O->SetFData(D.getFeatureData());
+
+#ifdef WITH_THREADS
+ cerr << "Creating a pool of " << threads << " threads" << endl;
+ Moses::ThreadPool pool(threads);
+#endif
+
+ vector<OptimizationTask*> tasks;
+
// run with specified starting points
- stringstream oss;
- statscore_t best=0, mean=0, var=0;
- Point bestP;
- for(int i=0;i<start_list.size();i++) {
- Point P(start_list[i], min, max);//Generate from the full feature set. Warning: must be done after Optimizer initialization
- statscore_t score=O->Run(P);
- oss.str("");
- oss << "Specified starting point number " << (1+i) << ", score: " << score;
- if (i==0 || score>best) {
- best=score;
- bestP=P;
- oss << " (new best)";
- }
- mean+=score;
- var+=(score*score);
- PrintUserTime(oss.str());
+ for(size_t i=0;i<start_list.size();i++) {
+ //Generate from the full feature set. Warning: must be done after Optimizer initialization
+ Point P(start_list[i], min, max);
+ OptimizationTask* task = new OptimizationTask(O,P);
+ tasks.push_back(task);
+#ifdef WITH_THREADS
+ pool.Submit(task);
+#else
+ task->Run();
+#endif
}
- // run with random starting points
- for(int i=0; i<ntry; i++) {
+ //run with random starting points
+ for (int i = 0; i < ntry; ++i) {
Point P(start_list[0], min, max);
P.Randomize(); // randomize within min and max as given to the constructor
- statscore_t score=O->Run(P);
- oss.str("");
- oss << "Randomized starting point number " << (1+i) << ", score: " << score;
- if(score>best) {
- best=score;
- bestP=P;
- oss << " (new best)";
+ OptimizationTask* task = new OptimizationTask(O,P);
+ tasks.push_back(task);
+#ifdef WITH_THREADS
+ pool.Submit(task);
+#else
+ task->Run();
+#endif
+ }
+
+ //wait for all threads to finish
+#ifdef WITH_THREADS
+ pool.Stop(true);
+#endif
+
+ //collect results
+ statscore_t best=0, mean=0, var=0;
+ Point bestP;
+ for (vector<OptimizationTask*>::const_iterator i = tasks.begin(); i != tasks.end(); ++i) {
+ statscore_t score = (*i)->getScore();
+ mean += score;
+ var += score*score;
+ if (score > best) {
+ bestP = (*i)->getPoint();
+ best = score;
}
- mean+=score;
- var+=(score*score);
- PrintUserTime(oss.str());
}
+
mean/=(float)ntry;
var/=(float)ntry;
var=sqrt(abs(var-mean*mean));
diff --git a/mert/regression-testing/tests/mert-basic/command b/mert/regression-testing/tests/mert-basic/command
index 06f71f1d6..f54cbb085 100755
--- a/mert/regression-testing/tests/mert-basic/command
+++ b/mert/regression-testing/tests/mert-basic/command
@@ -4,6 +4,10 @@ bin=$1; shift
testdir=$1; shift
cd $testdir
-$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"
-$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"
+cmd="$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000"
+#echo $cmd
+$cmd 2>&1 | grep -i "^Best"
+#echo $cmd
+cmd="$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000"
+$cmd 2>&1 | grep -i "^Best"
diff --git a/mert/regression-testing/tests/mert-basic/data/INIT b/mert/regression-testing/tests/mert-basic/data/INIT
index 1efdd15ea..e1122383a 100644
--- a/mert/regression-testing/tests/mert-basic/data/INIT
+++ b/mert/regression-testing/tests/mert-basic/data/INIT
@@ -1 +1,3 @@
0.4 0.15 0.15 0.15 0.15 0.15 0.15 0.5 -1 0.2 0.2 0.2 0.2 0.2
+0 0 0 0 0 0 0 0 0 0 0 0 0 0
+1 1 1 1 1 1 1 1 1 1 1 1 1 1
diff --git a/moses/src/ThreadPool.cpp b/moses/src/ThreadPool.cpp
index 97ae4a6e2..ca9bd2f60 100644
--- a/moses/src/ThreadPool.cpp
+++ b/moses/src/ThreadPool.cpp
@@ -56,13 +56,12 @@ void ThreadPool::Execute()
//Execute job
if (task) {
task->Run();
- delete task;
+ if (task->DeleteAfterExecution()) {
+ delete task;
+ }
}
m_threadAvailable.notify_all();
} while (!m_stopped);
-#ifdef BOOST_HAS_PTHREADS
- TRACE_ERR("Thread " << pthread_self() << " exiting" << endl);
-#endif
}
void ThreadPool::Submit( Task* task )
diff --git a/moses/src/ThreadPool.h b/moses/src/ThreadPool.h
index bcb3f8140..a76872027 100644
--- a/moses/src/ThreadPool.h
+++ b/moses/src/ThreadPool.h
@@ -36,7 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#endif
-#include "Util.h"
+//#include "Util.h"
/**
@@ -54,6 +54,7 @@ class Task
{
public:
virtual void Run() = 0;
+ virtual bool DeleteAfterExecution() {return true;}
virtual ~Task() {}
};
diff --git a/scripts/Makefile b/scripts/Makefile
index 29a3f7265..8c749957b 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -8,8 +8,8 @@ DS?=$(shell date '+%Y%m%d')
# Set TARGETDIR to directory where you want the compiled scripts to be copied
# to.
# Set BINDIR to the directory where GIZA++ and other tools are installed.
-TARGETDIR=/opt/AO/sw/edinburgh-code/
-BINDIR=/opt/AO/sw/edinburgh-code/
+TARGETDIR=/home/bhaddow/work/moses.svn
+BINDIR=/opt/statmt/moses/bin/
MAIN_SCRIPTS_TARGET_DIR=$(TARGETDIR)
# MAIN_SCRIPTS_TARGET_DIR=$(shell echo `pwd`/temp)
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index c8c04cb90..15e1d0d00 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -10,6 +10,7 @@
# Excerpts from revision history
+# Sept 2011 multi-threaded mert (Barry Haddow)
# Jul 2011 simplifications (Ondrej Bojar)
# -- rely on moses' -show-weights instead of parsing moses.ini
# ... so moses is also run once *before* mert starts, checking
@@ -99,6 +100,7 @@ my $___RANDOM_DIRECTIONS = 0; # search in random directions only
my $___NUM_RANDOM_DIRECTIONS = 0; # number of random directions, also works with default optimizer [Cer&al.,2008]
my $___PAIRWISE_RANKED_OPTIMIZER = 0; # use Hopkins&May[2011]
my $___RANDOM_RESTARTS = 20;
+my $__THREADS = 0;
# Parameter for effective reference length when computing BLEU score
# Default is to use shortest reference
@@ -180,7 +182,8 @@ GetOptions(
"range=s@" => \$___RANGES,
"prev-aggregate-nbestlist=i" => \$prev_aggregate_nbl_size, #number of previous step to consider when loading data (default =-1, i.e. all previous)
"maximum-iterations=i" => \$maximum_iterations,
- "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER
+ "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
+ "threads=i" => \$__THREADS
) or exit(1);
# the 4 required parameters can be supplied on the command line directly
@@ -258,6 +261,9 @@ Options:
--random-directions ... search only in random directions
--number-of-random-directions=int ... number of random directions
(also works with regular optimizer, default: 0)
+ --pairwise-ranked ... Use PRO for optimisation (Hopkins and May, emnlp 2011)
+ --threads=NUMBER ... Use multi-threaded mert (must be compiled in).
+
";
exit 1;
}
@@ -716,6 +722,10 @@ while(1) {
$cmd = $cmd." --ifile run$run.$weights_in_file";
}
+ if ($__THREADS) {
+ $cmd = $cmd." --threads $__THREADS";
+ }
+
if ($___PAIRWISE_RANKED_OPTIMIZER) {
$cmd .= " --pro pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer -fvals -maxi 30 -nobias binary pro.data";
}