diff options
-rw-r--r-- | mert/FeatureStats.cpp | 2 | ||||
-rw-r--r-- | mert/Makefile.am | 6 | ||||
-rw-r--r-- | mert/ScoreStats.cpp | 2 | ||||
-rwxr-xr-x | mert/mert.cpp | 121 | ||||
-rwxr-xr-x | mert/regression-testing/tests/mert-basic/command | 8 | ||||
-rw-r--r-- | mert/regression-testing/tests/mert-basic/data/INIT | 2 | ||||
-rw-r--r-- | moses/src/ThreadPool.cpp | 7 | ||||
-rw-r--r-- | moses/src/ThreadPool.h | 3 | ||||
-rw-r--r-- | scripts/Makefile | 4 | ||||
-rwxr-xr-x | scripts/training/mert-moses.pl | 12 |
10 files changed, 124 insertions, 43 deletions
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp index 74f1ff8a9..118599fa6 100644 --- a/mert/FeatureStats.cpp +++ b/mert/FeatureStats.cpp @@ -21,7 +21,7 @@ FeatureStats::FeatureStats() FeatureStats::~FeatureStats() { - delete array_; + delete[] array_; }; FeatureStats::FeatureStats(const FeatureStats &stats) diff --git a/mert/Makefile.am b/mert/Makefile.am index d751f798d..f5ea46ce1 100644 --- a/mert/Makefile.am +++ b/mert/Makefile.am @@ -1,6 +1,6 @@ lib_LTLIBRARIES = libmert.la bin_PROGRAMS = mert extractor evaluator -AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE +AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS) libmert_la_SOURCES = \ Util.cpp \ @@ -27,10 +27,10 @@ TERsrc/tools.cpp \ TerScorer.cpp \ CderScorer.cpp -mert_SOURCES = mert.cpp +mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp extractor_SOURCES = extractor.cpp evaluator_SOURCES = evaluator.cpp extractor_LDADD = libmert.la -lm -lz -mert_LDADD = libmert.la -lm -lz +mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) evaluator_LDADD = libmert.la -lm -lz diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp index bbfc38743..d3b28de36 100644 --- a/mert/ScoreStats.cpp +++ b/mert/ScoreStats.cpp @@ -21,7 +21,7 @@ ScoreStats::ScoreStats() ScoreStats::~ScoreStats() { - delete array_; + delete[] array_; }; ScoreStats::ScoreStats(const ScoreStats &stats) diff --git a/mert/mert.cpp b/mert/mert.cpp index 4cb8ea9d1..b0ea491bb 100755 --- a/mert/mert.cpp +++ b/mert/mert.cpp @@ -23,6 +23,8 @@ #include "Timer.h" #include "Util.h" +#include "../moses/src/ThreadPool.h" + float min_interval = 1e-3; @@ -42,6 +44,9 @@ void usage(void) cerr<<"[--scfile|-S] comma separated list of scorer data files (default score.data)"<<endl; cerr<<"[--ffile|-F] comma separated list of feature data files (default feature.data)"<<endl; cerr<<"[--ifile|-i] the starting point data file (default init.opt)"<<endl; +#ifdef WITH_THREADS + cerr<<"[--threads|-T] use multiple threads for random restart (default 1)"<<endl; +#endif cerr<<"[-v] verbose level"<<endl; cerr<<"[--help|-h] print this message and exit"<<endl; exit(1); @@ -60,12 +65,46 @@ static struct option long_options[] = { {"scfile",1,0,'S'}, {"ffile",1,0,'F'}, {"ifile",1,0,'i'}, +#ifdef WITH_THREADS + {"threads", required_argument,0,'T'}, +#endif {"verbose",1,0,'v'}, {"help",no_argument,0,'h'}, {0, 0, 0, 0} }; int option_index; +/** + * Runs an optimisation, or a random restart. +**/ +class OptimizationTask : public Moses::Task +{ + public: + OptimizationTask(Optimizer* optimizer, const Point& point) : + m_optimizer(optimizer), m_point(point) {} + + bool DeleteAfterExecution() { + return false; + } + + void Run() { + m_score = m_optimizer->Run(m_point); + } + + statscore_t getScore() const { + return m_score; + } + + const Point& getPoint() const { + return m_point; + } + + private: + Optimizer* m_optimizer; + Point m_point; + statscore_t m_score; +}; + int main (int argc, char **argv) { @@ -83,6 +122,9 @@ int main (int argc, char **argv) int nrandom=0; int seed=0; bool hasSeed = false; +#ifdef WITH_THREADS + size_t threads=1; +#endif string type("powell"); string scorertype("BLEU"); string scorerconfig(""); @@ -140,6 +182,12 @@ int main (int argc, char **argv) case 'v': setverboselevel(strtol(optarg,NULL,10)); break; +#ifdef WITH_THREADS + case 'T': + threads = strtol(optarg, NULL, 10); + if (threads < 1) threads = 1; + break; +#endif default: usage(); } @@ -266,41 +314,58 @@ int main (int argc, char **argv) O->SetScorer(TheScorer); O->SetFData(D.getFeatureData()); + +#ifdef WITH_THREADS + cerr << "Creating a pool of " << threads << " threads" << endl; + Moses::ThreadPool pool(threads); +#endif + + vector<OptimizationTask*> tasks; + // run with specified starting points - stringstream oss; - statscore_t best=0, mean=0, var=0; - Point bestP; - for(int i=0;i<start_list.size();i++) { - Point P(start_list[i], min, max);//Generate from the full feature set. Warning: must be done after Optimizer initialization - statscore_t score=O->Run(P); - oss.str(""); - oss << "Specified starting point number " << (1+i) << ", score: " << score; - if (i==0 || score>best) { - best=score; - bestP=P; - oss << " (new best)"; - } - mean+=score; - var+=(score*score); - PrintUserTime(oss.str()); + for(size_t i=0;i<start_list.size();i++) { + //Generate from the full feature set. Warning: must be done after Optimizer initialization + Point P(start_list[i], min, max); + OptimizationTask* task = new OptimizationTask(O,P); + tasks.push_back(task); +#ifdef WITH_THREADS + pool.Submit(task); +#else + task->Run(); +#endif } - // run with random starting points - for(int i=0; i<ntry; i++) { + //run with random starting points + for (int i = 0; i < ntry; ++i) { Point P(start_list[0], min, max); P.Randomize(); // randomize within min and max as given to the constructor - statscore_t score=O->Run(P); - oss.str(""); - oss << "Randomized starting point number " << (1+i) << ", score: " << score; - if(score>best) { - best=score; - bestP=P; - oss << " (new best)"; + OptimizationTask* task = new OptimizationTask(O,P); + tasks.push_back(task); +#ifdef WITH_THREADS + pool.Submit(task); +#else + task->Run(); +#endif + } + + //wait for all threads to finish +#ifdef WITH_THREADS + pool.Stop(true); +#endif + + //collect results + statscore_t best=0, mean=0, var=0; + Point bestP; + for (vector<OptimizationTask*>::const_iterator i = tasks.begin(); i != tasks.end(); ++i) { + statscore_t score = (*i)->getScore(); + mean += score; + var += score*score; + if (score > best) { + bestP = (*i)->getPoint(); + best = score; } - mean+=score; - var+=(score*score); - PrintUserTime(oss.str()); } + mean/=(float)ntry; var/=(float)ntry; var=sqrt(abs(var-mean*mean)); diff --git a/mert/regression-testing/tests/mert-basic/command b/mert/regression-testing/tests/mert-basic/command index 06f71f1d6..f54cbb085 100755 --- a/mert/regression-testing/tests/mert-basic/command +++ b/mert/regression-testing/tests/mert-basic/command @@ -4,6 +4,10 @@ bin=$1; shift testdir=$1; shift cd $testdir -$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best" -$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best" +cmd="$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000" +#echo $cmd +$cmd 2>&1 | grep -i "^Best" +#echo $cmd +cmd="$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000" +$cmd 2>&1 | grep -i "^Best" diff --git a/mert/regression-testing/tests/mert-basic/data/INIT b/mert/regression-testing/tests/mert-basic/data/INIT index 1efdd15ea..e1122383a 100644 --- a/mert/regression-testing/tests/mert-basic/data/INIT +++ b/mert/regression-testing/tests/mert-basic/data/INIT @@ -1 +1,3 @@ 0.4 0.15 0.15 0.15 0.15 0.15 0.15 0.5 -1 0.2 0.2 0.2 0.2 0.2 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 +1 1 1 1 1 1 1 1 1 1 1 1 1 1 diff --git a/moses/src/ThreadPool.cpp b/moses/src/ThreadPool.cpp index 97ae4a6e2..ca9bd2f60 100644 --- a/moses/src/ThreadPool.cpp +++ b/moses/src/ThreadPool.cpp @@ -56,13 +56,12 @@ void ThreadPool::Execute() //Execute job if (task) { task->Run(); - delete task; + if (task->DeleteAfterExecution()) { + delete task; + } } m_threadAvailable.notify_all(); } while (!m_stopped); -#ifdef BOOST_HAS_PTHREADS - TRACE_ERR("Thread " << pthread_self() << " exiting" << endl); -#endif } void ThreadPool::Submit( Task* task ) diff --git a/moses/src/ThreadPool.h b/moses/src/ThreadPool.h index bcb3f8140..a76872027 100644 --- a/moses/src/ThreadPool.h +++ b/moses/src/ThreadPool.h @@ -36,7 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #endif -#include "Util.h" +//#include "Util.h" /** @@ -54,6 +54,7 @@ class Task { public: virtual void Run() = 0; + virtual bool DeleteAfterExecution() {return true;} virtual ~Task() {} }; diff --git a/scripts/Makefile b/scripts/Makefile index 29a3f7265..8c749957b 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -8,8 +8,8 @@ DS?=$(shell date '+%Y%m%d') # Set TARGETDIR to directory where you want the compiled scripts to be copied # to. # Set BINDIR to the directory where GIZA++ and other tools are installed. -TARGETDIR=/opt/AO/sw/edinburgh-code/ -BINDIR=/opt/AO/sw/edinburgh-code/ +TARGETDIR=/home/bhaddow/work/moses.svn +BINDIR=/opt/statmt/moses/bin/ MAIN_SCRIPTS_TARGET_DIR=$(TARGETDIR) # MAIN_SCRIPTS_TARGET_DIR=$(shell echo `pwd`/temp) diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl index c8c04cb90..15e1d0d00 100755 --- a/scripts/training/mert-moses.pl +++ b/scripts/training/mert-moses.pl @@ -10,6 +10,7 @@ # Excerpts from revision history +# Sept 2011 multi-threaded mert (Barry Haddow) # Jul 2011 simplifications (Ondrej Bojar) # -- rely on moses' -show-weights instead of parsing moses.ini # ... so moses is also run once *before* mert starts, checking @@ -99,6 +100,7 @@ my $___RANDOM_DIRECTIONS = 0; # search in random directions only my $___NUM_RANDOM_DIRECTIONS = 0; # number of random directions, also works with default optimizer [Cer&al.,2008] my $___PAIRWISE_RANKED_OPTIMIZER = 0; # use Hopkins&May[2011] my $___RANDOM_RESTARTS = 20; +my $__THREADS = 0; # Parameter for effective reference length when computing BLEU score # Default is to use shortest reference @@ -180,7 +182,8 @@ GetOptions( "range=s@" => \$___RANGES, "prev-aggregate-nbestlist=i" => \$prev_aggregate_nbl_size, #number of previous step to consider when loading data (default =-1, i.e. all previous) "maximum-iterations=i" => \$maximum_iterations, - "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER + "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER, + "threads=i" => \$__THREADS ) or exit(1); # the 4 required parameters can be supplied on the command line directly @@ -258,6 +261,9 @@ Options: --random-directions ... search only in random directions --number-of-random-directions=int ... number of random directions (also works with regular optimizer, default: 0) + --pairwise-ranked ... Use PRO for optimisation (Hopkins and May, emnlp 2011) + --threads=NUMBER ... Use multi-threaded mert (must be compiled in). + "; exit 1; } @@ -716,6 +722,10 @@ while(1) { $cmd = $cmd." --ifile run$run.$weights_in_file"; } + if ($__THREADS) { + $cmd = $cmd." --threads $__THREADS"; + } + if ($___PAIRWISE_RANKED_OPTIMIZER) { $cmd .= " --pro pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer -fvals -maxi 30 -nobias binary pro.data"; } |