From f37ad8b9bd10d6c3700de8ba7b87880aa546131e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Fri, 11 May 2012 17:02:52 +0100 Subject: regression test for extract(hiero) --- regression-testing/MosesRegressionTesting.pm | 2 +- regression-testing/run-test-extract-rules.perl | 89 ++++++++++++++++++++++++++ regression-testing/run-test-suite.perl | 14 ++-- 3 files changed, 98 insertions(+), 7 deletions(-) create mode 100755 regression-testing/run-test-extract-rules.perl (limited to 'regression-testing') diff --git a/regression-testing/MosesRegressionTesting.pm b/regression-testing/MosesRegressionTesting.pm index 9eb94a838..f4b3ce099 100644 --- a/regression-testing/MosesRegressionTesting.pm +++ b/regression-testing/MosesRegressionTesting.pm @@ -5,7 +5,7 @@ use strict; # if your tests need a new version of the test data, increment this # and make sure that a moses-regression-tests-vX.Y is available for # download from statmt.org (redpony AT umd dot edu for more info) -use constant TESTING_DATA_VERSION => '9'; +use constant TESTING_DATA_VERSION => '10'; # find the data directory in a few likely locations and make sure # that it is the correct version diff --git a/regression-testing/run-test-extract-rules.perl b/regression-testing/run-test-extract-rules.perl new file mode 100755 index 000000000..c53e6247d --- /dev/null +++ b/regression-testing/run-test-extract-rules.perl @@ -0,0 +1,89 @@ +#!/usr/bin/perl -w + +use strict; + +BEGIN { +use Cwd qw/ abs_path /; +use File::Basename; +my $script_dir = dirname(abs_path($0)); +print STDERR "script_dir=$script_dir\n"; +push @INC, $script_dir; +} + +use FindBin qw($Bin); +use MosesRegressionTesting; +use Getopt::Long; +use File::Temp qw ( tempfile ); +use POSIX qw ( strftime ); + +my $extractorExe; +my $test_name; +my $data_dir; +my $test_dir; +my $results_dir; + +GetOptions("extractor=s" => \$extractorExe, + "test=s" => \$test_name, + "data-dir=s"=> \$data_dir, + "test-dir=s"=> \$test_dir, + "results-dir=s"=> \$results_dir, + ) or exit 1; + +# output dir +unless (defined $results_dir) +{ + my $ts = get_timestamp($extractorExe); + $results_dir = "$data_dir/results/$test_name/$ts"; +} + +`mkdir -p $results_dir`; + +my $outPath = "$results_dir"; + +my $extractorArgs = `cat $test_dir/$test_name/args.txt`; +$_ = $extractorArgs; +s/(\$\w+)/$1/eeg; +$extractorArgs = $_; + +my $cmdMain = "$extractorExe $extractorArgs \n"; +`$cmdMain`; + +my $truthPath = "$test_dir/$test_name/truth/"; + + +if (-e $outPath) +{ + my $cmd = "diff --exclude=.DS_Store $outPath/ $truthPath/ | wc -l"; + my $numDiff = `$cmd`; + + if ($numDiff == 0) + { + # print STDERR "FAILURE. Ran $cmdMain\n"; + print STDERR "SUCCESS\n"; + exit 0; + } + else + { + print STDERR "FAILURE. Ran $cmdMain\n"; + exit 1; + } +} +else +{ + print STDERR "FAILURE. Output does not exists. Ran $cmdMain\n"; + exit 1; +} + +################################### +sub get_timestamp { + my ($file) = @_; + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) + = stat($file); + my $timestamp = strftime("%Y%m%d-%H%M%S", gmtime $mtime); + my $timestamp2 = strftime("%Y%m%d-%H%M%S", gmtime); + my $username = `whoami`; chomp $username; + return "moses.v$timestamp-$username-at-$timestamp2"; +} + + diff --git a/regression-testing/run-test-suite.perl b/regression-testing/run-test-suite.perl index 6e33a11d5..3e5348882 100755 --- a/regression-testing/run-test-suite.perl +++ b/regression-testing/run-test-suite.perl @@ -63,6 +63,7 @@ my $decoderPhrase = "$Bin/../moses-cmd/src/moses"; my $decoderChart = "$Bin/../moses-chart-cmd/src/moses_chart"; my $scoreExe = "$Bin/../scripts/training/phrase-extract/score"; my $extractorExe = "$Bin/../scripts/training/phrase-extract/extract"; +my $extractorSyntaxExe = "$Bin/../scripts/training/phrase-extract/extract-rules"; my $kenlmBinarizer = "$Bin/../kenlm/build_binary"; my $test_dir; my $BIN_TEST = $script_dir; @@ -97,29 +98,30 @@ my @failed; foreach my $test (@tests) { my $cmd; - my $model_type = substr($test, $[, 6); + my @tokens = split('\.', $test); + my $model_type = $tokens[0]; if ($model_type eq 'phrase') { $cmd .= "$BIN_TEST/run-single-test.perl $test_run --decoder=$decoderPhrase"; } - elsif ($model_type eq 'chart.') + elsif ($model_type eq 'chart') { $cmd .= "$BIN_TEST/run-single-test.perl $test_run --decoder=$decoderChart"; } - elsif ($model_type eq 'score.') + elsif ($model_type eq 'score') { $cmd .= "$BIN_TEST/run-test-scorer.perl $test_run --scorer=$scoreExe"; } - elsif ($model_type eq 'extrac') + elsif ($model_type eq 'extract') { $cmd .= "$BIN_TEST/run-test-extract.perl $test_run --extractor=$extractorExe"; } - elsif ($test =~ /^mert/) + elsif ($model_type eq "mert") { $cmd .= "$BIN_TEST/run-test-mert.perl $test_run"; } - elsif ($test =~ /^kenlmbin/) + elsif ($model_type eq "kenlmbin") { $cmd .= "$BIN_TEST/run-kenlm-binarizer.perl --binarizer=$kenlmBinarizer"; } -- cgit v1.2.3