Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230>2011-08-05 18:32:39 +0400
committerbgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230>2011-08-05 18:32:39 +0400
commiteda0f4e370f4e9d2c9e8cfa7a5c106a18caf7135 (patch)
tree17e4dd4cec04124bd3f8bf694552801f36be8f65 /regression-testing
parent30ca534b861ed7fa7f50ce98c27001eacbf7cba7 (diff)
An initial test suite for detokenizer.perl.
I realize this doesn't quite fit the paradigm if the existing moses test suite. On the other hand, it's self-contained, easy to run, easy to add tests to (just follow the pattern in the section titled 'Definitions of individual test cases'), and uses an established Perl testing framework. I don't think it will be infeasible to incorporate it into the existing test suite. Usage: run-test-detokenizer.t --results-dir <RESULTS-DIRECTORY> where <RESULTS-DIRECTORY> is an empty existing directory where the output can be written git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4121 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'regression-testing')
-rw-r--r--regression-testing/run-test-detokenizer.t161
1 files changed, 161 insertions, 0 deletions
diff --git a/regression-testing/run-test-detokenizer.t b/regression-testing/run-test-detokenizer.t
new file mode 100644
index 000000000..b44890ebb
--- /dev/null
+++ b/regression-testing/run-test-detokenizer.t
@@ -0,0 +1,161 @@
+#!/usr/bin/perl -w
+#
+# Detokenization tests.
+#
+
+use strict;
+# This is here to suppress (false) warnings about OLDOUT and OLDERR being used only once. Maybe there is a less brutish way to suppress that, but I don't know it.
+no warnings 'once';
+use utf8;
+
+use Cwd ('abs_path');
+use File::Spec::Functions;
+use File::Basename ('dirname');
+use IPC::Run3;
+use Getopt::Long;
+
+use MosesRegressionTesting;
+
+GetOptions("detokenizer=s" => \(my $detokenizer),
+ "results-dir=s"=> \(my $results_dir)
+ ) or exit 1;
+
+unless (defined $results_dir) {
+ print STDERR "Usage: run-test-detokenizer.t --results-dir <RESULTS-DIRECTORY> [--detokenizer <DETOKENIZER-SCRIPT>]\n";
+ exit 1;
+}
+
+die "ERROR: Results directory ".$results_dir." doesn't exist or is not a writable directory. Dying" unless (-d $results_dir && -w $results_dir);
+
+$detokenizer = catfile(dirname(dirname(abs_path($0))), "scripts", "tokenizer", "detokenizer.perl") unless $detokenizer;
+die "ERROR: Detokenizer script ".$detokenizer." does not exist. Dying" unless -f $detokenizer;
+
+
+use Test::More;
+
+######################################
+# Definitions of individual test cases
+######################################
+
+# A simple English test
+&runDetokenizerTest("TEST_ENGLISH_EASY", "en",
+<<'TOK',
+This sentence is really simple , so it should not be hard to detokenize .
+This one is no more difficult , but , hey , it is on a new line .
+TOK
+<<'EXP'
+This sentence is really simple, so it should not be hard to detokenize.
+This one is no more difficult, but, hey, it is on a new line.
+EXP
+);
+
+# A simple French test
+&runDetokenizerTest("TEST_FRENCH_EASY", "fr",
+<<'TOK',
+Ici une phrase simple .
+TOK
+<<'EXP'
+Ici une phrase simple.
+EXP
+);
+
+######################################
+# end of individual test cases
+######################################
+
+done_testing();
+
+
+############
+## Utilities
+############
+
+sub runDetokenizerTest {
+ my ($testName, $language, $tokenizedString, $expectedString) = @_;
+
+ my $testOutputDir = catfile($results_dir, $testName);
+ my $tokenizedFile = catfile($testOutputDir, "input.txt");
+ my $expectedFile = catfile($testOutputDir, "expected.txt");
+
+ # Fail if we can't make the test output directory
+ unless (mkdir($testOutputDir)) {
+ fail($testName.": Failed to create output directory ".$testOutputDir." [".$!."]");
+ exit;
+ }
+
+ open TOK, ">".$tokenizedFile;
+ binmode TOK, ":utf8";
+ print TOK $tokenizedString;
+ close TOK;
+
+ open TRUTH, ">".$expectedFile;
+ binmode TRUTH, ":utf8";
+ print TRUTH $expectedString;
+ close TRUTH;
+
+ &runTest($testName, $testOutputDir, $tokenizedFile, sub {
+ return [$detokenizer, "-l", $language];
+ }, sub {
+ &verifyIdentical($testName, $expectedFile, catfile($testOutputDir, "stdout.txt"))
+ }, 1);
+}
+
+# $stdinFile, if defined, is a file to send to the command via STDIN
+# $buildCommandRoutineReference is a reference to a zero-argument subroutine that returns the
+# command to run in the form of an array reference
+# $validationRoutineReference is a reference to a zero-argument subroutine that makes some calls
+# to ok() or similar to validate the contents of the output directory
+# $separateStdoutFromStderr is an optional boolean argument; if omitted or false, the command's
+# STDOUT and STDERR are mixed together in out output file called
+# stdout-and-stderr.txt; otherwise, they are printed to separate output
+# files called stdout.txt and stderr.txt, respectively
+sub runTest {
+ my ($testName, $outputDir, $stdinFile, $buildCommandRoutineReference, $validationRoutineReference, $separateStdoutFromStderr) = @_;
+
+ # Note: You may need to upgrade your version of the Perl module Test::Simple in order to get this 'subtest' thing to work. (Perl modules are installed/upgraded using CPAN; google 'how do I upgrade a perl module')
+ subtest $testName => sub {
+ my ($stdoutFile, $stderrFile);
+ if ($separateStdoutFromStderr) {
+ $stdoutFile = catfile($outputDir, "stdout.txt");
+ $stderrFile = catfile($outputDir, "stderr.txt");
+ } else {
+ $stdoutFile = catfile($outputDir, "stdout-and-stderr.txt");
+ $stderrFile = $stdoutFile;
+ }
+
+ my $commandRef = $buildCommandRoutineReference->();
+ my $exitStatus = &runVerbosely($commandRef, $stdinFile, $stdoutFile, $stderrFile);
+ return unless is($exitStatus, 0, $testName.": command exited with status 0");
+
+ $validationRoutineReference->();
+ };
+}
+
+# Announce that we're going to run the given command, then run it.
+# $stdinFile, if defined, is a file to send to the command via STDIN
+# $stdoutFile and $stderrFile, if defined, are file paths to which the command's standard output
+# and standard error, respectively, are written. They can be the same file.
+# The exit code of the command is returned.
+sub runVerbosely {
+ my ($commandRef, $stdinFile, $stdoutFile, $stderrFile) = @_;
+ my @command = @{$commandRef};
+ note("Executing command:\n @command\n");
+ note("standard input coming from: ".$stdinFile) if defined $stdinFile;
+ note("standard output going to: ".$stdoutFile) if defined $stdoutFile;
+ note("standard error going to: ".$stderrFile) if defined $stderrFile;
+ run3($commandRef, $stdinFile, $stdoutFile, $stderrFile);
+ return $?;
+}
+
+# Verify that the given output file is identical to the given reference file.
+sub verifyIdentical {
+ my ($testName, $referenceFile, $outputFile) = @_;
+
+ open(REF, $referenceFile) or return fail($testName.": Can't open reference file ".$referenceFile." [".$!."].");
+ open(OUT, $outputFile) or return fail($testName.": Can't open output file ".$outputFile." [".$!."].");
+ my @referenceFileAsArray = <REF>;
+ my @outputFileAsArray = <OUT>;
+ close(REF);
+ close(OUT);
+ is_deeply(\@outputFileAsArray, \@referenceFileAsArray, $testName.": Output file ".$outputFile." matches reference file ".$referenceFile.".");
+}