- factor out class DetokenizerTestCase

- create an array of all of the test cases before running any of them - in the case of an expected failure, move the TODO block deeper, just around the validation of the results I'm not 100% I like this change, I think it makes the code slightly more elegant but it also makes it longer. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4128 1f5c12ca-751b-0410-a591-d2e778427230
author: bgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230> 2011-08-05 22:48:37 +0400
committer: bgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230> 2011-08-05 22:48:37 +0400
commit: 024bbe0bcc3f9133d15c45089934bbb9fd5ab606 (patch)
tree: d1add0fc9c4eb604f8d822fd6cbbd218cbf0b61d /regression-testing
parent: d521287a3f3b95df44a7368c3e8bee4f15fed47f (diff)
1 files changed, 104 insertions, 24 deletions
diff --git a/regression-testing/run-test-detokenizer.t b/regression-testing/run-test-detokenizer.t
index 166a962c1..1331b4c19 100644
--- a/regression-testing/run-test-detokenizer.t
+++ b/regression-testing/run-test-detokenizer.t
@@ -13,6 +13,7 @@ use File::Spec::Functions;
 use File::Basename ('dirname');
 use IPC::Run3;
 use Getopt::Long;
+use Test::More;
 
 GetOptions("detokenizer=s" => \(my $detokenizer),
            "results-dir=s"=> \(my $results_dir)
@@ -29,14 +30,14 @@ $detokenizer = catfile(dirname(dirname(abs_path($0))), "scripts", "tokenizer", "
 die "ERROR: Detokenizer script ".$detokenizer." does not exist. Dying" unless -f $detokenizer;
 
 
-use Test::More;
+my @testCases = ();
 
 ######################################
 # Definitions of individual test cases
 ######################################
 
 # A simple English test
-&runDetokenizerTest("TEST_ENGLISH_EASY", "en",
+&addDetokenizerTest("TEST_ENGLISH_EASY", "en",
 <<'TOK'
 This sentence is really simple , so it should not be hard to detokenize .
 This one is no more difficult , but , hey , it is on a new line .
@@ -49,7 +50,7 @@ EXP
 );
 
 # An English test involving double-quotes
-&runDetokenizerTest("TEST_ENGLISH_DOUBLEQUOTES", "en",
+&addDetokenizerTest("TEST_ENGLISH_DOUBLEQUOTES", "en",
 <<'TOK'
 This is a somewhat " less simple " test .
 TOK
@@ -60,7 +61,7 @@ EXP
 );
 
 # A simple French test
-&runDetokenizerTest("TEST_FRENCH_EASY", "fr",
+&addDetokenizerTest("TEST_FRENCH_EASY", "fr",
 <<'TOK'
 Voici une phrase simple .
 TOK
@@ -71,7 +72,7 @@ EXP
 );
 
 # A French test involving an apostrophe
-&runDetokenizerTest("TEST_FRENCH_APOSTROPHE", "fr",
+&addDetokenizerTest("TEST_FRENCH_APOSTROPHE", "fr",
 <<'TOK'
 Moi , j' ai une apostrophe .
 TOK
@@ -81,11 +82,10 @@ Moi, j'ai une apostrophe.
 EXP
 );
 
-TODO: {
-    local $TODO = "A bug is causing this to be detokenized wrong.";
-
-# A French test involving an apostrophe on the second-last word
-&runDetokenizerTest("TEST_FRENCH_APOSTROPHE_PENULTIMATE", "fr",
+# A (failing) French test involving an apostrophe on the second-last word
+{
+my $testCase =
+&addDetokenizerTest("TEST_FRENCH_APOSTROPHE_PENULTIMATE", "fr",
 <<'TOK'
 de musique rap issus de l' immigration
 TOK
@@ -95,60 +95,77 @@ de musique rap issus de l'immigration
 EXP
 );
 
+$testCase->setExpectedToFail("A bug is causing this to be detokenized wrong.");
 }
 
 ######################################
-# end of individual test cases
+# Now run those babies ...
 ######################################
 
-done_testing();
+plan tests => scalar(@testCases);
 
+foreach my $testCase (@testCases) {
+    &runDetokenizerTest($testCase);
+}
 
 ############
 ## Utilities
 ############
 
+# Creates a new detokenizer test case, adds it to the array of test cases to be run, and returns it.
+sub addDetokenizerTest {
+    my ($testName, $language, $tokenizedText, $rightAnswer) = @_;
+
+    my $testCase = new DetokenizerTestCase($testName, $language, $tokenizedText, $rightAnswer);
+    push(@testCases, $testCase);
+    return $testCase;
+}
+
 sub runDetokenizerTest {
-    my ($testName, $language, $tokenizedString, $expectedString) = @_;
+    my ($testCase) = @_;
 
-    my $testOutputDir = catfile($results_dir, $testName);
+    my $testOutputDir = catfile($results_dir, $testCase->getName());
     my $tokenizedFile = catfile($testOutputDir, "input.txt");
     my $expectedFile = catfile($testOutputDir, "expected.txt");
 
     # Fail if we can't make the test output directory
     unless (mkdir($testOutputDir)) {
-	fail($testName.": Failed to create output directory ".$testOutputDir." [".$!."]");
+	fail($testCase->getName().": Failed to create output directory ".$testOutputDir." [".$!."]");
 	exit;
     }
     
     open TOK, ">".$tokenizedFile;
     binmode TOK, ":utf8";
-    print TOK $tokenizedString;
+    print TOK $testCase->getTokenizedText();
     close TOK;
     
     open TRUTH, ">".$expectedFile;
     binmode TRUTH, ":utf8";
-    print TRUTH $expectedString;
+    print TRUTH $testCase->getRightAnswer();
     close TRUTH;
 
-    &runTest($testName, $testOutputDir, $tokenizedFile, sub {
-	return [$detokenizer, "-l", $language];
+    &runTest($testCase->getName(), $testOutputDir, $tokenizedFile, sub {
+	return [$detokenizer, "-l", $testCase->getLanguage()];
     }, sub {
-	&verifyIdentical($testName, $expectedFile, catfile($testOutputDir, "stdout.txt"))
-    }, 1);
+	&verifyIdentical($testCase->getName(), $expectedFile, catfile($testOutputDir, "stdout.txt"))
+    }, 1, $testCase->getFailureExplanation());
 }
 
 # $stdinFile, if defined, is a file to send to the command via STDIN
 # $buildCommandRoutineReference is a reference to a zero-argument subroutine that returns the
-#                               command to run in the form of an array reference
+#                               system command to run in the form of an array reference
 # $validationRoutineReference is a reference to a zero-argument subroutine that makes some calls
 #                             to ok() or similar to validate the contents of the output directory
 # $separateStdoutFromStderr is an optional boolean argument; if omitted or false, the command's
 #                           STDOUT and STDERR are mixed together in out output file called
 #                           stdout-and-stderr.txt; otherwise, they are printed to separate output
 #                           files called stdout.txt and stderr.txt, respectively
+# $failureExplanation is an explanation of why the test is expected to fail.  If the test is expected
+#                     to pass, then this should be left undefined.  Even in the case of a test that
+#                     is expected to fail, the system command is still expected to exit normally --
+#                     only the validation routine is expected to fail.
 sub runTest {
-    my ($testName, $outputDir, $stdinFile, $buildCommandRoutineReference, $validationRoutineReference, $separateStdoutFromStderr) = @_;
+    my ($testName, $outputDir, $stdinFile, $buildCommandRoutineReference, $validationRoutineReference, $separateStdoutFromStderr, $failureExplanation) = @_;
 
     # Note: You may need to upgrade your version of the Perl module Test::Simple in order to get this 'subtest' thing to work. (Perl modules are installed/upgraded using CPAN; google 'how do I upgrade a perl module')
     subtest $testName => sub {
@@ -165,7 +182,15 @@ sub runTest {
 	my $exitStatus = &runVerbosely($commandRef, $stdinFile, $stdoutFile, $stderrFile);
 	return unless is($exitStatus, 0, $testName.": command exited with status 0");
 
-	$validationRoutineReference->();
+	if (defined $failureExplanation) {
+	  TODO: {
+	      local $TODO = $failureExplanation;
+	      $validationRoutineReference->();
+	    }
+	} else {
+	    $validationRoutineReference->();
+
+	}
     };
 }
 
@@ -197,3 +222,58 @@ sub verifyIdentical {
     close(OUT);
     is_deeply(\@outputFileAsArray, \@referenceFileAsArray, $testName.": Output file ".$outputFile." matches reference file ".$referenceFile.".");
 }
+
+
+##%%%%%%%%%%%%%%%%%%%%%%%%%%%##
+## DetokenizerTestCase class ##
+
+package DetokenizerTestCase;
+
+# Constructor
+sub new {
+    my $class = shift;
+    my $self = {
+	_name                 => shift,
+	_language             => shift,
+	_tokenizedText        => shift,
+	_rightAnswer       => shift,
+
+	_failureExplanation   => undef
+    };
+    bless $self, $class;
+}
+
+sub getName {
+    my ($self) = @_;
+    return $self->{_name};
+}
+
+sub getLanguage {
+    my ($self) = @_;
+    return $self->{_language};
+}
+
+sub getTokenizedText {
+    my ($self) = @_;
+    return $self->{_tokenizedText};
+}
+
+sub getRightAnswer {
+    my ($self) = @_;
+    return $self->{_rightAnswer};
+}
+
+# Call this routine to indicate that this test case is expected to fail.
+# (The detokenizer script is still expected to exit normally, but the output is not expected to
+# match the right answer because of a bug or unimplemented use case.)
+sub setExpectedToFail {
+    my ($self, $failureExplanation) = @_;
+    $self->{_failureExplanation} = $failureExplanation || "This test is expected to fail.";
+}
+
+# Returns a string explaining why this test is expected to fail, or undef if this test is expected
+# to pass.
+sub getFailureExplanation {
+    my ($self) = @_;
+    return $self->{_failureExplanation};
+}
author	bgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230>	2011-08-05 22:48:37 +0400
committer	bgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230>	2011-08-05 22:48:37 +0400
commit	024bbe0bcc3f9133d15c45089934bbb9fd5ab606 (patch)
tree	d1add0fc9c4eb604f8d822fd6cbbd218cbf0b61d /regression-testing
parent	d521287a3f3b95df44a7368c3e8bee4f15fed47f (diff)