RunTests: add fuzzy compare to ignore minor floating-point differences

RunTests: support -o override on empty output files w/o references RunTests: support -o override on predict files w/o references RunTests: improve diagnostics / error messages RunTests: no fatal rename error if -o and rename ref is missing
author: ariel faigon <ariel.git@yendor.com> 2010-12-12 13:27:38 +0300
committer: JohnLangford <jl@hunch.net> 2010-12-12 19:28:01 +0300
commit: 7fc221f9fa0e1a43760fa0011ba8284263b2673a (patch)
tree: ae2e8e9d19be582256396d40631241d911e9ce0e /test
parent: 7e2d01226e0fb0a4ce7fc42f7b8da97fb88da4cb (diff)
1 files changed, 117 insertions, 23 deletions
diff --git a/test/RunTests b/test/RunTests
index c4e29835..44d5f865 100755
--- a/test/RunTests
+++ b/test/RunTests
@@ -8,7 +8,9 @@
 # See __DATA__ below for how to add more tests
 #
 use Getopt::Std;
-use vars qw($opt_d $opt_c $opt_e $opt_o);
+use vars qw($opt_d $opt_c $opt_e $opt_f $opt_o);
+
+my $Epsilon = 1e-4;
 
 my $VW;
 
@@ -23,10 +25,11 @@ sub usage(@) {
         .  ..  \$PATH
 
     Options:
-        -d  print diff output on diff-failure
         -c  print commands before running them
-        -o  Overwrite reference file with new/different result
+        -d  print diff output on diff-failure
         -e  Abort on first diff error
+        -f  Ignore small (< $Epsilon) floating-point differences (fuzzy compare)
+        -o  Overwrite reference file with new/different result
 ";
 }
 
@@ -46,8 +49,8 @@ sub which_vw() {
         foreach my $dir ('.', '..', split(':', $ENV{PATH})) {
             my $exe = "$dir/vw";
             if (-x $exe) {
-            printf STDERR "Testing vw: %s\n", $exe;
-            return $exe;
+                printf STDERR "Testing vw: %s\n", $exe;
+                return $exe;
             }
         }
     }
@@ -56,7 +59,7 @@ sub which_vw() {
 
 sub init() {
     $0 =~ s{.*/}{};
-    getopts('cdeo') || usage();
+    getopts('cdefo') || usage();
     $VW = which_vw();
 }
 
@@ -127,34 +130,129 @@ sub next_test() {
     ($cmd, $out_ref, $err_ref, $pred_ref, $pred);
 }
 
+#
+# If the difference is small (least significant digits of numbers)
+# treat it as ok. It may be a result of 32 vs 64 bit calculations.
+#
+use Scalar::Util qw(looks_like_number);
+
+sub lenient_array_compare($$) {
+    my ($w1_ref, $w2_ref) = @_;
+    my (@w1) = @$w1_ref;
+    my (@w2) = @$w2_ref;
+
+    # print STDERR "lenient_array_compare: (@w1) (@w2)\n";
+    return 1 if ($#w1 != $#w2); # arrays not of same size
+    my $nelem = scalar @w1;
+    for (my $i = 0; $i < $nelem; $i++) {
+        # print STDERR "\t$w1[$i] == $w2[$i] ?\n";
+        next if ($w1[$i] eq $w2[$i]);
+        # There's some difference, is it significant?
+        return 1 unless (looks_like_number($w1[$i]));        
+        return 1 unless (looks_like_number($w2[$i]));        
+        my $delta = $w1[$i] - $w2[$i];
+ 
+        return 1 if ($delta > $Epsilon);
+    }
+    # print STDERR "lenient_array_compare: no meaningful difference\n";
+    return 0; # no meaningful difference
+}
+
+sub diff_lenient_float($$) {
+    my ($outfile, $reffile) = @_;
+    my $status = 0;
+    my $diff_opts = '-N --minimal --side-by-side --suppress-common-lines';
+    my $tmpf = 'lenient-diff.tmp';
+    system("diff $diff_opts $outfile $reffile >$tmpf");
+    $status = $? >> 8;
+    if (-s $tmpf) {
+        # assume innocent till proven guilty
+        my $fuzzy_status = 0;
+        open(my $sdiff, $tmpf) || die "$0: diff_lenient_float: $tmpf: $!\n";
+        while (<$sdiff>) {
+            chomp;
+            my ($line1, $line2) = split(/\s+\|\s+/, $_);
+            # print STDERR "line1: $line1\n";
+            # print STDERR "line2: $line2\n";
+
+            # Break lines into tokens/words
+            my (@w1) = split(' ', $line1);
+            my (@w2) = split(' ', $line2);
+            if (lenient_array_compare(\@w1, \@w2) != 0) {
+                $fuzzy_status = 1;
+                last;
+            }
+        }
+        close $sdiff;
+        $status = $fuzzy_status;
+    }
+    $status;
+}
+
 sub diff($$) {
     my ($outfile, $reffile) = @_;
-    system("diff $outfile $reffile >diff.tmp");
-    my $status = $? >> 8;
+    my $status = 0;
+
+    # Special case, empty file w/o reference is not considered a failure.
+    # This is a most common case with stdout.
+    unless (-e $reffile) {
+        if (-s $outfile > 0) {
+            warn "$0: test $TestNo: stdout ref: $reffile: $!\n";
+            exit 1 if ($opt_e);
+            return 2 unless ($opt_o);
+        } else {
+            # Empty output without a ref is not considered a failure
+            if ($opt_o) {
+                print STDERR
+                  "$0: test $TestNo: -o: creating empty reference $reffile\n";
+                system("touch $reffile");
+            } else {
+                print STDERR
+                  "$0: test $TestNo: empty output with no reference: ignored.\n"
+            }
+            return 0;
+        }
+    }
+
+    # Actually run the diff
+    system("diff -N $outfile $reffile >diff.tmp");
+    $status = $? >> 8;
     if (-s 'diff.tmp') {
-        # There's a difference
         if ($opt_d) {
             system("cat diff.tmp")
         }
+        # There's a difference, but is it meaningfull?
+        if ($opt_f && -e $outfile && -e $reffile &&
+            diff_lenient_float($outfile, $reffile) == 0) {
+
+            print STDERR "$0: test $TestNo: minor (<$Epsilon) precision differences ignored\n";
+            $status = 0;
+        }
         if ($opt_o) {
             print STDERR "-o: overwriting reference:\n";
 
-            print STDERR "\t$reffile -> $reffile.prev\n";
-            rename($reffile, "$reffile.prev") ||
-                die "FATAL: rename($reffile, $reffile.prev): $!\n";
-
+            if (-e $reffile) {
+                print STDERR "\t$reffile -> $reffile.prev\n";
+                rename($reffile, "$reffile.prev") ||
+                    die "FATAL: rename($reffile, $reffile.prev): $!\n";
+            }
             print STDERR "\t$outfile -> $reffile\n";
             rename($outfile, $reffile) ||
                 die "FATAL: rename($outfile, $reffile): $!\n";
+
+            $status = 0;
         }
     }
     $status;
 }
 
 sub run_tests() {
-
-    print STDERR "If 'FAILED' - rerun with -d to see diff output\n"
-	unless ($opt_d);
+    print STDERR "$0: '-d' to see diff output\n"
+        unless ($opt_d);
+    print STDERR "$0: '-o' to force overwrite references\n"
+        unless ($opt_o);
+    print STDERR "$0: '-e' to abort on first failure\n"
+        unless ($opt_e);
 
     my ($cmd, $out_ref, $err_ref, $pred_ref);
     my ($outf, $errf, $predf);
@@ -182,12 +280,6 @@ sub run_tests() {
 
         # command succeded
         # -- compare stdout
-        unless (-e $out_ref) {
-            warn "$0: test $TestNo: stdout ref: $out_ref: $!\n";
-            exit $status if ($opt_e);
-            next;
-        }
-
         $status = diff($outf, $out_ref);
         if ($status) {
             printf STDERR "%s: test %d: FAILED: stdout(%s) != ref(%s):\n",
@@ -199,7 +291,9 @@ sub run_tests() {
 
         # -- compare stderr
         unless (-e $err_ref) {
-            die "$0: test $TestNo: FAILED: stderr ref: $err_ref: $!\n";
+            print STDERR "$0: test $TestNo: FAILED: stderr ref: $err_ref: $!\n";
+            exit 1 if ($opt_e);
+            next;
         }
         $status = diff($errf, $err_ref);
         if ($status) {
author	ariel faigon <ariel.git@yendor.com>	2010-12-12 13:27:38 +0300
committer	JohnLangford <jl@hunch.net>	2010-12-12 19:28:01 +0300
commit	7fc221f9fa0e1a43760fa0011ba8284263b2673a (patch)
tree	ae2e8e9d19be582256396d40631241d911e9ce0e /test
parent	7e2d01226e0fb0a4ce7fc42f7b8da97fb88da4cb (diff)