put a couple common functions into a utils perl module; fixed a bit of broken format-checking

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@652 1f5c12ca-751b-0410-a591-d2e778427230
author: eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> 2006-08-11 20:35:50 +0400
committer: eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230> 2006-08-11 20:35:50 +0400
commit: b340381eb48db8c0669fa5e41f2edf1998170936 (patch)
tree: 8969ee7c57e4a31328650e3f82b422f7dc79a015 /regression-testing
parent: 1fbdbbde2109df4508eeb88236db8293354007bc (diff)
11 files changed, 115 insertions, 136 deletions
diff --git a/regression-testing/tests/basic-surface-binptable/filter-stderr b/regression-testing/tests/basic-surface-binptable/filter-stderr
index ec3d82c38..8e6b67bc3 100755
--- a/regression-testing/tests/basic-surface-binptable/filter-stderr
+++ b/regression-testing/tests/basic-surface-binptable/filter-stderr
@@ -1,31 +1,22 @@
 #!/usr/bin/perl
+
+BEGIN {push @INC, "tests/perllib";} #expect we'll be called from regression-testing/
+use RegTestUtils;
+
 $x=0;
 while (<>) {
   chomp;
 
   if (/^Finished loading LanguageModels/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "LMLOAD_TIME ~ $time\n";
   }
   if (/^Finished loading phrase tables/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "PTLOAD_TIME ~ $time\n";
   }
   next unless /^BEST HYPO:/;
-  my $pscore = "FORMAT ERROR";
-  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [total score=-14.39] <<...>>
-  if (/\[1+\]\s*\[[^\]]*=(-?\d+\.\d+)\]/) {
-    $pscore = $1;
-  }
+  my $pscore = RegTestUtils::readHypoScore($_);
   $x++;
   print "SCORE_$x = $pscore\n";
 }
-
-sub get_time {
-  my $time = shift;
-  my $res = "FORMAT ERROR";
-  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
-    $res = $1;
-  }
-  return $res;
-}
diff --git a/regression-testing/tests/basic-surface-only/filter-stderr b/regression-testing/tests/basic-surface-only/filter-stderr
index ec3d82c38..8e6b67bc3 100755
--- a/regression-testing/tests/basic-surface-only/filter-stderr
+++ b/regression-testing/tests/basic-surface-only/filter-stderr
@@ -1,31 +1,22 @@
 #!/usr/bin/perl
+
+BEGIN {push @INC, "tests/perllib";} #expect we'll be called from regression-testing/
+use RegTestUtils;
+
 $x=0;
 while (<>) {
   chomp;
 
   if (/^Finished loading LanguageModels/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "LMLOAD_TIME ~ $time\n";
   }
   if (/^Finished loading phrase tables/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "PTLOAD_TIME ~ $time\n";
   }
   next unless /^BEST HYPO:/;
-  my $pscore = "FORMAT ERROR";
-  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [total score=-14.39] <<...>>
-  if (/\[1+\]\s*\[[^\]]*=(-?\d+\.\d+)\]/) {
-    $pscore = $1;
-  }
+  my $pscore = RegTestUtils::readHypoScore($_);
   $x++;
   print "SCORE_$x = $pscore\n";
 }
-
-sub get_time {
-  my $time = shift;
-  my $res = "FORMAT ERROR";
-  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
-    $res = $1;
-  }
-  return $res;
-}
diff --git a/regression-testing/tests/confusionNet-surface-only/filter-stderr b/regression-testing/tests/confusionNet-surface-only/filter-stderr
index ec3d82c38..8e6b67bc3 100755
--- a/regression-testing/tests/confusionNet-surface-only/filter-stderr
+++ b/regression-testing/tests/confusionNet-surface-only/filter-stderr
@@ -1,31 +1,22 @@
 #!/usr/bin/perl
+
+BEGIN {push @INC, "tests/perllib";} #expect we'll be called from regression-testing/
+use RegTestUtils;
+
 $x=0;
 while (<>) {
   chomp;
 
   if (/^Finished loading LanguageModels/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "LMLOAD_TIME ~ $time\n";
   }
   if (/^Finished loading phrase tables/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "PTLOAD_TIME ~ $time\n";
   }
   next unless /^BEST HYPO:/;
-  my $pscore = "FORMAT ERROR";
-  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [total score=-14.39] <<...>>
-  if (/\[1+\]\s*\[[^\]]*=(-?\d+\.\d+)\]/) {
-    $pscore = $1;
-  }
+  my $pscore = RegTestUtils::readHypoScore($_);
   $x++;
   print "SCORE_$x = $pscore\n";
 }
-
-sub get_time {
-  my $time = shift;
-  my $res = "FORMAT ERROR";
-  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
-    $res = $1;
-  }
-  return $res;
-}
diff --git a/regression-testing/tests/multi-factor-binptable/filter-stderr b/regression-testing/tests/multi-factor-binptable/filter-stderr
index ec3d82c38..8e6b67bc3 100755
--- a/regression-testing/tests/multi-factor-binptable/filter-stderr
+++ b/regression-testing/tests/multi-factor-binptable/filter-stderr
@@ -1,31 +1,22 @@
 #!/usr/bin/perl
+
+BEGIN {push @INC, "tests/perllib";} #expect we'll be called from regression-testing/
+use RegTestUtils;
+
 $x=0;
 while (<>) {
   chomp;
 
   if (/^Finished loading LanguageModels/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "LMLOAD_TIME ~ $time\n";
   }
   if (/^Finished loading phrase tables/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "PTLOAD_TIME ~ $time\n";
   }
   next unless /^BEST HYPO:/;
-  my $pscore = "FORMAT ERROR";
-  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [total score=-14.39] <<...>>
-  if (/\[1+\]\s*\[[^\]]*=(-?\d+\.\d+)\]/) {
-    $pscore = $1;
-  }
+  my $pscore = RegTestUtils::readHypoScore($_);
   $x++;
   print "SCORE_$x = $pscore\n";
 }
-
-sub get_time {
-  my $time = shift;
-  my $res = "FORMAT ERROR";
-  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
-    $res = $1;
-  }
-  return $res;
-}
diff --git a/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl b/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl
index 5c70081ff..e764eec84 100755
--- a/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl
+++ b/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl
@@ -3,6 +3,7 @@
 #add_empties_to_phrase_table: go through an old-style pharaoh phrase table (no empty target sources) and add one such line for each single-word source phrase in the table,
 #complete with factors (note the number and type of factors are hardcoded here);
 #also add deletion-cost factors as necessary to all lines
+#Evan Herbst 7 / 11 / 06
 
 #usage: aetpt INPUT_PTABLE OUTPUT_PTABLE
 
diff --git a/regression-testing/tests/multi-factor-drop/filter-stderr b/regression-testing/tests/multi-factor-drop/filter-stderr
index ec3d82c38..8e6b67bc3 100755
--- a/regression-testing/tests/multi-factor-drop/filter-stderr
+++ b/regression-testing/tests/multi-factor-drop/filter-stderr
@@ -1,31 +1,22 @@
 #!/usr/bin/perl
+
+BEGIN {push @INC, "tests/perllib";} #expect we'll be called from regression-testing/
+use RegTestUtils;
+
 $x=0;
 while (<>) {
   chomp;
 
   if (/^Finished loading LanguageModels/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "LMLOAD_TIME ~ $time\n";
   }
   if (/^Finished loading phrase tables/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "PTLOAD_TIME ~ $time\n";
   }
   next unless /^BEST HYPO:/;
-  my $pscore = "FORMAT ERROR";
-  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [total score=-14.39] <<...>>
-  if (/\[1+\]\s*\[[^\]]*=(-?\d+\.\d+)\]/) {
-    $pscore = $1;
-  }
+  my $pscore = RegTestUtils::readHypoScore($_);
   $x++;
   print "SCORE_$x = $pscore\n";
 }
-
-sub get_time {
-  my $time = shift;
-  my $res = "FORMAT ERROR";
-  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
-    $res = $1;
-  }
-  return $res;
-}
diff --git a/regression-testing/tests/multi-factor/filter-stderr b/regression-testing/tests/multi-factor/filter-stderr
index ec3d82c38..8e6b67bc3 100755
--- a/regression-testing/tests/multi-factor/filter-stderr
+++ b/regression-testing/tests/multi-factor/filter-stderr
@@ -1,31 +1,22 @@
 #!/usr/bin/perl
+
+BEGIN {push @INC, "tests/perllib";} #expect we'll be called from regression-testing/
+use RegTestUtils;
+
 $x=0;
 while (<>) {
   chomp;
 
   if (/^Finished loading LanguageModels/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "LMLOAD_TIME ~ $time\n";
   }
   if (/^Finished loading phrase tables/) {
-    my $time = get_time($_);
+    my $time = RegTestUtils::readTime($_);
     print "PTLOAD_TIME ~ $time\n";
   }
   next unless /^BEST HYPO:/;
-  my $pscore = "FORMAT ERROR";
-  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [total score=-14.39] <<...>>
-  if (/\[1+\]\s*\[[^\]]*=(-?\d+\.\d+)\]/) {
-    $pscore = $1;
-  }
+  my $pscore = RegTestUtils::readHypoScore($_);
   $x++;
   print "SCORE_$x = $pscore\n";
 }
-
-sub get_time {
-  my $time = shift;
-  my $res = "FORMAT ERROR";
-  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
-    $res = $1;
-  }
-  return $res;
-}
diff --git a/regression-testing/tests/perllib/RegTestUtils.pm b/regression-testing/tests/perllib/RegTestUtils.pm
new file mode 100644
index 000000000..f6fe1999c
--- /dev/null
+++ b/regression-testing/tests/perllib/RegTestUtils.pm
@@ -0,0 +1,31 @@
+#RegTestUtils.pm: for moses regression testing
+#Evan Herbst, 8 / 11 / 06
+
+use strict;
+
+package RegTestUtils;
+return 1;
+
+###############################################################
+
+#arguments: chomped line of output that gives the best hypo and various scores
+#return: a string to be compared with the correct total hypothesis score;
+# it's formatted as a double if no error, or "FORMAT ERROR" if there is one
+sub readHypoScore
+{
+	my $line = shift;
+	#the 0.12 is hardcoded in Hypothesis.cpp because some parsing scripts still
+	#expect a comma-separated list of scores -- EVH
+	if($line =~ /\[0.12, (-?\d+\.\d+)\]/) {return $1;}
+	return "FORMAT ERROR";
+}
+
+#arguments: chomped line of output that gives a time in seconds
+#return: a string to be compared with the correct time;
+# it's formatted as a double if no error, or "FORMAT ERROR" if there is one
+sub readTime
+{
+	my $line = shift;
+	if($line =~ /\[(\d+\.\d+)\]\s*seconds$/) {return $1;}
+	return "FORMAT ERROR";
+}
diff --git a/regression-testing/tests/ptable-filtering/filter-stderr b/regression-testing/tests/ptable-filtering/filter-stderr
index 95a4735dd..d7ea52114 100755
--- a/regression-testing/tests/ptable-filtering/filter-stderr
+++ b/regression-testing/tests/ptable-filtering/filter-stderr
@@ -3,29 +3,9 @@ $x=0;
 while (<>) {
   chomp;
 
-  if (/^\[.* ; 2-2\]$/o) {
-    my @lines;
-    my $done = 0;
-    while (!$done) {
-      $x = <>;
-      if ($x =~ /^\s*$/o) { $done = 1; } else {
-        chomp $x;
-        $x =~ s/^\s+//o;
-        push @lines, $x;
-      }
-    }
-    my $c = 0;
-    foreach my $x (sort @lines) {
-      $c++;
-      print "TRANSLATION_OPTION_$c=$x\n";
-    }
-  }
-
   next unless /^BEST HYPO:/;
   s/^BEST HYPO:\s*//;
   s/\s*\[111+.*$//;
   $x++;
   print "TRANSLATION_$x = $_\n";
-
 }
-
diff --git a/regression-testing/tests/ptable-filtering/filter-stdout b/regression-testing/tests/ptable-filtering/filter-stdout
index a0421ef93..1343c46d9 100755
--- a/regression-testing/tests/ptable-filtering/filter-stdout
+++ b/regression-testing/tests/ptable-filtering/filter-stdout
@@ -1,2 +1,23 @@
 #!/usr/bin/perl
 $x=0;
+while (<>) {
+  chomp;
+
+  if (/^\[.* ; 2-2\]$/o) {
+    my @lines;
+    my $done = 0;
+    while (!$done) {
+      $x = <>;
+      if ($x =~ /^\s*$/o) { $done = 1; } else {
+        chomp $x;
+        $x =~ s/^\s+//o;
+        push @lines, $x;
+      }
+    }
+    my $c = 0;
+    foreach my $x (sort @lines) {
+      $c++;
+      print "TRANSLATION_OPTION_$c=$x\n";
+    }
+  }
+}
diff --git a/regression-testing/tests/ptable-filtering/truth/results.dat b/regression-testing/tests/ptable-filtering/truth/results.dat
index 8ad5240a1..ae8bda18b 100644
--- a/regression-testing/tests/ptable-filtering/truth/results.dat
+++ b/regression-testing/tests/ptable-filtering/truth/results.dat
@@ -1,22 +1,22 @@
-TRANSLATION_OPTION_1=form of , pC=-0.57, c=-0.81
-TRANSLATION_OPTION_2=from , pC=-0.60, c=-0.78
-TRANSLATION_OPTION_3=money , pC=-0.38, c=-0.68
-TRANSLATION_OPTION_4=money transfers , pC=-0.29, c=-0.80
-TRANSLATION_OPTION_5=of transfers , pC=-0.32, c=-0.80
-TRANSLATION_OPTION_6=payments , pC=-0.49, c=-0.82
-TRANSLATION_OPTION_7=providing , pC=-0.54, c=-0.87
-TRANSLATION_OPTION_8=remittance , pC=-0.06, c=-0.63
-TRANSLATION_OPTION_9=remittance of , pC=-0.15, c=-0.73
-TRANSLATION_OPTION_10=remittances , , pC=-0.20, c=-0.79
-TRANSLATION_OPTION_11=remittances , pC=-0.04, c=-0.61
-TRANSLATION_OPTION_12=remittances , to , pC=-0.16, c=-0.86
-TRANSLATION_OPTION_13=remittances from , pC=-0.15, c=-0.75
-TRANSLATION_OPTION_14=represents , pC=-0.55, c=-0.85
-TRANSLATION_OPTION_15=sending money , pC=-0.33, c=-0.84
-TRANSLATION_OPTION_16=sent , pC=-0.49, c=-0.83
-TRANSLATION_OPTION_17=transfer , pC=-0.37, c=-0.72
-TRANSLATION_OPTION_18=transfer of money , pC=-0.35, c=-0.80
-TRANSLATION_OPTION_19=transfers , pC=-0.33, c=-0.69
-TRANSLATION_OPTION_20=transfers from , pC=-0.36, c=-0.76
+TRANSLATION_OPTION_1=form of , pC=-0.570639, c=-0.814663
+TRANSLATION_OPTION_2=from , pC=-0.604413, c=-0.783405
+TRANSLATION_OPTION_3=money , pC=-0.380378, c=-0.679572
+TRANSLATION_OPTION_4=money transfers , pC=-0.290805, c=-0.797336
+TRANSLATION_OPTION_5=of transfers , pC=-0.323035, c=-0.804664
+TRANSLATION_OPTION_6=payments , pC=-0.493163, c=-0.815826
+TRANSLATION_OPTION_7=providing , pC=-0.536236, c=-0.865767
+TRANSLATION_OPTION_8=remittance , pC=-0.058577, c=-0.62658
+TRANSLATION_OPTION_9=remittance of , pC=-0.153561, c=-0.725668
+TRANSLATION_OPTION_10=remittances , , pC=-0.201788, c=-0.793385
+TRANSLATION_OPTION_11=remittances , pC=-0.0431147, c=-0.611117
+TRANSLATION_OPTION_12=remittances , to , pC=-0.161446, c=-0.863898
+TRANSLATION_OPTION_13=remittances from , pC=-0.154321, c=-0.752515
+TRANSLATION_OPTION_14=represents , pC=-0.551077, c=-0.85218
+TRANSLATION_OPTION_15=sending money , pC=-0.332853, c=-0.835438
+TRANSLATION_OPTION_16=sent , pC=-0.490528, c=-0.832855
+TRANSLATION_OPTION_17=transfer , pC=-0.374108, c=-0.716859
+TRANSLATION_OPTION_18=transfer of money , pC=-0.35242, c=-0.801209
+TRANSLATION_OPTION_19=transfers , pC=-0.332958, c=-0.692228
+TRANSLATION_OPTION_20=transfers from , pC=-0.35855, c=-0.759244
 TRANSLATION_1 = from the west .
 TOTAL_WALLTIME ~ 14
author	eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>	2006-08-11 20:35:50 +0400
committer	eherbst <eherbst@1f5c12ca-751b-0410-a591-d2e778427230>	2006-08-11 20:35:50 +0400
commit	b340381eb48db8c0669fa5e41f2edf1998170936 (patch)
tree	8969ee7c57e4a31328650e3f82b422f7dc79a015 /regression-testing
parent	1fbdbbde2109df4508eeb88236db8293354007bc (diff)