check in regression tests to CVS

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@557 1f5c12ca-751b-0410-a591-d2e778427230
author: redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230> 2006-08-08 19:54:30 +0400
committer: redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230> 2006-08-08 19:54:30 +0400
commit: e9fd44ef132674335d88f627504431a5e98bac34 (patch)
tree: 5e665c350f1551c965a7003fb38eea28dd1f54bd /regression-testing
parent: 264f045a6bff3e7004e5860d997c36fc86d55662 (diff)
43 files changed, 1269 insertions, 0 deletions
diff --git a/regression-testing/MosesRegressionTesting.pm b/regression-testing/MosesRegressionTesting.pm
new file mode 100644
index 000000000..c332a3dcc
--- /dev/null
+++ b/regression-testing/MosesRegressionTesting.pm
@@ -0,0 +1,75 @@
+package MosesRegressionTesting;
+
+use strict;
+
+# if your tests need a new version of the test data, increment this
+# and make sure that a moses-regression-tests-vX.Y is available for
+# download from statmt.org (redpony AT umd dot edu for more info)
+use constant TESTING_DATA_VERSION => '0.1';
+
+# find the data directory in a few likely locations and make sure
+# that it is the correct version
+sub find_data_directory
+{
+  my ($test_script_root, $data_dir) = @_;
+	my $data_version = TESTING_DATA_VERSION;
+  my @ds = ();
+	my $mrtp = "moses-reg-test-data-$data_version";
+	push @ds, $data_dir if defined $data_dir;
+  push @ds, "$test_script_root/$mrtp";
+  push @ds, "/export/ws06osmt/regression-testing/$mrtp";
+	push @ds, "/tmp/$mrtp";
+  push @ds, "/var/tmp/$mrtp";
+	foreach my $d (@ds) {
+	  next unless (-d $d);
+		if (!-d "$d/models") {
+			print STDERR "Found $d but it is malformed: missing subdir models/\n";
+			next;
+		}
+		if (!-d "$d/lm") {
+			print STDERR "Found $d but it is malformed: missing subdir lm/\n";
+			next;
+		}
+		return $d;
+	}
+	print STDERR<<EOT;
+
+You do not appear to have the regression testing data installed.  You may
+either specify a non-standard location when running the test suite with
+the --data-dir option, or, you may install it in any one of the following
+standard locations: $test_script_root, /tmp, or /var/tmp with these
+commands:
+
+  cd <DESIRED_INSTALLATION_DIRECTORY>
+  wget http://www.statmt.org/moses/reg-testing/moses-regression-tests-v$data_version.tar
+  tar xf moses-regression-tests-v$data_version.tar
+  rm moses-regression-tests-v$data_version.tar
+
+EOT
+	exit 1;
+}
+
+
+sub get_localized_moses_ini
+{
+  use File::Temp;
+  my ($moses_ini, $data_dir) = @_;
+  my $LM_PATH = "$data_dir/lm";
+	my $MODEL_PATH = "$data_dir/models";
+	my $local_moses_ini = new File::Temp( UNLINK => 0, SUFFIX => '.ini' );
+
+  open MI, "<$moses_ini" or die "Couldn't read $moses_ini";
+	open MO, ">$local_moses_ini" or die "Couldn't open $local_moses_ini for writing";
+  while (my $l = <MI>) {
+		$l =~ s/\$\{LM_PATH\}/$LM_PATH/g;
+		$l =~ s/\$\{MODEL_PATH\}/$MODEL_PATH/g;
+		print $local_moses_ini $l;
+	}
+	close MO;
+	close MI;
+
+  return $local_moses_ini->filename;
+}
+
+1;
+
diff --git a/regression-testing/compare-results.pl b/regression-testing/compare-results.pl
new file mode 100755
index 000000000..a05bc7411
--- /dev/null
+++ b/regression-testing/compare-results.pl
@@ -0,0 +1,82 @@
+#!/usr/bin/perl -w
+
+use strict;
+my ($results, $truth) = @ARGV;
+
+my ($report, $pass, $fail) = compare_results("$results/results.dat", "$truth/results.dat");
+open OUT, ">$results/Summary";
+print OUT $report;
+print $report;
+close OUT;
+
+if ($fail > 0) {
+  print <<EOT;
+
+There were failures in this test run.  Please analyze the results carefully.
+
+EOT
+  exit 1;
+}
+exit 0;
+
+sub compare_results {
+  my ($testf, $truthf) = @_;
+  my $test = read_results($testf);
+  my $truth = read_results($truthf);
+  my $ct1 = delete $truth->{'COMPARISON_TYPE'};
+  my $ct2 = delete $test->{'COMPARISON_TYPE'};
+  my $pass = 0;
+  my $fail = 0;
+  my $report = '';
+  foreach my $k (sort keys %$truth) {
+    $report .= "test-name=$k\tresult=";
+    if (!exists $test->{$k}) {
+      $report .= "missing from test results\n";
+      $fail++;
+      next;
+    }
+    my $truthv = $truth->{$k} || '';
+    my $testv = delete $test->{$k} || '';
+    if ($ct1->{$k} eq '=') {
+      if ($truthv eq $testv) {
+        $report .= "pass\n";
+        $pass++;
+      } else {
+        $report .= "fail\n\tTRUTH=$truthv\n\t TEST=$testv\n";
+        $fail++;
+      }
+    } else { # numeric difference
+      my $diff = $testv - $truthv;
+      if ($diff == 0) { $report .= "identical\n"; next; }
+      $report .= "BASELINE=$truthv, TEST=$testv\t  DELTA=$diff";
+      if ($truthv != 0) {
+        my $pct = $diff/$truthv;
+        my $t = sprintf "\t PCT CHANGE=%4.2f", $pct*100;
+        $report .= $t;
+      }
+      $report .= "\n";
+    }
+  }
+  foreach my $k (sort keys %$test) {
+    $fail++;
+    $report .= "test-name=$k\tfound in TEST but not in TRUTH.\n";
+  }
+  $report .= "\nTESTS PASSED=$pass\nTESTS FAILED=$fail\n";
+  return $report, $pass, $fail;
+}
+
+sub read_results {
+  my ($file) = @_;
+  open IN, "<$file" or die "Could not open $file!";
+  my %res;
+  while (my $l = <IN>) {
+    if ($l =~ /^([A-Za-z0-9_]+)\s*([=~])\s*(.+)$/) {
+      my ($key, $comparison_type, $value) = ($1, $2, $3);
+      $res{$key} = $value;
+      $res{'COMPARISON_TYPE'}->{$key}=$comparison_type;
+    }
+  }
+  close IN;
+  return \%res;
+}
+
diff --git a/regression-testing/run-single-test.pl b/regression-testing/run-single-test.pl
new file mode 100755
index 000000000..0e9c84fe2
--- /dev/null
+++ b/regression-testing/run-single-test.pl
@@ -0,0 +1,135 @@
+#!/usr/bin/perl -w
+
+use strict;
+my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
+use MosesRegressionTesting;
+use Getopt::Long;
+use File::Temp qw ( tempfile );
+use POSIX qw ( strftime );
+my @SIGS = qw ( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGIOT SIGBUS SIGFPE SIGKILL SIGUSR1 SIGSEGV SIGUSR2 SIGPIPE SIGALRM SIGTERM SIGSTKFLT SIGCHLD SIGCONT SIGSTOP SIGTSTP SIGTTIN SIGTTOU SIGURG SIGXCPU SIGXFSZ SIGVTALRM SIGPROF SIGWINCH SIGIO SIGPWR SIGSYS SIGUNUSED SIGRTMIN );
+my ($decoder, $test_name);
+
+my $test_dir = "$script_dir/tests";
+my $data_dir;
+my $BIN_TEST = $script_dir;
+my $results_dir;
+
+GetOptions("decoder=s" => \$decoder,
+           "test=s"    => \$test_name,
+           "data-dir=s"=> \$data_dir,
+           "results-dir=s"=> \$results_dir,
+          );
+
+die "Please specify a decoder with --decoder\n" unless $decoder;
+die "Please specify a test to run with --test\n" unless $test_name;
+
+die "Please specify the location of the data directory with --data-dir\n" unless $data_dir;
+
+die "Cannot locate test dir at $test_dir" unless (-d $test_dir);
+
+$test_dir .= "/$test_name";
+die "Cannot locate test dir at $test_dir" unless (-d $test_dir);
+
+#### get place to put results
+unless (defined $results_dir) { $results_dir = "$data_dir/results"; }
+if (!-d $results_dir) {
+  print STDERR "[WARNING] Results directory not found.\n";
+  mkdir ($results_dir) || die "Failed to create $results_dir";
+}
+$results_dir .= "/$test_name";
+if (!-d $results_dir) {
+  print STDERR "[WARNING] Results directory for test=$test_name could not be found.\n";
+  mkdir ($results_dir) || die "Failed to create $results_dir";
+}
+##########
+
+my $conf = "$test_dir/moses.ini";
+my $input = "$test_dir/to-translate";
+
+die "Cannot locate executable called $decoder\n" unless (-x $decoder);
+die "Cannot find $conf\n" unless (-f $conf);
+die "Cannot locate input at $input" unless (-f $input);
+
+my $local_moses_ini = MosesRegressionTesting::get_localized_moses_ini($conf, $data_dir);
+
+my $ts = get_timestamp($decoder);
+my $results = "$results_dir/$ts";
+mkdir($results) || die "Failed to create results directory: $results\n";
+
+my $truth = "$test_dir/truth";
+if (!-d $truth) {
+  die "Could not find truth/ in $test_dir!\n";
+}
+
+print "RESULTS AVAILABLE IN: $results\n\n";
+
+my ($o, $elapsed, $ec, $sig) = exec_moses($decoder, $local_moses_ini, $input, $results);
+my $error = ($sig || $ec > 0);
+if ($error) {
+  open OUT, ">$results/Summary";
+  print STDERR "MOSES CRASHED.\n\texit_code=$ec\n\tsignal=$sig\n";
+  print OUT    "MOSES CRASHED.\n\texit_code=$ec\n\tsignal=$sig\n";
+  print STDERR "FAILURE, for debugging, local moses.ini=$local_moses_ini\n";
+  print OUT    "FAILURE, for debugging, local moses.ini=$local_moses_ini\n";
+  close OUT;
+  exit 2 if $sig;
+  exit 3;
+}
+
+($o, $ec, $sig) = run_command("$test_dir/filter-stdout $results/run.stdout > $results/results.dat");
+warn "filter-stdout failed!" if ($ec > 0 || $sig);
+($o, $ec, $sig) = run_command("$test_dir/filter-stderr $results/run.stderr >> $results/results.dat");
+warn "filter-stderr failed!" if ($ec > 0 || $sig);
+
+open OUT, ">>$results/results.dat";
+print OUT "TOTAL_WALLTIME ~ $elapsed\n";
+close OUT;
+
+run_command("gzip $results/run.stdout");
+run_command("gzip $results/run.stderr");
+
+($o, $ec, $sig) = run_command("$BIN_TEST/compare-results.pl $results $truth");
+print $o;
+if ($ec) {
+  print STDERR "FAILURE, for debugging, local moses.ini=$local_moses_ini\n";
+  exit 1;
+}
+
+unlink $local_moses_ini or warn "Couldn't remove $local_moses_ini\n";
+exit 0;
+
+sub exec_moses {
+  my ($decoder, $conf, $input, $results) = @_;
+  my $start_time = time;
+  my ($o, $ec, $sig) = run_command("$decoder -f $conf -i $input 1> $results/run.stdout 2> $results/run.stderr");
+  my $elapsed = time - $start_time;
+  return ($o, $elapsed, $ec, $sig);
+}
+
+sub run_command {
+  my ($cmd) = @_;
+  my $o = `$cmd`;
+  my $exit_code = $? >> 8;
+
+  my $signal = $? & 127;
+  my $core_dumped = $? & 128;
+  if ($signal) { $signal = sig_name($signal); }
+  return $o, $exit_code, $signal;
+}
+
+sub sig_name {
+  my $sig = shift;
+  return $SIGS[$sig];
+}
+
+sub get_timestamp {
+  my ($file) = @_;
+	my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
+		 $atime,$mtime,$ctime,$blksize,$blocks)
+								= stat($file);
+  my $timestamp = strftime("%Y%m%d-%H%M%S", gmtime $mtime);
+  my $timestamp2 = strftime("%Y%m%d-%H%M%S", gmtime);
+  my $username = `whoami`; chomp $username;
+  return "moses.v$timestamp-$username-at-$timestamp2";
+}
+
diff --git a/regression-testing/run-test-suite b/regression-testing/run-test-suite
new file mode 100755
index 000000000..e6061dee5
--- /dev/null
+++ b/regression-testing/run-test-suite
@@ -0,0 +1,93 @@
+#!/usr/bin/perl -w
+
+use strict;
+my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
+use Getopt::Long;
+
+############################################################
+my @tests = qw (
+  basic-surface-only
+  ptable-filtering
+  multi-factor
+  multi-factor-drop
+  confusionNet-surface-only
+  basic-surface-binptable
+  multi-factor-binptable
+               );
+############################################################
+
+use MosesRegressionTesting;
+use File::Temp qw ( tempfile );
+use POSIX qw ( strftime );
+
+my $decoder;
+my $test_dir;
+my $BIN_TEST = $script_dir;
+my $data_dir;
+
+GetOptions("decoder=s" => \$decoder,
+           "data-dir=s" => \$data_dir,
+          );
+
+$data_dir = MosesRegressionTesting::find_data_directory($BIN_TEST, $data_dir);
+
+my $test_run = "$BIN_TEST/run-single-test.pl --data-dir=$data_dir";
+$test_dir = $script_dir . "/tests";
+$test_run .= " --test-dir=$test_dir" if $test_dir;
+
+print "Data directory: $data_dir\n";
+
+die "Please specify a decoder to test with --decoder\n" unless $decoder;
+
+die "Cannot locate executable called $decoder\n" unless (-x $decoder);
+
+$test_run .= " --decoder=$decoder";
+
+print "Running tests: @tests\n\n";
+
+print "TEST NAME               STATUS     PATH TO RESULTS\n";
+my $lb = "---------------------------------------------------------------------------------------------------------\n";
+print $lb;
+
+my $fail = 0;
+my @failed;
+foreach my $test (@tests) {
+  my $cmd = "$test_run --test=$test";
+  my ($res, $output, $results_path) = do_test($cmd);
+  format STDOUT =
+@<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+$test,                   $res,      $results_path
+.
+  write;
+  if ($res eq 'FAIL') {
+    print "$lb$output$lb";
+    $fail++;
+    push @failed, $test;
+  } else {
+# TOTAL_WALLTIME  result=BASELINE=11, TEST=12       DELTA=1        PCT CHANGE=9.09
+    if ($output =~ /TOTAL_WALLTIME\s+result\s*=\s*([^\n]+)/o) {
+      print "\t\tTiming statistics: $1\n";
+    }
+  }
+}
+
+my $total = scalar @tests;
+my $fail_percentage = int(100 * $fail / $total);
+my $pass_percentage = int(100 * ($total-$fail) / $total);
+print "\n$pass_percentage% of the tests passed.\n";
+print "$fail_percentage% of the tests failed.\n";
+if ($fail_percentage>0) { print "\nPLEASE INVESTIAGE THESE FAILED TESTS: @failed\n"; }
+
+sub do_test {
+  my ($test) = @_;
+  my $o = `$test 2>&1`;
+  my $res = 'PASS';
+  $res = 'FAIL' if ($? > 0);
+  my $od = '';
+  if ($o =~ /RESULTS AVAILABLE IN: (.*)$/m) {
+    $od = $1;
+    $o =~ s/^RESULTS AVAIL.*$//mo;
+  }
+  return ($res, $o, $od);
+}
+
diff --git a/regression-testing/tests/basic-surface-binptable/filter-stderr b/regression-testing/tests/basic-surface-binptable/filter-stderr
new file mode 100755
index 000000000..aab962ac7
--- /dev/null
+++ b/regression-testing/tests/basic-surface-binptable/filter-stderr
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = get_time($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = get_time($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST HYPO:/;
+  my $pscore = "FORMAT ERROR";
+  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [-3.65,0.00,-12.34,-8.00,-10.00,0.00,-14.39]
+  if (/\[1+\]\s*\[[^\]]*,\s*(-?\d+\.\d+)\]/) {
+    $pscore = $1;
+  }
+  $x++;
+  print "SCORE_$x = $pscore\n";
+}
+
+sub get_time {
+  my $time = shift;
+  my $res = "FORMAT ERROR";
+  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
+    $res = $1;
+  }
+  return $res;
+}
diff --git a/regression-testing/tests/basic-surface-binptable/filter-stdout b/regression-testing/tests/basic-surface-binptable/filter-stdout
new file mode 100755
index 000000000..476ddf6e9
--- /dev/null
+++ b/regression-testing/tests/basic-surface-binptable/filter-stdout
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  $x++;
+  print "TRANSLATION_$x=$_\n";
+}
diff --git a/regression-testing/tests/basic-surface-binptable/moses.ini b/regression-testing/tests/basic-surface-binptable/moses.ini
new file mode 100644
index 000000000..d51f2bd3d
--- /dev/null
+++ b/regression-testing/tests/basic-surface-binptable/moses.ini
@@ -0,0 +1,52 @@
+# Moses configuration file
+# automatic exodus from pharaoh.ini Wed Jul 12 18:24:14 EDT 2006
+
+###########################
+### PHARAOH CONFIG FILE ###
+###########################
+
+# phrase table f, n, p(n|f)
+[ttable-file]
+0 0 5 ${MODEL_PATH}/basic-surface-binptable/phrase-table.gz
+
+# language model
+[lmodel-file]
+0 0 3 ${LM_PATH}/europarl.en.srilm.gz
+
+# limit on how many phrase translations e for each phrase f are loaded
+[ttable-limit]
+#ttable element load limit 0 = all elements loaded
+20
+
+# distortion (reordering) weight
+[weight-d]
+0.141806519223522
+
+# language model weight
+[weight-l]
+0.142658800199951
+
+# translation model weight (phrase translation, lexical weighting)
+[weight-t]
+0.00402447059454402
+0.0685647475075862
+0.294089113124688
+0.0328320356515851
+-0.0426081987467227
+
+# word penalty
+[weight-w]
+-0.273416114951401
+
+[distortion-limit]
+4
+
+[beam-threshold]
+0.03
+
+[input-factors]
+0
+
+[mapping]
+T 0
+
diff --git a/regression-testing/tests/basic-surface-binptable/to-translate b/regression-testing/tests/basic-surface-binptable/to-translate
new file mode 100644
index 000000000..a505702d1
--- /dev/null
+++ b/regression-testing/tests/basic-surface-binptable/to-translate
@@ -0,0 +1,5 @@
+ich frage sie also , herr pr�sident : stellen die unterschiedlichen arbeitskosten somit nicht auch eine beschr�nkung des freien wettbewerbs in der europ�ischen union dar ?
+schaut man sich die f�lligkeitspl�ne der ausf�hrung des haushalts f�r die rubriken 2 , 3 , 4 und 7 an , stellt man fest , dass nur durchschnittlich 8 % aller verpflichtungen durch zahlungen gedeckt sind .
+vor drei jahren haben wir mit unserer besch�ftigungsinitiative begonnen , indem wir kleinen und mittleren unternehmen halfen , chancenkapital zu bekommen .
+das parlament will das auf zweierlei weise tun .
+nur dann werden die europ�ischen institutionen auch ihrem auftrag gerecht .
diff --git a/regression-testing/tests/basic-surface-binptable/truth/results.dat b/regression-testing/tests/basic-surface-binptable/truth/results.dat
new file mode 100644
index 000000000..30b87afaa
--- /dev/null
+++ b/regression-testing/tests/basic-surface-binptable/truth/results.dat
@@ -0,0 +1,13 @@
+TRANSLATION_1=i ask you , therefore , mr president , the different labour costs are therefore not a restriction of free competition in the european union ? 
+TRANSLATION_2=if we look at the f�lligkeitspl�ne the implementation of the budget for the categories 2 , 3 , 4 and 7 to , we see that only an average of 8 % of commitments by payments are met . 
+TRANSLATION_3=three years ago our employment strategy , we started by small and medium-sized enterprises halfen , chancenkapital to obtain . 
+TRANSLATION_4=parliament wants the in two ways . 
+TRANSLATION_5=only then will the european institutions to its mandate . 
+LMLOAD_TIME ~ 10.00
+PTLOAD_TIME ~ 10.00
+SCORE_1 = -14.84
+SCORE_2 = -33.76
+SCORE_3 = -40.24
+SCORE_4 = -6.00
+SCORE_5 = -7.02
+TOTAL_WALLTIME ~ 23
diff --git a/regression-testing/tests/basic-surface-only/filter-stderr b/regression-testing/tests/basic-surface-only/filter-stderr
new file mode 100755
index 000000000..aab962ac7
--- /dev/null
+++ b/regression-testing/tests/basic-surface-only/filter-stderr
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = get_time($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = get_time($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST HYPO:/;
+  my $pscore = "FORMAT ERROR";
+  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [-3.65,0.00,-12.34,-8.00,-10.00,0.00,-14.39]
+  if (/\[1+\]\s*\[[^\]]*,\s*(-?\d+\.\d+)\]/) {
+    $pscore = $1;
+  }
+  $x++;
+  print "SCORE_$x = $pscore\n";
+}
+
+sub get_time {
+  my $time = shift;
+  my $res = "FORMAT ERROR";
+  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
+    $res = $1;
+  }
+  return $res;
+}
diff --git a/regression-testing/tests/basic-surface-only/filter-stdout b/regression-testing/tests/basic-surface-only/filter-stdout
new file mode 100755
index 000000000..476ddf6e9
--- /dev/null
+++ b/regression-testing/tests/basic-surface-only/filter-stdout
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  $x++;
+  print "TRANSLATION_$x=$_\n";
+}
diff --git a/regression-testing/tests/basic-surface-only/moses.ini b/regression-testing/tests/basic-surface-only/moses.ini
new file mode 100644
index 000000000..384954ece
--- /dev/null
+++ b/regression-testing/tests/basic-surface-only/moses.ini
@@ -0,0 +1,45 @@
+# moses.ini for regression test
+
+[ttable-file]
+0 0 5 ${MODEL_PATH}/basic-surface-only/phrase-table.gz
+
+# language model
+[lmodel-file]
+0 0 3 ${LM_PATH}/europarl.en.srilm.gz
+# limit on how many phrase translations e for each phrase f are loaded
+[ttable-limit]
+#ttable element load limit 0 = all elements loaded
+20
+
+# distortion (reordering) weight
+[weight-d]
+0.141806519223522
+
+# language model weight
+[weight-l]
+0.142658800199951
+
+# translation model weight (phrase translation, lexical weighting)
+[weight-t]
+0.00402447059454402
+0.0685647475075862
+0.294089113124688
+0.0328320356515851
+-0.0426081987467227
+
+# word penalty
+[weight-w]
+-0.273416114951401
+
+[distortion-limit]
+4
+
+[beam-threshold]
+0.03
+
+[input-factors]
+0
+
+[mapping]
+T 0
+
diff --git a/regression-testing/tests/basic-surface-only/to-translate b/regression-testing/tests/basic-surface-only/to-translate
new file mode 100644
index 000000000..a505702d1
--- /dev/null
+++ b/regression-testing/tests/basic-surface-only/to-translate
@@ -0,0 +1,5 @@
+ich frage sie also , herr pr�sident : stellen die unterschiedlichen arbeitskosten somit nicht auch eine beschr�nkung des freien wettbewerbs in der europ�ischen union dar ?
+schaut man sich die f�lligkeitspl�ne der ausf�hrung des haushalts f�r die rubriken 2 , 3 , 4 und 7 an , stellt man fest , dass nur durchschnittlich 8 % aller verpflichtungen durch zahlungen gedeckt sind .
+vor drei jahren haben wir mit unserer besch�ftigungsinitiative begonnen , indem wir kleinen und mittleren unternehmen halfen , chancenkapital zu bekommen .
+das parlament will das auf zweierlei weise tun .
+nur dann werden die europ�ischen institutionen auch ihrem auftrag gerecht .
diff --git a/regression-testing/tests/basic-surface-only/truth/results.dat b/regression-testing/tests/basic-surface-only/truth/results.dat
new file mode 100644
index 000000000..e41fb463f
--- /dev/null
+++ b/regression-testing/tests/basic-surface-only/truth/results.dat
@@ -0,0 +1,13 @@
+TRANSLATION_1=i ask you , therefore , mr president , the different labour costs are therefore not a restriction of free competition in the european union ? 
+TRANSLATION_2=if we look at the f�lligkeitspl�ne the implementation of the budget for the categories 2 , 3 , 4 and 7 to , we see that only an average of 8 % of commitments by payments are met . 
+TRANSLATION_3=three years ago our employment strategy , we started by small and medium-sized enterprises halfen , chancenkapital to obtain . 
+TRANSLATION_4=parliament wants the in two ways . 
+TRANSLATION_5=only then will the european institutions to its mandate . 
+LMLOAD_TIME ~ 8.00
+PTLOAD_TIME ~ 9.00
+SCORE_1 = -14.84
+SCORE_2 = -33.76
+SCORE_3 = -40.24
+SCORE_4 = -6.00
+SCORE_5 = -7.02
+TOTAL_WALLTIME ~ 28
diff --git a/regression-testing/tests/confusionNet-surface-only/filter-stderr b/regression-testing/tests/confusionNet-surface-only/filter-stderr
new file mode 100755
index 000000000..aab962ac7
--- /dev/null
+++ b/regression-testing/tests/confusionNet-surface-only/filter-stderr
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = get_time($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = get_time($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST HYPO:/;
+  my $pscore = "FORMAT ERROR";
+  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [-3.65,0.00,-12.34,-8.00,-10.00,0.00,-14.39]
+  if (/\[1+\]\s*\[[^\]]*,\s*(-?\d+\.\d+)\]/) {
+    $pscore = $1;
+  }
+  $x++;
+  print "SCORE_$x = $pscore\n";
+}
+
+sub get_time {
+  my $time = shift;
+  my $res = "FORMAT ERROR";
+  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
+    $res = $1;
+  }
+  return $res;
+}
diff --git a/regression-testing/tests/confusionNet-surface-only/filter-stdout b/regression-testing/tests/confusionNet-surface-only/filter-stdout
new file mode 100755
index 000000000..476ddf6e9
--- /dev/null
+++ b/regression-testing/tests/confusionNet-surface-only/filter-stdout
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  $x++;
+  print "TRANSLATION_$x=$_\n";
+}
diff --git a/regression-testing/tests/confusionNet-surface-only/moses.ini b/regression-testing/tests/confusionNet-surface-only/moses.ini
new file mode 100644
index 000000000..e9db4481d
--- /dev/null
+++ b/regression-testing/tests/confusionNet-surface-only/moses.ini
@@ -0,0 +1,56 @@
+# Moses configuration file
+# automatic exodus from pharaoh.ini Wed Jul 12 18:24:14 EDT 2006
+
+###########################
+### PHARAOH CONFIG FILE ###
+###########################
+
+# phrase table f, n, p(n|f)
+[ttable-file]
+0 0 5 ${MODEL_PATH}/confusionNet-surface-only/phrase-table.0-0
+
+# language model
+[lmodel-file]
+0 0 3 ${LM_PATH}/europarl.en.srilm.gz
+# limit on how many phrase translations e for each phrase f are loaded
+[ttable-limit]
+#ttable element load limit 0 = all elements loaded
+20
+
+# distortion (reordering) weight
+[weight-d]
+0.141806519223522
+
+# language model weight
+[weight-l]
+0.142658800199951
+
+# translation model weight (phrase translation, lexical weighting)
+[weight-t]
+0.00402447059454402
+0.0685647475075862
+0.294089113124688
+0.0328320356515851
+-0.0426081987467227
+
+# word penalty
+[weight-w]
+-0.273416114951401
+
+[distortion-limit]
+4
+
+[beam-threshold]
+0.03
+
+[input-factors]
+0
+
+[mapping]
+T 0
+
+[inputtype]
+1
+
+[weight-i]
+1.0
diff --git a/regression-testing/tests/confusionNet-surface-only/to-translate b/regression-testing/tests/confusionNet-surface-only/to-translate
new file mode 100644
index 000000000..d08755644
--- /dev/null
+++ b/regression-testing/tests/confusionNet-surface-only/to-translate
@@ -0,0 +1,15 @@
+damit|PROADV 1.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0
+arbeitsplan|NN 1.0
+
+damit|PROADV 1.0 dies|PROADV 1.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 ist|VVFIN 1.0 war|VVFIN 1.0 sei|VVFIN 1.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0 der|DT 1.0 die|DT 1.0 das|DT 1.0
+arbeitsplan|NN 1.0
+
+damit|PROADV 1.0 dies|PROADV 0.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 dies|PROADV 0.0 das|DT 0.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0 dies|PROADV 0.0
+arbeitsplan|NN 1.0 dies|PROADV 0.0
+
diff --git a/regression-testing/tests/confusionNet-surface-only/to-translate.new b/regression-testing/tests/confusionNet-surface-only/to-translate.new
new file mode 100644
index 000000000..008278bbb
--- /dev/null
+++ b/regression-testing/tests/confusionNet-surface-only/to-translate.new
@@ -0,0 +1,10 @@
+damit|PROADV 1.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0
+arbeitsplan|NN 1.0
+
+damit|PROADV 1.0 dies|PROADV 1.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 ist|VVFIN 1.0 war|VVFIN 1.0 sei|VVFIN 1.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0 der|DT 1.0 die|DT 1.0 das|DT 1.0
+arbeitsplan|NN 1.0
+
diff --git a/regression-testing/tests/confusionNet-surface-only/to-translate.orig b/regression-testing/tests/confusionNet-surface-only/to-translate.orig
new file mode 100644
index 000000000..f3fda26da
--- /dev/null
+++ b/regression-testing/tests/confusionNet-surface-only/to-translate.orig
@@ -0,0 +1,10 @@
+damit|PROADV 0.0
+ist|VSFIN 0.0 war|VSFIN 0.0 sei|VSFIN 0.0
+der|ART 0.0 die|ART 0.0 das|ART 0.0
+arbeitsplan|NN 0.0
+
+damit|PROADV 0.0 dies|PROADV 0.0
+ist|VSFIN 0.0 war|VSFIN 0.0 sei|VSFIN 0.0 ist|VVFIN 0.0 war|VVFIN 0.0 sei|VVFIN 0.0
+der|ART 0.0 die|ART 0.0 das|ART 0.0 der|DT 0.0 die|DT 0.0 das|DT 0.0
+arbeitsplan|NN 0.0
+
diff --git a/regression-testing/tests/confusionNet-surface-only/truth/results.dat b/regression-testing/tests/confusionNet-surface-only/truth/results.dat
new file mode 100644
index 000000000..fd8aac0c6
--- /dev/null
+++ b/regression-testing/tests/confusionNet-surface-only/truth/results.dat
@@ -0,0 +1,9 @@
+TRANSLATION_1=that is the order of business 
+TRANSLATION_2=this is the order of business 
+TRANSLATION_3=that is the order of business 
+LMLOAD_TIME ~ 10.00
+PTLOAD_TIME ~ 10.00
+SCORE_1 = -2.97
+SCORE_2 = -2.56
+SCORE_3 = -2.97
+TOTAL_WALLTIME ~ 11
diff --git a/regression-testing/tests/multi-factor-binptable/filter-stderr b/regression-testing/tests/multi-factor-binptable/filter-stderr
new file mode 100755
index 000000000..aab962ac7
--- /dev/null
+++ b/regression-testing/tests/multi-factor-binptable/filter-stderr
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = get_time($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = get_time($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST HYPO:/;
+  my $pscore = "FORMAT ERROR";
+  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [-3.65,0.00,-12.34,-8.00,-10.00,0.00,-14.39]
+  if (/\[1+\]\s*\[[^\]]*,\s*(-?\d+\.\d+)\]/) {
+    $pscore = $1;
+  }
+  $x++;
+  print "SCORE_$x = $pscore\n";
+}
+
+sub get_time {
+  my $time = shift;
+  my $res = "FORMAT ERROR";
+  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
+    $res = $1;
+  }
+  return $res;
+}
diff --git a/regression-testing/tests/multi-factor-binptable/filter-stdout b/regression-testing/tests/multi-factor-binptable/filter-stdout
new file mode 100755
index 000000000..476ddf6e9
--- /dev/null
+++ b/regression-testing/tests/multi-factor-binptable/filter-stdout
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  $x++;
+  print "TRANSLATION_$x=$_\n";
+}
diff --git a/regression-testing/tests/multi-factor-binptable/moses.ini b/regression-testing/tests/multi-factor-binptable/moses.ini
new file mode 100644
index 000000000..db43d33b9
--- /dev/null
+++ b/regression-testing/tests/multi-factor-binptable/moses.ini
@@ -0,0 +1,63 @@
+#########################
+### MOSES CONFIG FILE ###
+#########################
+
+# input factors
+[input-factors]
+0
+1
+
+# mapping steps
+[mapping]
+T 0
+G 0
+T 1
+
+# translation tables: source-factors, target-factors, number of scores, file 
+[ttable-file]
+0 0 5 ${MODEL_PATH}/multi-factor-binptable/phrase-table.0-0.gz
+1 1 5 ${MODEL_PATH}/multi-factor-binptable/phrase-table.1-1.gz
+
+# generation models: source-factors, target-factors
+[generation-file]
+0 1 2 ${MODEL_PATH}/multi-factor-binptable/generation.0-1.gz
+
+# language models: 0, factors, type, file
+[lmodel-file]
+0 0 3 ${LM_PATH}/europarl.en.srilm.gz
+
+# limit on how many phrase translations e for each phrase f are loaded
+# 0 = all elements loaded
+[ttable-limit]
+20
+0
+
+# distortion (reordering) weight
+[weight-d]
+0.6
+
+# language model weights
+[weight-l]
+0.5
+
+# translation model weights
+[weight-t]
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+
+# generation model weights
+[weight-generation]
+0.3
+0
+
+# word penalty
+[weight-w]
+-1
diff --git a/regression-testing/tests/multi-factor-binptable/to-translate b/regression-testing/tests/multi-factor-binptable/to-translate
new file mode 100644
index 000000000..b3283a754
--- /dev/null
+++ b/regression-testing/tests/multi-factor-binptable/to-translate
@@ -0,0 +1 @@
+es|PPER gibt|VVFIN verschiedene|ADJ andere|ADJ meinungen|NN .|PE
diff --git a/regression-testing/tests/multi-factor-binptable/truth/results.dat b/regression-testing/tests/multi-factor-binptable/truth/results.dat
new file mode 100644
index 000000000..d7637123d
--- /dev/null
+++ b/regression-testing/tests/multi-factor-binptable/truth/results.dat
@@ -0,0 +1,5 @@
+TRANSLATION_1=there are various different opinions . 
+LMLOAD_TIME ~ 10.00
+PTLOAD_TIME ~ 10.00
+SCORE_1 = -13.73
+TOTAL_WALLTIME ~ 11
diff --git a/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl b/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl
new file mode 100755
index 000000000..5c70081ff
--- /dev/null
+++ b/regression-testing/tests/multi-factor-drop/dropize_phrase_table.pl
@@ -0,0 +1,28 @@
+#!/usr/bin/perl
+
+#add_empties_to_phrase_table: go through an old-style pharaoh phrase table (no empty target sources) and add one such line for each single-word source phrase in the table,
+#complete with factors (note the number and type of factors are hardcoded here);
+#also add deletion-cost factors as necessary to all lines
+
+#usage: aetpt INPUT_PTABLE OUTPUT_PTABLE
+
+my ($inputFile, $outputFile) = @ARGV;
+my $DELETIONCOST = 2.718; #weight for an individual deletion
+
+open(INFILE, "<$inputFile") or die "couldn't open '$inputFile' for read\n";
+open(OUTFILE, ">$outputFile") or die "couldn't open '$outputFile' for write\n";
+my ($lastSrcPhrase, $srcPhrase);
+while(my $line = <INFILE>)
+{
+	chop($line);
+	$lastSrcPhrase = $srcPhrase;
+	my @tokens = split(/\|\|\|/, $line);
+	$srcPhrase = $tokens[0];
+	if($srcPhrase ne $lastSrcPhrase && $srcPhrase =~ /^\s*\S+\s*$/) #new source phrase of a single word; add deletion line
+	{
+		print OUTFILE "$srcPhrase |||  ||| 1 1 1 1 2.718 $DELETIONCOST\n";
+	}
+	print OUTFILE "$line 1\n";
+}
+close(INFILE);
+close(OUTFILE);
diff --git a/regression-testing/tests/multi-factor-drop/filter-stderr b/regression-testing/tests/multi-factor-drop/filter-stderr
new file mode 100755
index 000000000..aab962ac7
--- /dev/null
+++ b/regression-testing/tests/multi-factor-drop/filter-stderr
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = get_time($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = get_time($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST HYPO:/;
+  my $pscore = "FORMAT ERROR";
+  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [-3.65,0.00,-12.34,-8.00,-10.00,0.00,-14.39]
+  if (/\[1+\]\s*\[[^\]]*,\s*(-?\d+\.\d+)\]/) {
+    $pscore = $1;
+  }
+  $x++;
+  print "SCORE_$x = $pscore\n";
+}
+
+sub get_time {
+  my $time = shift;
+  my $res = "FORMAT ERROR";
+  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
+    $res = $1;
+  }
+  return $res;
+}
diff --git a/regression-testing/tests/multi-factor-drop/filter-stdout b/regression-testing/tests/multi-factor-drop/filter-stdout
new file mode 100755
index 000000000..476ddf6e9
--- /dev/null
+++ b/regression-testing/tests/multi-factor-drop/filter-stdout
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  $x++;
+  print "TRANSLATION_$x=$_\n";
+}
diff --git a/regression-testing/tests/multi-factor-drop/moses.ini b/regression-testing/tests/multi-factor-drop/moses.ini
new file mode 100644
index 000000000..526d59ee0
--- /dev/null
+++ b/regression-testing/tests/multi-factor-drop/moses.ini
@@ -0,0 +1,71 @@
+#########################
+### MOSES CONFIG FILE ###
+#########################
+
+# input factors
+[input-factors]
+0
+1
+
+# mapping steps
+[mapping]
+T 0
+G 0
+T 1
+
+# translation tables: source-factors, target-factors, number of scores, file 
+[ttable-file]
+0 0 6 ${MODEL_PATH}/multi-factor-drop/phrase-table.0-0.gz
+1 1 5 ${MODEL_PATH}/multi-factor-drop/phrase-table.1-1.gz
+
+# generation models: source-factors, target-factors
+[generation-file]
+0 1 ${MODEL_PATH}/multi-factor-drop/generation.0-1.gz
+
+# language models: 0, factors, type, file
+[lmodel-file]
+0 0 3 ${LM_PATH}/europarl.en.srilm.gz
+
+# limit on how many phrase translations e for each phrase f are loaded
+# 0 = all elements loaded
+[ttable-limit]
+20
+0
+
+# distortion (reordering) weight
+[weight-d]
+0.6
+
+# language model weights
+[weight-l]
+0.5
+
+# translation model weights
+[weight-t]
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+
+# generation model weights
+[weight-generation]
+0.3
+0
+
+# word penalty
+[weight-w]
+-1
+
+[phrase-drop-allowed]
+yes
+
+##word-drop penalty
+#[weight-e]
+#2.718
diff --git a/regression-testing/tests/multi-factor-drop/to-translate b/regression-testing/tests/multi-factor-drop/to-translate
new file mode 100644
index 000000000..b3283a754
--- /dev/null
+++ b/regression-testing/tests/multi-factor-drop/to-translate
@@ -0,0 +1 @@
+es|PPER gibt|VVFIN verschiedene|ADJ andere|ADJ meinungen|NN .|PE
diff --git a/regression-testing/tests/multi-factor-drop/truth/results.dat b/regression-testing/tests/multi-factor-drop/truth/results.dat
new file mode 100644
index 000000000..00928862a
--- /dev/null
+++ b/regression-testing/tests/multi-factor-drop/truth/results.dat
@@ -0,0 +1,5 @@
+TRANSLATION_1=there are different opinions 
+LMLOAD_TIME ~ 10.00
+PTLOAD_TIME ~ 11.00
+SCORE_1 = -12.67
+TOTAL_WALLTIME ~ 11
diff --git a/regression-testing/tests/multi-factor/filter-stderr b/regression-testing/tests/multi-factor/filter-stderr
new file mode 100755
index 000000000..aab962ac7
--- /dev/null
+++ b/regression-testing/tests/multi-factor/filter-stderr
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = get_time($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = get_time($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST HYPO:/;
+  my $pscore = "FORMAT ERROR";
+  # BEST HYPO: only then will the european institutions to your task . [11111111111]  [-3.65,0.00,-12.34,-8.00,-10.00,0.00,-14.39]
+  if (/\[1+\]\s*\[[^\]]*,\s*(-?\d+\.\d+)\]/) {
+    $pscore = $1;
+  }
+  $x++;
+  print "SCORE_$x = $pscore\n";
+}
+
+sub get_time {
+  my $time = shift;
+  my $res = "FORMAT ERROR";
+  if ($time =~ /\[(\d+\.\d+)\]\s*seconds*/) {
+    $res = $1;
+  }
+  return $res;
+}
diff --git a/regression-testing/tests/multi-factor/filter-stdout b/regression-testing/tests/multi-factor/filter-stdout
new file mode 100755
index 000000000..476ddf6e9
--- /dev/null
+++ b/regression-testing/tests/multi-factor/filter-stdout
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  $x++;
+  print "TRANSLATION_$x=$_\n";
+}
diff --git a/regression-testing/tests/multi-factor/moses.ini b/regression-testing/tests/multi-factor/moses.ini
new file mode 100644
index 000000000..e8611a2cf
--- /dev/null
+++ b/regression-testing/tests/multi-factor/moses.ini
@@ -0,0 +1,63 @@
+#########################
+### MOSES CONFIG FILE ###
+#########################
+
+# input factors
+[input-factors]
+0
+1
+
+# mapping steps
+[mapping]
+T 0
+G 0
+T 1
+
+# translation tables: source-factors, target-factors, number of scores, file 
+[ttable-file]
+0 0 5 ${MODEL_PATH}/multi-factor/phrase-table.0-0.gz
+1 1 5 ${MODEL_PATH}/multi-factor/phrase-table.1-1.gz
+
+# generation models: source-factors, target-factors
+[generation-file]
+0 1 2 ${MODEL_PATH}/multi-factor/generation.0-1.gz
+
+# language models: 0, factors, type, file
+[lmodel-file]
+0 0 3 ${LM_PATH}/europarl.en.srilm.gz
+
+# limit on how many phrase translations e for each phrase f are loaded
+# 0 = all elements loaded
+[ttable-limit]
+20
+0
+
+# distortion (reordering) weight
+[weight-d]
+0.6
+
+# language model weights
+[weight-l]
+0.5
+
+# translation model weights
+[weight-t]
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+
+# generation model weights
+[weight-generation]
+0.3
+0
+
+# word penalty
+[weight-w]
+-1
diff --git a/regression-testing/tests/multi-factor/moses2.ini b/regression-testing/tests/multi-factor/moses2.ini
new file mode 100644
index 000000000..a98e71483
--- /dev/null
+++ b/regression-testing/tests/multi-factor/moses2.ini
@@ -0,0 +1,63 @@
+#########################
+### MOSES CONFIG FILE ###
+#########################
+
+# input factors
+[input-factors]
+0
+1
+
+# mapping steps
+[mapping]
+T 0
+G 0
+T 1
+
+# translation tables: source-factors, target-factors, number of scores, file 
+[ttable-file]
+0 0 5 /export/ws06osmt/regression-testing/models/multi-factor/phrase-table.0-0.gz
+1 1 5 /export/ws06osmt/regression-testing/models/multi-factor/phrase-table.1-1.gz
+
+# generation models: source-factors, target-factors
+[generation-file]
+0 1 2 /export/ws06osmt/regression-testing/models/multi-factor/generation.0-1.gz
+
+# language models: 0, factors, type, file
+[lmodel-file]
+0 0 3 /export/ws06osmt/models/lm/europarl.en.srilm.gz
+
+# limit on how many phrase translations e for each phrase f are loaded
+# 0 = all elements loaded
+[ttable-limit]
+20
+0
+
+# distortion (reordering) weight
+[weight-d]
+0.6
+
+# language model weights
+[weight-l]
+0.5
+
+# translation model weights
+[weight-t]
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+0.2
+
+# generation model weights
+[weight-generation]
+0.3
+0
+
+# word penalty
+[weight-w]
+-1
diff --git a/regression-testing/tests/multi-factor/to-translate b/regression-testing/tests/multi-factor/to-translate
new file mode 100644
index 000000000..b3283a754
--- /dev/null
+++ b/regression-testing/tests/multi-factor/to-translate
@@ -0,0 +1 @@
+es|PPER gibt|VVFIN verschiedene|ADJ andere|ADJ meinungen|NN .|PE
diff --git a/regression-testing/tests/multi-factor/truth/results.dat b/regression-testing/tests/multi-factor/truth/results.dat
new file mode 100644
index 000000000..3f07889b2
--- /dev/null
+++ b/regression-testing/tests/multi-factor/truth/results.dat
@@ -0,0 +1,5 @@
+TRANSLATION_1=there are various different opinions . 
+LMLOAD_TIME ~ 10.00
+PTLOAD_TIME ~ 11.00
+SCORE_1 = -13.73
+TOTAL_WALLTIME ~ 12
diff --git a/regression-testing/tests/ptable-filtering/filter-stderr b/regression-testing/tests/ptable-filtering/filter-stderr
new file mode 100755
index 000000000..d4945a2d2
--- /dev/null
+++ b/regression-testing/tests/ptable-filtering/filter-stderr
@@ -0,0 +1,12 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+
+  next unless /^BEST HYPO:/;
+  s/^BEST HYPO:\s*//;
+  s/\s*\[111+.*$//;
+  $x++;
+  print "TRANSLATION_$x = $_\n";
+}
+
diff --git a/regression-testing/tests/ptable-filtering/filter-stdout b/regression-testing/tests/ptable-filtering/filter-stdout
new file mode 100755
index 000000000..6a5527bb2
--- /dev/null
+++ b/regression-testing/tests/ptable-filtering/filter-stdout
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+$x=0;
+
+while (<>) {
+  chomp;
+  if (/^\[.* ; 2-2\]$/o) {
+    my @lines;
+    my $done = 0;
+    while (!$done) {
+      $x = <>;
+      if ($x =~ /^\s*$/o) { $done = 1; } else {
+        chomp $x;
+        $x =~ s/^\s+//o;
+        push @lines, $x;
+      }
+    }
+    my $c = 0;
+    foreach my $x (sort @lines) {
+      $c++;
+      print "TRANSLATION_OPTION_$c=$x\n";
+    }
+  }
+}
diff --git a/regression-testing/tests/ptable-filtering/moses.ini b/regression-testing/tests/ptable-filtering/moses.ini
new file mode 100644
index 000000000..360c4389f
--- /dev/null
+++ b/regression-testing/tests/ptable-filtering/moses.ini
@@ -0,0 +1,59 @@
+# reg testing
+
+[ttable-file]
+0 0 6 ${MODEL_PATH}/ptable-filtering/phrase-table.gz
+
+# language model
+[lmodel-file]
+0 0 3 ${LM_PATH}/europarl.en.srilm.gz
+# limit on how many phrase translations e for each phrase f are loaded
+# 0 = all elements loaded
+[ttable-limit]
+20
+
+# distortion (reordering) type
+[distortion-type]
+distance
+
+# distortion (reordering) weight
+[weight-d]
+0.01
+
+# language model weight
+[weight-l]
+0.05
+
+# translation model weight (phrase translation, lexical weighting)
+[weight-t]
+0.019268
+0.025749
+0.018038
+0.008511
+0.013410
+-0.624610
+
+# word penalty
+[weight-w]
+-0.108496
+
+[distortion-limit]
+6
+
+[stack]
+100
+
+[lmodel-floor]
+-50
+
+[beam-threshold]
+0.03
+
+[input-factors]
+0
+
+[mapping]
+T 0
+
+[verbose]
+3
+
diff --git a/regression-testing/tests/ptable-filtering/to-translate b/regression-testing/tests/ptable-filtering/to-translate
new file mode 100644
index 000000000..3ece67fa6
--- /dev/null
+++ b/regression-testing/tests/ptable-filtering/to-translate
@@ -0,0 +1 @@
+西 联 汇款 .
diff --git a/regression-testing/tests/ptable-filtering/truth/results.dat b/regression-testing/tests/ptable-filtering/truth/results.dat
new file mode 100644
index 000000000..ae8bda18b
--- /dev/null
+++ b/regression-testing/tests/ptable-filtering/truth/results.dat
@@ -0,0 +1,22 @@
+TRANSLATION_OPTION_1=form of , pC=-0.570639, c=-0.814663
+TRANSLATION_OPTION_2=from , pC=-0.604413, c=-0.783405
+TRANSLATION_OPTION_3=money , pC=-0.380378, c=-0.679572
+TRANSLATION_OPTION_4=money transfers , pC=-0.290805, c=-0.797336
+TRANSLATION_OPTION_5=of transfers , pC=-0.323035, c=-0.804664
+TRANSLATION_OPTION_6=payments , pC=-0.493163, c=-0.815826
+TRANSLATION_OPTION_7=providing , pC=-0.536236, c=-0.865767
+TRANSLATION_OPTION_8=remittance , pC=-0.058577, c=-0.62658
+TRANSLATION_OPTION_9=remittance of , pC=-0.153561, c=-0.725668
+TRANSLATION_OPTION_10=remittances , , pC=-0.201788, c=-0.793385
+TRANSLATION_OPTION_11=remittances , pC=-0.0431147, c=-0.611117
+TRANSLATION_OPTION_12=remittances , to , pC=-0.161446, c=-0.863898
+TRANSLATION_OPTION_13=remittances from , pC=-0.154321, c=-0.752515
+TRANSLATION_OPTION_14=represents , pC=-0.551077, c=-0.85218
+TRANSLATION_OPTION_15=sending money , pC=-0.332853, c=-0.835438
+TRANSLATION_OPTION_16=sent , pC=-0.490528, c=-0.832855
+TRANSLATION_OPTION_17=transfer , pC=-0.374108, c=-0.716859
+TRANSLATION_OPTION_18=transfer of money , pC=-0.35242, c=-0.801209
+TRANSLATION_OPTION_19=transfers , pC=-0.332958, c=-0.692228
+TRANSLATION_OPTION_20=transfers from , pC=-0.35855, c=-0.759244
+TRANSLATION_1 = from the west .
+TOTAL_WALLTIME ~ 14
author	redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>	2006-08-08 19:54:30 +0400
committer	redpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>	2006-08-08 19:54:30 +0400
commit	e9fd44ef132674335d88f627504431a5e98bac34 (patch)
tree	5e665c350f1551c965a7003fb38eea28dd1f54bd /regression-testing
parent	264f045a6bff3e7004e5860d997c36fc86d55662 (diff)