re-add tests in the old format

author: Hieu Hoang <hieuhoang@gmail.com> 2013-07-20 18:27:45 +0400
committer: Hieu Hoang <hieuhoang@gmail.com> 2013-07-20 18:27:45 +0400
commit: a6bec142d7256cc06286e5d6bcbee21cf58e72d4 (patch)
tree: d41161451a0e2596768797a6b669e7014ffacb42
parent: 8f00dfeb10b9061ac7685df32758aa489f6d32bd (diff)
13 files changed, 219 insertions, 0 deletions
diff --git a/tests/phrase.compresspt.oldformat/filter-nbest.pl b/tests/phrase.compresspt.oldformat/filter-nbest.pl
new file mode 100755
index 0000000..4d34fe0
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/filter-nbest.pl
@@ -0,0 +1,15 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $x=0;
+my $oldcode = "";
+while (<>) {
+  chomp;
+  my ($code, $trans, $featscores, $globscores, $align1, $align2 ) = split(/[\s]*\|\|\|[\s]*/,$_);
+  $x = 0 if $oldcode ne $code;
+  $x++;
+  chomp($code);
+  print "TRANSLATION_${code}_NBEST_${x}=$trans ||| $featscores ||| $globscores ||| $align1 ||| $align2\n";
+  $oldcode = $code;
+}
diff --git a/tests/phrase.compresspt.oldformat/filter-stderr.pl b/tests/phrase.compresspt.oldformat/filter-stderr.pl
new file mode 100755
index 0000000..a359091
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/filter-stderr.pl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+
+BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
+use RegTestUtils;
+
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = RegTestUtils::readTime($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = RegTestUtils::readTime($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST TRANSLATION:/;
+  my $pscore = RegTestUtils::readHypoScore($_);
+  print "SCORE_$x = $pscore\n";
+  $x++;
+}
diff --git a/tests/phrase.compresspt.oldformat/filter-stdout.pl b/tests/phrase.compresspt.oldformat/filter-stdout.pl
new file mode 100755
index 0000000..3f2ec42
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/filter-stdout.pl
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  print "TRANSLATION_$x=$_\n";
+  $x++;
+}
diff --git a/tests/phrase.compresspt.oldformat/moses.ini b/tests/phrase.compresspt.oldformat/moses.ini
new file mode 100644
index 0000000..899e2c1
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/moses.ini
@@ -0,0 +1,72 @@
+#########################
+### MOSES CONFIG FILE ###
+#########################
+
+# input factors
+[input-factors]
+0
+1
+2
+3
+
+# mapping steps
+[mapping]
+0 T 0
+
+# translation tables: table type (hierarchical(0), textual (0), binary (1)), source-factors, target-factors, number of scores, file 
+# OLD FORMAT is still handled for back-compatibility
+# OLD FORMAT translation tables: source-factors, target-factors, number of scores, file 
+# OLD FORMAT a binary table type (1) is assumed 
+[ttable-file]
+12 0 0 5 ${TEST_PATH}/phrase-table.minphr
+
+# no generation models, no generation-file section
+
+# language models: type(srilm/irstlm), factors, order, file
+[lmodel-file]
+1 0 3 ${MODEL_PATH}/ptable-with-alignment/mini.irstlm.en.3g.lm
+
+# limit on how many phrase translations e for each phrase f are loaded
+# 0 = all elements loaded
+[ttable-limit]
+20
+
+# distortion (reordering) weight
+[weight-d]
+0.6
+
+# language model weights
+[weight-l]
+0.5000
+
+
+# translation model weights
+[weight-t]
+0.20
+0.20
+0.20
+0.20
+0.20
+
+# no generation models, no weight-generation section
+
+# word penalty
+[weight-w]
+-1
+
+[distortion-limit]
+6
+
+[n-best-list]
+nbest
+5
+
+
+[print-alignment-info-in-n-best]
+true
+
+[include-segmentation-in-n-best]
+true
+
+
+
diff --git a/tests/phrase.compresspt.oldformat/phrase-table.minphr b/tests/phrase.compresspt.oldformat/phrase-table.minphr
new file mode 100644
index 0000000..b6ad6ac
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/phrase-table.minphr
diff --git a/tests/phrase.compresspt.oldformat/reordering-table.minlexr b/tests/phrase.compresspt.oldformat/reordering-table.minlexr
new file mode 100644
index 0000000..6f22336
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/reordering-table.minlexr
diff --git a/tests/phrase.compresspt.oldformat/to-translate.txt b/tests/phrase.compresspt.oldformat/to-translate.txt
new file mode 100644
index 0000000..7c24b74
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/to-translate.txt
@@ -0,0 +1 @@
+beide|PIAT|2|NK Versäumnisse|NN|3|SB haben|VAFIN|0|ROOT terroristische|ADJA|5|NK Gruppen|NN|8|MO in|APPR|8|MO Pakistan|NE|6|NK gestärkt|VVPP|3|OC .|$.|3|PUNC
diff --git a/tests/phrase.compresspt.oldformat/truth/results.txt b/tests/phrase.compresspt.oldformat/truth/results.txt
new file mode 100644
index 0000000..c35cf21
--- /dev/null
+++ b/tests/phrase.compresspt.oldformat/truth/results.txt
@@ -0,0 +1,8 @@
+TRANSLATION_0=beide Versäumnisse , terroristische Gruppen in Pakistan gestärkt . 
+SCORE_0 = -549.862
+TRANSLATION_0_NBEST_1=beide Versäumnisse , terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -112.888 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -5.01064 -2.17433 -1.73026 -6.17638 2.99969 ||| -549.862 ||| 0=0 1=1 2=2 3=3 4=4 5-6=5-6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 
+TRANSLATION_0_NBEST_2=beide Versäumnisse , terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -112.888 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -6.00156 -2.17433 -1.82688 -6.17638 3.99959 ||| -549.88 ||| 0=0 1=1 2=2 3=3 4=4 5=5 6=6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 
+TRANSLATION_0_NBEST_3=beide Versäumnisse made terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -116.003 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -1.37305 -2.17433 -1.73026 -2.3922 2.99969 ||| -549.936 ||| 0=0 1=1 2=2 3=3 4=4 5-6=5-6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 
+TRANSLATION_0_NBEST_4=beide Versäumnisse made terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -116.003 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -2.36397 -2.17433 -1.82688 -2.3922 3.99959 ||| -549.953 ||| 0=0 1=1 2=2 3=3 4=4 5=5 6=6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 
+TRANSLATION_0_NBEST_5=beide Versäumnisse , terroristische Gruppen in Pakistan . gestärkt ||| IRSTLM0= -110.706 Distortion0= -3 WordPenalty0= -9 PhraseDictionaryCompact0= -5.01064 -2.17433 -1.73026 -6.17638 2.99969 ||| -550.572 ||| 0=0 1=1 2=2 3=3 4=4 5-6=5-6 8=7 7=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 8-7 7-8 
+TOTAL_WALLTIME ~ 0
diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/filter-stderr.pl b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stderr.pl
new file mode 100755
index 0000000..2f6e176
--- /dev/null
+++ b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stderr.pl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+
+BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
+use RegTestUtils;
+
+$x=0;
+while (<>) {
+  chomp;
+
+  if (/^Finished loading LanguageModels/) {
+    my $time = RegTestUtils::readTime($_);
+    print "LMLOAD_TIME ~ $time\n";
+  }
+  if (/^Finished loading phrase tables/) {
+    my $time = RegTestUtils::readTime($_);
+    print "PTLOAD_TIME ~ $time\n";
+  }
+  next unless /^BEST TRANSLATION:/;
+  my $pscore = RegTestUtils::readHypoScore($_);
+  $x++;
+  print "SCORE_$x = $pscore\n";
+}
diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/filter-stdout.pl b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stdout.pl
new file mode 100755
index 0000000..476ddf6
--- /dev/null
+++ b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stdout.pl
@@ -0,0 +1,7 @@
+#!/usr/bin/perl
+$x=0;
+while (<>) {
+  chomp;
+  $x++;
+  print "TRANSLATION_$x=$_\n";
+}
diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/moses.ini b/tests/phrase.confusionNet-multi-factor.oldformat/moses.ini
new file mode 100644
index 0000000..fa0dd72
--- /dev/null
+++ b/tests/phrase.confusionNet-multi-factor.oldformat/moses.ini
@@ -0,0 +1,43 @@
+[input-factors]
+0
+1
+
+[mapping]
+T 0
+G 0
+T 1
+
+[inputtype]
+1
+
+[beam-threshold]
+0.0003
+
+[distortion-limit]
+10
+
+[stack]
+1000
+
+[verbose]
+2
+
+[feature]
+KENLM factor=0 order=3 num-features=1 lazyken=0 path=${LM_PATH}/europarl.en.srilm.gz
+Generation input-factor=0 output-factor=1 num-features=2 path=${MODEL_PATH}/multi-factor-binptable/generation.0-1.gz
+Distortion
+WordPenalty
+UnknownWordPenalty
+PhraseDictionaryBinary input-factor=0 output-factor=0 path=${MODEL_PATH}/multi-factor-binptable/phrase-table.0-0.gz num-features=5 table-limit=20 
+PhraseDictionaryBinary input-factor=1 output-factor=1 path=${MODEL_PATH}/multi-factor-binptable/phrase-table.1-1.gz num-features=5 table-limit=0 
+InputFeature num-features=2 num-input-features=1 real-word-count=1
+
+[weight]
+WordPenalty0= 1
+KENLM0= 1
+Generation0= 1 1
+Distortion0= 1
+PhraseDictionaryBinary0= 1 1 1 1 1
+PhraseDictionaryBinary1= 1 1 1 1 1
+InputFeature0= 1 -1
+
diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/to-translate.txt b/tests/phrase.confusionNet-multi-factor.oldformat/to-translate.txt
new file mode 100644
index 0000000..d087556
--- /dev/null
+++ b/tests/phrase.confusionNet-multi-factor.oldformat/to-translate.txt
@@ -0,0 +1,15 @@
+damit|PROADV 1.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0
+arbeitsplan|NN 1.0
+
+damit|PROADV 1.0 dies|PROADV 1.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 ist|VVFIN 1.0 war|VVFIN 1.0 sei|VVFIN 1.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0 der|DT 1.0 die|DT 1.0 das|DT 1.0
+arbeitsplan|NN 1.0
+
+damit|PROADV 1.0 dies|PROADV 0.0
+ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 dies|PROADV 0.0 das|DT 0.0
+der|ART 1.0 die|ART 1.0 das|ART 1.0 dies|PROADV 0.0
+arbeitsplan|NN 1.0 dies|PROADV 0.0
+
diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/truth/results.txt b/tests/phrase.confusionNet-multi-factor.oldformat/truth/results.txt
new file mode 100644
index 0000000..df0d491
--- /dev/null
+++ b/tests/phrase.confusionNet-multi-factor.oldformat/truth/results.txt
@@ -0,0 +1,7 @@
+TRANSLATION_1=that is the order of business 
+TRANSLATION_2=this is the order of business 
+TRANSLATION_3=that is the order of business 
+SCORE_1 = -93.671
+SCORE_2 = -85.300
+SCORE_3 = -93.671
+TOTAL_WALLTIME ~ 3
author	Hieu Hoang <hieuhoang@gmail.com>	2013-07-20 18:27:45 +0400
committer	Hieu Hoang <hieuhoang@gmail.com>	2013-07-20 18:27:45 +0400
commit	a6bec142d7256cc06286e5d6bcbee21cf58e72d4 (patch)
tree	d41161451a0e2596768797a6b669e7014ffacb42
parent	8f00dfeb10b9061ac7685df32758aa489f6d32bd (diff)