diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2013-07-20 18:27:45 +0400 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2013-07-20 18:27:45 +0400 |
commit | a6bec142d7256cc06286e5d6bcbee21cf58e72d4 (patch) | |
tree | d41161451a0e2596768797a6b669e7014ffacb42 | |
parent | 8f00dfeb10b9061ac7685df32758aa489f6d32bd (diff) |
re-add tests in the old format
-rwxr-xr-x | tests/phrase.compresspt.oldformat/filter-nbest.pl | 15 | ||||
-rwxr-xr-x | tests/phrase.compresspt.oldformat/filter-stderr.pl | 22 | ||||
-rwxr-xr-x | tests/phrase.compresspt.oldformat/filter-stdout.pl | 7 | ||||
-rw-r--r-- | tests/phrase.compresspt.oldformat/moses.ini | 72 | ||||
-rw-r--r-- | tests/phrase.compresspt.oldformat/phrase-table.minphr | bin | 0 -> 59719 bytes | |||
-rw-r--r-- | tests/phrase.compresspt.oldformat/reordering-table.minlexr | bin | 0 -> 6985076 bytes | |||
-rw-r--r-- | tests/phrase.compresspt.oldformat/to-translate.txt | 1 | ||||
-rw-r--r-- | tests/phrase.compresspt.oldformat/truth/results.txt | 8 | ||||
-rwxr-xr-x | tests/phrase.confusionNet-multi-factor.oldformat/filter-stderr.pl | 22 | ||||
-rwxr-xr-x | tests/phrase.confusionNet-multi-factor.oldformat/filter-stdout.pl | 7 | ||||
-rw-r--r-- | tests/phrase.confusionNet-multi-factor.oldformat/moses.ini | 43 | ||||
-rw-r--r-- | tests/phrase.confusionNet-multi-factor.oldformat/to-translate.txt | 15 | ||||
-rw-r--r-- | tests/phrase.confusionNet-multi-factor.oldformat/truth/results.txt | 7 |
13 files changed, 219 insertions, 0 deletions
diff --git a/tests/phrase.compresspt.oldformat/filter-nbest.pl b/tests/phrase.compresspt.oldformat/filter-nbest.pl new file mode 100755 index 0000000..4d34fe0 --- /dev/null +++ b/tests/phrase.compresspt.oldformat/filter-nbest.pl @@ -0,0 +1,15 @@ +#!/usr/bin/perl + +use strict; + +my $x=0; +my $oldcode = ""; +while (<>) { + chomp; + my ($code, $trans, $featscores, $globscores, $align1, $align2 ) = split(/[\s]*\|\|\|[\s]*/,$_); + $x = 0 if $oldcode ne $code; + $x++; + chomp($code); + print "TRANSLATION_${code}_NBEST_${x}=$trans ||| $featscores ||| $globscores ||| $align1 ||| $align2\n"; + $oldcode = $code; +} diff --git a/tests/phrase.compresspt.oldformat/filter-stderr.pl b/tests/phrase.compresspt.oldformat/filter-stderr.pl new file mode 100755 index 0000000..a359091 --- /dev/null +++ b/tests/phrase.compresspt.oldformat/filter-stderr.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; } +use RegTestUtils; + +$x=0; +while (<>) { + chomp; + + if (/^Finished loading LanguageModels/) { + my $time = RegTestUtils::readTime($_); + print "LMLOAD_TIME ~ $time\n"; + } + if (/^Finished loading phrase tables/) { + my $time = RegTestUtils::readTime($_); + print "PTLOAD_TIME ~ $time\n"; + } + next unless /^BEST TRANSLATION:/; + my $pscore = RegTestUtils::readHypoScore($_); + print "SCORE_$x = $pscore\n"; + $x++; +} diff --git a/tests/phrase.compresspt.oldformat/filter-stdout.pl b/tests/phrase.compresspt.oldformat/filter-stdout.pl new file mode 100755 index 0000000..3f2ec42 --- /dev/null +++ b/tests/phrase.compresspt.oldformat/filter-stdout.pl @@ -0,0 +1,7 @@ +#!/usr/bin/perl +$x=0; +while (<>) { + chomp; + print "TRANSLATION_$x=$_\n"; + $x++; +} diff --git a/tests/phrase.compresspt.oldformat/moses.ini b/tests/phrase.compresspt.oldformat/moses.ini new file mode 100644 index 0000000..899e2c1 --- /dev/null +++ b/tests/phrase.compresspt.oldformat/moses.ini @@ -0,0 +1,72 @@ +######################### +### MOSES CONFIG FILE ### +######################### + +# input factors +[input-factors] +0 +1 +2 +3 + +# mapping steps +[mapping] +0 T 0 + +# translation tables: table type (hierarchical(0), textual (0), binary (1)), source-factors, target-factors, number of scores, file +# OLD FORMAT is still handled for back-compatibility +# OLD FORMAT translation tables: source-factors, target-factors, number of scores, file +# OLD FORMAT a binary table type (1) is assumed +[ttable-file] +12 0 0 5 ${TEST_PATH}/phrase-table.minphr + +# no generation models, no generation-file section + +# language models: type(srilm/irstlm), factors, order, file +[lmodel-file] +1 0 3 ${MODEL_PATH}/ptable-with-alignment/mini.irstlm.en.3g.lm + +# limit on how many phrase translations e for each phrase f are loaded +# 0 = all elements loaded +[ttable-limit] +20 + +# distortion (reordering) weight +[weight-d] +0.6 + +# language model weights +[weight-l] +0.5000 + + +# translation model weights +[weight-t] +0.20 +0.20 +0.20 +0.20 +0.20 + +# no generation models, no weight-generation section + +# word penalty +[weight-w] +-1 + +[distortion-limit] +6 + +[n-best-list] +nbest +5 + + +[print-alignment-info-in-n-best] +true + +[include-segmentation-in-n-best] +true + + + diff --git a/tests/phrase.compresspt.oldformat/phrase-table.minphr b/tests/phrase.compresspt.oldformat/phrase-table.minphr Binary files differnew file mode 100644 index 0000000..b6ad6ac --- /dev/null +++ b/tests/phrase.compresspt.oldformat/phrase-table.minphr diff --git a/tests/phrase.compresspt.oldformat/reordering-table.minlexr b/tests/phrase.compresspt.oldformat/reordering-table.minlexr Binary files differnew file mode 100644 index 0000000..6f22336 --- /dev/null +++ b/tests/phrase.compresspt.oldformat/reordering-table.minlexr diff --git a/tests/phrase.compresspt.oldformat/to-translate.txt b/tests/phrase.compresspt.oldformat/to-translate.txt new file mode 100644 index 0000000..7c24b74 --- /dev/null +++ b/tests/phrase.compresspt.oldformat/to-translate.txt @@ -0,0 +1 @@ +beide|PIAT|2|NK Versäumnisse|NN|3|SB haben|VAFIN|0|ROOT terroristische|ADJA|5|NK Gruppen|NN|8|MO in|APPR|8|MO Pakistan|NE|6|NK gestärkt|VVPP|3|OC .|$.|3|PUNC diff --git a/tests/phrase.compresspt.oldformat/truth/results.txt b/tests/phrase.compresspt.oldformat/truth/results.txt new file mode 100644 index 0000000..c35cf21 --- /dev/null +++ b/tests/phrase.compresspt.oldformat/truth/results.txt @@ -0,0 +1,8 @@ +TRANSLATION_0=beide Versäumnisse , terroristische Gruppen in Pakistan gestärkt . +SCORE_0 = -549.862 +TRANSLATION_0_NBEST_1=beide Versäumnisse , terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -112.888 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -5.01064 -2.17433 -1.73026 -6.17638 2.99969 ||| -549.862 ||| 0=0 1=1 2=2 3=3 4=4 5-6=5-6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 +TRANSLATION_0_NBEST_2=beide Versäumnisse , terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -112.888 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -6.00156 -2.17433 -1.82688 -6.17638 3.99959 ||| -549.88 ||| 0=0 1=1 2=2 3=3 4=4 5=5 6=6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 +TRANSLATION_0_NBEST_3=beide Versäumnisse made terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -116.003 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -1.37305 -2.17433 -1.73026 -2.3922 2.99969 ||| -549.936 ||| 0=0 1=1 2=2 3=3 4=4 5-6=5-6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 +TRANSLATION_0_NBEST_4=beide Versäumnisse made terroristische Gruppen in Pakistan gestärkt . ||| IRSTLM0= -116.003 Distortion0= 0 WordPenalty0= -9 PhraseDictionaryCompact0= -2.36397 -2.17433 -1.82688 -2.3922 3.99959 ||| -549.953 ||| 0=0 1=1 2=2 3=3 4=4 5=5 6=6 7=7 8=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 +TRANSLATION_0_NBEST_5=beide Versäumnisse , terroristische Gruppen in Pakistan . gestärkt ||| IRSTLM0= -110.706 Distortion0= -3 WordPenalty0= -9 PhraseDictionaryCompact0= -5.01064 -2.17433 -1.73026 -6.17638 2.99969 ||| -550.572 ||| 0=0 1=1 2=2 3=3 4=4 5-6=5-6 8=7 7=8 ||| 0-0 1-1 2-2 3-3 4-4 5-5 6-6 8-7 7-8 +TOTAL_WALLTIME ~ 0 diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/filter-stderr.pl b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stderr.pl new file mode 100755 index 0000000..2f6e176 --- /dev/null +++ b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stderr.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; } +use RegTestUtils; + +$x=0; +while (<>) { + chomp; + + if (/^Finished loading LanguageModels/) { + my $time = RegTestUtils::readTime($_); + print "LMLOAD_TIME ~ $time\n"; + } + if (/^Finished loading phrase tables/) { + my $time = RegTestUtils::readTime($_); + print "PTLOAD_TIME ~ $time\n"; + } + next unless /^BEST TRANSLATION:/; + my $pscore = RegTestUtils::readHypoScore($_); + $x++; + print "SCORE_$x = $pscore\n"; +} diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/filter-stdout.pl b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stdout.pl new file mode 100755 index 0000000..476ddf6 --- /dev/null +++ b/tests/phrase.confusionNet-multi-factor.oldformat/filter-stdout.pl @@ -0,0 +1,7 @@ +#!/usr/bin/perl +$x=0; +while (<>) { + chomp; + $x++; + print "TRANSLATION_$x=$_\n"; +} diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/moses.ini b/tests/phrase.confusionNet-multi-factor.oldformat/moses.ini new file mode 100644 index 0000000..fa0dd72 --- /dev/null +++ b/tests/phrase.confusionNet-multi-factor.oldformat/moses.ini @@ -0,0 +1,43 @@ +[input-factors] +0 +1 + +[mapping] +T 0 +G 0 +T 1 + +[inputtype] +1 + +[beam-threshold] +0.0003 + +[distortion-limit] +10 + +[stack] +1000 + +[verbose] +2 + +[feature] +KENLM factor=0 order=3 num-features=1 lazyken=0 path=${LM_PATH}/europarl.en.srilm.gz +Generation input-factor=0 output-factor=1 num-features=2 path=${MODEL_PATH}/multi-factor-binptable/generation.0-1.gz +Distortion +WordPenalty +UnknownWordPenalty +PhraseDictionaryBinary input-factor=0 output-factor=0 path=${MODEL_PATH}/multi-factor-binptable/phrase-table.0-0.gz num-features=5 table-limit=20 +PhraseDictionaryBinary input-factor=1 output-factor=1 path=${MODEL_PATH}/multi-factor-binptable/phrase-table.1-1.gz num-features=5 table-limit=0 +InputFeature num-features=2 num-input-features=1 real-word-count=1 + +[weight] +WordPenalty0= 1 +KENLM0= 1 +Generation0= 1 1 +Distortion0= 1 +PhraseDictionaryBinary0= 1 1 1 1 1 +PhraseDictionaryBinary1= 1 1 1 1 1 +InputFeature0= 1 -1 + diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/to-translate.txt b/tests/phrase.confusionNet-multi-factor.oldformat/to-translate.txt new file mode 100644 index 0000000..d087556 --- /dev/null +++ b/tests/phrase.confusionNet-multi-factor.oldformat/to-translate.txt @@ -0,0 +1,15 @@ +damit|PROADV 1.0 +ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 +der|ART 1.0 die|ART 1.0 das|ART 1.0 +arbeitsplan|NN 1.0 + +damit|PROADV 1.0 dies|PROADV 1.0 +ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 ist|VVFIN 1.0 war|VVFIN 1.0 sei|VVFIN 1.0 +der|ART 1.0 die|ART 1.0 das|ART 1.0 der|DT 1.0 die|DT 1.0 das|DT 1.0 +arbeitsplan|NN 1.0 + +damit|PROADV 1.0 dies|PROADV 0.0 +ist|VSFIN 1.0 war|VSFIN 1.0 sei|VSFIN 1.0 dies|PROADV 0.0 das|DT 0.0 +der|ART 1.0 die|ART 1.0 das|ART 1.0 dies|PROADV 0.0 +arbeitsplan|NN 1.0 dies|PROADV 0.0 + diff --git a/tests/phrase.confusionNet-multi-factor.oldformat/truth/results.txt b/tests/phrase.confusionNet-multi-factor.oldformat/truth/results.txt new file mode 100644 index 0000000..df0d491 --- /dev/null +++ b/tests/phrase.confusionNet-multi-factor.oldformat/truth/results.txt @@ -0,0 +1,7 @@ +TRANSLATION_1=that is the order of business +TRANSLATION_2=this is the order of business +TRANSLATION_3=that is the order of business +SCORE_1 = -93.671 +SCORE_2 = -85.300 +SCORE_3 = -93.671 +TOTAL_WALLTIME ~ 3 |