From 5b8323b60b7321ac34b635cabef184be708cdd1d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 15 May 2013 12:26:54 +0100 Subject: add test for sparse feature PhraseLengthFeature with nbest --- .../filter-nbest.pl | 12 ++++++ .../filter-stderr.pl | 22 +++++++++++ .../filter-stdout.pl | 7 ++++ tests/phrase.phrase-length-feature.nbest/moses.ini | 46 ++++++++++++++++++++++ .../to-translate.txt | 2 + .../truth/results.txt | 34 ++++++++++++++++ tests/phrase.phrase-length-feature.nbest/weights | 7 ++++ 7 files changed, 130 insertions(+) create mode 100755 tests/phrase.phrase-length-feature.nbest/filter-nbest.pl create mode 100755 tests/phrase.phrase-length-feature.nbest/filter-stderr.pl create mode 100755 tests/phrase.phrase-length-feature.nbest/filter-stdout.pl create mode 100644 tests/phrase.phrase-length-feature.nbest/moses.ini create mode 100644 tests/phrase.phrase-length-feature.nbest/to-translate.txt create mode 100644 tests/phrase.phrase-length-feature.nbest/truth/results.txt create mode 100644 tests/phrase.phrase-length-feature.nbest/weights (limited to 'tests') diff --git a/tests/phrase.phrase-length-feature.nbest/filter-nbest.pl b/tests/phrase.phrase-length-feature.nbest/filter-nbest.pl new file mode 100755 index 0000000..705fcee --- /dev/null +++ b/tests/phrase.phrase-length-feature.nbest/filter-nbest.pl @@ -0,0 +1,12 @@ +#!/usr/bin/perl +$x=0; +$oldcode = ""; +while (<>) { + chomp; + ($code,$trans,$featscores,$globscores) = split(/[\s]*\|\|\|[\s]*/,$_); + $x = 0 if $oldcode ne $code; + $x++; + chomp($code); + print "TRANSLATION_${code}_NBEST_${x}=$trans ||| $featscores\n"; + $oldcode = $code; +} diff --git a/tests/phrase.phrase-length-feature.nbest/filter-stderr.pl b/tests/phrase.phrase-length-feature.nbest/filter-stderr.pl new file mode 100755 index 0000000..2f6e176 --- /dev/null +++ b/tests/phrase.phrase-length-feature.nbest/filter-stderr.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; } +use RegTestUtils; + +$x=0; +while (<>) { + chomp; + + if (/^Finished loading LanguageModels/) { + my $time = RegTestUtils::readTime($_); + print "LMLOAD_TIME ~ $time\n"; + } + if (/^Finished loading phrase tables/) { + my $time = RegTestUtils::readTime($_); + print "PTLOAD_TIME ~ $time\n"; + } + next unless /^BEST TRANSLATION:/; + my $pscore = RegTestUtils::readHypoScore($_); + $x++; + print "SCORE_$x = $pscore\n"; +} diff --git a/tests/phrase.phrase-length-feature.nbest/filter-stdout.pl b/tests/phrase.phrase-length-feature.nbest/filter-stdout.pl new file mode 100755 index 0000000..476ddf6 --- /dev/null +++ b/tests/phrase.phrase-length-feature.nbest/filter-stdout.pl @@ -0,0 +1,7 @@ +#!/usr/bin/perl +$x=0; +while (<>) { + chomp; + $x++; + print "TRANSLATION_$x=$_\n"; +} diff --git a/tests/phrase.phrase-length-feature.nbest/moses.ini b/tests/phrase.phrase-length-feature.nbest/moses.ini new file mode 100644 index 0000000..5437359 --- /dev/null +++ b/tests/phrase.phrase-length-feature.nbest/moses.ini @@ -0,0 +1,46 @@ +# moses.ini for regression test + +# limit on how many phrase translations e for each phrase f are loaded +[ttable-limit] +#ttable element load limit 0 = all elements loaded +20 + +[distortion-limit] +4 + +[beam-threshold] +0.03 + +[input-factors] +0 + +[mapping] +T 0 + + +[verbose] +2 + +[feature] +WordPenalty +Distortion +KENLM lazyken=0 order=3 factor=0 path=${LM_PATH}/europarl.en.srilm.gz +PhraseLengthFeature +PhraseDictionaryMemory num-features=5 input-factor=0 output-factor=0 path=${MODEL_PATH}/ptable-with-alignment/phrase-table.gz +UnknownWordPenalty + +[weight] +WordPenalty0= -0.273416114951401 +KENLM0= 0.142658800199951 +Distortion0= 0.141806519223522 +PhraseDictionaryMemory0= 0.00402447059454402 0.0685647475075862 0.294089113124688 0.0328320356515851 -0.0426081987467227 + + +[weight-file] +${TEST_PATH}/weights + +[n-best-list] +nbest +100 + + diff --git a/tests/phrase.phrase-length-feature.nbest/to-translate.txt b/tests/phrase.phrase-length-feature.nbest/to-translate.txt new file mode 100644 index 0000000..78d7ca1 --- /dev/null +++ b/tests/phrase.phrase-length-feature.nbest/to-translate.txt @@ -0,0 +1,2 @@ +vor allem aber die Anwalt +Hand hat Mehrheit diff --git a/tests/phrase.phrase-length-feature.nbest/truth/results.txt b/tests/phrase.phrase-length-feature.nbest/truth/results.txt new file mode 100644 index 0000000..05f3b14 --- /dev/null +++ b/tests/phrase.phrase-length-feature.nbest/truth/results.txt @@ -0,0 +1,34 @@ +TRANSLATION_1=especially the lawyers +TRANSLATION_2=country majority +SCORE_1 = 7.298 +SCORE_2 = -4.648 +TRANSLATION_0_NBEST_1=especially the lawyers ||| Distortion0= 0 KENLM0= -34.6013 WordPenalty0= -3 PhraseLengthFeature0_1,1= 1 PhraseLengthFeature0_4,2= 1 PhraseLengthFeature0_s4= 1 PhraseLengthFeature0_t2= 1 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_t1= 1 PhraseDictionaryMemory0= -0.693147 -1.47855 -0.693147 -9.42962 1.99979 +TRANSLATION_0_NBEST_2=especially the lawyers ' ||| Distortion0= 0 KENLM0= -36.962 WordPenalty0= -4 PhraseLengthFeature0_1,1= 0 PhraseLengthFeature0_4,2= 1 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_s4= 1 PhraseLengthFeature0_t2= 2 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_t1= 0 PhraseDictionaryMemory0= 0 -3.80582 -0.693147 -9.42962 1.99979 +TRANSLATION_0_NBEST_3=especially lawyers but the ||| Distortion0= -5 KENLM0= -39.2873 WordPenalty0= -4 PhraseLengthFeature0_s2= 2 PhraseLengthFeature0_2,1= 1 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_1,1= 1 PhraseLengthFeature0_t2= 1 PhraseLengthFeature0_2,2= 1 PhraseDictionaryMemory0= -2.07944 -2.03816 -1.38629 -6.09741 2.99969 +TRANSLATION_0_NBEST_4=especially lawyers ' but the ||| Distortion0= -5 KENLM0= -43.5229 WordPenalty0= -5 PhraseLengthFeature0_s2= 2 PhraseLengthFeature0_2,1= 1 PhraseLengthFeature0_t1= 1 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_1,1= 0 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_t2= 2 PhraseLengthFeature0_2,2= 1 PhraseDictionaryMemory0= -1.38629 -4.36544 -1.38629 -6.09741 2.99969 +TRANSLATION_0_NBEST_5=especially lawyers the ||| Distortion0= -5 KENLM0= -35.606 WordPenalty0= -3 PhraseLengthFeature0_s2= 2 PhraseLengthFeature0_2,1= 2 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_1,1= 1 PhraseLengthFeature0_t2= 0 PhraseLengthFeature0_2,2= 0 PhraseDictionaryMemory0= -6.68461 -1.47854 -1.38629 -9.42962 2.99969 +TRANSLATION_0_NBEST_6=especially lawyers ' the ||| Distortion0= -5 KENLM0= -38.3181 WordPenalty0= -4 PhraseLengthFeature0_s2= 2 PhraseLengthFeature0_2,1= 2 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_1,1= 0 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_t2= 1 PhraseLengthFeature0_2,2= 0 PhraseDictionaryMemory0= -5.99146 -3.80582 -1.38629 -9.42962 2.99969 +TRANSLATION_1_NBEST_1=country majority ||| Distortion0= 0 KENLM0= -32.6072 WordPenalty0= -2 PhraseLengthFeature0_2,1= 1 PhraseLengthFeature0_s2= 1 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_1,1= 1 PhraseDictionaryMemory0= -2.07944 0 0 -5.83773 1.99979 +TRANSLATION_1_NBEST_2=majority country ||| Distortion0= -5 KENLM0= -29.1587 WordPenalty0= -2 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 1 PhraseLengthFeature0_1,1= 1 PhraseLengthFeature0_2,1= 1 PhraseLengthFeature0_s2= 1 PhraseDictionaryMemory0= -2.07944 0 0 -5.83773 1.99979 +TRANSLATION_1_NBEST_3=country has majority ||| Distortion0= 0 KENLM0= -38.629 WordPenalty0= -3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -2.69848 -0.405465 -0.451985 -2.43142 2.99969 +TRANSLATION_1_NBEST_4=majority country has ||| Distortion0= -5 KENLM0= -36.1339 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseDictionaryMemory0= -2.69848 -0.405465 -0.451985 -2.43142 2.99969 +TRANSLATION_1_NBEST_5=country , majority ||| Distortion0= 0 KENLM0= -37.5186 WordPenalty0= -3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -6.12249 -1.79176 -1.70475 -6.13557 2.99969 +TRANSLATION_1_NBEST_6=country majority has ||| Distortion0= -3 KENLM0= -38.744 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseDictionaryMemory0= -2.69848 -0.405465 -0.451985 -2.43142 2.99969 +TRANSLATION_1_NBEST_7=has majority country ||| Distortion0= -4 KENLM0= -38.121 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -2.69848 -0.405465 -0.451985 -2.43142 2.99969 +TRANSLATION_1_NBEST_8=majority has country ||| Distortion0= -6 KENLM0= -36.8304 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -2.69848 -0.405465 -0.451985 -2.43142 2.99969 +TRANSLATION_1_NBEST_9=country majority , ||| Distortion0= -3 KENLM0= -36.6777 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseDictionaryMemory0= -6.12249 -1.79176 -1.70475 -6.13557 2.99969 +TRANSLATION_1_NBEST_10=majority country , ||| Distortion0= -5 KENLM0= -34.7049 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseDictionaryMemory0= -6.12249 -1.79176 -1.70475 -6.13557 2.99969 +TRANSLATION_1_NBEST_11=has country majority ||| Distortion0= -4 KENLM0= -40.6093 WordPenalty0= -3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -2.69848 -0.405465 -0.451985 -2.43142 2.99969 +TRANSLATION_1_NBEST_12=, majority country ||| Distortion0= -4 KENLM0= -36.4979 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -6.12249 -1.79176 -1.70475 -6.13557 2.99969 +TRANSLATION_1_NBEST_13=majority , country ||| Distortion0= -6 KENLM0= -35.4181 WordPenalty0= -3 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -6.12249 -1.79176 -1.70475 -6.13557 2.99969 +TRANSLATION_1_NBEST_14=, country majority ||| Distortion0= -4 KENLM0= -39.3217 WordPenalty0= -3 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t1= 3 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 3 PhraseDictionaryMemory0= -6.12249 -1.79176 -1.70475 -6.13557 2.99969 +TRANSLATION_1_NBEST_15=country far , majority ||| Distortion0= 0 KENLM0= -47.4628 WordPenalty0= -4 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.77259 -7.70257 -2.3979 -6.13557 2.99969 +TRANSLATION_1_NBEST_16=far , majority country ||| Distortion0= -4 KENLM0= -44.5635 WordPenalty0= -4 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.77259 -7.70257 -2.3979 -6.13557 2.99969 +TRANSLATION_1_NBEST_17=country has discarded majority ||| Distortion0= 0 KENLM0= -50.4891 WordPenalty0= -4 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.07944 -6.31627 -2.3979 -2.43142 2.99969 +TRANSLATION_1_NBEST_18=majority country far , ||| Distortion0= -5 KENLM0= -44.6491 WordPenalty0= -4 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.77259 -7.70257 -2.3979 -6.13557 2.99969 +TRANSLATION_1_NBEST_19=country majority far , ||| Distortion0= -3 KENLM0= -47.8651 WordPenalty0= -4 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.77259 -7.70257 -2.3979 -6.13557 2.99969 +TRANSLATION_1_NBEST_20=far , country majority ||| Distortion0= -4 KENLM0= -47.3873 WordPenalty0= -4 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.77259 -7.70257 -2.3979 -6.13557 2.99969 +TRANSLATION_1_NBEST_21=majority country has discarded ||| Distortion0= -5 KENLM0= -47.994 WordPenalty0= -4 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.07944 -6.31627 -2.3979 -2.43142 2.99969 +TRANSLATION_1_NBEST_22=country majority has discarded ||| Distortion0= -3 KENLM0= -50.604 WordPenalty0= -4 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_2,1= 0 PhraseLengthFeature0_s2= 0 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.07944 -6.31627 -2.3979 -2.43142 2.99969 +TRANSLATION_1_NBEST_23=majority far , country ||| Distortion0= -6 KENLM0= -46.6055 WordPenalty0= -4 PhraseLengthFeature0_t1= 2 PhraseLengthFeature0_s1= 3 PhraseLengthFeature0_1,1= 2 PhraseLengthFeature0_1,2= 1 PhraseLengthFeature0_t2= 1 PhraseDictionaryMemory0= -2.77259 -7.70257 -2.3979 -6.13557 2.99969 +TOTAL_WALLTIME ~ 3 diff --git a/tests/phrase.phrase-length-feature.nbest/weights b/tests/phrase.phrase-length-feature.nbest/weights new file mode 100644 index 0000000..a02a9df --- /dev/null +++ b/tests/phrase.phrase-length-feature.nbest/weights @@ -0,0 +1,7 @@ +PhraseLengthFeature0_4,2 4.5 +PhraseLengthFeature0_t2 2.3 +PhraseLengthFeature0_s4 5.343 +PhraseLengthFeature0_s1 -2.55 +PhraseLengthFeature0_t1 -0.232 +PhraseLengthFeature0_1,1 2.756 + -- cgit v1.2.3