diff options
-rw-r--r-- | Makefile.am | 4 | ||||
-rw-r--r-- | irstlm/README | 7 | ||||
l--------- | irstlm/mkinstalldirs | 1 | ||||
-rwxr-xr-x | irstlm/regenerate-makefiles.sh | 15 | ||||
-rw-r--r-- | irstlm/src/Makefile.am | 10 | ||||
-rw-r--r-- | moses/src/Makefile.am | 8 | ||||
-rwxr-xr-x | scripts/generic/multi-bleu.perl | 47 | ||||
-rwxr-xr-x | scripts/training/cmert-0.5/bleu.py | 17 | ||||
-rwxr-xr-x | scripts/training/cmert-0.5/score-nbest.py | 12 |
9 files changed, 77 insertions, 44 deletions
diff --git a/Makefile.am b/Makefile.am index 3fe48e8c4..b2a065f6f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,8 +1,6 @@ # not a GNU package. You can remove this line, if # have all needed files, that a GNU package needs -#AUTOMAKE_OPTIONS = foreign 1.4 - AUTOMAKE_OPTIONS = foreign # order is important here: build moses before moses-cmd -SUBDIRS = moses/src moses-cmd/src +SUBDIRS = moses/src moses-cmd/src diff --git a/irstlm/README b/irstlm/README index 17a9b920f..ed8935d05 100644 --- a/irstlm/README +++ b/irstlm/README @@ -1,10 +1,7 @@ To build: - aclocal - autoconf - automake - - ./configure --with-prefix=PATH TO INSTALL (probably `pwd`) + ./regenerate-makefiles.sh + ./configure --with-prefix=PATH-TO-INSTALL (probably `pwd`) make make install diff --git a/irstlm/mkinstalldirs b/irstlm/mkinstalldirs new file mode 120000 index 000000000..8e772be8f --- /dev/null +++ b/irstlm/mkinstalldirs @@ -0,0 +1 @@ +../mkinstalldirs
\ No newline at end of file diff --git a/irstlm/regenerate-makefiles.sh b/irstlm/regenerate-makefiles.sh new file mode 100755 index 000000000..ae40a7a32 --- /dev/null +++ b/irstlm/regenerate-makefiles.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +echo "Calling aclocal..." +aclocal +echo "Calling autoconf..." +autoconf +echo "Calling automake..." +automake + +echo +echo "You should now be able to configure and build:" +echo " ./configure --prefix=/path/to/irstlm" +echo " make -j 4" +echo + diff --git a/irstlm/src/Makefile.am b/irstlm/src/Makefile.am index a520680e7..c9b7c6129 100644 --- a/irstlm/src/Makefile.am +++ b/irstlm/src/Makefile.am @@ -9,11 +9,17 @@ libirstlm_a_SOURCES = \ ngramcache.cpp library_includedir=$(includedir) -library_include_HEADERS = dictionary.h lmtable.h n_gram.h +library_include_HEADERS = \ + dictionary.h \ + htable.h \ + lmtable.h \ + mempool.h \ + n_gram.h \ + ngramcache.h bin_PROGRAMS = compile-lm quantize-lm -AM_LDFLAGS=-L . +AM_LDFLAGS=-L. LIBS=-lirstlm compile_lm_SOURCES = compile-lm.cpp diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am index c2ff4a2df..cff2e7f35 100644 --- a/moses/src/Makefile.am +++ b/moses/src/Makefile.am @@ -1,6 +1,6 @@ lib_LIBRARIES = libmoses.a AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -libmoses_a_SOURCES = \ +libmoses_a_SOURCES_TMP = \ ConfusionNet.cpp \ DecodeStep.cpp \ DecodeStep_Generation.cpp \ @@ -59,14 +59,14 @@ libmoses_a_SOURCES = \ if INTERNAL_LM -libmoses_a_SOURCES += NGramCollection.cpp LanguageModel_Internal.cpp +libmoses_a_SOURCES = $(libmoses_a_SOURCES_TMP) NGramCollection.cpp LanguageModel_Internal.cpp endif if SRI_LM -libmoses_a_SOURCES += LanguageModel_SRI.cpp +libmoses_a_SOURCES = $(libmoses_a_SOURCES_TMP) LanguageModel_SRI.cpp endif if IRST_LM -libmoses_a_SOURCES += LanguageModel_IRST.cpp +libmoses_a_SOURCES = $(libmoses_a_SOURCES_TMP) LanguageModel_IRST.cpp endif diff --git a/scripts/generic/multi-bleu.perl b/scripts/generic/multi-bleu.perl index 9f00e349f..fa7c70bda 100755 --- a/scripts/generic/multi-bleu.perl +++ b/scripts/generic/multi-bleu.perl @@ -17,8 +17,6 @@ while(-e "$stem$ref") { } &add_to_ref($stem,\@REF) if -e $stem; -die "No reference sentences found!" if 0 == scalar @REF; - sub add_to_ref { my ($file,$REF) = @_; my $s=0; @@ -45,7 +43,7 @@ while(<STDIN>) { if (abs($length_translation_this_sentence-$length) < $closest_diff) { $closest_diff = abs($length_translation_this_sentence-$length); $closest_length = $length; -# print "$i: closest diff = abs($length_translation_this_sentence-$length)<BR>\n"; +# print "$s: closest diff = abs($length_translation_this_sentence-$length)<BR>\n"; } for(my $n=1;$n<=4;$n++) { my %REF_NGRAM_N = (); @@ -96,24 +94,41 @@ while(<STDIN>) { $s++; } my $brevity_penalty = 1; -if ($length_translation<$length_reference) { - $brevity_penalty = exp(1-$length_reference/$length_translation); +my $bleu = 0; + +my @bleu=(); + +for(my $n=1;$n<=4;$n++) { + if (defined ($TOTAL[$n])){ + $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0; +# print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n"; + }else{ + $bleu[$n]=0; + } } -my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) + - my_log( $CORRECT[2]/$TOTAL[2] ) + - my_log( $CORRECT[3]/$TOTAL[3] ) + - my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4); -printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f, %i sents, %i refs)\n", +if ($length_reference==0){ + printf "BLEU = 0, 0/0/0/0 (BP=0, ration=0, hyp_len=0, ref_len=0)\n"; + exit(1); +} + +if ($length_translation<$length_reference) { + $brevity_penalty = exp(1-$length_reference/$length_translation); +} +$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) + + my_log( $bleu[2] ) + + my_log( $bleu[3] ) + + my_log( $bleu[4] ) ) / 4) ; +printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f, hyp_len=%d, ref_len=%d)\n", 100*$bleu, - 100*$CORRECT[1]/$TOTAL[1], - 100*$CORRECT[2]/$TOTAL[2], - 100*$CORRECT[3]/$TOTAL[3], - 100*$CORRECT[4]/$TOTAL[4], + 100*$bleu[1], + 100*$bleu[2], + 100*$bleu[3], + 100*$bleu[4], $brevity_penalty, $length_translation / $length_reference, - scalar @REF, - scalar @{$REF[0]}; + $length_translation, + $length_reference; sub my_log { return -9999999999 unless $_[0]; diff --git a/scripts/training/cmert-0.5/bleu.py b/scripts/training/cmert-0.5/bleu.py index 6a8dd42b0..2fcf16b5f 100755 --- a/scripts/training/cmert-0.5/bleu.py +++ b/scripts/training/cmert-0.5/bleu.py @@ -93,15 +93,14 @@ def cook_test(test, (reflens, refmaxcounts), n=4): result["reflen"] = min(reflens) elif eff_ref_len == "average": result["reflen"] = float(sum(reflens))/len(reflens) - - # Original: - '''min_diff = None - for reflen in reflens: - if min_diff is None or abs(reflen-len(test)) < min_diff: - min_diff = abs(reflen-len(test)) - result['reflen'] = reflen''' - - result["guess"] = [len(test)-k+1 for k in xrange(1,n+1)] + elif eff_ref_len == "closest": + min_diff = None + for reflen in reflens: + if min_diff is None or abs(reflen-len(test)) < min_diff: + min_diff = abs(reflen-len(test)) + result['reflen'] = reflen + + result["guess"] = [max(len(test)-k+1,0) for k in xrange(1,n+1)] result['correct'] = [0]*n counts = count_ngrams(test, n) diff --git a/scripts/training/cmert-0.5/score-nbest.py b/scripts/training/cmert-0.5/score-nbest.py index c89c994a8..4f99e5cdc 100755 --- a/scripts/training/cmert-0.5/score-nbest.py +++ b/scripts/training/cmert-0.5/score-nbest.py @@ -19,19 +19,19 @@ def process(sentnum, testsents): candsfile.write("%d %d\n" % (cur_sentnum, len(testsents))) for (sent,vector) in testsents: comps = bleu.cook_test(sent, cookedrefs[sentnum]) + if comps['testlen'] != comps['guess'][0]: sys.stderr.write("ERROR: test length != guessed 1-grams\n") - featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]), - " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]), - comps['reflen'])) - + featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]), + " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]), + comps['reflen'])) if __name__ == "__main__": import psyco psyco.full() import getopt - (opts,args) = getopt.getopt(sys.argv[1:], "casn", []) + (opts,args) = getopt.getopt(sys.argv[1:], "casen", []) for (opt,parm) in opts: if opt == "-c": @@ -40,6 +40,8 @@ if __name__ == "__main__": bleu.eff_ref_len = "average" if opt == "-s": bleu.eff_ref_len = "shortest" + if opt == "-e": + bleu.eff_ref_len = "closest" if opt == "-n": bleu.nonorm = 1 |