Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am4
-rw-r--r--irstlm/README7
l---------irstlm/mkinstalldirs1
-rwxr-xr-xirstlm/regenerate-makefiles.sh15
-rw-r--r--irstlm/src/Makefile.am10
-rw-r--r--moses/src/Makefile.am8
-rwxr-xr-xscripts/generic/multi-bleu.perl47
-rwxr-xr-xscripts/training/cmert-0.5/bleu.py17
-rwxr-xr-xscripts/training/cmert-0.5/score-nbest.py12
9 files changed, 77 insertions, 44 deletions
diff --git a/Makefile.am b/Makefile.am
index 3fe48e8c4..b2a065f6f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,8 +1,6 @@
# not a GNU package. You can remove this line, if
# have all needed files, that a GNU package needs
-#AUTOMAKE_OPTIONS = foreign 1.4
-
AUTOMAKE_OPTIONS = foreign
# order is important here: build moses before moses-cmd
-SUBDIRS = moses/src moses-cmd/src
+SUBDIRS = moses/src moses-cmd/src
diff --git a/irstlm/README b/irstlm/README
index 17a9b920f..ed8935d05 100644
--- a/irstlm/README
+++ b/irstlm/README
@@ -1,10 +1,7 @@
To build:
- aclocal
- autoconf
- automake
-
- ./configure --with-prefix=PATH TO INSTALL (probably `pwd`)
+ ./regenerate-makefiles.sh
+ ./configure --with-prefix=PATH-TO-INSTALL (probably `pwd`)
make
make install
diff --git a/irstlm/mkinstalldirs b/irstlm/mkinstalldirs
new file mode 120000
index 000000000..8e772be8f
--- /dev/null
+++ b/irstlm/mkinstalldirs
@@ -0,0 +1 @@
+../mkinstalldirs \ No newline at end of file
diff --git a/irstlm/regenerate-makefiles.sh b/irstlm/regenerate-makefiles.sh
new file mode 100755
index 000000000..ae40a7a32
--- /dev/null
+++ b/irstlm/regenerate-makefiles.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+echo "Calling aclocal..."
+aclocal
+echo "Calling autoconf..."
+autoconf
+echo "Calling automake..."
+automake
+
+echo
+echo "You should now be able to configure and build:"
+echo " ./configure --prefix=/path/to/irstlm"
+echo " make -j 4"
+echo
+
diff --git a/irstlm/src/Makefile.am b/irstlm/src/Makefile.am
index a520680e7..c9b7c6129 100644
--- a/irstlm/src/Makefile.am
+++ b/irstlm/src/Makefile.am
@@ -9,11 +9,17 @@ libirstlm_a_SOURCES = \
ngramcache.cpp
library_includedir=$(includedir)
-library_include_HEADERS = dictionary.h lmtable.h n_gram.h
+library_include_HEADERS = \
+ dictionary.h \
+ htable.h \
+ lmtable.h \
+ mempool.h \
+ n_gram.h \
+ ngramcache.h
bin_PROGRAMS = compile-lm quantize-lm
-AM_LDFLAGS=-L .
+AM_LDFLAGS=-L.
LIBS=-lirstlm
compile_lm_SOURCES = compile-lm.cpp
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index c2ff4a2df..cff2e7f35 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -1,6 +1,6 @@
lib_LIBRARIES = libmoses.a
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
-libmoses_a_SOURCES = \
+libmoses_a_SOURCES_TMP = \
ConfusionNet.cpp \
DecodeStep.cpp \
DecodeStep_Generation.cpp \
@@ -59,14 +59,14 @@ libmoses_a_SOURCES = \
if INTERNAL_LM
-libmoses_a_SOURCES += NGramCollection.cpp LanguageModel_Internal.cpp
+libmoses_a_SOURCES = $(libmoses_a_SOURCES_TMP) NGramCollection.cpp LanguageModel_Internal.cpp
endif
if SRI_LM
-libmoses_a_SOURCES += LanguageModel_SRI.cpp
+libmoses_a_SOURCES = $(libmoses_a_SOURCES_TMP) LanguageModel_SRI.cpp
endif
if IRST_LM
-libmoses_a_SOURCES += LanguageModel_IRST.cpp
+libmoses_a_SOURCES = $(libmoses_a_SOURCES_TMP) LanguageModel_IRST.cpp
endif
diff --git a/scripts/generic/multi-bleu.perl b/scripts/generic/multi-bleu.perl
index 9f00e349f..fa7c70bda 100755
--- a/scripts/generic/multi-bleu.perl
+++ b/scripts/generic/multi-bleu.perl
@@ -17,8 +17,6 @@ while(-e "$stem$ref") {
}
&add_to_ref($stem,\@REF) if -e $stem;
-die "No reference sentences found!" if 0 == scalar @REF;
-
sub add_to_ref {
my ($file,$REF) = @_;
my $s=0;
@@ -45,7 +43,7 @@ while(<STDIN>) {
if (abs($length_translation_this_sentence-$length) < $closest_diff) {
$closest_diff = abs($length_translation_this_sentence-$length);
$closest_length = $length;
-# print "$i: closest diff = abs($length_translation_this_sentence-$length)<BR>\n";
+# print "$s: closest diff = abs($length_translation_this_sentence-$length)<BR>\n";
}
for(my $n=1;$n<=4;$n++) {
my %REF_NGRAM_N = ();
@@ -96,24 +94,41 @@ while(<STDIN>) {
$s++;
}
my $brevity_penalty = 1;
-if ($length_translation<$length_reference) {
- $brevity_penalty = exp(1-$length_reference/$length_translation);
+my $bleu = 0;
+
+my @bleu=();
+
+for(my $n=1;$n<=4;$n++) {
+ if (defined ($TOTAL[$n])){
+ $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
+# print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
+ }else{
+ $bleu[$n]=0;
+ }
}
-my $bleu = $brevity_penalty * exp((my_log( $CORRECT[1]/$TOTAL[1] ) +
- my_log( $CORRECT[2]/$TOTAL[2] ) +
- my_log( $CORRECT[3]/$TOTAL[3] ) +
- my_log( $CORRECT[4]/$TOTAL[4] ) ) / 4);
-printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f, %i sents, %i refs)\n",
+if ($length_reference==0){
+ printf "BLEU = 0, 0/0/0/0 (BP=0, ration=0, hyp_len=0, ref_len=0)\n";
+ exit(1);
+}
+
+if ($length_translation<$length_reference) {
+ $brevity_penalty = exp(1-$length_reference/$length_translation);
+}
+$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
+ my_log( $bleu[2] ) +
+ my_log( $bleu[3] ) +
+ my_log( $bleu[4] ) ) / 4) ;
+printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ration=%.3f, hyp_len=%d, ref_len=%d)\n",
100*$bleu,
- 100*$CORRECT[1]/$TOTAL[1],
- 100*$CORRECT[2]/$TOTAL[2],
- 100*$CORRECT[3]/$TOTAL[3],
- 100*$CORRECT[4]/$TOTAL[4],
+ 100*$bleu[1],
+ 100*$bleu[2],
+ 100*$bleu[3],
+ 100*$bleu[4],
$brevity_penalty,
$length_translation / $length_reference,
- scalar @REF,
- scalar @{$REF[0]};
+ $length_translation,
+ $length_reference;
sub my_log {
return -9999999999 unless $_[0];
diff --git a/scripts/training/cmert-0.5/bleu.py b/scripts/training/cmert-0.5/bleu.py
index 6a8dd42b0..2fcf16b5f 100755
--- a/scripts/training/cmert-0.5/bleu.py
+++ b/scripts/training/cmert-0.5/bleu.py
@@ -93,15 +93,14 @@ def cook_test(test, (reflens, refmaxcounts), n=4):
result["reflen"] = min(reflens)
elif eff_ref_len == "average":
result["reflen"] = float(sum(reflens))/len(reflens)
-
- # Original:
- '''min_diff = None
- for reflen in reflens:
- if min_diff is None or abs(reflen-len(test)) < min_diff:
- min_diff = abs(reflen-len(test))
- result['reflen'] = reflen'''
-
- result["guess"] = [len(test)-k+1 for k in xrange(1,n+1)]
+ elif eff_ref_len == "closest":
+ min_diff = None
+ for reflen in reflens:
+ if min_diff is None or abs(reflen-len(test)) < min_diff:
+ min_diff = abs(reflen-len(test))
+ result['reflen'] = reflen
+
+ result["guess"] = [max(len(test)-k+1,0) for k in xrange(1,n+1)]
result['correct'] = [0]*n
counts = count_ngrams(test, n)
diff --git a/scripts/training/cmert-0.5/score-nbest.py b/scripts/training/cmert-0.5/score-nbest.py
index c89c994a8..4f99e5cdc 100755
--- a/scripts/training/cmert-0.5/score-nbest.py
+++ b/scripts/training/cmert-0.5/score-nbest.py
@@ -19,19 +19,19 @@ def process(sentnum, testsents):
candsfile.write("%d %d\n" % (cur_sentnum, len(testsents)))
for (sent,vector) in testsents:
comps = bleu.cook_test(sent, cookedrefs[sentnum])
+
if comps['testlen'] != comps['guess'][0]:
sys.stderr.write("ERROR: test length != guessed 1-grams\n")
- featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]),
- " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]),
- comps['reflen']))
-
+ featsfile.write("%s %s %d\n" % (" ".join([str(v) for v in vector]),
+ " ".join(["%d %d" % (c,g) for (c,g) in zip(comps['correct'], comps['guess'])]),
+ comps['reflen']))
if __name__ == "__main__":
import psyco
psyco.full()
import getopt
- (opts,args) = getopt.getopt(sys.argv[1:], "casn", [])
+ (opts,args) = getopt.getopt(sys.argv[1:], "casen", [])
for (opt,parm) in opts:
if opt == "-c":
@@ -40,6 +40,8 @@ if __name__ == "__main__":
bleu.eff_ref_len = "average"
if opt == "-s":
bleu.eff_ref_len = "shortest"
+ if opt == "-e":
+ bleu.eff_ref_len = "closest"
if opt == "-n":
bleu.nonorm = 1