diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-12-19 03:05:05 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-12-19 03:05:05 +0300 |
commit | 3e2e479ea0ad08172eec8ca674e09a6bc1af2de1 (patch) | |
tree | ca1889d3f3200692eb8b3e79205ce65607274f43 /scripts | |
parent | 36ff6fce0b6a82dd7237f888f04f54fe0636bb50 (diff) |
optmize.pl changed to MERT
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/optimize.pl | 71 |
1 files changed, 49 insertions, 22 deletions
diff --git a/scripts/optimize.pl b/scripts/optimize.pl index 14fd1db0..93587259 100755 --- a/scripts/optimize.pl +++ b/scripts/optimize.pl @@ -25,7 +25,7 @@ my $DEV = "dev"; GetOptions( "w|working-dir=s" => \$WORK, - "a|amunn-bin-dir=s" => \$AMUNN_DIR, + "a|amunmt-bin-dir=s" => \$AMUNN_DIR, "m|moses-bin-dir=s" => \$MOSES_DIR, "s|scorer=s" => \$SCORER, "i|maximum-iterations=i" => \$MAX_IT, @@ -35,24 +35,27 @@ GetOptions( "o|decoder-opts=s" => \$DECODER_OPTS, ); -my $AMUNN = "$AMUNN_DIR/amunn"; +my $AMUNN = "$AMUNN_DIR/amun"; my $MIRA = "$MOSES_DIR/kbmira"; +my $MERT = "$MOSES_DIR/mert"; my $EVAL = "$MOSES_DIR/evaluator"; my $EXTR = "$MOSES_DIR/extractor"; my $DEV_SRC = "$DEV.$SRC"; my $DEV_TRG = "$DEV.$TRG"; -my $CONFIG = "--sctype $SCORER --filter /work/wmt16/tools/scripts/cleanBPE"; +my $CONFIG = "--sctype $SCORER"; $WORK = File::Spec->rel2abs($WORK); execute("mkdir -p $WORK"); execute("$AMUNN $DECODER_OPTS --show-weights > $WORK/run1.dense"); +dense2init("$WORK/run1.dense", "$WORK/run1.initopt"); + execute("rm -rf $WORK/progress.txt"); for my $i (1 .. $MAX_IT) { unless(-s "$WORK/run$i.out") { - execute("cat $DEV_SRC | $AMUNN $DECODER_OPTS --load-weights $WORK/run$i.dense --n-best > $WORK/run$i.out"); + execute("cat $DEV_SRC | $AMUNN $DECODER_OPTS --load-weights $WORK/run$i.dense --n-best | perl -pe 's/\@\@ //g' > $WORK/run$i.out"); } execute("$EVAL $CONFIG --reference $DEV_TRG -n $WORK/run$i.out | tee -a $WORK/progress.txt"); @@ -60,11 +63,13 @@ for my $i (1 .. $MAX_IT) { unless(-s "$WORK/run$j.dense") { execute("$EXTR $CONFIG --reference $DEV_TRG -n $WORK/run$i.out -S $WORK/run$i.scores.dat -F $WORK/run$i.features.dat"); - my $SCORES = join(" ", map { "$WORK/run$_.scores.dat" } (1 .. $i)); - my $FEATURES = join(" ", map { "$WORK/run$_.features.dat" } (1 .. $i)); + my $SCORES = join(",", map { "$WORK/run$_.scores.dat" } (1 .. $i)); + my $FEATURES = join(",", map { "$WORK/run$_.features.dat" } (1 .. $i)); - execute("$MIRA --sctype $SCORER -S $SCORES -F $FEATURES -d $WORK/run$i.dense -o $WORK/run$j.dense 2> $WORK/mira.run$i.log"); - normalizeWeights("$WORK/run$j.dense"); + execute("$MERT --sctype $SCORER --scfile $SCORES --ffile $FEATURES --ifile $WORK/run$i.initopt -d 9 -n 20 -m 20 --threads 20 2> $WORK/mert.run$i.log"); + + log2dense("$WORK/mert.run$i.log", "$WORK/run$j.dense"); + dense2init("$WORK/run$j.dense", "$WORK/run$j.initopt"); } execute("cp $WORK/run$j.dense $WORK/weights.txt") } @@ -81,29 +86,51 @@ sub execute { } } -sub normalizeWeights { - my $path = shift; - my ($temp_h, $temp) = tempfile(); - open(OLD, "<", $path) or die "can't open $path: $!"; +sub log2dense { + my $log = shift; + my $dense = shift; + + open(OLD, "<", $log) or die "can't open $log: $!"; + open(NEW, ">", $dense) or die "can't open $dense: $!"; my @weights; - my $sum = 0; - while (<OLD>) { + while(<OLD>) { chomp; - if (/^(F\d+) (.+)$/) { - push(@weights, [$1, $2]); - $sum += abs($2); + if (/^Best point: (.*?) =>/) { + @weights = split(/\s/, $1); } } - close(OLD) or die "can't close $path: $!"; + close(OLD) or die "can't close $log: $!"; + my $i = 0; foreach(@weights) { - print $temp_h $_->[0], "= ", $_->[1]/$sum, "\n"; + print NEW "F$i= ", $_, "\n"; + $i++; } - close($temp_h); - execute("mv $temp $path"); - #rename($temp, $path) or die "can't rename $temp to $path: $!"; + close(NEW); } +sub dense2init { + my $dense = shift; + my $init = shift; + + open(OLD, "<", $dense) or die "can't open $dense: $!"; + open(NEW, ">", $init) or die "can't open $init: $!"; + + my @weights; + while(<OLD>) { + chomp; + if (/^F\d+= (\S*)$/) { + push(@weights, $1); + } + } + close(OLD) or die "can't close $dense: $!"; + print NEW join(" ", @weights), "\n"; + print NEW "0 " x scalar @weights, "\n"; + print NEW "1 " x scalar @weights, "\n"; + close(NEW); +} + + sub logMessage { my $message = shift; my $time = POSIX::strftime("%m/%d/%Y %H:%M:%S", localtime()); |