Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-04-22 20:46:30 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-04-22 20:46:30 +0300
commitafb0f1a8adf9827436f8ffd7f1c0cedb46063edf (patch)
tree70a7defabb9eaf0d90173172ea9109599900cded /scripts
parent2f5705aac57ce10147cd3c71531e604c745304fd (diff)
working parameter optimization, first version
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/optimize.pl104
1 files changed, 84 insertions, 20 deletions
diff --git a/scripts/optimize.pl b/scripts/optimize.pl
index d6122b8a..82e600ee 100755
--- a/scripts/optimize.pl
+++ b/scripts/optimize.pl
@@ -1,17 +1,18 @@
-#!/bin/env perl
+#!/usr/bin/env perl
use strict;
+use POSIX;
+use File::Temp qw/ tempfile tempdir /;
+
+my $PID = $$;
+$SIG{TERM} = $SIG{INT} = $SIG{QUIT} = sub { die; };
use Getopt::Long;
-my $AMUNN = "/home/marcinj/Badania/amunn/build/bin/amunn";
-my $MOSES = "/data/smt/mosesMaster/bin";
+my $AMUNN_DIR = "";
+my $MOSES_DIR = "";
my $DECODER_OPTS = "";
-my $MIRA = "$MOSES/kbmira";
-my $EVAL = "$MOSES/evaluator";
-my $EXTR = "$MOSES/extractor";
-
my $time = time();
my $WORK = "tuning.$time";
my $SCORER = "BLEU";
@@ -22,29 +23,92 @@ my ($SRC, $TRG) = ("ru", "en");
my $DEV = "dev";
GetOptions(
- "working-dir=s" => \$WORK,
- "scorer=s" => \$SCORER,
- "maximum-iterations=i" => \$MAX_IT,
- "dev" => \$DEV,
+ "w|working-dir=s" => \$WORK,
+ "a|amunn-bin-dir=s" => \$AMUNN_DIR,
+ "m|moses-bin-dir=s" => \$MOSES_DIR,
+ "s|scorer=s" => \$SCORER,
+ "i|maximum-iterations=i" => \$MAX_IT,
+ "d|dev=s" => \$DEV,
"f=s" => \$SRC,
"e=s" => \$TRG,
- "decoder-opts=s" => \$DECODER_OPTS,
+ "o|decoder-opts=s" => \$DECODER_OPTS,
);
-system("mkdir -p $WORK");
+my $AMUNN = "$AMUNN_DIR/bin";
+my $MIRA = "$MOSES_DIR/kbmira";
+my $EVAL = "$MOSES_DIR/evaluator";
+my $EXTR = "$MOSES_DIR/extractor";
my $DEV_SRC = "$DEV.$SRC";
my $DEV_TRG = "$DEV.$TRG";
my $CONFIG = "--sctype $SCORER --filter /work/wmt16/tools/scripts/cleanBPE";
-system("$AMUNN $DECODER_OPTS --show-weights > $WORK/run1.dense");
-
+execute("mkdir -p $WORK");
+execute("$AMUNN $DECODER_OPTS --show-weights > $WORK/run1.dense");
+execute("rm -rf $WORK/progress.txt");
for my $i (1 .. $MAX_IT) {
- system("cat $DEV_SRC | $AMUNN $DECODER_OPTS --weights-file $WORK/run1.dense --n-best > $WORK/run$i.out");
- system("$EVAL $CONFIG --reference $DEV_TRG -n $WORK/run$i.out | tee -a $WORK/progress.txt");
- system("$EXTR $CONFIG --reference $DEV_TRG -n $WORK/run$i.out -S $WORK/run$i.scores.dat -F $WORK/run$i.features.dat");
+ unless(-s "$WORK/run$i.out") {
+ execute("cat $DEV_SRC | $AMUNN $DECODER_OPTS --load-weights $WORK/run$i.dense --n-best > $WORK/run$i.out");
+ }
+ execute("$EVAL $CONFIG --reference $DEV_TRG -n $WORK/run$i.out | tee -a $WORK/progress.txt");
+
my $j = $i + 1;
- system("$MIRA --sctype $SCORER -S $WORK/run$i.scores.dat -F $WORK/run$i.features.dat -d $WORK/run$i.dense -o $WORK/run$j.dense");
- system("cp $WORK/run$i.dense $WORK/weights.txt")
+ unless(-s "$WORK/run$j.dense") {
+ execute("$EXTR $CONFIG --reference $DEV_TRG -n $WORK/run$i.out -S $WORK/run$i.scores.dat -F $WORK/run$i.features.dat");
+
+ my $SCORES = join(" ", map { "$WORK/run$_.scores.dat" } (1 .. $i));
+ my $FEATURES = join(" ", map { "$WORK/run$_.features.dat" } (1 .. $i));
+
+ execute("$MIRA --sctype $SCORER -S $SCORES -F $FEATURES -d $WORK/run$i.dense -o $WORK/run$j.dense 2> $WORK/mira.run$i.log");
+ normalizeWeights("$WORK/run$j.dense");
+ }
+ execute("cp $WORK/run$j.dense $WORK/weights.txt")
+}
+
+sub execute {
+ my $command = shift;
+ logMessage("Executing:\t$command");
+ my $ret = system($command);
+ if($ret != 0) {
+ logMessage("Command '$command' finished with return status $ret");
+ logMessage("Aborting and killing parent process");
+ kill(2, $PID);
+ die;
+ }
+}
+
+sub normalizeWeights {
+ my $path = shift;
+ my ($temp_h, $temp) = tempfile();
+ open(OLD, "<", $path) or die "can't open $path: $!";
+
+ my @weights;
+ my $sum = 0;
+ while (<OLD>) {
+ chomp;
+ if (/^(F\d+) (.+)$/) {
+ push(@weights, [$1, $2]);
+ $sum += abs($2);
+ }
+ }
+ close(OLD) or die "can't close $path: $!";
+ foreach(@weights) {
+ print $temp_h $_->[0], "= ", $_->[1]/$sum, "\n";
+ }
+ close($temp_h);
+ rename($temp, $path) or die "can't rename $temp to $path: $!";
+}
+
+sub logMessage {
+ my $message = shift;
+ my $time = POSIX::strftime("%m/%d/%Y %H:%M:%S", localtime());
+ my $log_message = $time."\t$message\n";
+ print STDERR $log_message;
+}
+
+sub wc {
+ my $path = shift;
+ my $lineCount = `wc -l < '$path'` + 0;
+ return $lineCount;
}