1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
#!/usr/bin/env perl
use strict;
use POSIX;
use File::Temp qw/ tempfile tempdir /;
my $PID = $$;
$SIG{TERM} = $SIG{INT} = $SIG{QUIT} = sub { die; };
use Getopt::Long;
use File::Spec;
my $AMUNN_DIR = "";
my $MOSES_DIR = "";
my $DECODER_OPTS = "";
my $time = time();
my $WORK = "tuning.$time";
my $SCORER = "BLEU";
my $MAX_IT = 10;
my ($SRC, $TRG) = ("ru", "en");
my $DEV = "dev";
GetOptions(
"w|working-dir=s" => \$WORK,
"a|amunn-bin-dir=s" => \$AMUNN_DIR,
"m|moses-bin-dir=s" => \$MOSES_DIR,
"s|scorer=s" => \$SCORER,
"i|maximum-iterations=i" => \$MAX_IT,
"d|dev=s" => \$DEV,
"f=s" => \$SRC,
"e=s" => \$TRG,
"o|decoder-opts=s" => \$DECODER_OPTS,
);
my $AMUNN = "$AMUNN_DIR/amunn";
my $MIRA = "$MOSES_DIR/kbmira";
my $EVAL = "$MOSES_DIR/evaluator";
my $EXTR = "$MOSES_DIR/extractor";
my $DEV_SRC = "$DEV.$SRC";
my $DEV_TRG = "$DEV.$TRG";
my $CONFIG = "--sctype $SCORER --filter /work/wmt16/tools/scripts/cleanBPE";
$WORK = File::Spec->rel2abs($WORK);
execute("mkdir -p $WORK");
execute("$AMUNN $DECODER_OPTS --show-weights > $WORK/run1.dense");
execute("rm -rf $WORK/progress.txt");
for my $i (1 .. $MAX_IT) {
unless(-s "$WORK/run$i.out") {
execute("cat $DEV_SRC | $AMUNN $DECODER_OPTS --load-weights $WORK/run$i.dense --n-best > $WORK/run$i.out");
}
execute("$EVAL $CONFIG --reference $DEV_TRG -n $WORK/run$i.out | tee -a $WORK/progress.txt");
my $j = $i + 1;
unless(-s "$WORK/run$j.dense") {
execute("$EXTR $CONFIG --reference $DEV_TRG -n $WORK/run$i.out -S $WORK/run$i.scores.dat -F $WORK/run$i.features.dat");
my $SCORES = join(" ", map { "$WORK/run$_.scores.dat" } (1 .. $i));
my $FEATURES = join(" ", map { "$WORK/run$_.features.dat" } (1 .. $i));
execute("$MIRA --sctype $SCORER -S $SCORES -F $FEATURES -d $WORK/run$i.dense -o $WORK/run$j.dense 2> $WORK/mira.run$i.log");
normalizeWeights("$WORK/run$j.dense");
}
execute("cp $WORK/run$j.dense $WORK/weights.txt")
}
sub execute {
my $command = shift;
logMessage("Executing:\t$command");
my $ret = system($command);
if($ret != 0) {
logMessage("Command '$command' finished with return status $ret");
logMessage("Aborting and killing parent process");
kill(2, $PID);
die;
}
}
sub normalizeWeights {
my $path = shift;
my ($temp_h, $temp) = tempfile();
open(OLD, "<", $path) or die "can't open $path: $!";
my @weights;
my $sum = 0;
while (<OLD>) {
chomp;
if (/^(F\d+) (.+)$/) {
push(@weights, [$1, $2]);
$sum += abs($2);
}
}
close(OLD) or die "can't close $path: $!";
foreach(@weights) {
print $temp_h $_->[0], "= ", $_->[1]/$sum, "\n";
}
close($temp_h);
execute("mv $temp $path");
#rename($temp, $path) or die "can't rename $temp to $path: $!";
}
sub logMessage {
my $message = shift;
my $time = POSIX::strftime("%m/%d/%Y %H:%M:%S", localtime());
my $log_message = $time."\t$message\n";
print STDERR $log_message;
}
sub wc {
my $path = shift;
my $lineCount = `wc -l < '$path'` + 0;
return $lineCount;
}
|