Welcome to mirror list, hosted at ThFree Co, Russian Federation.

weight-scan.pl « analysis « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 7283483e95f082b3b0655eacc98f99632f29a4f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/usr/bin/env perl 
# runs Moses many times changing the values of one weight, all others fixed
# nbest lists are always produced to allow for comparison of real and
# 'projected' BLEU (BLEU estimated from n-best lists collected at a neighouring
# node)
# usage: weight-scan.pl <input> <moses> <moses.ini> tm_2 --range=0.0,0.1,1.0

use strict;
use warnings;
use Getopt::Long;
use FindBin qw($RealBin);
use File::Basename;
use File::Path;
my $SCRIPTS_ROOTDIR = $RealBin;
$SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});

my $prec = 3; # precision of weightvalue within filename
my $jobs = 0;
my $workdir = "weight-scan";
my $range = "0.0,0.1,1.0";
my $input_type = 0;
my $normalize = 0; # normalize 
my $nbestsize = 100;
my $decoderflags = "";
my $moses_parallel_cmd = "$SCRIPTS_ROOTDIR/generic/moses-parallel.pl";
my $qsubwrapper="$SCRIPTS_ROOTDIR/generic/qsub-wrapper.pl";
my $queue_flags = "-hard";  # extra parameters for parallelizer
GetOptions(
  "jobs=i" => \$jobs,
  "range=s" => \$range,
  "working-dir=s" => \$workdir,
  "normalize!" => \$normalize,
  "nbest=i" => \$nbestsize,
  "decoderflags=s" => \$decoderflags,
) or exit 1;

my $inf = shift;
my $decoder = shift;
my $config = shift;
my $weightspec = shift;

if (!defined $inf || ! defined $decoder || !defined $config || !defined $weightspec) {
  print STDERR "usage: $0 <input> <moses> <moses.ini> tm_2 --range=0.0,0.1,1.0
Options:
  --working-dir=weight-scan
  --jobs=0
  --range=0.0,0.1,1.0
";
  exit 1;
}

print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";

die "Not executable: $moses_parallel_cmd" if defined $jobs && ! -x $moses_parallel_cmd;
die "Not executable: $qsubwrapper" if defined $jobs && ! -x $qsubwrapper;
die "Not executable: $decoder" if ! -x $decoder;

my $inf_abs = ensure_full_path($inf);
die "File not found: $inf (interpreted as $inf_abs)."
  if ! -e $inf_abs;
$inf = $inf_abs;

my $decoder_abs = ensure_full_path($decoder);
die "File not executable: $decoder (interpreted as $decoder_abs)."
  if ! -x $decoder_abs;
$decoder = $decoder_abs;

my $config_abs = ensure_full_path($config);
die "File not found: $config (interpreted as $config_abs)."
  if ! -e $config_abs;
$config = $config_abs;


my ($startvalue, $step, $stopvalue) = split /,/, $range;
die "Bad range: $range; expected start,step,stop"
  if !defined $startvalue || !defined $step || !defined $stopvalue;


my $featlist = get_featlist_from_moses($config);

# $weightidx is within features of the name $weightname
# $weightindex is global
my ($weightname, $weightidx) = split /_/, $weightspec;
my $weightindex;

# scan the weights, find the one we'll test and remember values of all of the
# given name
my $only_one_expected = 0;
if (!defined $weightidx) {
  $only_one_expected = 1;
  $weightidx = 0;
}
my @weightvalues = ();
my $idx = 0;
for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
  my $name = $featlist->{"names"}->[$i];
  if ($name eq $weightname) {
    push @weightvalues, $featlist->{"values"}->[$i];
    $weightindex = $i if $idx == $weightidx; # remember the global index of the weight
    $idx++;
  }
}

die "You specified only '$weightspec' but there are $idx features of the given name.\nUse e.g.: ${weightspec}_0\n"
  if $only_one_expected && $idx > 1;
die "Failed to find weights of the name '$weightname' in moses config."
  if !defined $weightindex;



#store current directory and create the working directory (if needed)
my $cwd = `pawd 2>/dev/null`; 
if(!$cwd){$cwd = `pwd`;}
chomp($cwd);

mkpath($workdir);
{
# open local scope

#chdir to the working directory
chdir($workdir) or die "Can't chdir to $workdir";

## MAIN LOOP
for(my $weightvalue = $startvalue; $weightvalue <= $stopvalue; $weightvalue+=$step) {
  my $nbestout = run_decoder($featlist, $weightvalue);
}


#chdir back to the original directory # useless, just to remind we were not there
chdir($cwd);
} # end of local scope

sub run_decoder {
    my ($featlist, $weightvalue) = @_;
    my $filebase = sprintf("%${prec}f", $weightvalue);
    my $nbestfilename = "best$nbestsize.$filebase";
    my $filename = "out.$filebase";
    
    # user-supplied parameters
    print STDERR "params = $decoderflags\n";

    # parameters to set all model weights (to override moses.ini)
    my @vals = @{$featlist->{"values"}};
    $vals[$weightindex] = $weightvalue; # set the one we're scanning
    if ($normalize) {
      print STDERR "Normalizing lambdas: @vals\n";
      my $totlambda=0;
      grep($totlambda+=abs($_),@vals);
      grep($_/=$totlambda,@vals);
    }
    # moses now does not seem accept "-tm X -tm Y" but needs "-tm X Y"
    my %model_weights;
    for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
      my $name = $featlist->{"names"}->[$i];
      $model_weights{$name} = "-$name" if !defined $model_weights{$name};
      $model_weights{$name} .= sprintf " %.6f", $vals[$i];
    }
    my $decoder_config = join(" ", values %model_weights);
    print STDERR "DECODER_CFG = $decoder_config\n";

    # write the weights for future use
    open OUTF, ">weights.$filebase" or die "Can't write weights";
    print OUTF join(" ", map { sprintf("%.6f", $_) } @vals)."\n";
    close OUTF;

    # run the decoder
    my $nBest_cmd = "-n-best-size $nbestsize";
    my $decoder_cmd;

    if ($jobs) {
      $decoder_cmd = "$moses_parallel_cmd -config $config -inputtype $input_type -qsub-prefix scan$weightvalue -queue-parameters \"$queue_flags\" -decoder-parameters \"$decoderflags $decoder_config\" -n-best-list \"$nbestfilename $nbestsize\" -input-file $inf -jobs $jobs -decoder $decoder > $filename";
    } else {
      $decoder_cmd = "$decoder $decoderflags  -config $config -inputtype $input_type $decoder_config -n-best-list $nbestfilename $nbestsize -input-file $inf > $filename";
    }

    safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n";

    return $nbestfilename;
}

sub get_featlist_from_moses {
  # run moses with the given config file and return the list of features and
  # their initial values
  my $configfn = shift;
  my $featlistfn = "./features.list";
  if (-e $featlistfn) {
    print STDERR "Using cached features list: $featlistfn\n";
  } else {
    print STDERR "Asking moses for feature names and values from $configfn\n";
    my $cmd = "$decoder $decoderflags -config $configfn  -inputtype $input_type -show-weights > $featlistfn";
    safesystem($cmd) or die "Failed to run moses with the config $configfn";
  }

  # read feature list
  my @names = ();
  my @startvalues = ();
  open(INI,$featlistfn) or die "Can't read $featlistfn";
  my $nr = 0;
  my @errs = ();
  while (<INI>) {
    $nr++;
    chomp;
    my ($longname, $feature, $value) = split / /;
    push @errs, "$featlistfn:$nr:Bad initial value of $feature: $value\n"
      if $value !~ /^[+-]?[0-9.e]+$/;
    #push @errs, "$featlistfn:$nr:Unknown feature '$feature', please add it to \@ABBR_FULL_MAP\n"
    #  if !defined $ABBR2FULL{$feature};
    push @names, $feature;
    push @startvalues, $value;
  }
  close INI;
  if (scalar @errs) {
    print STDERR join("", @errs);
    exit 1;
  }
  return {"names"=>\@names, "values"=>\@startvalues};
}

sub safesystem {
  print STDERR "Executing: @_\n";
  system(@_);
  if ($? == -1) {
      print STDERR "Failed to execute: @_\n  $!\n";
      exit(1);
  }
  elsif ($? & 127) {
      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",
          ($? & 127),  ($? & 128) ? 'with' : 'without';
      exit(1);
  }
  else {
    my $exitcode = $? >> 8;
    print STDERR "Exit code: $exitcode\n" if $exitcode;
    return ! $exitcode;
  }
}

sub ensure_full_path {
    my $PATH = shift;
$PATH =~ s/\/nfsmnt//;
    return $PATH if $PATH =~ /^\//;
    my $dir = `pawd 2>/dev/null`; 
    if(!$dir){$dir = `pwd`;}
    chomp($dir);
    $PATH = $dir."/".$PATH;
    $PATH =~ s/[\r\n]//g;
    $PATH =~ s/\/\.\//\//g;
    $PATH =~ s/\/+/\//g;
    my $sanity = 0;
    while($PATH =~ /\/\.\.\// && $sanity++<10) {
        $PATH =~ s/\/+/\//g;
        $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
    }
    $PATH =~ s/\/[^\/]+\/\.\.$//;
    $PATH =~ s/\/+$//;
$PATH =~ s/\/nfsmnt//;
    return $PATH;
}