diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-04-22 23:33:25 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-04-22 23:33:25 +0300 |
commit | b2734ec72c2c7728953ca13ab36ac6e353cc16f9 (patch) | |
tree | 8b72e6f21d80123218b2857aa8b6698b3122e0b5 /scripts | |
parent | afb0f1a8adf9827436f8ffd7f1c0cedb46063edf (diff) |
rescorer wrapper
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/rescore.pl | 75 | ||||
-rwxr-xr-x | scripts/wrapper.pl | 36 |
2 files changed, 111 insertions, 0 deletions
diff --git a/scripts/rescore.pl b/scripts/rescore.pl new file mode 100755 index 00000000..82445cc5 --- /dev/null +++ b/scripts/rescore.pl @@ -0,0 +1,75 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Long; +use File::Temp qw(tempfile); + +my $RESCORER; +my $INPUT; +my $NBEST; +my $WEIGHTS; + +my @MODELS; +my ($VSRC, $VTRG); +my @FEATURES; + +GetOptions( + "i|input=s" => \$INPUT, + "n|n-best=s" => \$NBEST, + "f|features=s" => \@FEATURES, + "m|models=s" => \@MODELS, + "s|source=s" => \$VSRC, + "t|target=s" => \$VTRG, + "r|rescorer=s" => \$RESCORER, + "w|weights=s" => \$WEIGHTS +); + +my $BEFORE = "LM1="; +open(W, "<", $WEIGHTS) or die "Could not open"; +chomp(my $FIRST = <W>); +($BEFORE) = split(/\s/, $FIRST); +while (<W>) { + my ($CURRENT) = split(/\s/, $_); + print STDERR "$CURRENT\n"; + if ($CURRENT eq "$FEATURES[0]=") { + print STDERR "Found $FEATURES[0] after $BEFORE\n"; + last; + } + $BEFORE = $CURRENT; +} +close(W); + +my ($NBEST_TEMP_HANDLE, $NBEST_TEMP_FILE1) = tempfile(); +my (undef, $NBEST_TEMP_FILE2) = tempfile(); +open(NBEST_IN, "<", $NBEST) or die "Could not open"; +while (<NBEST_IN>) { + chomp; + foreach my $name (@FEATURES) { + s/$name= \S+ //g; + } + print $NBEST_TEMP_HANDLE $_, "\n"; +} +close(NBEST_IN); +close($NBEST_TEMP_HANDLE); + +foreach my $i (0 .. $#MODELS) { + system("$RESCORER -i $INPUT -m $MODELS[$i] -s $VSRC -t $VTRG -f $FEATURES[$i] -n $NBEST_TEMP_FILE1 > $NBEST_TEMP_FILE2"); + rename($NBEST_TEMP_FILE2, $NBEST_TEMP_FILE1); +} + +open($NBEST_TEMP_HANDLE, "<", $NBEST_TEMP_FILE1) or die "Could not open"; + +my $PATTERN1 = quotemeta(join(" ", map { "\\w$_= \\S+" } @FEATURES)); +my $PATTERN2 = quotemeta("\\w$BEFORE \\S+"); + +while (<$NBEST_TEMP_HANDLE>) { + chomp; + if (/$PATTERN2/) { + if(s/($PATTERN1)//) { + my $FEAT = $1; + s/($PATTERN2 )/$1$FEAT lala /; + } + } + print "$_\n"; +} + diff --git a/scripts/wrapper.pl b/scripts/wrapper.pl new file mode 100755 index 00000000..07592ef6 --- /dev/null +++ b/scripts/wrapper.pl @@ -0,0 +1,36 @@ +#!/usr/bin/perl + +use strict; + +my $MOSES = "/work/mosesdecoder/bin/moses"; +my $RESCORER = "/work/amunn/build/bin/rescorer"; +my $RESCORER_WRAPPER = "/work/amunn/scripts/rescore.pl"; + +my $NMT = "/work/wmt16/work/mjd.en-ru.penn/work.en-ru/nmt.ru-en"; + +my $MODELS = join(" ", map { "-m $NMT/$_" } qw(model.iter510000.npz model.iter540000.npz model.iter570000.npz)); +my ($SVCB, $TVCB) = map { "$NMT/$_" } qw(vocab.ru vocab.en); +my $FEATURES = join(" ", map { "-f $_" } qw(N0 N1 N2)); + +for(my $i = 0; $i < @ARGV; $i++) { + if($ARGV[$i] =~ /weight-overwrite/) { + $ARGV[$i+1] = "'". $ARGV[$i+1] . "'"; + } +} + +my $opts = join(" ", @ARGV); + +my ($nbest) = $opts =~ /-n-best-list (run.*?.best100.out)/; + +if($opts =~ /-show-weights/) { + exec("$MOSES $opts"); +} +else { + $opts =~ /-input-file (\S+)/; + my $input = $1; + print STDERR "OPTS: $opts\n"; + system("$MOSES $opts"); + system("$RESCORER_WRAPPER -r $RESCORER $MODELS $FEATURES -s $SVCB -t $TVCB -n $nbest -i $input -w features.list > $nbest"); +} + +
\ No newline at end of file |