Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-04-22 23:33:25 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-04-22 23:33:25 +0300
commitb2734ec72c2c7728953ca13ab36ac6e353cc16f9 (patch)
tree8b72e6f21d80123218b2857aa8b6698b3122e0b5 /scripts
parentafb0f1a8adf9827436f8ffd7f1c0cedb46063edf (diff)
rescorer wrapper
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/rescore.pl75
-rwxr-xr-xscripts/wrapper.pl36
2 files changed, 111 insertions, 0 deletions
diff --git a/scripts/rescore.pl b/scripts/rescore.pl
new file mode 100755
index 00000000..82445cc5
--- /dev/null
+++ b/scripts/rescore.pl
@@ -0,0 +1,75 @@
+#!/usr/bin/env perl
+
+use strict;
+use Getopt::Long;
+use File::Temp qw(tempfile);
+
+my $RESCORER;
+my $INPUT;
+my $NBEST;
+my $WEIGHTS;
+
+my @MODELS;
+my ($VSRC, $VTRG);
+my @FEATURES;
+
+GetOptions(
+ "i|input=s" => \$INPUT,
+ "n|n-best=s" => \$NBEST,
+ "f|features=s" => \@FEATURES,
+ "m|models=s" => \@MODELS,
+ "s|source=s" => \$VSRC,
+ "t|target=s" => \$VTRG,
+ "r|rescorer=s" => \$RESCORER,
+ "w|weights=s" => \$WEIGHTS
+);
+
+my $BEFORE = "LM1=";
+open(W, "<", $WEIGHTS) or die "Could not open";
+chomp(my $FIRST = <W>);
+($BEFORE) = split(/\s/, $FIRST);
+while (<W>) {
+ my ($CURRENT) = split(/\s/, $_);
+ print STDERR "$CURRENT\n";
+ if ($CURRENT eq "$FEATURES[0]=") {
+ print STDERR "Found $FEATURES[0] after $BEFORE\n";
+ last;
+ }
+ $BEFORE = $CURRENT;
+}
+close(W);
+
+my ($NBEST_TEMP_HANDLE, $NBEST_TEMP_FILE1) = tempfile();
+my (undef, $NBEST_TEMP_FILE2) = tempfile();
+open(NBEST_IN, "<", $NBEST) or die "Could not open";
+while (<NBEST_IN>) {
+ chomp;
+ foreach my $name (@FEATURES) {
+ s/$name= \S+ //g;
+ }
+ print $NBEST_TEMP_HANDLE $_, "\n";
+}
+close(NBEST_IN);
+close($NBEST_TEMP_HANDLE);
+
+foreach my $i (0 .. $#MODELS) {
+ system("$RESCORER -i $INPUT -m $MODELS[$i] -s $VSRC -t $VTRG -f $FEATURES[$i] -n $NBEST_TEMP_FILE1 > $NBEST_TEMP_FILE2");
+ rename($NBEST_TEMP_FILE2, $NBEST_TEMP_FILE1);
+}
+
+open($NBEST_TEMP_HANDLE, "<", $NBEST_TEMP_FILE1) or die "Could not open";
+
+my $PATTERN1 = quotemeta(join(" ", map { "\\w$_= \\S+" } @FEATURES));
+my $PATTERN2 = quotemeta("\\w$BEFORE \\S+");
+
+while (<$NBEST_TEMP_HANDLE>) {
+ chomp;
+ if (/$PATTERN2/) {
+ if(s/($PATTERN1)//) {
+ my $FEAT = $1;
+ s/($PATTERN2 )/$1$FEAT lala /;
+ }
+ }
+ print "$_\n";
+}
+
diff --git a/scripts/wrapper.pl b/scripts/wrapper.pl
new file mode 100755
index 00000000..07592ef6
--- /dev/null
+++ b/scripts/wrapper.pl
@@ -0,0 +1,36 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $MOSES = "/work/mosesdecoder/bin/moses";
+my $RESCORER = "/work/amunn/build/bin/rescorer";
+my $RESCORER_WRAPPER = "/work/amunn/scripts/rescore.pl";
+
+my $NMT = "/work/wmt16/work/mjd.en-ru.penn/work.en-ru/nmt.ru-en";
+
+my $MODELS = join(" ", map { "-m $NMT/$_" } qw(model.iter510000.npz model.iter540000.npz model.iter570000.npz));
+my ($SVCB, $TVCB) = map { "$NMT/$_" } qw(vocab.ru vocab.en);
+my $FEATURES = join(" ", map { "-f $_" } qw(N0 N1 N2));
+
+for(my $i = 0; $i < @ARGV; $i++) {
+ if($ARGV[$i] =~ /weight-overwrite/) {
+ $ARGV[$i+1] = "'". $ARGV[$i+1] . "'";
+ }
+}
+
+my $opts = join(" ", @ARGV);
+
+my ($nbest) = $opts =~ /-n-best-list (run.*?.best100.out)/;
+
+if($opts =~ /-show-weights/) {
+ exec("$MOSES $opts");
+}
+else {
+ $opts =~ /-input-file (\S+)/;
+ my $input = $1;
+ print STDERR "OPTS: $opts\n";
+ system("$MOSES $opts");
+ system("$RESCORER_WRAPPER -r $RESCORER $MODELS $FEATURES -s $SVCB -t $TVCB -n $nbest -i $input -w features.list > $nbest");
+}
+
+ \ No newline at end of file