Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-04-22 23:33:25 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-04-22 23:33:25 +0300
commitb2734ec72c2c7728953ca13ab36ac6e353cc16f9 (patch)
tree8b72e6f21d80123218b2857aa8b6698b3122e0b5 /scripts/rescore.pl
parentafb0f1a8adf9827436f8ffd7f1c0cedb46063edf (diff)
rescorer wrapper
Diffstat (limited to 'scripts/rescore.pl')
-rwxr-xr-xscripts/rescore.pl75
1 files changed, 75 insertions, 0 deletions
diff --git a/scripts/rescore.pl b/scripts/rescore.pl
new file mode 100755
index 00000000..82445cc5
--- /dev/null
+++ b/scripts/rescore.pl
@@ -0,0 +1,75 @@
+#!/usr/bin/env perl
+
+use strict;
+use Getopt::Long;
+use File::Temp qw(tempfile);
+
+my $RESCORER;
+my $INPUT;
+my $NBEST;
+my $WEIGHTS;
+
+my @MODELS;
+my ($VSRC, $VTRG);
+my @FEATURES;
+
+GetOptions(
+ "i|input=s" => \$INPUT,
+ "n|n-best=s" => \$NBEST,
+ "f|features=s" => \@FEATURES,
+ "m|models=s" => \@MODELS,
+ "s|source=s" => \$VSRC,
+ "t|target=s" => \$VTRG,
+ "r|rescorer=s" => \$RESCORER,
+ "w|weights=s" => \$WEIGHTS
+);
+
+my $BEFORE = "LM1=";
+open(W, "<", $WEIGHTS) or die "Could not open";
+chomp(my $FIRST = <W>);
+($BEFORE) = split(/\s/, $FIRST);
+while (<W>) {
+ my ($CURRENT) = split(/\s/, $_);
+ print STDERR "$CURRENT\n";
+ if ($CURRENT eq "$FEATURES[0]=") {
+ print STDERR "Found $FEATURES[0] after $BEFORE\n";
+ last;
+ }
+ $BEFORE = $CURRENT;
+}
+close(W);
+
+my ($NBEST_TEMP_HANDLE, $NBEST_TEMP_FILE1) = tempfile();
+my (undef, $NBEST_TEMP_FILE2) = tempfile();
+open(NBEST_IN, "<", $NBEST) or die "Could not open";
+while (<NBEST_IN>) {
+ chomp;
+ foreach my $name (@FEATURES) {
+ s/$name= \S+ //g;
+ }
+ print $NBEST_TEMP_HANDLE $_, "\n";
+}
+close(NBEST_IN);
+close($NBEST_TEMP_HANDLE);
+
+foreach my $i (0 .. $#MODELS) {
+ system("$RESCORER -i $INPUT -m $MODELS[$i] -s $VSRC -t $VTRG -f $FEATURES[$i] -n $NBEST_TEMP_FILE1 > $NBEST_TEMP_FILE2");
+ rename($NBEST_TEMP_FILE2, $NBEST_TEMP_FILE1);
+}
+
+open($NBEST_TEMP_HANDLE, "<", $NBEST_TEMP_FILE1) or die "Could not open";
+
+my $PATTERN1 = quotemeta(join(" ", map { "\\w$_= \\S+" } @FEATURES));
+my $PATTERN2 = quotemeta("\\w$BEFORE \\S+");
+
+while (<$NBEST_TEMP_HANDLE>) {
+ chomp;
+ if (/$PATTERN2/) {
+ if(s/($PATTERN1)//) {
+ my $FEAT = $1;
+ s/($PATTERN2 )/$1$FEAT lala /;
+ }
+ }
+ print "$_\n";
+}
+