diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-04-22 23:33:25 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-04-22 23:33:25 +0300 |
commit | b2734ec72c2c7728953ca13ab36ac6e353cc16f9 (patch) | |
tree | 8b72e6f21d80123218b2857aa8b6698b3122e0b5 /scripts/rescore.pl | |
parent | afb0f1a8adf9827436f8ffd7f1c0cedb46063edf (diff) |
rescorer wrapper
Diffstat (limited to 'scripts/rescore.pl')
-rwxr-xr-x | scripts/rescore.pl | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/scripts/rescore.pl b/scripts/rescore.pl new file mode 100755 index 00000000..82445cc5 --- /dev/null +++ b/scripts/rescore.pl @@ -0,0 +1,75 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Long; +use File::Temp qw(tempfile); + +my $RESCORER; +my $INPUT; +my $NBEST; +my $WEIGHTS; + +my @MODELS; +my ($VSRC, $VTRG); +my @FEATURES; + +GetOptions( + "i|input=s" => \$INPUT, + "n|n-best=s" => \$NBEST, + "f|features=s" => \@FEATURES, + "m|models=s" => \@MODELS, + "s|source=s" => \$VSRC, + "t|target=s" => \$VTRG, + "r|rescorer=s" => \$RESCORER, + "w|weights=s" => \$WEIGHTS +); + +my $BEFORE = "LM1="; +open(W, "<", $WEIGHTS) or die "Could not open"; +chomp(my $FIRST = <W>); +($BEFORE) = split(/\s/, $FIRST); +while (<W>) { + my ($CURRENT) = split(/\s/, $_); + print STDERR "$CURRENT\n"; + if ($CURRENT eq "$FEATURES[0]=") { + print STDERR "Found $FEATURES[0] after $BEFORE\n"; + last; + } + $BEFORE = $CURRENT; +} +close(W); + +my ($NBEST_TEMP_HANDLE, $NBEST_TEMP_FILE1) = tempfile(); +my (undef, $NBEST_TEMP_FILE2) = tempfile(); +open(NBEST_IN, "<", $NBEST) or die "Could not open"; +while (<NBEST_IN>) { + chomp; + foreach my $name (@FEATURES) { + s/$name= \S+ //g; + } + print $NBEST_TEMP_HANDLE $_, "\n"; +} +close(NBEST_IN); +close($NBEST_TEMP_HANDLE); + +foreach my $i (0 .. $#MODELS) { + system("$RESCORER -i $INPUT -m $MODELS[$i] -s $VSRC -t $VTRG -f $FEATURES[$i] -n $NBEST_TEMP_FILE1 > $NBEST_TEMP_FILE2"); + rename($NBEST_TEMP_FILE2, $NBEST_TEMP_FILE1); +} + +open($NBEST_TEMP_HANDLE, "<", $NBEST_TEMP_FILE1) or die "Could not open"; + +my $PATTERN1 = quotemeta(join(" ", map { "\\w$_= \\S+" } @FEATURES)); +my $PATTERN2 = quotemeta("\\w$BEFORE \\S+"); + +while (<$NBEST_TEMP_HANDLE>) { + chomp; + if (/$PATTERN2/) { + if(s/($PATTERN1)//) { + my $FEAT = $1; + s/($PATTERN2 )/$1$FEAT lala /; + } + } + print "$_\n"; +} + |