Welcome to mirror list, hosted at ThFree Co, Russian Federation.

rescore.pl « scripts - github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c8269b8f66f531eace2042683ebd84bde1d3f1f9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env perl

use strict;
use Getopt::Long;
use File::Temp qw(tempfile);
use POSIX;

$File::Temp::KEEP_ALL = 1;

my $PID = $$;
$SIG{TERM} = $SIG{INT} = $SIG{QUIT} = sub { die; };

my $RESCORER;
my $INPUT;
my $NBEST;
my $WEIGHTS;

my @MODELS;
my ($VSRC, $VTRG);
my @FEATURES;

GetOptions(
    "i|input=s" => \$INPUT,
    "n|n-best=s" => \$NBEST,
    "f|features=s" => \@FEATURES,
    "m|models=s" => \@MODELS,
    "s|source=s" => \$VSRC,
    "t|target=s" => \$VTRG,
    "r|rescorer=s" => \$RESCORER,
    "w|weights=s" => \$WEIGHTS
);

my $BEFORE = "LM1=";
open(W, "<", $WEIGHTS) or die "Could not open";
chomp(my $FIRST = <W>);
($BEFORE) = split(/\s/, $FIRST);
while (<W>) {
    my ($CURRENT) = split(/\s/, $_);
    print STDERR "$CURRENT\n";
    if ($CURRENT eq "$FEATURES[0]=") {
        print STDERR "Found $FEATURES[0] after $BEFORE\n";
        last;
    }
    $BEFORE = $CURRENT;
}
close(W);

my $PATTERN1 = join(" ", map { "\\b$_= \\S+" } @FEATURES);
my $PATTERN2 = "\\b$BEFORE \\S+";

print STDERR $PATTERN1, "\n";
print STDERR $PATTERN2, "\n";

my ($NBEST_TEMP_HANDLE, $NBEST_TEMP_FILE1) = tempfile();
my (undef, $NBEST_TEMP_FILE2) = tempfile();
open(NBEST_IN, "<", $NBEST) or die "Could not open";
while (<NBEST_IN>) {
    chomp;
    foreach my $name (@FEATURES) {
        s/$name= \S+ //g;
    }
    print $NBEST_TEMP_HANDLE $_, "\n";
}
close(NBEST_IN);
close($NBEST_TEMP_HANDLE);

foreach my $i (0 .. $#MODELS) {
    execute("$RESCORER -i $INPUT -m $MODELS[$i] -s $VSRC -t $VTRG -f $FEATURES[$i] -n $NBEST_TEMP_FILE1 > $NBEST_TEMP_FILE2");
    rename($NBEST_TEMP_FILE2, $NBEST_TEMP_FILE1);
}

open($NBEST_TEMP_HANDLE, "<", $NBEST_TEMP_FILE1) or die "Could not open";

while (<$NBEST_TEMP_HANDLE>) {
    chomp;
    if (/$PATTERN2/) {
        if(s/($PATTERN1)//) {
            my $FEAT = $1;
            s/($PATTERN2 )/$1$FEAT /;   
        }
    }
    print "$_\n";
}

sub execute {
    my $command = shift;
    logMessage("Executing:\t$command");
    my $ret = system($command);
    if($ret != 0) {
        logMessage("Command '$command' finished with return status $ret");
        logMessage("Aborting and killing parent process");
        kill(2, $PID);
        die;
    }
}

sub logMessage {
    my $message = shift;
    my $time = POSIX::strftime("%m/%d/%Y %H:%M:%S", localtime());
    my $log_message = $time."\t$message\n"; 
    print STDERR $log_message;
}