Welcome to mirror list, hosted at ThFree Co, Russian Federation.

eval_chunking.pl « train-sets « test - github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 78f7b92c04df0723dafc30aa75940b6fe7d23430 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/perl -w
use strict;

my $rdictFile = shift or die;
my $truthFile = shift or die;

my %rdict = (); my $rdictNum = 1;
open F, $rdictFile or die;
while (<F>) {
    chomp;
    my ($c, $num) = split;
    if ($c =~ /^[BI]-O$/) { $c = 'O'; }
    $rdict{$num} = $c;
    if ($num+1 > $rdictNum) { $rdictNum = $num + 1; }
}
close F or die;

my $np = 0;
my $nt = 0;
my $ni = 0;
my $nil = 0;

my $nc = 0;
my $ncl = 0;
my $na = 0;

my @truth = ();
if ($truthFile =~ /.gz$/ ) { open T, "zcat  $truthFile |" or die; }
elsif ($truthFile =~ /.bz2$/) { open T, "bzcat $truthFile |" or die; }
else { open T, $truthFile or die; }
while (<T>) {
    chomp;
    if (/^[\s]*$/) { runit(); @truth = (); next; }
    my ($c) = split;
    if (not defined $rdict{$c}) { die $c; }
    push @truth, $rdict{$c};
}
close T;

my $p  = $ni  / (($np > 0) ? $np : 1);
my $r  = $ni  / (($nt > 0) ? $nt : 1);
my $f  = 2 * $p * $r / ($p + $r);
my $a  = $nc  / (($na > 0) ? $na : 1);
my $pl = $nil / (($np > 0) ? $np : 1);
my $rl = $nil / (($nt > 0) ? $nt : 1);
my $fl = 2 * $pl * $rl / ($pl + $rl);
my $al = $ncl / (($na > 0) ? $na : 1);

$p  = int($p  * 1000)/10; $r  = int($r  * 1000)/10; $f  = int($f  * 1000)/10; $a  = int ($a  * 1000)/10;
$pl = int($pl * 1000)/10; $rl = int($rl * 1000)/10; $fl = int($fl * 1000)/10; $al = int ($al * 1000)/10;

print "unlabeled: p=$p\tr=$r\tf=$f\tacc=$a\n";
print "  labeled: p=$pl\tr=$rl\tf=$fl\tacc=$al\n";


sub runit {
    my $N = scalar @truth;
    my @pred = ();
    for (my $n=0; $n<$N; $n++) {
        $_ = <>;
        chomp;
        $_ = int($_);
        if (not defined $rdict{$_}) { die $_; }
        push @pred, $rdict{$_};
    }
    $_ = <>; chomp;
    if (not /^\s*$/) { die; }

    $na += $N;
    for (my $n=0; $n<$N; $n++) {
        if ($pred[$n] eq $truth[$n]) { $ncl++; }
        if (substr($pred[$n],0,1) eq substr($truth[$n],0,1)) { $nc++; }
    }

    my %c1 = chunksof(@truth);
    my %c2 = chunksof(@pred);

    $np += scalar keys %c1;
    $nt += scalar keys %c2;
    foreach my $c (keys %c1) {
        if (exists $c2{$c}) { 
            $ni++;
            if ($c2{$c} eq $c1{$c}) {
                $nil++;
            }
        }
    }
}

sub chunksof {
    my @l = @_;
    my $i = 0;
    my %c = ();
    while ($i < @l) {
        if ($l[$i] =~ /^B-(.+)$/) {
            my $lab = $1;
            if ($lab eq 'O') { $i++; next; }
            my $j = $i+1;
            while ($j < @l) {
                if ($l[$j] eq "I-$lab") { $j++; }
                else { last; }
            }
            $c{"$i $j"} = $lab;
            $i = $j;
        } else {
            $i++;
        }
    }
    return (%c);
}