blob: 2c830f6b6b302cb7c3cd7d80c2cc2f8a9fec6f94 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
#!/usr/bin/env perl
use strict;
use Getopt::Long "GetOptions";
my $MARK_HYP = 0;
my $BINARIZE = 0;
my $SLASH = 0;
die unless &GetOptions('binarize' => \$BINARIZE,'mark-split' => \$MARK_HYP,'slash' => \$SLASH);
my $punc = $SLASH ? "/" : "-";
while(<STDIN>) {
chop;
my @OUT = ();
foreach (split) {
if (/^</ || />$/) {
push @OUT, $_;
}
elsif(/([\p{IsAlnum}])$punc([\p{IsAlnum}])/) {
s/([\p{IsAlnum}])$punc([\p{IsAlnum}])/$1 \@$punc\@ $2/g;
my @WORD = split;
$OUT[$#OUT] =~ /label=\"([^\"]+)\"/;
my $pos = $1;
my $mark = $SLASH ? "SLASH-" : "HYP-";
my $punc_pos = $SLASH ? "SLASH" : "HYP";
if ($MARK_HYP) {
$OUT[$#OUT] =~ s/label=\"/label=\"$mark/;
}
if ($BINARIZE) {
for(my $i=0;$i<scalar(@WORD)-2;$i++) {
push @OUT,"<tree label=\"\@".($MARK_HYP ? $mark : "")."$pos\">";
}
}
for(my $i=0;$i<scalar(@WORD);$i++) {
if ($BINARIZE && $i>=2) {
push @OUT, "</tree>";
}
push @OUT,"<tree label=\"".(($WORD[$i] eq "\@$punc\@") ? $punc_pos : $pos)."\"> $WORD[$i] </tree>";
}
}
else {
push @OUT, $_;
}
}
print join(" ",@OUT)."\n";
}
|