blob: de888a6f342adccd7846fcdfb2bb8d7d67bdd608 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
#!/usr/bin/perl -w
use strict;
die("ERROR syntax: input-from-sgm.perl < in.sgm > in.txt")
unless scalar @ARGV == 0;
while(my $line = <STDIN>) {
chop($line);
while ($line =~ /<seg[^>]+>\s*$/i) {
my $next_line = <STDIN>;
$line .= $next_line;
chop($line);
}
while ($line =~ /<seg[^>]+>\s*(.*)\s*$/i &&
$line !~ /<seg[^>]+>\s*(.*)\s*<\/seg>/i) {
my $next_line = <STDIN>;
$line .= $next_line;
chop($line);
}
if ($line =~ /<seg[^>]+>\s*(.*)\s*<\/seg>/i) {
my $input = $1;
$input =~ s/\s+/ /g;
$input =~ s/^ //g;
$input =~ s/ $//g;
print $input."\n";
}
}
|