blob: e94b91744089d5b166916fb5f23c5322ddb32d4f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
#!/usr/bin/env perl
use warnings;
use strict;
while(<STDIN>) {
chop;
# avoid general madness
s/[\000-\037]//g;
s/\s+/ /g;
s/^ //g;
s/ $//g;
# special characters in moses
s/\&/\&/g; # escape escape
s/\|/\|/g; # factor separator
s/\</\</g; # xml
s/\>/\>/g; # xml
s/\'/\'/g; # xml
s/\"/\"/g; # xml
s/\[/\[/g; # syntax non-terminal
s/\]/\]/g; # syntax non-terminal
# restore xml instructions
s/\<(\S+) translation="(.+?)"> (.+?) <\/(\S+)>/\<$1 translation=\"$2\"> $3 <\/$4>/g;
print $_."\n";
}
|