diff options
Diffstat (limited to 'example/preprocess.pl')
-rwxr-xr-x | example/preprocess.pl | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/example/preprocess.pl b/example/preprocess.pl new file mode 100755 index 0000000..31432dd --- /dev/null +++ b/example/preprocess.pl @@ -0,0 +1,16 @@ +#!/usr/bin/env perl + +while (<>) { + next if (/^\s*$/); + next if (/^CANTO /); + + s/^/ /; s/$/ /; + + # punctuation + s/([,.!?:;\(\)"]|-+)/ $1 /g; + # lowercase + tr/A-Z/a-z/; + # single quotes are too much trouble + + print(join(" ", split) . "\n"); +} |