#!/usr/bin/perl -w use strict; use Getopt::Long "GetOptions"; binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); my ($SRC,$INFILE,$UNBUFFERED); die("detruecase.perl < in > out") unless &GetOptions('headline=s' => \$SRC, 'in=s' => \$INFILE, 'b|unbuffered' => \$UNBUFFERED); if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; } my %SENTENCE_END = ("."=>1,":"=>1,"?"=>1,"!"=>1); my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"""=>1,"'"=>1,"["=>1,"]"=>1); # lowercase even in headline my %ALWAYS_LOWER; foreach ("a","after","against","al-.+","and","any","as","at","be","because","between","by","during","el-.+","for","from","his","in","is","its","last","not","of","off","on","than","the","their","this","to","was","were","which","will","with") { $ALWAYS_LOWER{$_} = 1; } # find out about the headlines my @HEADLINE; if (defined($SRC)) { open(SRC,$SRC); my $headline_flag = 0; while() { $headline_flag = 1 if //; $headline_flag = 0 if /<.hl>/; next unless /^) { &process($_,$sentence++); } close(IN); } else { while() { &process($_,$sentence++); } } sub process { my $line = $_[0]; chomp($line); $line =~ s/^\s+//; $line =~ s/\s+$//; my @WORD = split(/\s+/,$line); # uppercase at sentence start my $sentence_start = 1; for(my $i=0;$i