diff options
author | Barry Haddow <barry.haddow@gmail.com> | 2019-11-05 18:58:07 +0300 |
---|---|---|
committer | Barry Haddow <barry.haddow@gmail.com> | 2019-11-05 18:58:07 +0300 |
commit | 56b2bad9073b6e50db692d9b0003ed805684849c (patch) | |
tree | 2d674c09b5dded11b2eac253a20be46be1109fbe | |
parent | 3910cd6c4625eefa57600159e66f9a86122750fa (diff) |
fix abbrev rule
-rwxr-xr-x | scripts/ems/support/split-sentences.perl | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index 0279a0b88..1dfb36d26 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -209,11 +209,14 @@ sub preprocess { my $starting_punct = $2; if ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) { # Not breaking; - } elsif ($words[$i] =~ /(\.)[$sentence_start\-]+(\.+)$/) { + #print "NBP1 $words[$i] $words[$i+1]\n"; + } elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) { # Not breaking - upper case acronym + #print "NBP2 $words[$i] $words[$i+1]\n"; } elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[0-9$sentence_start])/) { # The next word has a bunch of initial quotes, maybe a # space, then either upper case or a number + #print "MAYBE $words[$i] $words[$i+1]\n"; $words[$i] = $words[$i]."\n" unless ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 2 && !$starting_punct && ($words[$i+1] =~ /^[0-9]+/)); # We always add a return for these, unless we have a # numeric non-breaker and a number start. |