Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2019-09-30 18:53:06 +0300
committerBarry Haddow <barry.haddow@gmail.com>2019-09-30 18:53:06 +0300
commit9bffde57ba1a6e03a101b8826220609bbb49743a (patch)
treee3195f82c1c9f2e5b62d34ea5452462dfa26ff73 /scripts/ems/support
parent257d7e5e663a6bba97a21b191a04a9684f5e8436 (diff)
revert 05788925
Diffstat (limited to 'scripts/ems/support')
-rwxr-xr-xscripts/ems/support/split-sentences.perl2
1 files changed, 1 insertions, 1 deletions
diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl
index 8062e2942..49d599430 100755
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@@ -202,7 +202,7 @@ sub preprocess {
my $starting_punct = $2;
if ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) {
# Not breaking;
- } elsif ($words[$i] =~ /(\.?)[\p{IsUpper}\-]+(\.+)$/) {
+ } elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) {
# Not breaking - upper case acronym
} elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) {
# The next word has a bunch of initial quotes, maybe a