Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralvations <alvations@gmail.com>2019-09-04 09:16:41 +0300
committerGitHub <noreply@github.com>2019-09-04 09:16:41 +0300
commit05788925812f0d3265e355565cbb1701a0ad7510 (patch)
tree47f3e6942d9aef6201c9f77d13468f0e71de88be
parent9f08d77b0ddfa80764d2bdd21b1b17090a367fcc (diff)
The dot before an acronym should be optional.alvations-patch-regexes
-rwxr-xr-xscripts/ems/support/split-sentences.perl2
1 files changed, 1 insertions, 1 deletions
diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl
index a1cfb0d37..d4e0161bf 100755
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@@ -191,7 +191,7 @@ sub preprocess {
my $starting_punct = $2;
if ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) {
# Not breaking;
- } elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) {
+ } elsif ($words[$i] =~ /(\.?)[\p{IsUpper}\-]+(\.+)$/) {
# Not breaking - upper case acronym
} elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) {
# The next word has a bunch of initial quotes, maybe a