Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Waites <wwaites@inf.ed.ac.uk>2020-02-26 17:08:26 +0300
committerWilliam Waites <wwaites@inf.ed.ac.uk>2020-02-26 17:08:26 +0300
commit696a5d98339b1b1e41480bed904e80355c49b947 (patch)
tree9a520199abee44cba7553f0ba4d83729879d340b /scripts/ems/support
parent22923ddcf049c694ac75158ead1613ea77ed18b1 (diff)
flag to turn off sentence splitter from emitting <P>
Diffstat (limited to 'scripts/ems/support')
-rwxr-xr-xscripts/ems/support/split-sentences.perl5
1 files changed, 4 insertions, 1 deletions
diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl
index 240195c7c..b6e9e2456 100755
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@@ -23,6 +23,7 @@ my $is_cjk = 0;
my $QUIET = 0;
my $HELP = 0;
my $LIST_ITEM = 0;
+my $NOP = 0;
while (@ARGV) {
$_ = shift;
@@ -31,6 +32,7 @@ while (@ARGV) {
/^-q$/ && ($QUIET = 1, next);
/^-h$/ && ($HELP = 1, next);
/^-i$/ && ($LIST_ITEM = 1, next);
+ /^-n$/ && ($NOP = 1, next);
/^-b$/ && ($|++, next); # no output buffering
}
@@ -40,6 +42,7 @@ if ($HELP) {
print "-b: no output buffering (for use in bidirectional pipes)\n";
print "-p: use a custom prefix file, overriding the installed one\n";
print "-i: avoid splitting on list items (e.g. 1. This is the first)\n";
+ print "-n: do not emit <P> after paragraphs\n";
exit;
}
if (!$QUIET) {
@@ -89,7 +92,7 @@ while (<STDIN>) {
if (/^<.+>$/ || /^\s*$/) {
# Time to process this block; we've hit a blank or <p>
&do_it_for($text, $_);
- print "<P>\n" if (/^\s*$/ && $text); ## If we have text followed by <P>
+ print "<P>\n" if $NOP == 0 && (/^\s*$/ && $text); ## If we have text followed by <P>
$text = "";
}
else {