Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbojar <bojar@1f5c12ca-751b-0410-a591-d2e778427230>2008-02-22 17:50:43 +0300
committerbojar <bojar@1f5c12ca-751b-0410-a591-d2e778427230>2008-02-22 17:50:43 +0300
commit6af3140978d271f2222971b73ca5a363e8d68bd7 (patch)
tree713679b72d7d4eb5d7db96351b7c085e48bcf9bc /scripts/recaser
parent8b3d44b2e224684c8db2c14e862a5ef5699c19bc (diff)
added optional sentence uppercasing (use -u)
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1566 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'scripts/recaser')
-rwxr-xr-xscripts/recaser/detokenizer.perl10
1 files changed, 9 insertions, 1 deletions
diff --git a/scripts/recaser/detokenizer.perl b/scripts/recaser/detokenizer.perl
index 60d8732c9..0ab8c1588 100755
--- a/scripts/recaser/detokenizer.perl
+++ b/scripts/recaser/detokenizer.perl
@@ -2,6 +2,7 @@
# Sample De-Tokenizer
# written by Josh Schroeder, based on code by Philipp Koehn
+# further modifications by Ondrej Bojar
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
@@ -10,16 +11,21 @@ use strict;
my $language = "en";
my $QUIET = 0;
my $HELP = 0;
+my $UPPERCASE_SENT = 0;
while (@ARGV) {
$_ = shift;
/^-l$/ && ($language = shift, next);
/^-q$/ && ($QUIET = 1, next);
/^-h$/ && ($HELP = 1, next);
+ /^-u$/ && ($UPPERCASE_SENT = 1, next);
}
if ($HELP) {
- print "Usage ./detokenizer.perl (-l [en|de|...]) < tokenizedfile > detokenizedfile\n";
+ print "Usage ./detokenizer.perl (-l [en|fr|cs]) < tokenizedfile > detokenizedfile\n";
+ print "Options:\n";
+ print " -u ... uppercase the first char in the final sentence.\n";
+ print " -q ... don't report detokenizer revision.\n";
exit;
}
@@ -117,6 +123,8 @@ sub detokenize {
#add trailing break
$text .= "\n" unless $text =~ /\n$/;
+ $text = ucfirst($text) if $UPPERCASE_SENT;
+
return $text;
}