Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBarry Haddow <barry.haddow@gmail.com>2012-01-18 12:55:12 +0400
committerBarry Haddow <barry.haddow@gmail.com>2012-01-18 12:55:12 +0400
commit83bb28680923e7b4f524aeb68729a388d7787abb (patch)
treebdcd0f3a460e2e98bc792d0c9f433d58e0ea4052 /scripts/tokenizer
parentfcbaafadbc253f8748ca2f66ecfbf7b54f1935b4 (diff)
Option to disable buffering (from Tom Hoar)
Diffstat (limited to 'scripts/tokenizer')
-rwxr-xr-xscripts/tokenizer/detokenizer.perl9
-rwxr-xr-xscripts/tokenizer/tokenizer.perl5
2 files changed, 9 insertions, 5 deletions
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index ac88834b0..d8d6e5da7 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -1,13 +1,10 @@
#!/usr/bin/perl -w
-# $Id$
+# $Id: detokenizer.perl 4134 2011-08-08 15:30:54Z bgottesman $
# Sample De-Tokenizer
# written by Josh Schroeder, based on code by Philipp Koehn
# further modifications by Ondrej Bojar
-# This added by Herve Saint-Amand for compatibility with translate.cgi
-$|++;
-
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
use strict;
@@ -20,6 +17,7 @@ my $UPPERCASE_SENT = 0;
while (@ARGV) {
$_ = shift;
+ /^-b$/ && ($| = 1, next);
/^-l$/ && ($language = shift, next);
/^-q$/ && ($QUIET = 1, next);
/^-h$/ && ($HELP = 1, next);
@@ -31,6 +29,7 @@ if ($HELP) {
print "Options:\n";
print " -u ... uppercase the first char in the final sentence.\n";
print " -q ... don't report detokenizer revision.\n";
+ print " -b ... disable Perl buffering.\n";
exit;
}
@@ -38,7 +37,7 @@ die "No built-in rules for language $language, claim en for default behaviour."
if $language !~ /^(cs|en|fr|it)$/;
if (!$QUIET) {
- print STDERR "Detokenizer Version ".'$Revision$'."\n";
+ print STDERR "Detokenizer Version ".'$Revision: 4134 $'."\n";
print STDERR "Language: $language\n";
}
diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl
index 54be11644..84fdc3462 100755
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@@ -23,6 +23,7 @@ my $AGGRESSIVE = 0;
while (@ARGV) {
$_ = shift;
+ /^-b$/ && ($| = 1, next);
/^-l$/ && ($language = shift, next);
/^-q$/ && ($QUIET = 1, next);
/^-h$/ && ($HELP = 1, next);
@@ -31,6 +32,10 @@ while (@ARGV) {
if ($HELP) {
print "Usage ./tokenizer.perl (-l [en|de|...]) < textfile > tokenizedfile\n";
+ print "Options:\n";
+ print " -q ... quiet.\n";
+ print " -a ... aggressive hyphen splitting.\n";
+ print " -b ... disable Perl buffering.\n";
exit;
}
if (!$QUIET) {