Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/recaser/train-truecaser.perl6
1 files changed, 6 insertions, 0 deletions
diff --git a/scripts/recaser/train-truecaser.perl b/scripts/recaser/train-truecaser.perl
index 4f600a640..94ddbf2fa 100755
--- a/scripts/recaser/train-truecaser.perl
+++ b/scripts/recaser/train-truecaser.perl
@@ -44,6 +44,12 @@ while(<CORPUS>) {
$firstWordOfSentence = 1;
}
+ if ($currentWord !~ /[\p{Ll}\p{Lu}\p{Lt}]/) {
+ # skip words with nothing to case
+ $firstWordOfSentence = 0;
+ next;
+ }
+
my $currentWordWeight = 0;
if (! $firstWordOfSentence) {
$currentWordWeight = 1;