Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoralvations <alvations@gmail.com>2018-01-19 08:29:53 +0300
committerGitHub <noreply@github.com>2018-01-19 08:29:53 +0300
commit194964c017d8acb56918bab94f4d7cdd60b9c9b7 (patch)
tree366316cc2dcd51a39518b6381654e10789bcbf1e
parent3a0631a05b7f53a7f387ca8ddca432f5ddb22029 (diff)
Korean words has spaces =)patch-detokenizer-ko
-rwxr-xr-xscripts/tokenizer/detokenizer.perl2
1 files changed, 1 insertions, 1 deletions
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index 881b93dd1..41299baf2 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -106,7 +106,7 @@ sub detokenize {
my $prependSpace = " ";
for ($i=0;$i<(scalar(@words));$i++) {
if (&startsWithCJKChar($words[$i])) {
- if ($i > 0 && &endsWithCJKChar($words[$i-1])) {
+ if (($i > 0 && &endsWithCJKChar($words[$i-1])) && ($language ne "ko")) {
# perform left shift if this is a second consecutive CJK (Chinese/Japanese/Korean) word
$text=$text.$words[$i];
} else {