diff options
author | alvations <alvations@gmail.com> | 2018-01-19 08:29:53 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-19 08:29:53 +0300 |
commit | 194964c017d8acb56918bab94f4d7cdd60b9c9b7 (patch) | |
tree | 366316cc2dcd51a39518b6381654e10789bcbf1e | |
parent | 3a0631a05b7f53a7f387ca8ddca432f5ddb22029 (diff) |
Korean words has spaces =)patch-detokenizer-ko
-rwxr-xr-x | scripts/tokenizer/detokenizer.perl | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl index 881b93dd1..41299baf2 100755 --- a/scripts/tokenizer/detokenizer.perl +++ b/scripts/tokenizer/detokenizer.perl @@ -106,7 +106,7 @@ sub detokenize { my $prependSpace = " "; for ($i=0;$i<(scalar(@words));$i++) { if (&startsWithCJKChar($words[$i])) { - if ($i > 0 && &endsWithCJKChar($words[$i-1])) { + if (($i > 0 && &endsWithCJKChar($words[$i-1])) && ($language ne "ko")) { # perform left shift if this is a second consecutive CJK (Chinese/Japanese/Korean) word $text=$text.$words[$i]; } else { |