Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regression-testing/run-test-detokenizer.t7
-rwxr-xr-xscripts/tokenizer/detokenizer.perl2
2 files changed, 2 insertions, 7 deletions
diff --git a/regression-testing/run-test-detokenizer.t b/regression-testing/run-test-detokenizer.t
index f9cc3423a..9d677b43e 100644
--- a/regression-testing/run-test-detokenizer.t
+++ b/regression-testing/run-test-detokenizer.t
@@ -82,9 +82,7 @@ Moi, j'ai une apostrophe.
EXP
);
-# A (failing) French test involving an apostrophe on the second-last word
-{
-my $testCase =
+# A French test involving an apostrophe on the second-last word
&addDetokenizerTest("TEST_FRENCH_APOSTROPHE_PENULTIMATE", "fr",
<<'TOK'
de musique rap issus de l' immigration
@@ -95,9 +93,6 @@ de musique rap issus de l'immigration
EXP
);
-$testCase->setExpectedToFail("A bug is causing this to be detokenized wrong.");
-}
-
# A German test involving non-ASCII characters
# Note: We don't specify a language because the detokenizer errors if you pass in a language for which it has no special rules, of which German is an example.
&addDetokenizerTest("TEST_GERMAN_NONASCII", undef,
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index 0b8e5af73..f049b8080 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -92,7 +92,7 @@ sub detokenize {
#left-shift floats in Czech
$text=$text.$words[$i];
$prependSpace = " ";
- } elsif ((($language eq "fr") ||($language eq "it")) && ($i<(scalar(@words)-2)) && ($words[$i] =~ /[\p{IsAlpha}][\']$/) && ($words[$i+1] =~ /^[\p{IsAlpha}]/)) {
+ } elsif ((($language eq "fr") ||($language eq "it")) && ($i<=(scalar(@words)-2)) && ($words[$i] =~ /[\p{IsAlpha}][\']$/) && ($words[$i+1] =~ /^[\p{IsAlpha}]/)) {
#right-shift the contraction for French and Italian
$text = $text.$prependSpace.$words[$i];
$prependSpace = "";