Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230>2011-08-05 20:23:47 +0400
committerbgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230>2011-08-05 20:23:47 +0400
commit76c3ef4dbafcc958569a39173dd8dcedfcfe5486 (patch)
tree9b830939c532dadc87b22d27ef49e661367fc900 /regression-testing
parentf8a99e5d6dc668d50efd2df62f579158e60b4bdf (diff)
a few more detokenization tests, including a TODO one that exposes a bug
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4124 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'regression-testing')
-rw-r--r--regression-testing/run-test-detokenizer.t39
1 files changed, 37 insertions, 2 deletions
diff --git a/regression-testing/run-test-detokenizer.t b/regression-testing/run-test-detokenizer.t
index b44890ebb..d4e7afa1d 100644
--- a/regression-testing/run-test-detokenizer.t
+++ b/regression-testing/run-test-detokenizer.t
@@ -49,16 +49,51 @@ This one is no more difficult, but, hey, it is on a new line.
EXP
);
+# An English test involving double-quotes
+&runDetokenizerTest("TEST_ENGLISH_DOUBLEQUOTES", "en",
+<<'TOK',
+This is a somewhat " less simple " test .
+TOK
+<<'EXP'
+This is a somewhat "less simple" test.
+EXP
+);
+
# A simple French test
&runDetokenizerTest("TEST_FRENCH_EASY", "fr",
<<'TOK',
-Ici une phrase simple .
+Voici une phrase simple .
TOK
<<'EXP'
-Ici une phrase simple.
+Voici une phrase simple.
EXP
);
+# A French test involving an apostrophe
+&runDetokenizerTest("TEST_FRENCH_APOSTROPHE", "fr",
+<<'TOK',
+Moi , j' ai une apostrophe .
+TOK
+<<'EXP'
+Moi, j'ai une apostrophe.
+EXP
+);
+
+TODO: {
+ local $TODO = "A bug is causing this to be detokenized wrong.";
+
+# A French test involving an apostrophe on the second-last word
+&runDetokenizerTest("TEST_FRENCH_APOSTROPHE_PENULTIMATE", "fr",
+<<'TOK',
+de musique rap issus de l' immigration
+TOK
+<<'EXP'
+de musique rap issus de l'immigration
+EXP
+);
+
+}
+
######################################
# end of individual test cases
######################################