diff options
author | John Bauer <horatio@gmail.com> | 2022-09-07 19:39:01 +0300 |
---|---|---|
committer | John Bauer <horatio@gmail.com> | 2022-09-07 19:39:01 +0300 |
commit | a7ff934f4a2b57e7b0cbeabfb84077814391b8e3 (patch) | |
tree | 4306b02c72690e83da482cc213a6fb93161d2677 | |
parent | c2941d7de6a4f1c22a3dabf0632852f9cd92bf8b (diff) |
Separate the langid test into two separate test scripts
-rw-r--r-- | stanza/tests/langid/test_langid.py | 36 | ||||
-rw-r--r-- | stanza/tests/langid/test_multilingual.py | 43 |
2 files changed, 44 insertions, 35 deletions
diff --git a/stanza/tests/langid/test_langid.py b/stanza/tests/langid/test_langid.py index 7dba9e40..a6ab671c 100644 --- a/stanza/tests/langid/test_langid.py +++ b/stanza/tests/langid/test_langid.py @@ -6,8 +6,7 @@ import pytest from stanza.models.common.doc import Document from stanza.pipeline.core import Pipeline -from stanza.pipeline.multilingual import MultilingualPipeline -from stanza.tests import * +from stanza.tests import TEST_MODELS_DIR #pytestmark = pytest.mark.skip @@ -595,36 +594,3 @@ def test_lang_subset_unlikely_language(): predictions = model(text_tensor) assert predictions[0, en_idx] < 0, "If this test fails, then regardless of how unlikely it was, the model is predicting the input string is possibly English. Update the test by picking a different combination of languages & input" -def test_multilingual_pipeline(): - """ - Basic test of multilingual pipeline - """ - english_text = "This is an English sentence." - english_deps_gold = "\n".join(( - "('This', 5, 'nsubj')", - "('is', 5, 'cop')", - "('an', 5, 'det')", - "('English', 5, 'amod')", - "('sentence', 0, 'root')", - "('.', 5, 'punct')" - )) - - french_text = "C'est une phrase française." - french_deps_gold = "\n".join(( - "(\"C'\", 4, 'nsubj')", - "('est', 4, 'cop')", - "('une', 4, 'det')", - "('phrase', 0, 'root')", - "('française', 4, 'amod')", - "('.', 4, 'punct')" - )) - - nlp = MultilingualPipeline(model_dir=TEST_MODELS_DIR) - docs = [english_text, french_text] - docs = nlp(docs) - - assert docs[0].lang == "en" - assert docs[0].sentences[0].dependencies_string() == english_deps_gold - assert docs[1].lang == "fr" - assert docs[1].sentences[0].dependencies_string() == french_deps_gold - diff --git a/stanza/tests/langid/test_multilingual.py b/stanza/tests/langid/test_multilingual.py new file mode 100644 index 00000000..0428ddf4 --- /dev/null +++ b/stanza/tests/langid/test_multilingual.py @@ -0,0 +1,43 @@ +""" +Tests specifically for the MultilingualPipeline +""" + +import pytest + +from stanza.pipeline.multilingual import MultilingualPipeline + +from stanza.tests import TEST_MODELS_DIR + +def test_multilingual_pipeline(): + """ + Basic test of multilingual pipeline + """ + english_text = "This is an English sentence." + english_deps_gold = "\n".join(( + "('This', 5, 'nsubj')", + "('is', 5, 'cop')", + "('an', 5, 'det')", + "('English', 5, 'amod')", + "('sentence', 0, 'root')", + "('.', 5, 'punct')" + )) + + french_text = "C'est une phrase française." + french_deps_gold = "\n".join(( + "(\"C'\", 4, 'nsubj')", + "('est', 4, 'cop')", + "('une', 4, 'det')", + "('phrase', 0, 'root')", + "('française', 4, 'amod')", + "('.', 4, 'punct')" + )) + + nlp = MultilingualPipeline(model_dir=TEST_MODELS_DIR) + docs = [english_text, french_text] + docs = nlp(docs) + + assert docs[0].lang == "en" + assert docs[0].sentences[0].dependencies_string() == english_deps_gold + assert docs[1].lang == "fr" + assert docs[1].sentences[0].dependencies_string() == french_deps_gold + |