Separate the langid test into two separate test scripts

author: John Bauer <horatio@gmail.com> 2022-09-07 19:39:01 +0300
committer: John Bauer <horatio@gmail.com> 2022-09-07 19:39:01 +0300
commit: a7ff934f4a2b57e7b0cbeabfb84077814391b8e3 (patch)
tree: 4306b02c72690e83da482cc213a6fb93161d2677
parent: c2941d7de6a4f1c22a3dabf0632852f9cd92bf8b (diff)
2 files changed, 44 insertions, 35 deletions
diff --git a/stanza/tests/langid/test_langid.py b/stanza/tests/langid/test_langid.py
index 7dba9e40..a6ab671c 100644
--- a/stanza/tests/langid/test_langid.py
+++ b/stanza/tests/langid/test_langid.py
@@ -6,8 +6,7 @@ import pytest
 
 from stanza.models.common.doc import Document
 from stanza.pipeline.core import Pipeline
-from stanza.pipeline.multilingual import MultilingualPipeline
-from stanza.tests import *
+from stanza.tests import TEST_MODELS_DIR
 
 #pytestmark = pytest.mark.skip
 
@@ -595,36 +594,3 @@ def test_lang_subset_unlikely_language():
     predictions = model(text_tensor)
     assert predictions[0, en_idx] < 0, "If this test fails, then regardless of how unlikely it was, the model is predicting the input string is possibly English.  Update the test by picking a different combination of languages & input"
 
-def test_multilingual_pipeline():
-    """
-    Basic test of multilingual pipeline
-    """
-    english_text = "This is an English sentence."
-    english_deps_gold = "\n".join((
-        "('This', 5, 'nsubj')",
-        "('is', 5, 'cop')",
-        "('an', 5, 'det')",
-        "('English', 5, 'amod')",
-        "('sentence', 0, 'root')",
-        "('.', 5, 'punct')"
-    ))
-
-    french_text = "C'est une phrase française."
-    french_deps_gold = "\n".join((
-        "(\"C'\", 4, 'nsubj')",
-        "('est', 4, 'cop')",
-        "('une', 4, 'det')",
-        "('phrase', 0, 'root')",
-        "('française', 4, 'amod')",
-        "('.', 4, 'punct')"
-    ))
-
-    nlp = MultilingualPipeline(model_dir=TEST_MODELS_DIR)
-    docs = [english_text, french_text]
-    docs = nlp(docs)
-
-    assert docs[0].lang == "en"
-    assert docs[0].sentences[0].dependencies_string() == english_deps_gold
-    assert docs[1].lang == "fr"
-    assert docs[1].sentences[0].dependencies_string() == french_deps_gold
-
diff --git a/stanza/tests/langid/test_multilingual.py b/stanza/tests/langid/test_multilingual.py
new file mode 100644
index 00000000..0428ddf4
--- /dev/null
+++ b/stanza/tests/langid/test_multilingual.py
@@ -0,0 +1,43 @@
+"""
+Tests specifically for the MultilingualPipeline
+"""
+
+import pytest
+
+from stanza.pipeline.multilingual import MultilingualPipeline
+
+from stanza.tests import TEST_MODELS_DIR
+
+def test_multilingual_pipeline():
+    """
+    Basic test of multilingual pipeline
+    """
+    english_text = "This is an English sentence."
+    english_deps_gold = "\n".join((
+        "('This', 5, 'nsubj')",
+        "('is', 5, 'cop')",
+        "('an', 5, 'det')",
+        "('English', 5, 'amod')",
+        "('sentence', 0, 'root')",
+        "('.', 5, 'punct')"
+    ))
+
+    french_text = "C'est une phrase française."
+    french_deps_gold = "\n".join((
+        "(\"C'\", 4, 'nsubj')",
+        "('est', 4, 'cop')",
+        "('une', 4, 'det')",
+        "('phrase', 0, 'root')",
+        "('française', 4, 'amod')",
+        "('.', 4, 'punct')"
+    ))
+
+    nlp = MultilingualPipeline(model_dir=TEST_MODELS_DIR)
+    docs = [english_text, french_text]
+    docs = nlp(docs)
+
+    assert docs[0].lang == "en"
+    assert docs[0].sentences[0].dependencies_string() == english_deps_gold
+    assert docs[1].lang == "fr"
+    assert docs[1].sentences[0].dependencies_string() == french_deps_gold
+
author	John Bauer <horatio@gmail.com>	2022-09-07 19:39:01 +0300
committer	John Bauer <horatio@gmail.com>	2022-09-07 19:39:01 +0300
commit	a7ff934f4a2b57e7b0cbeabfb84077814391b8e3 (patch)
tree	4306b02c72690e83da482cc213a6fb93161d2677
parent	c2941d7de6a4f1c22a3dabf0632852f9cd92bf8b (diff)