Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-09-07 19:39:01 +0300
committerJohn Bauer <horatio@gmail.com>2022-09-07 19:39:01 +0300
commita7ff934f4a2b57e7b0cbeabfb84077814391b8e3 (patch)
tree4306b02c72690e83da482cc213a6fb93161d2677
parentc2941d7de6a4f1c22a3dabf0632852f9cd92bf8b (diff)
Separate the langid test into two separate test scripts
-rw-r--r--stanza/tests/langid/test_langid.py36
-rw-r--r--stanza/tests/langid/test_multilingual.py43
2 files changed, 44 insertions, 35 deletions
diff --git a/stanza/tests/langid/test_langid.py b/stanza/tests/langid/test_langid.py
index 7dba9e40..a6ab671c 100644
--- a/stanza/tests/langid/test_langid.py
+++ b/stanza/tests/langid/test_langid.py
@@ -6,8 +6,7 @@ import pytest
from stanza.models.common.doc import Document
from stanza.pipeline.core import Pipeline
-from stanza.pipeline.multilingual import MultilingualPipeline
-from stanza.tests import *
+from stanza.tests import TEST_MODELS_DIR
#pytestmark = pytest.mark.skip
@@ -595,36 +594,3 @@ def test_lang_subset_unlikely_language():
predictions = model(text_tensor)
assert predictions[0, en_idx] < 0, "If this test fails, then regardless of how unlikely it was, the model is predicting the input string is possibly English. Update the test by picking a different combination of languages & input"
-def test_multilingual_pipeline():
- """
- Basic test of multilingual pipeline
- """
- english_text = "This is an English sentence."
- english_deps_gold = "\n".join((
- "('This', 5, 'nsubj')",
- "('is', 5, 'cop')",
- "('an', 5, 'det')",
- "('English', 5, 'amod')",
- "('sentence', 0, 'root')",
- "('.', 5, 'punct')"
- ))
-
- french_text = "C'est une phrase française."
- french_deps_gold = "\n".join((
- "(\"C'\", 4, 'nsubj')",
- "('est', 4, 'cop')",
- "('une', 4, 'det')",
- "('phrase', 0, 'root')",
- "('française', 4, 'amod')",
- "('.', 4, 'punct')"
- ))
-
- nlp = MultilingualPipeline(model_dir=TEST_MODELS_DIR)
- docs = [english_text, french_text]
- docs = nlp(docs)
-
- assert docs[0].lang == "en"
- assert docs[0].sentences[0].dependencies_string() == english_deps_gold
- assert docs[1].lang == "fr"
- assert docs[1].sentences[0].dependencies_string() == french_deps_gold
-
diff --git a/stanza/tests/langid/test_multilingual.py b/stanza/tests/langid/test_multilingual.py
new file mode 100644
index 00000000..0428ddf4
--- /dev/null
+++ b/stanza/tests/langid/test_multilingual.py
@@ -0,0 +1,43 @@
+"""
+Tests specifically for the MultilingualPipeline
+"""
+
+import pytest
+
+from stanza.pipeline.multilingual import MultilingualPipeline
+
+from stanza.tests import TEST_MODELS_DIR
+
+def test_multilingual_pipeline():
+ """
+ Basic test of multilingual pipeline
+ """
+ english_text = "This is an English sentence."
+ english_deps_gold = "\n".join((
+ "('This', 5, 'nsubj')",
+ "('is', 5, 'cop')",
+ "('an', 5, 'det')",
+ "('English', 5, 'amod')",
+ "('sentence', 0, 'root')",
+ "('.', 5, 'punct')"
+ ))
+
+ french_text = "C'est une phrase française."
+ french_deps_gold = "\n".join((
+ "(\"C'\", 4, 'nsubj')",
+ "('est', 4, 'cop')",
+ "('une', 4, 'det')",
+ "('phrase', 0, 'root')",
+ "('française', 4, 'amod')",
+ "('.', 4, 'punct')"
+ ))
+
+ nlp = MultilingualPipeline(model_dir=TEST_MODELS_DIR)
+ docs = [english_text, french_text]
+ docs = nlp(docs)
+
+ assert docs[0].lang == "en"
+ assert docs[0].sentences[0].dependencies_string() == english_deps_gold
+ assert docs[1].lang == "fr"
+ assert docs[1].sentences[0].dependencies_string() == french_deps_gold
+