Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-09-08 23:06:30 +0300
committerJohn Bauer <horatio@gmail.com>2022-09-08 23:06:30 +0300
commit2db43c834bc8adbb8b096cf135f0fab8b8d886cb (patch)
tree6217a1d62b3ce800e2421bf95705e68e5d20f89d
parent80b96bad477f6c74cf03d6d6d8dc057d1af88c8e (diff)
Add a method to get the constituents known by a conparser, as requested in #1066
-rw-r--r--stanza/pipeline/constituency_processor.py9
-rw-r--r--stanza/tests/pipeline/test_pipeline_constituency_processor.py4
2 files changed, 13 insertions, 0 deletions
diff --git a/stanza/pipeline/constituency_processor.py b/stanza/pipeline/constituency_processor.py
index 0ead83ba..b70a7b2d 100644
--- a/stanza/pipeline/constituency_processor.py
+++ b/stanza/pipeline/constituency_processor.py
@@ -66,3 +66,12 @@ class ConstituencyProcessor(UDProcessor):
trees = trainer.parse_tagged_words(self._model.model, words, self._batch_size)
document.set(CONSTITUENCY, trees, to_sentence=True)
return document
+
+ def get_constituents(self):
+ """
+ Return a set of the constituents known by this model
+
+ For a pipeline, this can be queried with
+ pipeline.processors["constituency"].get_constituents()
+ """
+ return set(self._model.model.constituents)
diff --git a/stanza/tests/pipeline/test_pipeline_constituency_processor.py b/stanza/tests/pipeline/test_pipeline_constituency_processor.py
index 0cc01d0f..77a83b48 100644
--- a/stanza/tests/pipeline/test_pipeline_constituency_processor.py
+++ b/stanza/tests/pipeline/test_pipeline_constituency_processor.py
@@ -35,3 +35,7 @@ def test_sorted_two_batch():
pipe = stanza.Pipeline("en", model_dir=TEST_MODELS_DIR, processors="tokenize,pos,constituency", constituency_batch_size=2)
doc = pipe(TEST_TEXT)
check_results(doc)
+
+def test_get_constituents():
+ pipe = stanza.Pipeline("en", processors="tokenize,pos,constituency")
+ assert "SBAR" in pipe.processors["constituency"].get_constituents()