Add a method to get the constituents known by a conparser, as requested in #1066

author: John Bauer <horatio@gmail.com> 2022-09-08 23:06:30 +0300
committer: John Bauer <horatio@gmail.com> 2022-09-08 23:06:30 +0300
commit: 2db43c834bc8adbb8b096cf135f0fab8b8d886cb (patch)
tree: 6217a1d62b3ce800e2421bf95705e68e5d20f89d
parent: 80b96bad477f6c74cf03d6d6d8dc057d1af88c8e (diff)
2 files changed, 13 insertions, 0 deletions
diff --git a/stanza/pipeline/constituency_processor.py b/stanza/pipeline/constituency_processor.py
index 0ead83ba..b70a7b2d 100644
--- a/stanza/pipeline/constituency_processor.py
+++ b/stanza/pipeline/constituency_processor.py
@@ -66,3 +66,12 @@ class ConstituencyProcessor(UDProcessor):
         trees = trainer.parse_tagged_words(self._model.model, words, self._batch_size)
         document.set(CONSTITUENCY, trees, to_sentence=True)
         return document
+
+    def get_constituents(self):
+        """
+        Return a set of the constituents known by this model
+
+        For a pipeline, this can be queried with
+          pipeline.processors["constituency"].get_constituents()
+        """
+        return set(self._model.model.constituents)
diff --git a/stanza/tests/pipeline/test_pipeline_constituency_processor.py b/stanza/tests/pipeline/test_pipeline_constituency_processor.py
index 0cc01d0f..77a83b48 100644
--- a/stanza/tests/pipeline/test_pipeline_constituency_processor.py
+++ b/stanza/tests/pipeline/test_pipeline_constituency_processor.py
@@ -35,3 +35,7 @@ def test_sorted_two_batch():
     pipe = stanza.Pipeline("en", model_dir=TEST_MODELS_DIR, processors="tokenize,pos,constituency", constituency_batch_size=2)
     doc = pipe(TEST_TEXT)
     check_results(doc)
+
+def test_get_constituents():
+    pipe = stanza.Pipeline("en", processors="tokenize,pos,constituency")
+    assert "SBAR" in pipe.processors["constituency"].get_constituents()
author	John Bauer <horatio@gmail.com>	2022-09-08 23:06:30 +0300
committer	John Bauer <horatio@gmail.com>	2022-09-08 23:06:30 +0300
commit	2db43c834bc8adbb8b096cf135f0fab8b8d886cb (patch)
tree	6217a1d62b3ce800e2421bf95705e68e5d20f89d
parent	80b96bad477f6c74cf03d6d6d8dc057d1af88c8e (diff)