Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'stanza/models/constituency/parse_tree.py')
-rw-r--r--stanza/models/constituency/parse_tree.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/stanza/models/constituency/parse_tree.py b/stanza/models/constituency/parse_tree.py
index 7db70caf..bca64f44 100644
--- a/stanza/models/constituency/parse_tree.py
+++ b/stanza/models/constituency/parse_tree.py
@@ -327,6 +327,23 @@ class Tree(StanzaObject):
tree.visit_preorder(preterminal = lambda x: tags.add(x.label))
return sorted(tags)
+
+ @staticmethod
+ def get_common_tags(trees, num_tags=5):
+ """
+ Walks over all of the trees and gets the most frequently occurring tags from the trees
+ """
+ if num_tags == 0:
+ return set()
+
+ if isinstance(trees, Tree):
+ trees = [trees]
+
+ tags = Counter()
+ for tree in trees:
+ tree.visit_preorder(preterminal = lambda x: tags.update([x.label]))
+ return sorted(x[0] for x in tags.most_common()[:num_tags])
+
@staticmethod
def get_unique_words(trees):
"""