blob: feee74fa13978b96a3d34bfc15574accb142b5bc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
import pytest
from stanza.models.constituency import tree_reader
from stanza.tests import *
pytestmark = [pytest.mark.pipeline, pytest.mark.travis]
def test_simple():
"""
Tests reading two simple trees from the same text
"""
text = "(VB Unban) (NNP Opal)"
trees = tree_reader.read_trees(text)
assert len(trees) == 2
assert trees[0].is_preterminal()
assert trees[0].label == 'VB'
assert trees[0].children[0].label == 'Unban'
assert trees[1].is_preterminal()
assert trees[1].label == 'NNP'
assert trees[1].children[0].label == 'Opal'
def test_newlines():
"""
The same test should work if there are newlines
"""
text = "(VB Unban)\n\n(NNP Opal)"
trees = tree_reader.read_trees(text)
assert len(trees) == 2
def test_complicated():
"""
A more complicated tree that should successfully read
"""
text="( (SBARQ (WHNP (WP Who)) (SQ (VP (VBZ sits) (PP (IN in) (NP (DT this) (NN seat))))) (. ?)))"
trees = tree_reader.read_trees(text)
assert len(trees) == 1
tree = trees[0]
assert not tree.is_leaf()
assert not tree.is_preterminal()
assert tree.label == 'ROOT'
assert len(tree.children) == 1
assert tree.children[0].label == 'SBARQ'
assert len(tree.children[0].children) == 3
assert [x.label for x in tree.children[0].children] == ['WHNP', 'SQ', '.']
# etc etc
def test_one_word():
"""
Check that one node trees are correctly read
probably not super relevant for the parsing use case
"""
text="(FOO) (BAR)"
trees = tree_reader.read_trees(text)
assert len(trees) == 2
assert trees[0].is_leaf()
assert trees[0].label == 'FOO'
assert trees[1].is_leaf()
assert trees[1].label == 'BAR'
|