Welcome to mirror list, hosted at ThFree Co, Russian Federation.

test_pipeline_constituency_processor.py « pipeline « tests « stanza - github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 0cc01d0fe42b5032e8fcbed8655c4f63d716e9f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

import pytest
import stanza

from stanza.tests import *

pytestmark = [pytest.mark.pipeline, pytest.mark.travis]

# data for testing
TEST_TEXT = "This is a test.  Another sentence.  Are these sorted?"

TEST_TOKENS = [["This", "is", "a", "test", "."], ["Another", "sentence", "."], ["Are", "these", "sorted", "?"]]

def check_results(doc):
    assert len(doc.sentences) == len(TEST_TOKENS)
    for sentence, expected in zip(doc.sentences, TEST_TOKENS):
        assert sentence.constituency.leaf_labels() == expected

def test_sorted_big_batch():
    pipe = stanza.Pipeline("en", model_dir=TEST_MODELS_DIR, processors="tokenize,pos,constituency")
    doc = pipe(TEST_TEXT)
    check_results(doc)

def test_illegal_batch_size():
    stanza.Pipeline("en", model_dir=TEST_MODELS_DIR, processors="tokenize,pos", constituency_batch_size="zzz")
    with pytest.raises(ValueError):
        stanza.Pipeline("en", model_dir=TEST_MODELS_DIR, processors="tokenize,pos,constituency", constituency_batch_size="zzz")

def test_sorted_one_batch():
    pipe = stanza.Pipeline("en", model_dir=TEST_MODELS_DIR, processors="tokenize,pos,constituency", constituency_batch_size=1)
    doc = pipe(TEST_TEXT)
    check_results(doc)

def test_sorted_two_batch():
    pipe = stanza.Pipeline("en", model_dir=TEST_MODELS_DIR, processors="tokenize,pos,constituency", constituency_batch_size=2)
    doc = pipe(TEST_TEXT)
    check_results(doc)