Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_server_start.py')
-rw-r--r--tests/test_server_start.py79
1 files changed, 40 insertions, 39 deletions
diff --git a/tests/test_server_start.py b/tests/test_server_start.py
index 9eb01375..96061fcf 100644
--- a/tests/test_server_start.py
+++ b/tests/test_server_start.py
@@ -31,12 +31,12 @@ root(ROOT-0, lives-3)
compound(Smith-2, Joe-1)
nsubj(lives-3, Smith-2)
case(California-5, in-4)
-nmod:in(lives-3, California-5)
+obl:in(lives-3, California-5)
punct(lives-3, .-6)
Extracted the following NER entity mentions:
-Joe Smith PERSON
-California STATE_OR_PROVINCE
+Joe Smith PERSON PERSON:0.9972202689478088
+California STATE_OR_PROVINCE LOCATION:0.9990868267002156
"""
# results with an example properties file
@@ -61,35 +61,37 @@ Sentence #1 (10 tokens):
Angela Merkel ist seit 2005 Bundeskanzlerin der Bundesrepublik Deutschland.
Tokens:
-[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=NE Lemma=angela NamedEntityTag=PERSON]
-[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=NE Lemma=merkel NamedEntityTag=PERSON]
-[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=VAFIN Lemma=ist NamedEntityTag=O]
-[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=APPR Lemma=seit NamedEntityTag=O]
-[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=CARD Lemma=2005 NamedEntityTag=O]
-[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NN Lemma=bundeskanzlerin NamedEntityTag=O]
-[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=ART Lemma=der NamedEntityTag=O]
-[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=NN Lemma=bundesrepublik NamedEntityTag=LOCATION]
-[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=NE Lemma=deutschland NamedEntityTag=LOCATION]
-[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=$. Lemma=. NamedEntityTag=O]
-
-Constituency parse:
-(ROOT
- (S
- (MPN (NE Angela) (NE Merkel))
- (VAFIN ist)
- (PP (APPR seit) (CARD 2005) (NN Bundeskanzlerin)
- (NP (ART der) (NN Bundesrepublik) (NE Deutschland)))
- ($. .)))
+[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=PROPN Lemma=angela NamedEntityTag=PERSON]
+[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=PROPN Lemma=merkel NamedEntityTag=PERSON]
+[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=AUX Lemma=ist NamedEntityTag=O]
+[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=ADP Lemma=seit NamedEntityTag=O]
+[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=NUM Lemma=2005 NamedEntityTag=O]
+[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NOUN Lemma=bundeskanzlerin NamedEntityTag=O]
+[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=DET Lemma=der NamedEntityTag=O]
+[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=PROPN Lemma=bundesrepublik NamedEntityTag=LOCATION]
+[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=PROPN Lemma=deutschland NamedEntityTag=LOCATION]
+[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=PUNCT Lemma=. NamedEntityTag=O]
+Dependency Parse (enhanced plus plus dependencies):
+root(ROOT-0, Bundeskanzlerin-6)
+nsubj(Bundeskanzlerin-6, Angela-1)
+flat(Angela-1, Merkel-2)
+cop(Bundeskanzlerin-6, ist-3)
+case(2005-5, seit-4)
+nmod:seit(Bundeskanzlerin-6, 2005-5)
+det(Bundesrepublik-8, der-7)
+nmod(Bundeskanzlerin-6, Bundesrepublik-8)
+appos(Bundesrepublik-8, Deutschland-9)
+punct(Bundeskanzlerin-6, .-10)
Extracted the following NER entity mentions:
-Angela Merkel PERSON
-Bundesrepublik Deutschland LOCATION
+Angela Merkel PERSON PERSON:0.9999981583355767
+Bundesrepublik Deutschland LOCATION LOCATION:0.968290232887181
"""
GERMAN_SMALL_PROPS = {'annotators': 'tokenize,ssplit,pos', 'tokenize.language': 'de',
- 'pos.model': 'edu/stanford/nlp/models/pos-tagger/german/german-hgc.tagger'}
+ 'pos.model': 'edu/stanford/nlp/models/pos-tagger/german-ud.tagger'}
# results with custom Python dictionary set properties
GERMAN_SMALL_PROPS_GOLD = """
@@ -97,16 +99,16 @@ Sentence #1 (10 tokens):
Angela Merkel ist seit 2005 Bundeskanzlerin der Bundesrepublik Deutschland.
Tokens:
-[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=NE]
-[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=NE]
-[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=VAFIN]
-[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=APPR]
-[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=CARD]
-[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NN]
-[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=ART]
-[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=NN]
-[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=NE]
-[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=$.]
+[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=PROPN]
+[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=PROPN]
+[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=AUX]
+[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=ADP]
+[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=NUM]
+[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NOUN]
+[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=DET]
+[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=PROPN]
+[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=PROPN]
+[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=PUNCT]
"""
# results with custom Python dictionary set properties and annotators=tokenize,ssplit
@@ -151,15 +153,14 @@ def annotate_and_time(client, text, properties={}):
end = time.time()
return {'annotation': ann, 'start_time': start, 'end_time': end}
-
def test_preload():
""" Test that the default annotators load fully immediately upon server start """
with corenlp.CoreNLPClient(server_id='test_server_start_preload') as client:
# wait for annotators to load
time.sleep(140)
results = annotate_and_time(client, EN_DOC)
- assert results['annotation'].strip() == EN_PRELOAD_GOLD.strip()
- assert results['end_time'] - results['start_time'] < 1.5
+ compare_ignoring_whitespace(results['annotation'], EN_PRELOAD_GOLD)
+ assert results['end_time'] - results['start_time'] < 3
def test_props_file():
@@ -173,7 +174,7 @@ def test_lang_start():
""" Test starting the server with a Stanford CoreNLP language name """
with corenlp.CoreNLPClient(properties='german', server_id='test_server_start_lang_name') as client:
ann = client.annotate(GERMAN_DOC, output_format='text')
- assert ann.strip() == GERMAN_FULL_PROPS_GOLD.strip()
+ compare_ignoring_whitespace(ann, GERMAN_FULL_PROPS_GOLD)
def test_python_dict():