diff options
Diffstat (limited to 'tests/test_server_start.py')
-rw-r--r-- | tests/test_server_start.py | 79 |
1 files changed, 40 insertions, 39 deletions
diff --git a/tests/test_server_start.py b/tests/test_server_start.py index 9eb01375..96061fcf 100644 --- a/tests/test_server_start.py +++ b/tests/test_server_start.py @@ -31,12 +31,12 @@ root(ROOT-0, lives-3) compound(Smith-2, Joe-1) nsubj(lives-3, Smith-2) case(California-5, in-4) -nmod:in(lives-3, California-5) +obl:in(lives-3, California-5) punct(lives-3, .-6) Extracted the following NER entity mentions: -Joe Smith PERSON -California STATE_OR_PROVINCE +Joe Smith PERSON PERSON:0.9972202689478088 +California STATE_OR_PROVINCE LOCATION:0.9990868267002156 """ # results with an example properties file @@ -61,35 +61,37 @@ Sentence #1 (10 tokens): Angela Merkel ist seit 2005 Bundeskanzlerin der Bundesrepublik Deutschland. Tokens: -[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=NE Lemma=angela NamedEntityTag=PERSON] -[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=NE Lemma=merkel NamedEntityTag=PERSON] -[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=VAFIN Lemma=ist NamedEntityTag=O] -[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=APPR Lemma=seit NamedEntityTag=O] -[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=CARD Lemma=2005 NamedEntityTag=O] -[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NN Lemma=bundeskanzlerin NamedEntityTag=O] -[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=ART Lemma=der NamedEntityTag=O] -[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=NN Lemma=bundesrepublik NamedEntityTag=LOCATION] -[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=NE Lemma=deutschland NamedEntityTag=LOCATION] -[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=$. Lemma=. NamedEntityTag=O] - -Constituency parse: -(ROOT - (S - (MPN (NE Angela) (NE Merkel)) - (VAFIN ist) - (PP (APPR seit) (CARD 2005) (NN Bundeskanzlerin) - (NP (ART der) (NN Bundesrepublik) (NE Deutschland))) - ($. .))) +[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=PROPN Lemma=angela NamedEntityTag=PERSON] +[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=PROPN Lemma=merkel NamedEntityTag=PERSON] +[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=AUX Lemma=ist NamedEntityTag=O] +[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=ADP Lemma=seit NamedEntityTag=O] +[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=NUM Lemma=2005 NamedEntityTag=O] +[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NOUN Lemma=bundeskanzlerin NamedEntityTag=O] +[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=DET Lemma=der NamedEntityTag=O] +[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=PROPN Lemma=bundesrepublik NamedEntityTag=LOCATION] +[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=PROPN Lemma=deutschland NamedEntityTag=LOCATION] +[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=PUNCT Lemma=. NamedEntityTag=O] +Dependency Parse (enhanced plus plus dependencies): +root(ROOT-0, Bundeskanzlerin-6) +nsubj(Bundeskanzlerin-6, Angela-1) +flat(Angela-1, Merkel-2) +cop(Bundeskanzlerin-6, ist-3) +case(2005-5, seit-4) +nmod:seit(Bundeskanzlerin-6, 2005-5) +det(Bundesrepublik-8, der-7) +nmod(Bundeskanzlerin-6, Bundesrepublik-8) +appos(Bundesrepublik-8, Deutschland-9) +punct(Bundeskanzlerin-6, .-10) Extracted the following NER entity mentions: -Angela Merkel PERSON -Bundesrepublik Deutschland LOCATION +Angela Merkel PERSON PERSON:0.9999981583355767 +Bundesrepublik Deutschland LOCATION LOCATION:0.968290232887181 """ GERMAN_SMALL_PROPS = {'annotators': 'tokenize,ssplit,pos', 'tokenize.language': 'de', - 'pos.model': 'edu/stanford/nlp/models/pos-tagger/german/german-hgc.tagger'} + 'pos.model': 'edu/stanford/nlp/models/pos-tagger/german-ud.tagger'} # results with custom Python dictionary set properties GERMAN_SMALL_PROPS_GOLD = """ @@ -97,16 +99,16 @@ Sentence #1 (10 tokens): Angela Merkel ist seit 2005 Bundeskanzlerin der Bundesrepublik Deutschland. Tokens: -[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=NE] -[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=NE] -[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=VAFIN] -[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=APPR] -[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=CARD] -[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NN] -[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=ART] -[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=NN] -[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=NE] -[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=$.] +[Text=Angela CharacterOffsetBegin=0 CharacterOffsetEnd=6 PartOfSpeech=PROPN] +[Text=Merkel CharacterOffsetBegin=7 CharacterOffsetEnd=13 PartOfSpeech=PROPN] +[Text=ist CharacterOffsetBegin=14 CharacterOffsetEnd=17 PartOfSpeech=AUX] +[Text=seit CharacterOffsetBegin=18 CharacterOffsetEnd=22 PartOfSpeech=ADP] +[Text=2005 CharacterOffsetBegin=23 CharacterOffsetEnd=27 PartOfSpeech=NUM] +[Text=Bundeskanzlerin CharacterOffsetBegin=28 CharacterOffsetEnd=43 PartOfSpeech=NOUN] +[Text=der CharacterOffsetBegin=44 CharacterOffsetEnd=47 PartOfSpeech=DET] +[Text=Bundesrepublik CharacterOffsetBegin=48 CharacterOffsetEnd=62 PartOfSpeech=PROPN] +[Text=Deutschland CharacterOffsetBegin=63 CharacterOffsetEnd=74 PartOfSpeech=PROPN] +[Text=. CharacterOffsetBegin=74 CharacterOffsetEnd=75 PartOfSpeech=PUNCT] """ # results with custom Python dictionary set properties and annotators=tokenize,ssplit @@ -151,15 +153,14 @@ def annotate_and_time(client, text, properties={}): end = time.time() return {'annotation': ann, 'start_time': start, 'end_time': end} - def test_preload(): """ Test that the default annotators load fully immediately upon server start """ with corenlp.CoreNLPClient(server_id='test_server_start_preload') as client: # wait for annotators to load time.sleep(140) results = annotate_and_time(client, EN_DOC) - assert results['annotation'].strip() == EN_PRELOAD_GOLD.strip() - assert results['end_time'] - results['start_time'] < 1.5 + compare_ignoring_whitespace(results['annotation'], EN_PRELOAD_GOLD) + assert results['end_time'] - results['start_time'] < 3 def test_props_file(): @@ -173,7 +174,7 @@ def test_lang_start(): """ Test starting the server with a Stanford CoreNLP language name """ with corenlp.CoreNLPClient(properties='german', server_id='test_server_start_lang_name') as client: ann = client.annotate(GERMAN_DOC, output_format='text') - assert ann.strip() == GERMAN_FULL_PROPS_GOLD.strip() + compare_ignoring_whitespace(ann, GERMAN_FULL_PROPS_GOLD) def test_python_dict(): |