Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/demo
diff options
context:
space:
mode:
authorJ38 <jebolton@stanford.edu>2019-01-12 02:37:09 +0300
committerJ38 <jebolton@stanford.edu>2019-01-12 02:37:09 +0300
commita5f254279d799258555a8ee8ea2f8cd3cd143e1e (patch)
treecc405c2539a442e4e5d4062d15ecc0549925eb9b /demo
parentd8d835fde3df3a29486746fd6d919c637eb25925 (diff)
add demo
Diffstat (limited to 'demo')
-rw-r--r--demo/pipeline_demo.py76
1 files changed, 76 insertions, 0 deletions
diff --git a/demo/pipeline_demo.py b/demo/pipeline_demo.py
new file mode 100644
index 00000000..178f6050
--- /dev/null
+++ b/demo/pipeline_demo.py
@@ -0,0 +1,76 @@
+from datetime import datetime
+from stanfordnlp.pipeline import Document, Pipeline
+
+# example documents
+english_doc = Document('Barack Obama was born in Hawaii. He was elected president in 2008.')
+french_doc = Document('Emmanuel Macron est né à Amiens. Il a été élu président en 2017.')
+
+# example configs
+english_config = {
+ 'processors': 'tokenize,pos,lemma,depparse',
+ 'tokenize.model_path': 'saved_models/tokenize/en_ewt_tokenizer.pt',
+ 'lemma.model_path': 'saved_models/lemma/en_ewt_lemmatizer.pt',
+ 'pos.pretrain_path': 'saved_models/pos/en_ewt_tagger.pretrain.pt',
+ 'pos.model_path': 'saved_models/pos/en_ewt_tagger.pt',
+ 'depparse.pretrain_path': 'saved_models/depparse/en_ewt_parser.pretrain.pt',
+ 'depparse.model_path': 'saved_models/depparse/en_ewt_parser.pt'
+}
+
+french_config = {
+ 'processors': 'tokenize,mwt,pos,lemma,depparse',
+ 'tokenize.model_path': 'saved_models/tokenize/fr_gsd_tokenizer.pt',
+ 'mwt.model_path': 'saved_models/mwt/fr_gsd_mwt_expander.pt',
+ 'lemma.model_path': 'saved_models/lemma/fr_gsd_lemmatizer.pt',
+ 'pos.pretrain_path': 'saved_models/pos/fr_gsd_tagger.pretrain.pt',
+ 'pos.model_path': 'saved_models/pos/fr_gsd_tagger.pt',
+ 'depparse.pretrain_path': 'saved_models/depparse/fr_gsd_parser.pretrain.pt',
+ 'depparse.model_path': 'saved_models/depparse/fr_gsd_parser.pt'
+}
+
+print('---')
+print('load pipeline')
+print('\tstart: '+str(datetime.now()))
+
+# english example
+english_pipeline = Pipeline(config=english_config)
+english_pipeline.process(english_doc)
+
+print('\tend: '+str(datetime.now()))
+
+print('---')
+print('english example')
+print('---')
+print('tokens of first English sentence')
+for tok in english_doc.sentences[0].tokens:
+ print(tok.word + '\t' + tok.lemma + '\t' + tok.pos)
+print('---')
+print('dependency parse of first English sentence')
+for dep_edge in english_doc.sentences[0].dependencies:
+ print((dep_edge[0].word, dep_edge[1], dep_edge[2].word))
+print('---')
+print('run on a second english document')
+second_english_doc = Document('I am a sentence.')
+english_pipeline.process(second_english_doc)
+print('---')
+print('tokens of second English document')
+for tok in second_english_doc.sentences[0].tokens:
+ print(tok.word + '\t' + tok.lemma + '\t' + tok.pos)
+print('---')
+print('dependency parse of second English document')
+for dep_edge in english_doc.sentences[0].dependencies:
+ print((dep_edge[0].word, dep_edge[1], dep_edge[2].word))
+
+# french example
+french_pipeline = Pipeline(config=french_config)
+french_pipeline.process(french_doc)
+
+print('---')
+print('french example')
+print('---')
+print('tokens of first French sentence')
+for tok in french_doc.sentences[0].tokens:
+ print(tok.word + '\t' + tok.lemma + '\t' + tok.pos)
+print('---')
+print('dependency parse of first French sentence')
+for dep_edge in french_doc.sentences[0].dependencies:
+ print((dep_edge[0].word, dep_edge[1], dep_edge[2].word))