update and simplify basic demo

author: J38 <jebolton@stanford.edu> 2019-01-20 11:10:41 +0300
committer: J38 <jebolton@stanford.edu> 2019-01-20 11:10:41 +0300
commit: 8cf208768591e8d336b6be4c9a84ff4024a73dff (patch)
tree: 6018e3ca4f18f4101b74ac818c4462ba3b95802b /demo
parent: d84c4518e5834c004cf685d3f6088001cdda56f0 (diff)
1 files changed, 47 insertions, 75 deletions
diff --git a/demo/pipeline_demo.py b/demo/pipeline_demo.py
index 178f6050..43f86463 100644
--- a/demo/pipeline_demo.py
+++ b/demo/pipeline_demo.py
@@ -1,76 +1,48 @@
-from datetime import datetime
-from stanfordnlp.pipeline import Document, Pipeline
+"""
+basic demo script
+"""
+
+import argparse
+import os
+
+from pathlib import Path
+from stanfordnlp import Document, Pipeline
+from stanfordnlp.utils.resources import build_default_config
+
+
+if __name__ == '__main__':
+    # get arguments
+    # determine home directory
+    home_dir = str(Path.home())
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-d', '--models_dir', help='location of models files | default: ~/stanfordnlp_data', 
+                        default=home_dir+'/stanfordnlp_data')
+    args = parser.parse_args()
+    # download the models
+    if not os.path.exists(args.models_dir+'/en_ewt_models'):
+        download('en_ewt')
+    # set up a pipeline
+    print('---')
+    print('Building pipeline...')
+    print('with config: ')
+    pipeline_config = build_default_config('en_ewt', args.models_dir)
+    print(pipeline_config)
+    print('')
+    pipeline = Pipeline(config=pipeline_config)
+    # set up document
+    doc = Document('Barack Obama was born in Hawaii.  He was elected president in 2008.')
+    # run pipeline on the document
+    pipeline.process(doc)
+    # access nlp annotations
+    print('')
+    print('---')
+    print('tokens of first sentence: ')
+    for tok in doc.sentences[0].tokens:
+        print(tok.word + '\t' + tok.lemma + '\t' + tok.pos)
+    print('')
+    print('---')
+    print('dependency parse of first sentence: ')
+    for dep_edge in doc.sentences[0].dependencies:
+        print((dep_edge[0].word, dep_edge[1], dep_edge[2].word))
+    print('')
 
-# example documents
-english_doc = Document('Barack Obama was born in Hawaii.  He was elected president in 2008.')
-french_doc = Document('Emmanuel Macron est né à Amiens. Il a été élu président en 2017.')
-
-# example configs
-english_config = {
-    'processors': 'tokenize,pos,lemma,depparse',
-    'tokenize.model_path': 'saved_models/tokenize/en_ewt_tokenizer.pt',
-    'lemma.model_path': 'saved_models/lemma/en_ewt_lemmatizer.pt',
-    'pos.pretrain_path': 'saved_models/pos/en_ewt_tagger.pretrain.pt',
-    'pos.model_path': 'saved_models/pos/en_ewt_tagger.pt',
-    'depparse.pretrain_path': 'saved_models/depparse/en_ewt_parser.pretrain.pt',
-    'depparse.model_path': 'saved_models/depparse/en_ewt_parser.pt'
-}
-
-french_config = {
-    'processors': 'tokenize,mwt,pos,lemma,depparse',
-    'tokenize.model_path': 'saved_models/tokenize/fr_gsd_tokenizer.pt',
-    'mwt.model_path': 'saved_models/mwt/fr_gsd_mwt_expander.pt',
-    'lemma.model_path': 'saved_models/lemma/fr_gsd_lemmatizer.pt',
-    'pos.pretrain_path': 'saved_models/pos/fr_gsd_tagger.pretrain.pt',
-    'pos.model_path': 'saved_models/pos/fr_gsd_tagger.pt',
-    'depparse.pretrain_path': 'saved_models/depparse/fr_gsd_parser.pretrain.pt',
-    'depparse.model_path': 'saved_models/depparse/fr_gsd_parser.pt'
-}
-
-print('---')
-print('load pipeline')
-print('\tstart: '+str(datetime.now()))
-
-# english example
-english_pipeline = Pipeline(config=english_config)
-english_pipeline.process(english_doc)
-
-print('\tend: '+str(datetime.now()))
-
-print('---')
-print('english example')
-print('---')
-print('tokens of first English sentence')
-for tok in english_doc.sentences[0].tokens:
-    print(tok.word + '\t' + tok.lemma + '\t' + tok.pos)
-print('---')
-print('dependency parse of first English sentence')
-for dep_edge in english_doc.sentences[0].dependencies:
-    print((dep_edge[0].word, dep_edge[1], dep_edge[2].word))
-print('---')
-print('run on a second english document')
-second_english_doc = Document('I am a sentence.')
-english_pipeline.process(second_english_doc)
-print('---')
-print('tokens of second English document')
-for tok in second_english_doc.sentences[0].tokens:
-    print(tok.word + '\t' + tok.lemma + '\t' + tok.pos)
-print('---')
-print('dependency parse of second English document')
-for dep_edge in english_doc.sentences[0].dependencies:
-    print((dep_edge[0].word, dep_edge[1], dep_edge[2].word))
-
-# french example
-french_pipeline = Pipeline(config=french_config)
-french_pipeline.process(french_doc)
-
-print('---')
-print('french example')
-print('---')
-print('tokens of first French sentence')
-for tok in french_doc.sentences[0].tokens:
-    print(tok.word + '\t' + tok.lemma + '\t' + tok.pos)
-print('---')
-print('dependency parse of first French sentence')
-for dep_edge in french_doc.sentences[0].dependencies:
-    print((dep_edge[0].word, dep_edge[1], dep_edge[2].word))
author	J38 <jebolton@stanford.edu>	2019-01-20 11:10:41 +0300
committer	J38 <jebolton@stanford.edu>	2019-01-20 11:10:41 +0300
commit	8cf208768591e8d336b6be4c9a84ff4024a73dff (patch)
tree	6018e3ca4f18f4101b74ac818c4462ba3b95802b /demo
parent	d84c4518e5834c004cf685d3f6088001cdda56f0 (diff)