Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'stanza/pipeline/tokenize_processor.py')
-rw-r--r--stanza/pipeline/tokenize_processor.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/stanza/pipeline/tokenize_processor.py b/stanza/pipeline/tokenize_processor.py
index 79b17a54..3421f90b 100644
--- a/stanza/pipeline/tokenize_processor.py
+++ b/stanza/pipeline/tokenize_processor.py
@@ -82,7 +82,7 @@ class TokenizeProcessor(UDProcessor):
raw_text = '\n\n'.join(document) if isinstance(document, list) else document
# set up batches
- batches = DataLoader(self.config, input_text=raw_text, vocab=self.vocab, evaluation=True)
+ batches = DataLoader(self.config, input_text=raw_text, vocab=self.vocab, evaluation=True, dictionary=self.trainer.dictionary)
# get dict data
_, _, _, document = output_predictions(None, self.trainer, batches, self.vocab, None,
self.config.get('max_seqlen', TokenizeProcessor.MAX_SEQ_LENGTH_DEFAULT),