Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-11-06 03:55:14 +0300
committerJohn Bauer <horatio@gmail.com>2022-11-06 03:55:14 +0300
commit35637568473df11de95d0a6e35d0b41c1ea10e48 (patch)
tree5c39466ecc69a8ebbf40f40dc59b4532a4168369
parent435bc94aba934c1f3c3bb5e0188ea166a67be3df (diff)
Update error to ValueError (more appropriate) and log what the unexpected type was
-rw-r--r--stanza/pipeline/tokenize_processor.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/stanza/pipeline/tokenize_processor.py b/stanza/pipeline/tokenize_processor.py
index 83e2f00f..1baa773f 100644
--- a/stanza/pipeline/tokenize_processor.py
+++ b/stanza/pipeline/tokenize_processor.py
@@ -65,8 +65,8 @@ class TokenizeProcessor(UDProcessor):
return raw_text, document
def process(self, document):
- assert isinstance(document, str) or isinstance(document, doc.Document) or (self.config.get('pretokenized') or self.config.get('no_ssplit', False)), \
- "If neither 'pretokenized' or 'no_ssplit' option is enabled, the input to the TokenizerProcessor must be a string or a Document object."
+ if not (isinstance(document, str) or isinstance(document, doc.Document) or (self.config.get('pretokenized') or self.config.get('no_ssplit', False))):
+ raise ValueError("If neither 'pretokenized' or 'no_ssplit' option is enabled, the input to the TokenizerProcessor must be a string or a Document object. Got %s" % str(type(document)))
if isinstance(document, doc.Document):
if self.config.get('pretokenized'):