diff options
author | John Bauer <horatio@gmail.com> | 2022-11-06 03:55:14 +0300 |
---|---|---|
committer | John Bauer <horatio@gmail.com> | 2022-11-06 03:55:14 +0300 |
commit | 35637568473df11de95d0a6e35d0b41c1ea10e48 (patch) | |
tree | 5c39466ecc69a8ebbf40f40dc59b4532a4168369 | |
parent | 435bc94aba934c1f3c3bb5e0188ea166a67be3df (diff) |
Update error to ValueError (more appropriate) and log what the unexpected type was
-rw-r--r-- | stanza/pipeline/tokenize_processor.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/stanza/pipeline/tokenize_processor.py b/stanza/pipeline/tokenize_processor.py index 83e2f00f..1baa773f 100644 --- a/stanza/pipeline/tokenize_processor.py +++ b/stanza/pipeline/tokenize_processor.py @@ -65,8 +65,8 @@ class TokenizeProcessor(UDProcessor): return raw_text, document def process(self, document): - assert isinstance(document, str) or isinstance(document, doc.Document) or (self.config.get('pretokenized') or self.config.get('no_ssplit', False)), \ - "If neither 'pretokenized' or 'no_ssplit' option is enabled, the input to the TokenizerProcessor must be a string or a Document object." + if not (isinstance(document, str) or isinstance(document, doc.Document) or (self.config.get('pretokenized') or self.config.get('no_ssplit', False))): + raise ValueError("If neither 'pretokenized' or 'no_ssplit' option is enabled, the input to the TokenizerProcessor must be a string or a Document object. Got %s" % str(type(document))) if isinstance(document, doc.Document): if self.config.get('pretokenized'): |