Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-11-07 11:59:55 +0300
committerJohn Bauer <horatio@gmail.com>2022-11-07 11:59:55 +0300
commit4a0dcc1dcfb884d6b618b3fdc3f74139806d3dd9 (patch)
treeae74b76b73fe4aac1beafcbe6d9f34810cf06a7d
parent0508833ce6d986de05d0f8b002c64f21f11e7856 (diff)
Skip blank lines
-rw-r--r--stanza/models/constituency/trainer.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/stanza/models/constituency/trainer.py b/stanza/models/constituency/trainer.py
index eec82619..09ef534c 100644
--- a/stanza/models/constituency/trainer.py
+++ b/stanza/models/constituency/trainer.py
@@ -240,6 +240,7 @@ def parse_text(args, model, retag_pipeline):
with open(args['tokenized_file'], encoding='utf-8') as fin:
lines = fin.readlines()
lines = [x.strip() for x in lines]
+ lines = [x for x in lines if x]
docs = [[word.replace("_", " ") for word in sentence.split()] for sentence in lines]
logger.info("Processing %d lines", len(docs))
treebank = []