Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-11-05 09:04:56 +0300
committerJohn Bauer <horatio@gmail.com>2022-11-05 09:04:56 +0300
commit1ab93b58cc978e744caae7f4343c7b275e5bf216 (patch)
tree8f0187f1e4cc8a511cf4d2a9173fed8b93ea513f
parent2f2e1e21c0258ec8f9c2c67f8a49c0a4b150d81f (diff)
A couple comments on how the NER training is organized
-rw-r--r--stanza/models/ner_tagger.py2
-rw-r--r--stanza/utils/training/run_ner.py2
2 files changed, 4 insertions, 0 deletions
diff --git a/stanza/models/ner_tagger.py b/stanza/models/ner_tagger.py
index 8e7c6e8e..57952e46 100644
--- a/stanza/models/ner_tagger.py
+++ b/stanza/models/ner_tagger.py
@@ -219,6 +219,8 @@ def train(args):
# LR scheduling
if args['lr_decay'] > 0:
+ # learning rate changes on plateau -- no improvement on model for patience number of epochs
+ # change is made as a factor of the learning rate decay
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(trainer.optimizer, mode='max', factor=args['lr_decay'], \
patience=args['patience'], verbose=True, min_lr=args['min_lr'])
else:
diff --git a/stanza/utils/training/run_ner.py b/stanza/utils/training/run_ner.py
index 6075e38d..26d3d222 100644
--- a/stanza/utils/training/run_ner.py
+++ b/stanza/utils/training/run_ner.py
@@ -71,6 +71,8 @@ def run_treebank(mode, paths, treebank, short_name,
dev_file = os.path.join(ner_dir, f"{short_name}.dev.json")
test_file = os.path.join(ner_dir, f"{short_name}.test.json")
+ # if any files are missing, try to rebuild the dataset
+ # if that still doesn't work, we have to throw an error
missing_file = [x for x in (train_file, dev_file, test_file) if not os.path.exists(x)]
if len(missing_file) > 0:
logger.warning(f"The data for {short_name} is missing or incomplete. Cannot find {missing_file} Attempting to rebuild...")