diff options
author | John Bauer <horatio@gmail.com> | 2022-11-05 09:04:56 +0300 |
---|---|---|
committer | John Bauer <horatio@gmail.com> | 2022-11-05 09:04:56 +0300 |
commit | 1ab93b58cc978e744caae7f4343c7b275e5bf216 (patch) | |
tree | 8f0187f1e4cc8a511cf4d2a9173fed8b93ea513f | |
parent | 2f2e1e21c0258ec8f9c2c67f8a49c0a4b150d81f (diff) |
A couple comments on how the NER training is organized
-rw-r--r-- | stanza/models/ner_tagger.py | 2 | ||||
-rw-r--r-- | stanza/utils/training/run_ner.py | 2 |
2 files changed, 4 insertions, 0 deletions
diff --git a/stanza/models/ner_tagger.py b/stanza/models/ner_tagger.py index 8e7c6e8e..57952e46 100644 --- a/stanza/models/ner_tagger.py +++ b/stanza/models/ner_tagger.py @@ -219,6 +219,8 @@ def train(args): # LR scheduling if args['lr_decay'] > 0: + # learning rate changes on plateau -- no improvement on model for patience number of epochs + # change is made as a factor of the learning rate decay scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(trainer.optimizer, mode='max', factor=args['lr_decay'], \ patience=args['patience'], verbose=True, min_lr=args['min_lr']) else: diff --git a/stanza/utils/training/run_ner.py b/stanza/utils/training/run_ner.py index 6075e38d..26d3d222 100644 --- a/stanza/utils/training/run_ner.py +++ b/stanza/utils/training/run_ner.py @@ -71,6 +71,8 @@ def run_treebank(mode, paths, treebank, short_name, dev_file = os.path.join(ner_dir, f"{short_name}.dev.json") test_file = os.path.join(ner_dir, f"{short_name}.test.json") + # if any files are missing, try to rebuild the dataset + # if that still doesn't work, we have to throw an error missing_file = [x for x in (train_file, dev_file, test_file) if not os.path.exists(x)] if len(missing_file) > 0: logger.warning(f"The data for {short_name} is missing or incomplete. Cannot find {missing_file} Attempting to rebuild...") |