diff options
Diffstat (limited to 'stanza/utils/datasets/prepare_depparse_treebank.py')
-rw-r--r-- | stanza/utils/datasets/prepare_depparse_treebank.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/stanza/utils/datasets/prepare_depparse_treebank.py b/stanza/utils/datasets/prepare_depparse_treebank.py index 2e99276f..e73ef31a 100644 --- a/stanza/utils/datasets/prepare_depparse_treebank.py +++ b/stanza/utils/datasets/prepare_depparse_treebank.py @@ -28,6 +28,8 @@ def add_specific_args(parser): help='Use gold tags for building the depparse data') parser.add_argument("--predicted", dest='tag_method', action='store_const', const=Tags.PREDICTED, help='Use predicted tags for building the depparse data') + parser.add_argument('--wordvec_pretrain_file', type=str, default=None, help='Exact name of the pretrain file to read') + def process_treebank(treebank, paths, args): if args.tag_method is Tags.GOLD: @@ -49,6 +51,8 @@ def process_treebank(treebank, paths, args): tagger_args = ["--eval_file", original, "--gold_file", original, "--output_file", retagged] + if args.wordvec_pretrain_file: + tagger_args.extend(["--wordvec_pretrain_file", args.wordvec_pretrain_file]) tagger_args = base_args + tagger_args logger.info("Running tagger to retag {} to {}\n Args: {}".format(original, retagged, tagger_args)) tagger.main(tagger_args) |