diff options
author | John Bauer <horatio@gmail.com> | 2022-09-08 01:15:57 +0300 |
---|---|---|
committer | John Bauer <horatio@gmail.com> | 2022-09-08 01:15:57 +0300 |
commit | e60c6aa390fca4d36eefa56c2d40ac8737568c0f (patch) | |
tree | 2f1240d75a5318c75e10da7e9503da8bec676926 | |
parent | 27f886e668f98c36ad2a7cd44ebd23a93ba4575f (diff) |
Eliminate a redundant function call
-rw-r--r-- | stanza/utils/datasets/common.py | 9 | ||||
-rw-r--r-- | stanza/utils/datasets/prepare_depparse_treebank.py | 3 | ||||
-rw-r--r-- | stanza/utils/datasets/prepare_mwt_treebank.py | 3 | ||||
-rwxr-xr-x | stanza/utils/datasets/prepare_tokenizer_treebank.py | 5 | ||||
-rw-r--r-- | stanza/utils/training/run_ete.py | 5 |
5 files changed, 10 insertions, 15 deletions
diff --git a/stanza/utils/datasets/common.py b/stanza/utils/datasets/common.py index a17db993..efdbb8cf 100644 --- a/stanza/utils/datasets/common.py +++ b/stanza/utils/datasets/common.py @@ -6,19 +6,10 @@ import os import sys import stanza.utils.default_paths as default_paths -from stanza.models.common.constant import treebank_to_short_name from stanza.models.common.short_name_to_treebank import canonical_treebank_name logger = logging.getLogger('stanza') -def project_to_short_name(treebank): - """ - Project either a treebank or a short name to a short name - - TODO: see if treebank_to_short_name can incorporate this - """ - return treebank_to_short_name(treebank) - def find_treebank_dataset_file(treebank, udbase_dir, dataset, extension, fail=False): """ For a given treebank, dataset, extension, look for the exact filename to use. diff --git a/stanza/utils/datasets/prepare_depparse_treebank.py b/stanza/utils/datasets/prepare_depparse_treebank.py index 3152bfae..d452ac2c 100644 --- a/stanza/utils/datasets/prepare_depparse_treebank.py +++ b/stanza/utils/datasets/prepare_depparse_treebank.py @@ -14,6 +14,7 @@ import logging import os from stanza.models import tagger +from stanza.models.common.constant import treebank_to_short_name from stanza.resources.common import download, DEFAULT_MODEL_DIR from stanza.resources.prepare_resources import default_charlms, pos_charlms import stanza.utils.datasets.common as common @@ -67,7 +68,7 @@ def process_treebank(treebank, paths, args) -> None: if args.tag_method is Tags.GOLD: prepare_tokenizer_treebank.copy_conllu_treebank(treebank, paths, paths["DEPPARSE_DATA_DIR"]) elif args.tag_method is Tags.PREDICTED: - short_name = common.project_to_short_name(treebank) + short_name = treebank_to_short_name(treebank) short_language, dataset = short_name.split("_") # fmt: off diff --git a/stanza/utils/datasets/prepare_mwt_treebank.py b/stanza/utils/datasets/prepare_mwt_treebank.py index 80881465..6d2c3e50 100644 --- a/stanza/utils/datasets/prepare_mwt_treebank.py +++ b/stanza/utils/datasets/prepare_mwt_treebank.py @@ -14,6 +14,7 @@ import os import shutil import tempfile +from stanza.models.common.constant import treebank_to_short_name import stanza.utils.datasets.common as common import stanza.utils.datasets.prepare_tokenizer_treebank as prepare_tokenizer_treebank @@ -25,7 +26,7 @@ def copy_conllu(tokenizer_dir, mwt_dir, short_name, dataset, particle): shutil.copyfile(input_conllu_tokenizer, input_conllu_mwt) def process_treebank(treebank, paths, args): - short_name = common.project_to_short_name(treebank) + short_name = treebank_to_short_name(treebank) mwt_dir = paths["MWT_DATA_DIR"] os.makedirs(mwt_dir, exist_ok=True) diff --git a/stanza/utils/datasets/prepare_tokenizer_treebank.py b/stanza/utils/datasets/prepare_tokenizer_treebank.py index d3b815f8..d03b81ac 100755 --- a/stanza/utils/datasets/prepare_tokenizer_treebank.py +++ b/stanza/utils/datasets/prepare_tokenizer_treebank.py @@ -31,6 +31,7 @@ import tempfile from collections import Counter +from stanza.models.common.constant import treebank_to_short_name import stanza.utils.datasets.common as common import stanza.utils.datasets.prepare_tokenizer_data as prepare_tokenizer_data import stanza.utils.datasets.tokenization.convert_my_alt as convert_my_alt @@ -56,7 +57,7 @@ def copy_conllu_treebank(treebank, paths, dest_dir, postprocess=None, augment=Tr """ os.makedirs(dest_dir, exist_ok=True) - short_name = common.project_to_short_name(treebank) + short_name = treebank_to_short_name(treebank) short_language = short_name.split("_")[0] with tempfile.TemporaryDirectory() as tokenizer_dir: @@ -1156,7 +1157,7 @@ def process_treebank(treebank, paths, args): tokenizer_dir = paths["TOKENIZE_DATA_DIR"] handparsed_dir = paths["HANDPARSED_DIR"] - short_name = common.project_to_short_name(treebank) + short_name = treebank_to_short_name(treebank) short_language = short_name.split("_")[0] os.makedirs(tokenizer_dir, exist_ok=True) diff --git a/stanza/utils/training/run_ete.py b/stanza/utils/training/run_ete.py index d4cc8a25..c4d146e3 100644 --- a/stanza/utils/training/run_ete.py +++ b/stanza/utils/training/run_ete.py @@ -28,9 +28,10 @@ from stanza.models import parser from stanza.models import tagger from stanza.models import tokenizer +from stanza.models.common.constant import treebank_to_short_name + from stanza.resources.prepare_resources import default_charlms, pos_charlms -from stanza.utils.datasets.common import project_to_short_name from stanza.utils.training import common from stanza.utils.training.common import Mode, build_charlm_args, choose_charlm from stanza.utils.training.run_lemma import check_lemmas @@ -65,7 +66,7 @@ def run_ete(paths, dataset, short_name, command_args, extra_args): # value of command_args.save_output if command_args and command_args.test_data: - test_short_name = project_to_short_name(command_args.test_data) + test_short_name = treebank_to_short_name(command_args.test_data) else: test_short_name = short_name |