Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-09-08 01:15:57 +0300
committerJohn Bauer <horatio@gmail.com>2022-09-08 01:15:57 +0300
commite60c6aa390fca4d36eefa56c2d40ac8737568c0f (patch)
tree2f1240d75a5318c75e10da7e9503da8bec676926
parent27f886e668f98c36ad2a7cd44ebd23a93ba4575f (diff)
Eliminate a redundant function call
-rw-r--r--stanza/utils/datasets/common.py9
-rw-r--r--stanza/utils/datasets/prepare_depparse_treebank.py3
-rw-r--r--stanza/utils/datasets/prepare_mwt_treebank.py3
-rwxr-xr-xstanza/utils/datasets/prepare_tokenizer_treebank.py5
-rw-r--r--stanza/utils/training/run_ete.py5
5 files changed, 10 insertions, 15 deletions
diff --git a/stanza/utils/datasets/common.py b/stanza/utils/datasets/common.py
index a17db993..efdbb8cf 100644
--- a/stanza/utils/datasets/common.py
+++ b/stanza/utils/datasets/common.py
@@ -6,19 +6,10 @@ import os
import sys
import stanza.utils.default_paths as default_paths
-from stanza.models.common.constant import treebank_to_short_name
from stanza.models.common.short_name_to_treebank import canonical_treebank_name
logger = logging.getLogger('stanza')
-def project_to_short_name(treebank):
- """
- Project either a treebank or a short name to a short name
-
- TODO: see if treebank_to_short_name can incorporate this
- """
- return treebank_to_short_name(treebank)
-
def find_treebank_dataset_file(treebank, udbase_dir, dataset, extension, fail=False):
"""
For a given treebank, dataset, extension, look for the exact filename to use.
diff --git a/stanza/utils/datasets/prepare_depparse_treebank.py b/stanza/utils/datasets/prepare_depparse_treebank.py
index 3152bfae..d452ac2c 100644
--- a/stanza/utils/datasets/prepare_depparse_treebank.py
+++ b/stanza/utils/datasets/prepare_depparse_treebank.py
@@ -14,6 +14,7 @@ import logging
import os
from stanza.models import tagger
+from stanza.models.common.constant import treebank_to_short_name
from stanza.resources.common import download, DEFAULT_MODEL_DIR
from stanza.resources.prepare_resources import default_charlms, pos_charlms
import stanza.utils.datasets.common as common
@@ -67,7 +68,7 @@ def process_treebank(treebank, paths, args) -> None:
if args.tag_method is Tags.GOLD:
prepare_tokenizer_treebank.copy_conllu_treebank(treebank, paths, paths["DEPPARSE_DATA_DIR"])
elif args.tag_method is Tags.PREDICTED:
- short_name = common.project_to_short_name(treebank)
+ short_name = treebank_to_short_name(treebank)
short_language, dataset = short_name.split("_")
# fmt: off
diff --git a/stanza/utils/datasets/prepare_mwt_treebank.py b/stanza/utils/datasets/prepare_mwt_treebank.py
index 80881465..6d2c3e50 100644
--- a/stanza/utils/datasets/prepare_mwt_treebank.py
+++ b/stanza/utils/datasets/prepare_mwt_treebank.py
@@ -14,6 +14,7 @@ import os
import shutil
import tempfile
+from stanza.models.common.constant import treebank_to_short_name
import stanza.utils.datasets.common as common
import stanza.utils.datasets.prepare_tokenizer_treebank as prepare_tokenizer_treebank
@@ -25,7 +26,7 @@ def copy_conllu(tokenizer_dir, mwt_dir, short_name, dataset, particle):
shutil.copyfile(input_conllu_tokenizer, input_conllu_mwt)
def process_treebank(treebank, paths, args):
- short_name = common.project_to_short_name(treebank)
+ short_name = treebank_to_short_name(treebank)
mwt_dir = paths["MWT_DATA_DIR"]
os.makedirs(mwt_dir, exist_ok=True)
diff --git a/stanza/utils/datasets/prepare_tokenizer_treebank.py b/stanza/utils/datasets/prepare_tokenizer_treebank.py
index d3b815f8..d03b81ac 100755
--- a/stanza/utils/datasets/prepare_tokenizer_treebank.py
+++ b/stanza/utils/datasets/prepare_tokenizer_treebank.py
@@ -31,6 +31,7 @@ import tempfile
from collections import Counter
+from stanza.models.common.constant import treebank_to_short_name
import stanza.utils.datasets.common as common
import stanza.utils.datasets.prepare_tokenizer_data as prepare_tokenizer_data
import stanza.utils.datasets.tokenization.convert_my_alt as convert_my_alt
@@ -56,7 +57,7 @@ def copy_conllu_treebank(treebank, paths, dest_dir, postprocess=None, augment=Tr
"""
os.makedirs(dest_dir, exist_ok=True)
- short_name = common.project_to_short_name(treebank)
+ short_name = treebank_to_short_name(treebank)
short_language = short_name.split("_")[0]
with tempfile.TemporaryDirectory() as tokenizer_dir:
@@ -1156,7 +1157,7 @@ def process_treebank(treebank, paths, args):
tokenizer_dir = paths["TOKENIZE_DATA_DIR"]
handparsed_dir = paths["HANDPARSED_DIR"]
- short_name = common.project_to_short_name(treebank)
+ short_name = treebank_to_short_name(treebank)
short_language = short_name.split("_")[0]
os.makedirs(tokenizer_dir, exist_ok=True)
diff --git a/stanza/utils/training/run_ete.py b/stanza/utils/training/run_ete.py
index d4cc8a25..c4d146e3 100644
--- a/stanza/utils/training/run_ete.py
+++ b/stanza/utils/training/run_ete.py
@@ -28,9 +28,10 @@ from stanza.models import parser
from stanza.models import tagger
from stanza.models import tokenizer
+from stanza.models.common.constant import treebank_to_short_name
+
from stanza.resources.prepare_resources import default_charlms, pos_charlms
-from stanza.utils.datasets.common import project_to_short_name
from stanza.utils.training import common
from stanza.utils.training.common import Mode, build_charlm_args, choose_charlm
from stanza.utils.training.run_lemma import check_lemmas
@@ -65,7 +66,7 @@ def run_ete(paths, dataset, short_name, command_args, extra_args):
# value of command_args.save_output
if command_args and command_args.test_data:
- test_short_name = project_to_short_name(command_args.test_data)
+ test_short_name = treebank_to_short_name(command_args.test_data)
else:
test_short_name = short_name