Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-11-11 07:39:46 +0300
committerJohn Bauer <horatio@gmail.com>2022-11-11 07:39:46 +0300
commit080b714426d8be12a0bcf39d5e6020be23dfb33d (patch)
tree55aec95e7aa1c626b862df7ac837f7c6f8979ea6
parent15c7d1ff5f9e28eb673c17f1606a1d5a2d465aca (diff)
Turn the split dataset name for vlsp22 into a -x-y format. Makes it easier to use different sizes
-rw-r--r--stanza/utils/datasets/constituency/prepare_con_dataset.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/stanza/utils/datasets/constituency/prepare_con_dataset.py b/stanza/utils/datasets/constituency/prepare_con_dataset.py
index 92fe559e..9e267361 100644
--- a/stanza/utils/datasets/constituency/prepare_con_dataset.py
+++ b/stanza/utils/datasets/constituency/prepare_con_dataset.py
@@ -194,15 +194,18 @@ def process_vlsp22(paths, dataset_name, *args):
dev_size = 1.0 / args.n_splits
train_size = 1.0 - dev_size
for rotation in range(args.n_splits):
- rotation_name = "%s-%d" % (dataset_name, rotation)
+ rotation_name = "%s-%d-%d" % (dataset_name, rotation, args.n_splits)
vtb_split.split_files(tmp_output_path, paths["CONSTITUENCY_DATA_DIR"], rotation_name, train_size=train_size, dev_size=dev_size, rotation=(rotation, args.n_splits))
_, _, test_file = vtb_split.create_paths(paths["CONSTITUENCY_DATA_DIR"], rotation_name)
+ with open(test_file, "w"):
+ # create an empty test file - currently we don't have actual test data for VLSP 21
+ pass
else:
vtb_split.split_files(tmp_output_path, paths["CONSTITUENCY_DATA_DIR"], dataset_name, train_size=0.9, dev_size=0.1)
_, _, test_file = vtb_split.create_paths(paths["CONSTITUENCY_DATA_DIR"], dataset_name)
- with open(test_file, "w"):
- # create an empty test file - currently we don't have actual test data for VLSP 21
- pass
+ with open(test_file, "w"):
+ # create an empty test file - currently we don't have actual test data for VLSP 21
+ pass
def process_arboretum(paths, dataset_name, *args):
"""