Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'stanza/utils/training/run_ete.py')
-rw-r--r--stanza/utils/training/run_ete.py8
1 files changed, 2 insertions, 6 deletions
diff --git a/stanza/utils/training/run_ete.py b/stanza/utils/training/run_ete.py
index ea90960b..87c2a84d 100644
--- a/stanza/utils/training/run_ete.py
+++ b/stanza/utils/training/run_ete.py
@@ -66,12 +66,8 @@ def run_ete(paths, dataset, short_name, command_args, extra_args):
# TOKENIZE step
# the raw data to process starts in tokenize_dir
# retokenize it using the saved model
- if short_language == 'vi':
- tokenizer_type = "--json_file"
- tokenizer_file = f"{tokenize_dir}/{test_short_name}-ud-{dataset}.json"
- else:
- tokenizer_type = "--txt_file"
- tokenizer_file = f"{tokenize_dir}/{test_short_name}.{dataset}.txt"
+ tokenizer_type = "--txt_file"
+ tokenizer_file = f"{tokenize_dir}/{test_short_name}.{dataset}.txt"
tokenizer_output = f"{ete_dir}/{short_name}.{dataset}.tokenizer.conllu"