diff options
Diffstat (limited to 'stanza/utils/training/run_ete.py')
-rw-r--r-- | stanza/utils/training/run_ete.py | 8 |
1 files changed, 2 insertions, 6 deletions
diff --git a/stanza/utils/training/run_ete.py b/stanza/utils/training/run_ete.py index ea90960b..87c2a84d 100644 --- a/stanza/utils/training/run_ete.py +++ b/stanza/utils/training/run_ete.py @@ -66,12 +66,8 @@ def run_ete(paths, dataset, short_name, command_args, extra_args): # TOKENIZE step # the raw data to process starts in tokenize_dir # retokenize it using the saved model - if short_language == 'vi': - tokenizer_type = "--json_file" - tokenizer_file = f"{tokenize_dir}/{test_short_name}-ud-{dataset}.json" - else: - tokenizer_type = "--txt_file" - tokenizer_file = f"{tokenize_dir}/{test_short_name}.{dataset}.txt" + tokenizer_type = "--txt_file" + tokenizer_file = f"{tokenize_dir}/{test_short_name}.{dataset}.txt" tokenizer_output = f"{ete_dir}/{short_name}.{dataset}.tokenizer.conllu" |