From 27f886e668f98c36ad2a7cd44ebd23a93ba4575f Mon Sep 17 00:00:00 2001 From: John Bauer Date: Wed, 7 Sep 2022 13:02:09 -0700 Subject: Update a comment on a sentence being eliminated in constituency VIT --- stanza/utils/datasets/constituency/convert_it_vit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stanza/utils/datasets/constituency/convert_it_vit.py b/stanza/utils/datasets/constituency/convert_it_vit.py index 305ce029..18bb0b76 100644 --- a/stanza/utils/datasets/constituency/convert_it_vit.py +++ b/stanza/utils/datasets/constituency/convert_it_vit.py @@ -542,10 +542,10 @@ def update_tree(original_tree, dep_sentence, con_id, dep_id, mwt_map, tsurgeon_p # train set: # 858: missing close parens in the UD conversion +# 1169: 'che', 'poi', 'tutti', 'i', 'Paesi', 'ue', '.' -> 'per', 'tutti', 'i', 'paesi', 'Ue', '.' # 2375: the problem is inconsistent treatment of s_p_a_ # 05052: the heuristic to fill in a missing "si" doesn't work because there's # already another "si" immediately after -# 07683: weird token # # test set: # 09764: weird punct at end -- cgit v1.2.3