diff options
author | John Bauer <horatio@gmail.com> | 2022-09-07 23:02:09 +0300 |
---|---|---|
committer | John Bauer <horatio@gmail.com> | 2022-09-08 00:10:27 +0300 |
commit | 27f886e668f98c36ad2a7cd44ebd23a93ba4575f (patch) | |
tree | 2c9839f435956951fb1c5dabbeaf6df49d561479 | |
parent | ef617803b2cd5c124393e1468c6ad0e367c4fdc7 (diff) |
Update a comment on a sentence being eliminated in constituency VIT
-rw-r--r-- | stanza/utils/datasets/constituency/convert_it_vit.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/stanza/utils/datasets/constituency/convert_it_vit.py b/stanza/utils/datasets/constituency/convert_it_vit.py index 305ce029..18bb0b76 100644 --- a/stanza/utils/datasets/constituency/convert_it_vit.py +++ b/stanza/utils/datasets/constituency/convert_it_vit.py @@ -542,10 +542,10 @@ def update_tree(original_tree, dep_sentence, con_id, dep_id, mwt_map, tsurgeon_p # train set: # 858: missing close parens in the UD conversion +# 1169: 'che', 'poi', 'tutti', 'i', 'Paesi', 'ue', '.' -> 'per', 'tutti', 'i', 'paesi', 'Ue', '.' # 2375: the problem is inconsistent treatment of s_p_a_ # 05052: the heuristic to fill in a missing "si" doesn't work because there's # already another "si" immediately after -# 07683: weird token # # test set: # 09764: weird punct at end |