From 4dca146a8c34393c4b3887c76b3d4261e39f2029 Mon Sep 17 00:00:00 2001
From: John Bauer <horatio@gmail.com>
Date: Fri, 9 Sep 2022 23:31:52 -0700
Subject: Add some doc and update dev_sentence -> dep_sentence to reflect where
 the variable comes from

---
 stanza/utils/datasets/constituency/convert_it_vit.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/stanza/utils/datasets/constituency/convert_it_vit.py b/stanza/utils/datasets/constituency/convert_it_vit.py
index 7c4ca0bc..20ba9814 100644
--- a/stanza/utils/datasets/constituency/convert_it_vit.py
+++ b/stanza/utils/datasets/constituency/convert_it_vit.py
@@ -434,7 +434,13 @@ def get_mwt(*dep_datasets):
                 mwt_map[token.text] = expansion
     return mwt_map
 
-def update_mwts_and_special_cases(original_tree, dev_sentence, mwt_map, tsurgeon_processor):
+def update_mwts_and_special_cases(original_tree, dep_sentence, mwt_map, tsurgeon_processor):
+    """
+    Replace MWT structures with their UD equivalents, along with some other minor tsurgeon based edits
+
+    original_tree: the tree as read from VIT
+    dep_sentence: the UD dependency dataset version of this sentence
+    """
     updated_tree = original_tree
 
     operations = []
@@ -468,7 +474,7 @@ def update_mwts_and_special_cases(original_tree, dev_sentence, mwt_map, tsurgeon
 
     # now assemble a bunch of regex to split and otherwise manipulate
     # the MWT in the trees
-    for token in dev_sentence.tokens:
+    for token in dep_sentence.tokens:
         if len(token.words) == 1:
             continue
         if token.text in mwt_map:
-- 
cgit v1.2.3