pointless refactoringupdated_eval

author: John Bauer <horatio@gmail.com> 2022-08-25 08:40:13 +0300
committer: John Bauer <horatio@gmail.com> 2022-08-25 08:40:13 +0300
commit: 6a97370402a8510c1d48f1836c1412aa13a51942 (patch)
tree: cecf1c0294828684afda47c0e4f908568909e332
parent: c2933d060afe507e30f504677fd01c370575a313 (diff)
1 files changed, 45 insertions, 37 deletions
diff --git a/stanza/utils/conll18_ud_eval.py b/stanza/utils/conll18_ud_eval.py
index 1bdbddf8..946e02d7 100755
--- a/stanza/utils/conll18_ud_eval.py
+++ b/stanza/utils/conll18_ud_eval.py
@@ -156,7 +156,7 @@ def process_enhanced_deps(deps) :
     return edeps
 
 # Load given CoNLL-U file into internal representation
-def load_conllu(file,treebank_type):
+def load_conllu(file, treebank_type):
     # Internal representation classes
     class UDRepresentation:
         def __init__(self):
@@ -249,7 +249,7 @@ def load_conllu(file,treebank_type):
                 # ignore rel>rel dependencies, and instead append the original hd/rel edge
                 # note that this also ignores other extensions (like adding lemma's)
                 # note that this sometimes introduces duplicates (if orig hd/rel was already included in DEPS)
-                if (treebank_type['no_gapping']) : # enhancement 1
+                if treebank_type.get('no_gapping', False) : # enhancement 1
                     processed_deps = []
                     for (parent,steps) in enhanced_deps :
                         if len(steps) > 1 :
@@ -262,7 +262,7 @@ def load_conllu(file,treebank_type):
                     enhanced_deps = processed_deps
 
                 # for a given conj node, any rel other than conj in DEPS can be ignored
-                if treebank_type['no_shared_parents_in_coordination'] :   # enhancement  2
+                if treebank_type.get('no_shared_parents_in_coordination', False) :   # enhancement  2
                     for (hd,steps) in enhanced_deps :
                         if len(steps) == 1 and steps[0].startswith('conj') :
                             enhanced_deps = [(hd,steps)]
@@ -270,7 +270,7 @@ def load_conllu(file,treebank_type):
                 # deprels not matching ud_hd/ud_dep are spurious.
                 #  czech/pud estonian/ewt syntagrus finnish/pud
                 # TO DO: treebanks that do not mark xcomp and relcl subjects
-                if treebank_type['no_shared_dependents_in_coordination'] : # enhancement  3
+                if treebank_type.get('no_shared_dependents_in_coordination', False) : # enhancement  3
                     processed_deps = []
                     for (hd,steps) in enhanced_deps :
                         duplicate = 0
@@ -284,7 +284,7 @@ def load_conllu(file,treebank_type):
                 # if treebank does not have control relations: subjects of xcomp parents in system are to be skipped
                 # note that rel is actually a path sometimes rel1>rel2 in theory rel2 could be subj?
                 # from lassy-small: 7:conj:en>nsubj:pass|7:conj:en>nsubj:xsubj    (7,['conj:en','nsubj:xsubj'])
-                if (treebank_type['no_control']) : # enhancement 4
+                if treebank_type.get('no_control', False) : # enhancement 4
                     processed_deps = []
                     for (parent,steps) in enhanced_deps :
                         include = 1
@@ -296,7 +296,7 @@ def load_conllu(file,treebank_type):
                             processed_deps.append((parent,steps))
                     enhanced_deps = processed_deps
 
-                if (treebank_type['no_external_arguments_of_relative_clauses']) : # enhancement 5
+                if treebank_type.get('no_external_arguments_of_relative_clauses', False) : # enhancement 5
                     processed_deps = []
                     for (parent,steps) in enhanced_deps :
                         if (steps[0] == 'ref') :
@@ -311,7 +311,7 @@ def load_conllu(file,treebank_type):
                     enhanced_deps = processed_deps
 
                 # treebanks where no lemma info has been added
-                if treebank_type['no_case_info'] :  # enhancement number 6
+                if treebank_type.get('no_case_info', False) :  # enhancement number 6
                     processed_deps = []
                     for (hd,steps) in enhanced_deps :
                         processed_steps = []
@@ -632,7 +632,10 @@ def evaluate(gold_ud, system_ud):
     }
 
 
-def load_conllu_file(path,treebank_type):
+def load_conllu_file(path, treebank_type=None):
+    if treebank_type is None:
+        treebank_type = {}
+
     _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {}))
     return load_conllu(_file,treebank_type)
 
@@ -647,45 +650,30 @@ def evaluate_wrapper(args):
     treebank_type['no_case_info'] = 1 if '6' in enhancements else 0
 
     # Load CoNLL-U files
-    gold_ud = load_conllu_file(args.gold_file,treebank_type)
-    system_ud = load_conllu_file(args.system_file,treebank_type)
+    gold_ud = load_conllu_file(args.gold_file, treebank_type)
+    system_ud = load_conllu_file(args.system_file, treebank_type)
     return evaluate(gold_ud, system_ud)
 
-def main():
-    # Parse arguments
-    parser = argparse.ArgumentParser()
-    parser.add_argument("gold_file", type=str,
-                        help="Name of the CoNLL-U file with the gold data.")
-    parser.add_argument("system_file", type=str,
-                        help="Name of the CoNLL-U file with the predicted data.")
-    parser.add_argument("--verbose", "-v", default=False, action="store_true",
-                        help="Print all metrics.")
-    parser.add_argument("--counts", "-c", default=False, action="store_true",
-                        help="Print raw counts of correct/gold/system/aligned words instead of prec/rec/F1 for all metrics.")
-    parser.add_argument("--enhancements", type=str, default='0',
-                        help="Level of enhancements in the gold data (see guidelines) 0=all (default), 1=no gapping, 2=no shared parents, 3=no shared dependents 4=no control, 5=no external arguments, 6=no lemma info, combinations: 12=both 1 and 2 apply, etc.")
-    args = parser.parse_args()
-
-    # Evaluate
-    evaluation = evaluate_wrapper(args)
+def build_evaluation_table(evaluation, verbose, counts):
+    text = []
 
     # Print the evaluation
     if not args.verbose and not args.counts:
-        print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1))
-        print("ELAS F1 Score: {:.2f}".format(100 * evaluation["ELAS"].f1))
-        print("EULAS F1 Score: {:.2f}".format(100 * evaluation["EULAS"].f1))
+        text.append("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1))
+        text.append("ELAS F1 Score: {:.2f}".format(100 * evaluation["ELAS"].f1))
+        text.append("EULAS F1 Score: {:.2f}".format(100 * evaluation["EULAS"].f1))
 
-        print("MLAS Score: {:.2f}".format(100 * evaluation["MLAS"].f1))
-        print("BLEX Score: {:.2f}".format(100 * evaluation["BLEX"].f1))
+        text.append("MLAS Score: {:.2f}".format(100 * evaluation["MLAS"].f1))
+        text.append("BLEX Score: {:.2f}".format(100 * evaluation["BLEX"].f1))
     else:
         if args.counts:
-            print("Metric     | Correct   |      Gold | Predicted | Aligned")
+            text.append("Metric     | Correct   |      Gold | Predicted | Aligned")
         else:
-            print("Metric     | Precision |    Recall |  F1 Score | AligndAcc")
-        print("-----------+-----------+-----------+-----------+-----------")
+            text.append("Metric     | Precision |    Recall |  F1 Score | AligndAcc")
+        text.append("-----------+-----------+-----------+-----------+-----------")
         for metric in["Tokens", "Sentences", "Words", "UPOS", "XPOS", "UFeats", "AllTags", "Lemmas", "UAS", "LAS", "ELAS", "EULAS", "CLAS", "MLAS", "BLEX"]:
             if args.counts:
-                print("{:11}|{:10} |{:10} |{:10} |{:10}".format(
+                text.append("{:11}|{:10} |{:10} |{:10} |{:10}".format(
                     metric,
                     evaluation[metric].correct,
                     evaluation[metric].gold_total,
@@ -693,7 +681,7 @@ def main():
                     evaluation[metric].aligned_total or (evaluation[metric].correct if metric == "Words" else "")
                 ))
             else:
-                print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format(
+                text.append("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format(
                     metric,
                     100 * evaluation[metric].precision,
                     100 * evaluation[metric].recall,
@@ -701,6 +689,26 @@ def main():
                     "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else ""
                 ))
 
+def main():
+    # Parse arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument("gold_file", type=str,
+                        help="Name of the CoNLL-U file with the gold data.")
+    parser.add_argument("system_file", type=str,
+                        help="Name of the CoNLL-U file with the predicted data.")
+    parser.add_argument("--verbose", "-v", default=False, action="store_true",
+                        help="Print all metrics.")
+    parser.add_argument("--counts", "-c", default=False, action="store_true",
+                        help="Print raw counts of correct/gold/system/aligned words instead of prec/rec/F1 for all metrics.")
+    parser.add_argument("--enhancements", type=str, default='0',
+                        help="Level of enhancements in the gold data (see guidelines) 0=all (default), 1=no gapping, 2=no shared parents, 3=no shared dependents 4=no control, 5=no external arguments, 6=no lemma info, combinations: 12=both 1 and 2 apply, etc.")
+    args = parser.parse_args()
+
+    # Evaluate
+    evaluation = evaluate_wrapper(args)
+    results = build_evaluation_table(evaluation, args.verbose, args.counts)
+    print(results)
+
 if __name__ == "__main__":
     main()
author	John Bauer <horatio@gmail.com>	2022-08-25 08:40:13 +0300
committer	John Bauer <horatio@gmail.com>	2022-08-25 08:40:13 +0300
commit	6a97370402a8510c1d48f1836c1412aa13a51942 (patch)
tree	cecf1c0294828684afda47c0e4f908568909e332
parent	c2933d060afe507e30f504677fd01c370575a313 (diff)